Netdev List

Netdev List
 help / color / mirror / Atom feed

* [PATCH V2 7/8] ARM: dts: stm32: add support of ethernet on stm32mp157c-ev1
From: Christophe Roullier @ 2018-05-02 14:18 UTC (permalink / raw)
  To: mark.rutland, mcoquelin.stm32, alexandre.torgue, peppe.cavallaro
  Cc: devicetree, linux-arm-kernel, netdev, christophe.roullier, andrew
In-Reply-To: <1525270723-18241-1-git-send-email-christophe.roullier@st.com>

MAC is connected to a PHY in RGMII mode.

Signed-off-by: Christophe Roullier <christophe.roullier@st.com>
---
 arch/arm/boot/dts/stm32mp157c-ev1.dts | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/arch/arm/boot/dts/stm32mp157c-ev1.dts b/arch/arm/boot/dts/stm32mp157c-ev1.dts
index 57e6dbc..a7fee5c 100644
--- a/arch/arm/boot/dts/stm32mp157c-ev1.dts
+++ b/arch/arm/boot/dts/stm32mp157c-ev1.dts
@@ -17,5 +17,25 @@
 
 	aliases {
 		serial0 = &uart4;
+		ethernet0 = &ethernet0;
+	};
+};
+
+&ethernet0 {
+	status = "okay";
+	pinctrl-0 = <&ethernet0_rgmii_pins_a>;
+	pinctrl-1 = <&ethernet0_rgmii_pins_sleep_a>;
+	pinctrl-names = "default", "sleep";
+	phy-mode = "rgmii";
+	max-speed = <1000>;
+	phy-handle = <&phy0>;
+
+	mdio0 {
+		#address-cells = <1>;
+		#size-cells = <0>;
+		compatible = "snps,dwmac-mdio";
+		phy0: ethernet-phy@0 {
+			reg = <0>;
+		};
 	};
 };
-- 
1.9.1

^ permalink raw reply related

* [PATCH V2 1/8] net: ethernet: stmmac: add adaptation for stm32mp157c.
From: Christophe Roullier @ 2018-05-02 14:18 UTC (permalink / raw)
  To: mark.rutland, mcoquelin.stm32, alexandre.torgue, peppe.cavallaro
  Cc: devicetree, linux-arm-kernel, netdev, christophe.roullier, andrew
In-Reply-To: <1525270723-18241-1-git-send-email-christophe.roullier@st.com>

Glue codes to support stm32mp157c device and stay
compatible with stm32 mcu family

Signed-off-by: Christophe Roullier <christophe.roullier@st.com>
---
 drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c | 270 ++++++++++++++++++++--
 1 file changed, 255 insertions(+), 15 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c
index 9e6db16..f51e327 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c
@@ -16,49 +16,183 @@
 #include <linux/of_net.h>
 #include <linux/phy.h>
 #include <linux/platform_device.h>
+#include <linux/pm_wakeirq.h>
 #include <linux/regmap.h>
 #include <linux/slab.h>
 #include <linux/stmmac.h>
 
 #include "stmmac_platform.h"
 
-#define MII_PHY_SEL_MASK	BIT(23)
+#define SYSCFG_MCU_ETH_MASK		BIT(23)
+#define SYSCFG_MP1_ETH_MASK		GENMASK(23, 16)
+
+#define SYSCFG_PMCR_ETH_CLK_SEL		BIT(16)
+#define SYSCFG_PMCR_ETH_REF_CLK_SEL	BIT(17)
+#define SYSCFG_PMCR_ETH_SEL_MII		BIT(20)
+#define SYSCFG_PMCR_ETH_SEL_RGMII	BIT(21)
+#define SYSCFG_PMCR_ETH_SEL_RMII	BIT(23)
+#define SYSCFG_PMCR_ETH_SEL_GMII	0
+#define SYSCFG_MCU_ETH_SEL_MII		0
+#define SYSCFG_MCU_ETH_SEL_RMII		1
 
 struct stm32_dwmac {
 	struct clk *clk_tx;
 	struct clk *clk_rx;
+	struct clk *clk_eth_ck;
+	struct clk *clk_ethstp;
+	struct clk *syscfg_clk;
+	bool int_phyclk;	/* Clock from RCC to drive PHY */
 	u32 mode_reg;		/* MAC glue-logic mode register */
 	struct regmap *regmap;
 	u32 speed;
+	const struct stm32_ops *ops;
+	struct device *dev;
+};
+
+struct stm32_ops {
+	int (*set_mode)(struct plat_stmmacenet_data *plat_dat);
+	int (*clk_prepare)(struct stm32_dwmac *dwmac, bool prepare);
+	int (*suspend)(struct stm32_dwmac *dwmac);
+	void (*resume)(struct stm32_dwmac *dwmac);
+	int (*parse_data)(struct stm32_dwmac *dwmac,
+			  struct device *dev);
+	u32 syscfg_eth_mask;
 };
 
 static int stm32_dwmac_init(struct plat_stmmacenet_data *plat_dat)
 {
 	struct stm32_dwmac *dwmac = plat_dat->bsp_priv;
-	u32 reg = dwmac->mode_reg;
-	u32 val;
 	int ret;
 
-	val = (plat_dat->interface == PHY_INTERFACE_MODE_MII) ? 0 : 1;
-	ret = regmap_update_bits(dwmac->regmap, reg, MII_PHY_SEL_MASK, val);
-	if (ret)
-		return ret;
+	if (dwmac->ops->set_mode) {
+		ret = dwmac->ops->set_mode(plat_dat);
+		if (ret)
+			return ret;
+	}
 
 	ret = clk_prepare_enable(dwmac->clk_tx);
 	if (ret)
 		return ret;
 
-	ret = clk_prepare_enable(dwmac->clk_rx);
-	if (ret)
-		clk_disable_unprepare(dwmac->clk_tx);
+	if (!dwmac->dev->power.is_suspended) {
+		ret = clk_prepare_enable(dwmac->clk_rx);
+		if (ret) {
+			clk_disable_unprepare(dwmac->clk_tx);
+			return ret;
+		}
+	}
+
+	if (dwmac->ops->clk_prepare) {
+		ret = dwmac->ops->clk_prepare(dwmac, true);
+		if (ret) {
+			clk_disable_unprepare(dwmac->clk_rx);
+			clk_disable_unprepare(dwmac->clk_tx);
+		}
+	}
 
 	return ret;
 }
 
+static int stm32mp1_clk_prepare(struct stm32_dwmac *dwmac, bool prepare)
+{
+	int ret = 0;
+
+	if (prepare) {
+		ret = clk_prepare_enable(dwmac->syscfg_clk);
+		if (ret)
+			return ret;
+
+		if (dwmac->int_phyclk) {
+			ret = clk_prepare_enable(dwmac->clk_eth_ck);
+			if (ret) {
+				clk_disable_unprepare(dwmac->syscfg_clk);
+				return ret;
+			}
+		}
+	} else {
+		clk_disable_unprepare(dwmac->syscfg_clk);
+		if (dwmac->int_phyclk)
+			clk_disable_unprepare(dwmac->clk_eth_ck);
+	}
+	return ret;
+}
+
+static int stm32mp1_set_mode(struct plat_stmmacenet_data *plat_dat)
+{
+	struct stm32_dwmac *dwmac = plat_dat->bsp_priv;
+	u32 reg = dwmac->mode_reg;
+	int val;
+
+	switch (plat_dat->interface) {
+	case PHY_INTERFACE_MODE_MII:
+		val = SYSCFG_PMCR_ETH_SEL_MII;
+		pr_debug("SYSCFG init : PHY_INTERFACE_MODE_MII\n");
+		break;
+	case PHY_INTERFACE_MODE_GMII:
+		val = SYSCFG_PMCR_ETH_SEL_GMII;
+		if (dwmac->int_phyclk)
+			val |= SYSCFG_PMCR_ETH_CLK_SEL;
+		pr_debug("SYSCFG init : PHY_INTERFACE_MODE_GMII\n");
+		break;
+	case PHY_INTERFACE_MODE_RMII:
+		val = SYSCFG_PMCR_ETH_SEL_RMII;
+		if (dwmac->int_phyclk)
+			val |= SYSCFG_PMCR_ETH_REF_CLK_SEL;
+		pr_debug("SYSCFG init : PHY_INTERFACE_MODE_RMII\n");
+		break;
+	case PHY_INTERFACE_MODE_RGMII:
+	case PHY_INTERFACE_MODE_RGMII_ID:
+	case PHY_INTERFACE_MODE_RGMII_RXID:
+	case PHY_INTERFACE_MODE_RGMII_TXID:
+		val = SYSCFG_PMCR_ETH_SEL_RGMII;
+		if (dwmac->int_phyclk)
+			val |= SYSCFG_PMCR_ETH_CLK_SEL;
+		pr_debug("SYSCFG init : PHY_INTERFACE_MODE_RGMII\n");
+		break;
+	default:
+		pr_debug("SYSCFG init :  Do not manage %d interface\n",
+			 plat_dat->interface);
+		/* Do not manage others interfaces */
+		return -EINVAL;
+	}
+
+	return regmap_update_bits(dwmac->regmap, reg,
+				 dwmac->ops->syscfg_eth_mask, val);
+}
+
+static int stm32mcu_set_mode(struct plat_stmmacenet_data *plat_dat)
+{
+	struct stm32_dwmac *dwmac = plat_dat->bsp_priv;
+	u32 reg = dwmac->mode_reg;
+	int val;
+
+	switch (plat_dat->interface) {
+	case PHY_INTERFACE_MODE_MII:
+		val = SYSCFG_MCU_ETH_SEL_MII;
+		pr_debug("SYSCFG init : PHY_INTERFACE_MODE_MII\n");
+		break;
+	case PHY_INTERFACE_MODE_RMII:
+		val = SYSCFG_MCU_ETH_SEL_RMII;
+		pr_debug("SYSCFG init : PHY_INTERFACE_MODE_RMII\n");
+		break;
+	default:
+		pr_debug("SYSCFG init :  Do not manage %d interface\n",
+			 plat_dat->interface);
+		/* Do not manage others interfaces */
+		return -EINVAL;
+	}
+
+	return regmap_update_bits(dwmac->regmap, reg,
+				 dwmac->ops->syscfg_eth_mask, val);
+}
+
 static void stm32_dwmac_clk_disable(struct stm32_dwmac *dwmac)
 {
 	clk_disable_unprepare(dwmac->clk_tx);
 	clk_disable_unprepare(dwmac->clk_rx);
+
+	if (dwmac->ops->clk_prepare)
+		dwmac->ops->clk_prepare(dwmac, false);
 }
 
 static int stm32_dwmac_parse_data(struct stm32_dwmac *dwmac,
@@ -70,15 +204,22 @@ static int stm32_dwmac_parse_data(struct stm32_dwmac *dwmac,
 	/*  Get TX/RX clocks */
 	dwmac->clk_tx = devm_clk_get(dev, "mac-clk-tx");
 	if (IS_ERR(dwmac->clk_tx)) {
-		dev_err(dev, "No tx clock provided...\n");
+		dev_err(dev, "No ETH Tx clock provided...\n");
 		return PTR_ERR(dwmac->clk_tx);
 	}
+
 	dwmac->clk_rx = devm_clk_get(dev, "mac-clk-rx");
 	if (IS_ERR(dwmac->clk_rx)) {
-		dev_err(dev, "No rx clock provided...\n");
+		dev_err(dev, "No ETH Rx clock provided...\n");
 		return PTR_ERR(dwmac->clk_rx);
 	}
 
+	if (dwmac->ops->parse_data) {
+		err = dwmac->ops->parse_data(dwmac, dev);
+		if (err)
+			return err;
+	}
+
 	/* Get mode register */
 	dwmac->regmap = syscon_regmap_lookup_by_phandle(np, "st,syscon");
 	if (IS_ERR(dwmac->regmap))
@@ -91,11 +232,46 @@ static int stm32_dwmac_parse_data(struct stm32_dwmac *dwmac,
 	return err;
 }
 
+static int stm32mp1_parse_data(struct stm32_dwmac *dwmac,
+			       struct device *dev)
+{
+	struct device_node *np = dev->of_node;
+
+	dwmac->int_phyclk = of_property_read_bool(np, "st,int-phyclk");
+
+	/* Check if internal clk from RCC selected */
+	if (dwmac->int_phyclk) {
+		/*  Get ETH_CLK clocks */
+		dwmac->clk_eth_ck = devm_clk_get(dev, "eth-ck");
+		if (IS_ERR(dwmac->clk_eth_ck)) {
+			dev_err(dev, "No ETH CK clock provided...\n");
+			return PTR_ERR(dwmac->clk_eth_ck);
+		}
+	}
+
+	/*  Clock used for low power mode */
+	dwmac->clk_ethstp = devm_clk_get(dev, "ethstp");
+	if (IS_ERR(dwmac->clk_ethstp)) {
+		dev_err(dev, "No ETH peripheral clock provided for CStop mode ...\n");
+		return PTR_ERR(dwmac->clk_ethstp);
+	}
+
+	/*  Clock for sysconfig */
+	dwmac->syscfg_clk = devm_clk_get(dev, "syscfg-clk");
+	if (IS_ERR(dwmac->syscfg_clk)) {
+		dev_err(dev, "No syscfg clock provided...\n");
+		return PTR_ERR(dwmac->syscfg_clk);
+	}
+
+	return 0;
+}
+
 static int stm32_dwmac_probe(struct platform_device *pdev)
 {
 	struct plat_stmmacenet_data *plat_dat;
 	struct stmmac_resources stmmac_res;
 	struct stm32_dwmac *dwmac;
+	const struct stm32_ops *data;
 	int ret;
 
 	ret = stmmac_get_platform_resources(pdev, &stmmac_res);
@@ -112,6 +288,16 @@ static int stm32_dwmac_probe(struct platform_device *pdev)
 		goto err_remove_config_dt;
 	}
 
+	data = of_device_get_match_data(&pdev->dev);
+	if (!data) {
+		dev_err(&pdev->dev, "no of match data provided\n");
+		ret = -EINVAL;
+		goto err_remove_config_dt;
+	}
+
+	dwmac->ops = data;
+	dwmac->dev = &pdev->dev;
+
 	ret = stm32_dwmac_parse_data(dwmac, &pdev->dev);
 	if (ret) {
 		dev_err(&pdev->dev, "Unable to parse OF data\n");
@@ -149,15 +335,48 @@ static int stm32_dwmac_remove(struct platform_device *pdev)
 	return ret;
 }
 
+static int stm32mp1_suspend(struct stm32_dwmac *dwmac)
+{
+	int ret = 0;
+
+	ret = clk_prepare_enable(dwmac->clk_ethstp);
+	if (ret)
+		return ret;
+
+	clk_disable_unprepare(dwmac->clk_tx);
+	clk_disable_unprepare(dwmac->syscfg_clk);
+	if (dwmac->int_phyclk)
+		clk_disable_unprepare(dwmac->clk_eth_ck);
+
+	return ret;
+}
+
+static void stm32mp1_resume(struct stm32_dwmac *dwmac)
+{
+	clk_disable_unprepare(dwmac->clk_ethstp);
+}
+
+static int stm32mcu_suspend(struct stm32_dwmac *dwmac)
+{
+	clk_disable_unprepare(dwmac->clk_tx);
+	clk_disable_unprepare(dwmac->clk_rx);
+
+	return 0;
+}
+
 #ifdef CONFIG_PM_SLEEP
 static int stm32_dwmac_suspend(struct device *dev)
 {
 	struct net_device *ndev = dev_get_drvdata(dev);
 	struct stmmac_priv *priv = netdev_priv(ndev);
+	struct stm32_dwmac *dwmac = priv->plat->bsp_priv;
+
 	int ret;
 
 	ret = stmmac_suspend(dev);
-	stm32_dwmac_clk_disable(priv->plat->bsp_priv);
+
+	if (dwmac->ops->suspend)
+		ret = dwmac->ops->suspend(dwmac);
 
 	return ret;
 }
@@ -166,8 +385,12 @@ static int stm32_dwmac_resume(struct device *dev)
 {
 	struct net_device *ndev = dev_get_drvdata(dev);
 	struct stmmac_priv *priv = netdev_priv(ndev);
+	struct stm32_dwmac *dwmac = priv->plat->bsp_priv;
 	int ret;
 
+	if (dwmac->ops->resume)
+		dwmac->ops->resume(dwmac);
+
 	ret = stm32_dwmac_init(priv->plat);
 	if (ret)
 		return ret;
@@ -181,8 +404,24 @@ static int stm32_dwmac_resume(struct device *dev)
 static SIMPLE_DEV_PM_OPS(stm32_dwmac_pm_ops,
 	stm32_dwmac_suspend, stm32_dwmac_resume);
 
+static struct stm32_ops stm32mcu_dwmac_data = {
+	.set_mode = stm32mcu_set_mode,
+	.suspend = stm32mcu_suspend,
+	.syscfg_eth_mask = SYSCFG_MCU_ETH_MASK
+};
+
+static struct stm32_ops stm32mp1_dwmac_data = {
+	.set_mode = stm32mp1_set_mode,
+	.clk_prepare = stm32mp1_clk_prepare,
+	.suspend = stm32mp1_suspend,
+	.resume = stm32mp1_resume,
+	.parse_data = stm32mp1_parse_data,
+	.syscfg_eth_mask = SYSCFG_MP1_ETH_MASK
+};
+
 static const struct of_device_id stm32_dwmac_match[] = {
-	{ .compatible = "st,stm32-dwmac"},
+	{ .compatible = "st,stm32-dwmac", .data = &stm32mcu_dwmac_data},
+	{ .compatible = "st,stm32mp1-dwmac", .data = &stm32mp1_dwmac_data},
 	{ }
 };
 MODULE_DEVICE_TABLE(of, stm32_dwmac_match);
@@ -199,5 +438,6 @@ static SIMPLE_DEV_PM_OPS(stm32_dwmac_pm_ops,
 module_platform_driver(stm32_dwmac_driver);
 
 MODULE_AUTHOR("Alexandre Torgue <alexandre.torgue@gmail.com>");
-MODULE_DESCRIPTION("STMicroelectronics MCU DWMAC Specific Glue layer");
+MODULE_AUTHOR("Christophe Roullier <christophe.roullier@st.com>");
+MODULE_DESCRIPTION("STMicroelectronics STM32 DWMAC Specific Glue layer");
 MODULE_LICENSE("GPL v2");
-- 
1.9.1

^ permalink raw reply related

* [PATCH V2 6/8] net: stmmac: add dwmac-4.20a compatible
From: Christophe Roullier @ 2018-05-02 14:18 UTC (permalink / raw)
  To: mark.rutland, mcoquelin.stm32, alexandre.torgue, peppe.cavallaro
  Cc: devicetree, linux-arm-kernel, netdev, christophe.roullier, andrew
In-Reply-To: <1525270723-18241-1-git-send-email-christophe.roullier@st.com>

Manage dwmac-4.20a version from synopsys

Signed-off-by: Christophe Roullier <christophe.roullier@st.com>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
index ebd3e5f..6d141f3 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
@@ -472,7 +472,8 @@ struct plat_stmmacenet_data *
 	}
 
 	if (of_device_is_compatible(np, "snps,dwmac-4.00") ||
-	    of_device_is_compatible(np, "snps,dwmac-4.10a")) {
+	    of_device_is_compatible(np, "snps,dwmac-4.10a") ||
+	    of_device_is_compatible(np, "snps,dwmac-4.20a")) {
 		plat->has_gmac4 = 1;
 		plat->has_gmac = 0;
 		plat->pmt = 1;
-- 
1.9.1

^ permalink raw reply related

* [PATCH V2 5/8] ARM: dts: stm32: Add ethernet dwmac on stm32mp1
From: Christophe Roullier @ 2018-05-02 14:18 UTC (permalink / raw)
  To: mark.rutland, mcoquelin.stm32, alexandre.torgue, peppe.cavallaro
  Cc: devicetree, linux-arm-kernel, netdev, christophe.roullier, andrew
In-Reply-To: <1525270723-18241-1-git-send-email-christophe.roullier@st.com>

Add Ethernet support (Synopsys MAC IP 4.20a) on stm32mp1 SOC.
Enable feature supported by the stmmac driver, such as TSO.

Signed-off-by: Christophe Roullier <christophe.roullier@st.com>
---
 arch/arm/boot/dts/stm32mp157c.dtsi | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/arch/arm/boot/dts/stm32mp157c.dtsi b/arch/arm/boot/dts/stm32mp157c.dtsi
index 86421ba..79def3b 100644
--- a/arch/arm/boot/dts/stm32mp157c.dtsi
+++ b/arch/arm/boot/dts/stm32mp157c.dtsi
@@ -179,5 +179,35 @@
 			clocks = <&rcc USART1_K>;
 			status = "disabled";
 		};
+
+		stmmac_axi_config_0: stmmac-axi-config {
+			snps,wr_osr_lmt = <0x7>;
+			snps,rd_osr_lmt = <0x7>;
+			snps,blen = <0 0 0 0 16 8 4>;
+		};
+
+		ethernet0: ethernet@5800a000 {
+			compatible = "st,stm32mp1-dwmac", "snps,dwmac-4.20a";
+			reg = <0x5800a000 0x2000>;
+			reg-names = "stmmaceth";
+			interrupts-extended = <&intc GIC_SPI 61 IRQ_TYPE_NONE>;
+			interrupt-names = "macirq";
+			clock-names = "stmmaceth",
+				      "mac-clk-tx",
+				      "mac-clk-rx",
+				      "ethstp",
+				      "syscfg-clk";
+			clocks = <&rcc ETHMAC>,
+				 <&rcc ETHTX>,
+				 <&rcc ETHRX>,
+				 <&rcc ETHSTP>,
+				 <&rcc SYSCFG>;
+			st,syscon = <&syscfg 0x4>;
+			snps,mixed-burst;
+			snps,pbl = <2>;
+			snps,axi-config = <&stmmac_axi_config_0>;
+			snps,tso;
+			status = "disabled";
+		};
 	};
 };
-- 
1.9.1

^ permalink raw reply related

* [PATCH V2 2/8] dt-bindings: stm32-dwmac: add support of MPU families
From: Christophe Roullier @ 2018-05-02 14:18 UTC (permalink / raw)
  To: mark.rutland, mcoquelin.stm32, alexandre.torgue, peppe.cavallaro
  Cc: devicetree, linux-arm-kernel, netdev, christophe.roullier, andrew
In-Reply-To: <1525270723-18241-1-git-send-email-christophe.roullier@st.com>

Add description for Ethernet MPU families fields

Signed-off-by: Christophe Roullier <christophe.roullier@st.com>
---
 Documentation/devicetree/bindings/net/stm32-dwmac.txt | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/Documentation/devicetree/bindings/net/stm32-dwmac.txt b/Documentation/devicetree/bindings/net/stm32-dwmac.txt
index 489dbcb..1341012 100644
--- a/Documentation/devicetree/bindings/net/stm32-dwmac.txt
+++ b/Documentation/devicetree/bindings/net/stm32-dwmac.txt
@@ -6,14 +6,28 @@ Please see stmmac.txt for the other unchanged properties.
 The device node has following properties.
 
 Required properties:
-- compatible:  Should be "st,stm32-dwmac" to select glue, and
+- compatible:  For MCU family should be "st,stm32-dwmac" to select glue, and
 	       "snps,dwmac-3.50a" to select IP version.
+	       For MPU family should be "st,stm32mp1-dwmac" to select
+	       glue, and "snps,dwmac-4.20a" to select IP version.
 - clocks: Must contain a phandle for each entry in clock-names.
 - clock-names: Should be "stmmaceth" for the host clock.
 	       Should be "mac-clk-tx" for the MAC TX clock.
 	       Should be "mac-clk-rx" for the MAC RX clock.
+	       For MPU family need to add also "ethstp" for power mode clock and,
+	                                       "syscfg-clk" for SYSCFG clock.
+- interrupt-names: Should contain a list of interrupt names corresponding to
+           the interrupts in the interrupts property, if available.
+		   Should be "macirq" for the main MAC IRQ
+		   Should be "eth_wake_irq" for the IT which wake up system
 - st,syscon : Should be phandle/offset pair. The phandle to the syscon node which
-	      encompases the glue register, and the offset of the control register.
+	       encompases the glue register, and the offset of the control register.
+
+Optional properties:
+- clock-names:     For MPU family "mac-clk-ck" for PHY without quartz
+- st,int-phyclk (boolean) :  valid only where PHY do not have quartz and need to be clock
+	           by RCC
+
 Example:
 
 	ethernet@40028000 {
-- 
1.9.1

^ permalink raw reply related

* [PATCH V2 8/8] dt-bindings: stm32: add compatible for syscon
From: Christophe Roullier @ 2018-05-02 14:18 UTC (permalink / raw)
  To: mark.rutland, mcoquelin.stm32, alexandre.torgue, peppe.cavallaro
  Cc: devicetree, linux-arm-kernel, netdev, christophe.roullier, andrew
In-Reply-To: <1525270723-18241-1-git-send-email-christophe.roullier@st.com>

This patch describes syscon DT bindings.

Signed-off-by: Christophe Roullier <christophe.roullier@st.com>
---
 Documentation/devicetree/bindings/arm/stm32.txt | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/Documentation/devicetree/bindings/arm/stm32.txt b/Documentation/devicetree/bindings/arm/stm32.txt
index 6808ed9..06e3834 100644
--- a/Documentation/devicetree/bindings/arm/stm32.txt
+++ b/Documentation/devicetree/bindings/arm/stm32.txt
@@ -8,3 +8,7 @@ using one of the following compatible strings:
   st,stm32f746
   st,stm32h743
   st,stm32mp157
+
+Required nodes:
+- syscon: the soc bus node must have a system controller node pointing to the
+  global control registers, with the compatible string "syscon";
-- 
1.9.1

^ permalink raw reply related

* [PATCH V2 0/8] net: ethernet: stmmac: add support for stm32mp1
From: Christophe Roullier @ 2018-05-02 14:18 UTC (permalink / raw)
  To: mark.rutland, mcoquelin.stm32, alexandre.torgue, peppe.cavallaro
  Cc: devicetree, linux-arm-kernel, netdev, christophe.roullier, andrew

Patches to have Ethernet support on stm32mp1
Changelog:
Remark from Andrew Lunn
In drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c
Add support of PHY_INTERFACE_MODE_RGMII_ID, PHY_INTERFACE_MODE_RGMII_RXID, 
PHY_INTERFACE_MODE_RGMII_TXID.

Remark from Rob Herring
In arch/arm/boot/dts/stm32mp157-pinctrl.dtsi:
Replace @0 with -0
In Documentation/devicetree/bindings/arm/stm32.txt
 .../devicetree/bindings/net/stm32-dwmac.txt
Update with requirement of Rob
In arch/arm/boot/dts/stm32mp157c.dtsi:
Remove compatible "st,stm32-syscfg" and use only generic "syscon"

Christophe Roullier (8):
  net: ethernet: stmmac: add adaptation for stm32mp157c.
  dt-bindings: stm32-dwmac: add support of MPU families
  ARM: dts: stm32: add ethernet pins to stm32mp157c
  ARM: dts: stm32: Add syscfg on stm32mp1
  ARM: dts: stm32: Add ethernet dwmac on stm32mp1
  net: stmmac: add dwmac-4.20a compatible
  ARM: dts: stm32: add support of ethernet on stm32mp157c-ev1
  dt-bindings: stm32: add compatible for syscon

 Documentation/devicetree/bindings/arm/stm32.txt    |   4 +
 .../devicetree/bindings/net/stm32-dwmac.txt        |  18 +-
 arch/arm/boot/dts/stm32mp157-pinctrl.dtsi          |  46 ++++
 arch/arm/boot/dts/stm32mp157c-ev1.dts              |  20 ++
 arch/arm/boot/dts/stm32mp157c.dtsi                 |  35 +++
 drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c  | 270 +++++++++++++++++++--
 .../net/ethernet/stmicro/stmmac/stmmac_platform.c  |   3 +-
 7 files changed, 378 insertions(+), 18 deletions(-)

-- 
1.9.1

^ permalink raw reply

* [PATCH] Revert "vhost: make msg padding explicit"
From: Michael S. Tsirkin @ 2018-05-02 14:19 UTC (permalink / raw)
  To: linux-kernel; +Cc: Jason Wang, kvm, virtualization, netdev

This reverts commit 93c0d549c4c5a7382ad70de6b86610b7aae57406.

Unfortunately the padding will break 32 bit userspace.
Ouch. Need to add some compat code, revert for now.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 include/uapi/linux/vhost.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/include/uapi/linux/vhost.h b/include/uapi/linux/vhost.h
index 5a8ad06..c51f8e5 100644
--- a/include/uapi/linux/vhost.h
+++ b/include/uapi/linux/vhost.h
@@ -68,7 +68,6 @@ struct vhost_iotlb_msg {

 struct vhost_msg {
 	int type;
-	int padding0;
 	union {
 		struct vhost_iotlb_msg iotlb;
 		__u8 padding[64];
-- 
MST

^ permalink raw reply related

* Re: [PATCH net-next] net/mlx4_en: optimizes get_fixed_ipv6_csum()
From: Tariq Toukan @ 2018-05-02 14:18 UTC (permalink / raw)
  To: Saeed Mahameed, davem@davemloft.net, edumazet@google.com
  Cc: netdev@vger.kernel.org, eric.dumazet@gmail.com
In-Reply-To: <1524783416.1731.1.camel@mellanox.com>



On 27/04/2018 1:56 AM, Saeed Mahameed wrote:
> On Thu, 2018-04-19 at 08:49 -0700, Eric Dumazet wrote:
>> While trying to support CHECKSUM_COMPLETE for IPV6 fragments,
>> I had to experiments various hacks in get_fixed_ipv6_csum().
>> I must admit I could not find how to implement this :/
>>
>> However, get_fixed_ipv6_csum() does a lot of redundant operations,
>> calling csum_partial() twice.
>>
>> First csum_partial() computes the checksum of saddr and daddr,
>> put in @csum_pseudo_hdr. Undone later in the second csum_partial()
>> computed on whole ipv6 header.
>>
>> Then nexthdr is added once, added a second time, then substracted.
>>
>> payload_len is added once, then substracted.
>>
>> Really all this can be reduced to two add_csum(), to add back 6 bytes
>> that were removed by mlx4 when providing hw_checksum in RX
>> descriptor.
>>
>> Signed-off-by: Eric Dumazet <edumazet@google.com>
>> Cc: Saeed Mahameed <saeedm@mellanox.com>
>> Cc: Tariq Toukan <tariqt@mellanox.com>
>> ---
>> Note: This patch, like other mlx4 patches can definitely wait
>> Tariq approval, thanks !
>>
> 
> LGTM,
> 
> Reviewed-by: Saeed Mahameed <saeedm@mellanox.com>
> 

Acked-by: Tariq Toukan <tariqt@mellanox.com>

Thanks Eric.

>>   drivers/net/ethernet/mellanox/mlx4/en_rx.c | 21 ++++++++----------
>> ---
>>   1 file changed, 8 insertions(+), 13 deletions(-)
>>
>> diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
>> b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
>> index
>> 5c613c6663da51a4ae792eeb4d8956b54655786b..38c56fb6e5f5970f245dd56c38e
>> 1fc63a9349a07 100644
>> --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
>> +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
>> @@ -593,30 +593,25 @@ static int get_fixed_ipv4_csum(__wsum
>> hw_checksum, struct sk_buff *skb,
>>   }
>>   
>>   #if IS_ENABLED(CONFIG_IPV6)
>> -/* In IPv6 packets, besides subtracting the pseudo header checksum,
>> - * we also compute/add the IP header checksum which
>> - * is not added by the HW.
>> +/* In IPv6 packets, hw_checksum lacks 6 bytes from IPv6 header:
>> + * 4 first bytes : priority, version, flow_lbl
>> + * and 2 additional bytes : nexthdr, hop_limit.
>>    */
>>   static int get_fixed_ipv6_csum(__wsum hw_checksum, struct sk_buff
>> *skb,
>>   			       struct ipv6hdr *ipv6h)
>>   {
>>   	__u8 nexthdr = ipv6h->nexthdr;
>> -	__wsum csum_pseudo_hdr = 0;
>> +	__wsum temp;
>>   
>>   	if (unlikely(nexthdr == IPPROTO_FRAGMENT ||
>>   		     nexthdr == IPPROTO_HOPOPTS ||
>>   		     nexthdr == IPPROTO_SCTP))
>>   		return -1;
>> -	hw_checksum = csum_add(hw_checksum, (__force
>> __wsum)htons(nexthdr));
>>   
>> -	csum_pseudo_hdr = csum_partial(&ipv6h->saddr,
>> -				       sizeof(ipv6h->saddr) +
>> sizeof(ipv6h->daddr), 0);
>> -	csum_pseudo_hdr = csum_add(csum_pseudo_hdr, (__force
>> __wsum)ipv6h->payload_len);
>> -	csum_pseudo_hdr = csum_add(csum_pseudo_hdr,
>> -				   (__force __wsum)htons(nexthdr));
>> -
>> -	skb->csum = csum_sub(hw_checksum, csum_pseudo_hdr);
>> -	skb->csum = csum_add(skb->csum, csum_partial(ipv6h,
>> sizeof(struct ipv6hdr), 0));
>> +	/* priority, version, flow_lbl */
>> +	temp = csum_add(hw_checksum, *(__wsum *)ipv6h);
>> +	/* nexthdr and hop_limit */
>> +	skb->csum = csum_add(temp, (__force __wsum)*(__be16
>> *)&ipv6h->nexthdr);
>>   	return 0;
>>   }
>>   #endif

^ permalink raw reply

* [PATCH V2 4/8] ARM: dts: stm32: Add syscfg on stm32mp1
From: Christophe Roullier @ 2018-05-02 14:18 UTC (permalink / raw)
  To: mark.rutland, mcoquelin.stm32, alexandre.torgue, peppe.cavallaro
  Cc: devicetree, andrew, christophe.roullier, linux-arm-kernel, netdev
In-Reply-To: <1525270723-18241-1-git-send-email-christophe.roullier@st.com>

System configuration controller is mainly used to manage
the compensation cell and other IOs and system related
settings.

Signed-off-by: Christophe Roullier <christophe.roullier@st.com>
---
 arch/arm/boot/dts/stm32mp157c.dtsi | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/arch/arm/boot/dts/stm32mp157c.dtsi b/arch/arm/boot/dts/stm32mp157c.dtsi
index bc3eddc..86421ba 100644
--- a/arch/arm/boot/dts/stm32mp157c.dtsi
+++ b/arch/arm/boot/dts/stm32mp157c.dtsi
@@ -167,6 +167,11 @@
 			#reset-cells = <1>;
 		};
 
+		syscfg: system-config@50020000 {
+			compatible = "syscon";
+			reg = <0x50020000 0x400>;
+		};
+
 		usart1: serial@5c000000 {
 			compatible = "st,stm32h7-uart";
 			reg = <0x5c000000 0x400>;
-- 
1.9.1

^ permalink raw reply related

* Re: [PATCH V2 net-next 5/6] macvlan/macvtap: Add support for SCTP checksum offload.
From: Michael S. Tsirkin @ 2018-05-02 14:17 UTC (permalink / raw)
  To: Vlad Yasevich
  Cc: virtio-dev, marcelo.leitner, nhorman, netdev, virtualization,
	linux-sctp, Vladislav Yasevich
In-Reply-To: <cd94e936-f24c-de0d-7254-069054e33268@redhat.com>

On Wed, May 02, 2018 at 10:00:14AM -0400, Vlad Yasevich wrote:
> On 05/02/2018 09:46 AM, Michael S. Tsirkin wrote:
> > On Wed, May 02, 2018 at 09:27:00AM -0400, Vlad Yasevich wrote:
> >> On 05/01/2018 11:24 PM, Michael S. Tsirkin wrote:
> >>> On Tue, May 01, 2018 at 10:07:38PM -0400, Vladislav Yasevich wrote:
> >>>> Since we now have support for software CRC32c offload, turn it on
> >>>> for macvlan and macvtap devices so that guests can take advantage
> >>>> of offload SCTP checksums to the host or host hardware.
> >>>>
> >>>> Signed-off-by: Vladislav Yasevich <vyasevic@redhat.com>
> >>>> ---
> >>>>  drivers/net/macvlan.c | 5 +++--
> >>>>  drivers/net/tap.c     | 8 +++++---
> >>>>  2 files changed, 8 insertions(+), 5 deletions(-)
> >>>>
> >>>> diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
> >>>> index 725f4b4..646b730 100644
> >>>> --- a/drivers/net/macvlan.c
> >>>> +++ b/drivers/net/macvlan.c
> >>>> @@ -834,7 +834,7 @@ static struct lock_class_key macvlan_netdev_addr_lock_key;
> >>>>  
> >>>>  #define ALWAYS_ON_OFFLOADS \
> >>>>  	(NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_GSO_SOFTWARE | \
> >>>> -	 NETIF_F_GSO_ROBUST | NETIF_F_GSO_ENCAP_ALL)
> >>>> +	 NETIF_F_GSO_ROBUST | NETIF_F_GSO_ENCAP_ALL | NETIF_F_SCTP_CRC)
> >>>>  
> >>>>  #define ALWAYS_ON_FEATURES (ALWAYS_ON_OFFLOADS | NETIF_F_LLTX)
> >>>>  
> >>>> @@ -842,7 +842,8 @@ static struct lock_class_key macvlan_netdev_addr_lock_key;
> >>>>  	(NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST | \
> >>>>  	 NETIF_F_GSO | NETIF_F_TSO | NETIF_F_LRO | \
> >>>>  	 NETIF_F_TSO_ECN | NETIF_F_TSO6 | NETIF_F_GRO | NETIF_F_RXCSUM | \
> >>>> -	 NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_VLAN_STAG_FILTER)
> >>>> +	 NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_VLAN_STAG_FILTER | \
> >>>> +	 NETIF_F_SCTP_CRC)
> >>>>  
> >>>>  #define MACVLAN_STATE_MASK \
> >>>>  	((1<<__LINK_STATE_NOCARRIER) | (1<<__LINK_STATE_DORMANT))
> >>>> diff --git a/drivers/net/tap.c b/drivers/net/tap.c
> >>>> index 9b6cb78..2c8512b 100644
> >>>> --- a/drivers/net/tap.c
> >>>> +++ b/drivers/net/tap.c
> >>>> @@ -369,8 +369,7 @@ rx_handler_result_t tap_handle_frame(struct sk_buff **pskb)
> >>>>  		 *	  check, we either support them all or none.
> >>>>  		 */
> >>>>  		if (skb->ip_summed == CHECKSUM_PARTIAL &&
> >>>> -		    !(features & NETIF_F_CSUM_MASK) &&
> >>>> -		    skb_checksum_help(skb))
> >>>> +		    skb_csum_hwoffload_help(skb, features))
> >>>>  			goto drop;
> >>>>  		if (ptr_ring_produce(&q->ring, skb))
> >>>>  			goto drop;
> >>>> @@ -945,6 +944,9 @@ static int set_offload(struct tap_queue *q, unsigned long arg)
> >>>>  		}
> >>>>  	}
> >>>>  
> >>>> +	if (arg & TUN_F_SCTP_CSUM)
> >>>> +		feature_mask |= NETIF_F_SCTP_CRC;
> >>>> +
> >>>
> >>> so this still affects TX, shouldn't this affect RX instead?
> >>
> >> There is no bit to set on the RX path just like there is no bit to set on the RX patch
> >> for TUN_F_CSUM.
> >>
> >> We only invert TSO offloads, not checksum offloads as the comment below states.
> >> For checksum,  macvtap has to compute the checksum itself in tap_handle_frame() above.
> >> It uses tx feature bits to see if needs do to the checksum.
> >>
> >> If you think we need another flag to macvtap to control RXCSUM, that would need to be
> >> separate and cover standard TCP checksum as well.
> >>
> >> -vlad
> > 
> > Confused. What is the meaning of TUN_F_SCTP_CSUM? I assume this is
> > a way for userspace to tell tun device: "I can handle
> > packets without SCTP checksum, pls send them my way".
> 
> Yes,  just as TUN_F_CSUM means that tun device can handle packets with
> partial tcp/udp checksum.
> 
> > 
> > Now what is the implication for macvtap? 
> 
> The implication is exactly the same as for TUN_F_CSUM.  If the
> flag is set on the macvtap device, the TX checksum feature is
> turned on.

I guess I will have to go back and re-read that code - I do not remember
what does TUN_F_CSUM does by now.  Here is a quick question that might
help me:

Let's assume userspace does not set TUN_F_SCTP_CSUM and device
does not calculate the checksums either. I would expect that with
macvtap this then behaves exactly like now with no overhead.

> 
> > And why  are
> > you setting NETIF_F_SCTP_CRC which is a flag
> > that affects packets sent by guest to host?
> 
> Mainly its because we are using just 1 flag to control checksum
> offloading and we need to be able control both tx and rx paths.

Well that's not really the case I think. What we have is controls for tx
offloads for tun. That's TUN_F_CSUM.
there are no rx offloads - userspace can send what it wants.

These are supposed to translate to rx offloads for macvtap.
tx offloads shouldn't be affected at all.

Maybe that's not the case - as I said need to go back and check.
Will try to find the time in the next couple of days.

> What you are suggesting that we either invert what TUN_F_CSUM
> is doing in macvtap case, or have another flag that lets us control
> TX and RX paths separately.
> 
> Either case, that would be separate work.
> -vlad

So assuming TUN_F_CSUM affects tx for macvtap do you
agree it's a bug? And should we add to it with
TUN_F_SCTP_CSUM doing the same?

> > 
> > 
> >>>
> >>>
> >>>>  	/* tun/tap driver inverts the usage for TSO offloads, where
> >>>>  	 * setting the TSO bit means that the userspace wants to
> >>>>  	 * accept TSO frames and turning it off means that user space
> >>>> @@ -1077,7 +1079,7 @@ static long tap_ioctl(struct file *file, unsigned int cmd,
> >>>>  	case TUNSETOFFLOAD:
> >>>>  		/* let the user check for future flags */
> >>>>  		if (arg & ~(TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 |
> >>>> -			    TUN_F_TSO_ECN | TUN_F_UFO))
> >>>> +			    TUN_F_TSO_ECN | TUN_F_UFO | TUN_F_SCTP_CSUM))
> >>>>  			return -EINVAL;
> >>>>  
> >>>>  		rtnl_lock();
> >>>> -- 
> >>>> 2.9.5

^ permalink raw reply

* Re: [PATCH net-next] net: phy: broadcom: add support for BCM89610 PHY
From: Bhadram Varka @ 2018-05-02 14:16 UTC (permalink / raw)
  To: Andrew Lunn; +Cc: f.fainelli, davem, netdev
In-Reply-To: <20180502132938.GB23318@lunn.ch>

On 5/2/2018 6:59 PM, Andrew Lunn wrote:

> On Wed, May 02, 2018 at 03:54:36PM +0530, Bhadram Varka wrote:
>> It adds support for BCM89610 (Single-Port 10/100/1000BASE-T)
>> transceiver which is used in P3310 Tegra186 platform.
>>
>> Signed-off-by: Bhadram Varka <vbhadram@nvidia.com>
>> ---
>>   drivers/net/phy/broadcom.c | 11 +++++++++++
>>   include/linux/brcmphy.h    |  1 +
>>   2 files changed, 12 insertions(+)
>>
>> diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c
>> index 3bb6b66..1e4b6c2 100644
>> --- a/drivers/net/phy/broadcom.c
>> +++ b/drivers/net/phy/broadcom.c
>> @@ -720,6 +720,16 @@ static struct phy_driver broadcom_drivers[] = {
>>   	.get_strings	= bcm_phy_get_strings,
>>   	.get_stats	= bcm53xx_phy_get_stats,
>>   	.probe		= bcm53xx_phy_probe,
>> +}, {
>> +	.phy_id         = PHY_ID_BCM89610,
>> +	.phy_id_mask    = 0xfffffff0,
>> +	.name           = "Broadcom BCM89610",
>> +	.features       = PHY_GBIT_FEATURES |
>> +			  SUPPORTED_Pause | SUPPORTED_Asym_Pause,
> Hi Bhadram
>
> Why do you have SUPPORTED_Pause | SUPPORTED_Asym_Pause. No other PHY
> does.
Hi Andrew,

Thanks for the review. Yes - got it. I will push updated patch.

Thanks,
Bhadram.

-----------------------------------------------------------------------------------
This email message is for the sole use of the intended recipient(s) and may contain
confidential information.  Any unauthorized review, use, disclosure or distribution
is prohibited.  If you are not the intended recipient, please contact the sender by
reply email and destroy all copies of the original message.
-----------------------------------------------------------------------------------

^ permalink raw reply

* Re: [PATCH V2 net-next 1/6] virtio: Add support for SCTP checksum offloading
From: Marcelo Ricardo Leitner @ 2018-05-02 14:14 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Vladislav Yasevich, netdev, linux-sctp, virtualization,
	virtio-dev, jasowang, nhorman, Vladislav Yasevich
In-Reply-To: <20180502061520-mutt-send-email-mst@kernel.org>

On Wed, May 02, 2018 at 06:16:45AM +0300, Michael S. Tsirkin wrote:
> On Tue, May 01, 2018 at 10:07:34PM -0400, Vladislav Yasevich wrote:
> > To support SCTP checksum offloading, we need to add a new feature
> > to virtio_net, so we can negotiate support between the hypervisor
> > and the guest.
> > The HOST feature bit signifies offloading support for transmit and
> > enables device offload features.
> > The GUEST feature bit signifies offloading support of recieve and
> > is currently only used by the driver in case of xdp.
> >
> > That patch also adds an addition virtio_net header flag which
> > mirrors the skb->csum_not_inet flag.  This flags is used to indicate
> > that is this an SCTP packet that needs its checksum computed by the
> > lower layer.  In this case, the lower layer is the host hypervisor or
> > possibly HW nic that supporst CRC32c offload.
> >
> > In the case that GUEST feature bit is flag, it will be possible to
> > receive a virtio_net header with this bit set, which will set the
> > corresponding skb bit.  SCTP protocol will be updated to correctly
> > deal with it.
> >
> > Signed-off-by: Vladislav Yasevich <vyasevic@redhat.com>
> > ---
> >  drivers/net/virtio_net.c        | 14 +++++++++++++-
> >  include/linux/virtio_net.h      |  6 ++++++
> >  include/uapi/linux/virtio_net.h |  5 +++++
> >  3 files changed, 24 insertions(+), 1 deletion(-)
> >
> > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> > index 7b187ec..34af280 100644
> > --- a/drivers/net/virtio_net.c
> > +++ b/drivers/net/virtio_net.c
> > @@ -2148,6 +2148,8 @@ static int virtnet_clear_guest_offloads(struct virtnet_info *vi)
> >
> >  	if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_CSUM))
> >  		offloads = 1ULL << VIRTIO_NET_F_GUEST_CSUM;
> > +	if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_SCTP_CSUM))
> > +		offloads |= 1ULL << VIRTIO_NET_F_GUEST_SCTP_CSUM;
> >
> >  	return virtnet_set_guest_offloads(vi, offloads);
> >  }
> > @@ -2160,6 +2162,8 @@ static int virtnet_restore_guest_offloads(struct virtnet_info *vi)
> >  		return 0;
> >  	if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_CSUM))
> >  		offloads |= 1ULL << VIRTIO_NET_F_GUEST_CSUM;
> > +	if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_SCTP_CSUM))
> > +		offloads |= 1ULL << VIRTIO_NET_F_GUEST_SCTP_CSUM;
> >
> >  	return virtnet_set_guest_offloads(vi, offloads);
> >  }
> > @@ -2724,6 +2728,7 @@ static int virtnet_probe(struct virtio_device *vdev)
> >  	/* Do we support "hardware" checksums? */
> >  	if (virtio_has_feature(vdev, VIRTIO_NET_F_CSUM)) {
> >  		/* This opens up the world of extra features. */
> > +
> >  		dev->hw_features |= NETIF_F_HW_CSUM | NETIF_F_SG;
> >  		if (csum)
> >  			dev->features |= NETIF_F_HW_CSUM | NETIF_F_SG;
> > @@ -2746,9 +2751,15 @@ static int virtnet_probe(struct virtio_device *vdev)
> >  			dev->features |= dev->hw_features & NETIF_F_ALL_TSO;
> >  		/* (!csum && gso) case will be fixed by register_netdev() */
> >  	}
> > +
> >  	if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_CSUM))
> >  		dev->features |= NETIF_F_RXCSUM;
> >
> > +	if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_SCTP_CSUM)) {
> > +		dev->hw_features |= NETIF_F_SCTP_CRC;
> > +		dev->features |= NETIF_F_SCTP_CRC;
> > +	}
> > +
> >  	dev->vlan_features = dev->features;
> >
> >  	/* MTU range: 68 - 65535 */
> > @@ -2962,7 +2973,8 @@ static struct virtio_device_id id_table[] = {
> >  	VIRTIO_NET_F_GUEST_ANNOUNCE, VIRTIO_NET_F_MQ, \
> >  	VIRTIO_NET_F_CTRL_MAC_ADDR, \
> >  	VIRTIO_NET_F_MTU, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, \
> > -	VIRTIO_NET_F_SPEED_DUPLEX
> > +	VIRTIO_NET_F_SPEED_DUPLEX, \
> > +	VIRTIO_NET_F_HOST_SCTP_CSUM, VIRTIO_NET_F_GUEST_SCTP_CSUM
> >
> >  static unsigned int features[] = {
> >  	VIRTNET_FEATURES,
> > diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h
> > index f144216..28fffdc 100644
> > --- a/include/linux/virtio_net.h
> > +++ b/include/linux/virtio_net.h
> > @@ -39,6 +39,9 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb,
> >
> >  		if (!skb_partial_csum_set(skb, start, off))
> >  			return -EINVAL;
> > +
> > +		if (hdr->flags & VIRTIO_NET_HDR_F_CSUM_NOT_INET)
> > +			skb->csum_not_inet = 1;
> >  	}
> >
> >  	if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
> > @@ -91,6 +94,9 @@ static inline int virtio_net_hdr_from_skb(const struct sk_buff *skb,
> >  				skb_checksum_start_offset(skb));
> >  		hdr->csum_offset = __cpu_to_virtio16(little_endian,
> >  				skb->csum_offset);
> > +
> > +		if (skb->csum_not_inet)
> > +			hdr->flags |= VIRTIO_NET_HDR_F_CSUM_NOT_INET;
> >  	} else if (has_data_valid &&
> >  		   skb->ip_summed == CHECKSUM_UNNECESSARY) {
> >  		hdr->flags = VIRTIO_NET_HDR_F_DATA_VALID;
> > diff --git a/include/uapi/linux/virtio_net.h b/include/uapi/linux/virtio_net.h
> > index 5de6ed3..9dfca1a 100644
> > --- a/include/uapi/linux/virtio_net.h
> > +++ b/include/uapi/linux/virtio_net.h
> > @@ -57,6 +57,10 @@
> >  					 * Steering */
> >  #define VIRTIO_NET_F_CTRL_MAC_ADDR 23	/* Set MAC address */
> >
> > +#define VIRTIO_NET_F_GUEST_SCTP_CSUM  61 /* Guest handles SCTP pks w/ partial
> > +					  * csum */
> > +#define VIRTIO_NET_F_HOST_SCTP_CSUM   62 /* HOST handles SCTP pkts w/ partial
> > +					  * csum */
> >  #define VIRTIO_NET_F_SPEED_DUPLEX 63	/* Device set linkspeed and duplex */
> >
> >  #ifndef VIRTIO_NET_NO_LEGACY
> > @@ -101,6 +105,7 @@ struct virtio_net_config {
> >  struct virtio_net_hdr_v1 {
> >  #define VIRTIO_NET_HDR_F_NEEDS_CSUM	1	/* Use csum_start, csum_offset */
> >  #define VIRTIO_NET_HDR_F_DATA_VALID	2	/* Csum is valid */
> > +#define VIRTIO_NET_HDR_F_CSUM_NOT_INET  4       /* Checksum is not inet */
>
> Both comment and name are not very informative.
> How about just saying CRC32c ?

csum_not_inet is following the nomenclature used in sk_buff. Initially
Davide had named the sk_buff field after crc32c but then it was argued
that maybe another check method may be introduced later and the name
would be bogus, thus why the "csum_not_inet".

>
> >  	__u8 flags;
> >  #define VIRTIO_NET_HDR_GSO_NONE		0	/* Not a GSO frame */
> >  #define VIRTIO_NET_HDR_GSO_TCPV4	1	/* GSO frame, IPv4 TCP (TSO) */
> > --
> > 2.9.5
> --
> To unsubscribe from this list: send the line "unsubscribe linux-sctp" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>

^ permalink raw reply

* Re: [PATCH] net: stmmac: Avoid VLA usage
From: Jose Abreu @ 2018-05-02 14:07 UTC (permalink / raw)
  To: Kees Cook, Jose Abreu
  Cc: Giuseppe Cavallaro, Alexandre Torgue, LKML, Network Development
In-Reply-To: <CAGXu5jKrpEOrUVng7zrq0Em9DsAwKfidaqa2AKGrz3ZSXXLn1g@mail.gmail.com>



On 02-05-2018 13:36, Kees Cook wrote:
> On Wed, May 2, 2018 at 1:54 AM, Jose Abreu <Jose.Abreu@synopsys.com> wrote:
>> Hi Kees,
>>
>> On 01-05-2018 22:01, Kees Cook wrote:
>>> In the quest to remove all stack VLAs from the kernel[1], this switches
>>> the "status" stack buffer to use the existing small (8) upper bound on
>>> how many queues can be checked for DMA, and adds a sanity-check just to
>>> make sure it doesn't operate under pathological conditions.
>>>
>>> [1] https://urldefense.proofpoint.com/v2/url?u=http-3A__lkml.kernel.org_r_CA-2B55aFzCG-2DzNmZwX4A2FQpadafLfEzK6CC-3DqPXydAacU1RqZWA-40mail.gmail.com&d=DwIBAg&c=DPL6_X_6JkXFx7AXWqB0tg&r=WHDsc6kcWAl4i96Vm5hJ_19IJiuxx_p_Rzo2g-uHDKw&m=TBD6a7UY2VbpPmV9LOW_eHAyg8uPq1ZPDhq93VROTVE&s=4fvOST1HhWmZ4lThQe-dHCJYEXNOwey00BCXOWm8tKo&e=
>>>
>>> Signed-off-by: Kees Cook <keescook@chromium.org>
>>>
>> I rather prefer the variables declaration in reverse-tree order,
>> but thats just a minor pick.
> I can explicitly reorder the other variables, if you want?

No need by me, unless Giuseppe or Alexandre prefer that. Thanks!

Best Regards,
Jose Miguel Abreu

>
>> Reviewed-by: Jose Abreu <joabreu@synopsys.com>
> Thanks!
>
>> PS: Is VLA warning switch in gcc already active? Because I didn't
>> see this warning in my builds.
> It is not. A bunch of people have been building with KCFLAGS=-Wvla to
> find the VLAs and sending patches. Once we get rid of them all, we can
> add the flag to the top-level Makefile.
>
> -Kees
>

^ permalink raw reply

* Re: [PATCH] vhost: make msg padding explicit
From: David Miller @ 2018-05-02 14:04 UTC (permalink / raw)
  To: mst; +Cc: kevin, kvm, netdev, linux-kernel, virtualization
In-Reply-To: <20180502162932-mutt-send-email-mst@kernel.org>

From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Wed, 2 May 2018 16:36:37 +0300

> Ouch.  True - and in particular the 32 bit ABI on 64 bit kernels doesn't
> work at all. Hmm. It's relatively new and maybe there aren't any 32 bit
> users yet. Thoughts?

If it's been in a released kernel version, we really aren't at liberty
to play "maybe nobody uses this" UAPI changing games.

Please send me a revert.

^ permalink raw reply

* Re: [PATCH V2 net-next 5/6] macvlan/macvtap: Add support for SCTP checksum offload.
From: Vlad Yasevich @ 2018-05-02 14:00 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Vladislav Yasevich, netdev, linux-sctp, virtualization,
	virtio-dev, jasowang, nhorman, marcelo.leitner
In-Reply-To: <20180502164317-mutt-send-email-mst@kernel.org>

On 05/02/2018 09:46 AM, Michael S. Tsirkin wrote:
> On Wed, May 02, 2018 at 09:27:00AM -0400, Vlad Yasevich wrote:
>> On 05/01/2018 11:24 PM, Michael S. Tsirkin wrote:
>>> On Tue, May 01, 2018 at 10:07:38PM -0400, Vladislav Yasevich wrote:
>>>> Since we now have support for software CRC32c offload, turn it on
>>>> for macvlan and macvtap devices so that guests can take advantage
>>>> of offload SCTP checksums to the host or host hardware.
>>>>
>>>> Signed-off-by: Vladislav Yasevich <vyasevic@redhat.com>
>>>> ---
>>>>  drivers/net/macvlan.c | 5 +++--
>>>>  drivers/net/tap.c     | 8 +++++---
>>>>  2 files changed, 8 insertions(+), 5 deletions(-)
>>>>
>>>> diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
>>>> index 725f4b4..646b730 100644
>>>> --- a/drivers/net/macvlan.c
>>>> +++ b/drivers/net/macvlan.c
>>>> @@ -834,7 +834,7 @@ static struct lock_class_key macvlan_netdev_addr_lock_key;
>>>>  
>>>>  #define ALWAYS_ON_OFFLOADS \
>>>>  	(NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_GSO_SOFTWARE | \
>>>> -	 NETIF_F_GSO_ROBUST | NETIF_F_GSO_ENCAP_ALL)
>>>> +	 NETIF_F_GSO_ROBUST | NETIF_F_GSO_ENCAP_ALL | NETIF_F_SCTP_CRC)
>>>>  
>>>>  #define ALWAYS_ON_FEATURES (ALWAYS_ON_OFFLOADS | NETIF_F_LLTX)
>>>>  
>>>> @@ -842,7 +842,8 @@ static struct lock_class_key macvlan_netdev_addr_lock_key;
>>>>  	(NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST | \
>>>>  	 NETIF_F_GSO | NETIF_F_TSO | NETIF_F_LRO | \
>>>>  	 NETIF_F_TSO_ECN | NETIF_F_TSO6 | NETIF_F_GRO | NETIF_F_RXCSUM | \
>>>> -	 NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_VLAN_STAG_FILTER)
>>>> +	 NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_VLAN_STAG_FILTER | \
>>>> +	 NETIF_F_SCTP_CRC)
>>>>  
>>>>  #define MACVLAN_STATE_MASK \
>>>>  	((1<<__LINK_STATE_NOCARRIER) | (1<<__LINK_STATE_DORMANT))
>>>> diff --git a/drivers/net/tap.c b/drivers/net/tap.c
>>>> index 9b6cb78..2c8512b 100644
>>>> --- a/drivers/net/tap.c
>>>> +++ b/drivers/net/tap.c
>>>> @@ -369,8 +369,7 @@ rx_handler_result_t tap_handle_frame(struct sk_buff **pskb)
>>>>  		 *	  check, we either support them all or none.
>>>>  		 */
>>>>  		if (skb->ip_summed == CHECKSUM_PARTIAL &&
>>>> -		    !(features & NETIF_F_CSUM_MASK) &&
>>>> -		    skb_checksum_help(skb))
>>>> +		    skb_csum_hwoffload_help(skb, features))
>>>>  			goto drop;
>>>>  		if (ptr_ring_produce(&q->ring, skb))
>>>>  			goto drop;
>>>> @@ -945,6 +944,9 @@ static int set_offload(struct tap_queue *q, unsigned long arg)
>>>>  		}
>>>>  	}
>>>>  
>>>> +	if (arg & TUN_F_SCTP_CSUM)
>>>> +		feature_mask |= NETIF_F_SCTP_CRC;
>>>> +
>>>
>>> so this still affects TX, shouldn't this affect RX instead?
>>
>> There is no bit to set on the RX path just like there is no bit to set on the RX patch
>> for TUN_F_CSUM.
>>
>> We only invert TSO offloads, not checksum offloads as the comment below states.
>> For checksum,  macvtap has to compute the checksum itself in tap_handle_frame() above.
>> It uses tx feature bits to see if needs do to the checksum.
>>
>> If you think we need another flag to macvtap to control RXCSUM, that would need to be
>> separate and cover standard TCP checksum as well.
>>
>> -vlad
> 
> Confused. What is the meaning of TUN_F_SCTP_CSUM? I assume this is
> a way for userspace to tell tun device: "I can handle
> packets without SCTP checksum, pls send them my way".

Yes,  just as TUN_F_CSUM means that tun device can handle packets with
partial tcp/udp checksum.

> 
> Now what is the implication for macvtap? 

The implication is exactly the same as for TUN_F_CSUM.  If the
flag is set on the macvtap device, the TX checksum feature is
turned on.

> And why  are
> you setting NETIF_F_SCTP_CRC which is a flag
> that affects packets sent by guest to host?

Mainly its because we are using just 1 flag to control checksum
offloading and we need to be able control both tx and rx paths.

What you are suggesting that we either invert what TUN_F_CSUM
is doing in macvtap case, or have another flag that lets us control
TX and RX paths separately.

Either case, that would be separate work.
-vlad

> 
> 
>>>
>>>
>>>>  	/* tun/tap driver inverts the usage for TSO offloads, where
>>>>  	 * setting the TSO bit means that the userspace wants to
>>>>  	 * accept TSO frames and turning it off means that user space
>>>> @@ -1077,7 +1079,7 @@ static long tap_ioctl(struct file *file, unsigned int cmd,
>>>>  	case TUNSETOFFLOAD:
>>>>  		/* let the user check for future flags */
>>>>  		if (arg & ~(TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 |
>>>> -			    TUN_F_TSO_ECN | TUN_F_UFO))
>>>> +			    TUN_F_TSO_ECN | TUN_F_UFO | TUN_F_SCTP_CSUM))
>>>>  			return -EINVAL;
>>>>  
>>>>  		rtnl_lock();
>>>> -- 
>>>> 2.9.5

^ permalink raw reply

* Re: [RFC v3 4/5] virtio_ring: add event idx support in packed ring
From: Michael S. Tsirkin @ 2018-05-02 13:51 UTC (permalink / raw)
  To: Tiwei Bie
  Cc: Jason Wang, virtualization, linux-kernel, netdev, wexu, jfreimann
In-Reply-To: <20180502072819.mf5l3dypk6dwx2s7@debian>

On Wed, May 02, 2018 at 03:28:19PM +0800, Tiwei Bie wrote:
> On Wed, May 02, 2018 at 10:51:06AM +0800, Jason Wang wrote:
> > On 2018年04月25日 13:15, Tiwei Bie wrote:
> > > This commit introduces the event idx support in packed
> > > ring. This feature is temporarily disabled, because the
> > > implementation in this patch may not work as expected,
> > > and some further discussions on the implementation are
> > > needed, e.g. do we have to check the wrap counter when
> > > checking whether a kick is needed?
> > > 
> > > Signed-off-by: Tiwei Bie <tiwei.bie@intel.com>
> > > ---
> > >   drivers/virtio/virtio_ring.c | 53 ++++++++++++++++++++++++++++++++++++++++----
> > >   1 file changed, 49 insertions(+), 4 deletions(-)
> > > 
> > > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> > > index 0181e93897be..b1039c2985b9 100644
> > > --- a/drivers/virtio/virtio_ring.c
> > > +++ b/drivers/virtio/virtio_ring.c
> > > @@ -986,7 +986,7 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq,
> > >   static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq)
> > >   {
> > >   	struct vring_virtqueue *vq = to_vvq(_vq);
> > > -	u16 flags;
> > > +	u16 new, old, off_wrap, flags;
> > >   	bool needs_kick;
> > >   	u32 snapshot;
> > > @@ -995,7 +995,12 @@ static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq)
> > >   	 * suppressions. */
> > >   	virtio_mb(vq->weak_barriers);
> > > +	old = vq->next_avail_idx - vq->num_added;
> > > +	new = vq->next_avail_idx;
> > > +	vq->num_added = 0;
> > > +
> > >   	snapshot = *(u32 *)vq->vring_packed.device;
> > > +	off_wrap = virtio16_to_cpu(_vq->vdev, snapshot & 0xffff);
> > >   	flags = cpu_to_virtio16(_vq->vdev, snapshot >> 16) & 0x3;
> > >   #ifdef DEBUG
> > > @@ -1006,7 +1011,10 @@ static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq)
> > >   	vq->last_add_time_valid = false;
> > >   #endif
> > > -	needs_kick = (flags != VRING_EVENT_F_DISABLE);
> > > +	if (flags == VRING_EVENT_F_DESC)
> > > +		needs_kick = vring_need_event(off_wrap & ~(1<<15), new, old);
> > 
> > I wonder whether or not the math is correct. Both new and event are in the
> > unit of descriptor ring size, but old looks not.
> 
> What vring_need_event() cares is the distance between
> `new` and `old`, i.e. vq->num_added. So I think there
> is nothing wrong with `old`. But the calculation of the
> distance between `new` and `event_idx` isn't right when
> `new` wraps. How do you think about the below code:
> 
> 	wrap_counter = off_wrap >> 15;
> 	event_idx = off_wrap & ~(1<<15);
> 	if (wrap_counter != vq->wrap_counter)
> 		event_idx -= vq->vring_packed.num;
> 	
> 	needs_kick = vring_need_event(event_idx, new, old);

I suspect this hack won't work for non power of 2 ring.


> Best regards,
> Tiwei Bie
> 
> 
> > 
> > Thanks
> > 
> > > +	else
> > > +		needs_kick = (flags != VRING_EVENT_F_DISABLE);
> > >   	END_USE(vq);
> > >   	return needs_kick;
> > >   }
> > > @@ -1116,6 +1124,15 @@ static void *virtqueue_get_buf_ctx_packed(struct virtqueue *_vq,
> > >   	if (vq->last_used_idx >= vq->vring_packed.num)
> > >   		vq->last_used_idx -= vq->vring_packed.num;
> > > +	/* If we expect an interrupt for the next entry, tell host
> > > +	 * by writing event index and flush out the write before
> > > +	 * the read in the next get_buf call. */
> > > +	if (vq->event_flags_shadow == VRING_EVENT_F_DESC)
> > > +		virtio_store_mb(vq->weak_barriers,
> > > +				&vq->vring_packed.driver->off_wrap,
> > > +				cpu_to_virtio16(_vq->vdev, vq->last_used_idx |
> > > +						(vq->wrap_counter << 15)));
> > > +
> > >   #ifdef DEBUG
> > >   	vq->last_add_time_valid = false;
> > >   #endif
> > > @@ -1143,10 +1160,17 @@ static unsigned virtqueue_enable_cb_prepare_packed(struct virtqueue *_vq)
> > >   	/* We optimistically turn back on interrupts, then check if there was
> > >   	 * more to do. */
> > > +	/* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to
> > > +	 * either clear the flags bit or point the event index at the next
> > > +	 * entry. Always update the event index to keep code simple. */
> > > +
> > > +	vq->vring_packed.driver->off_wrap = cpu_to_virtio16(_vq->vdev,
> > > +			vq->last_used_idx | (vq->wrap_counter << 15));
> > >   	if (vq->event_flags_shadow == VRING_EVENT_F_DISABLE) {
> > >   		virtio_wmb(vq->weak_barriers);
> > > -		vq->event_flags_shadow = VRING_EVENT_F_ENABLE;
> > > +		vq->event_flags_shadow = vq->event ? VRING_EVENT_F_DESC :
> > > +						     VRING_EVENT_F_ENABLE;
> > >   		vq->vring_packed.driver->flags = cpu_to_virtio16(_vq->vdev,
> > >   							vq->event_flags_shadow);
> > >   	}
> > > @@ -1172,15 +1196,34 @@ static bool virtqueue_poll_packed(struct virtqueue *_vq, unsigned last_used_idx)
> > >   static bool virtqueue_enable_cb_delayed_packed(struct virtqueue *_vq)
> > >   {
> > >   	struct vring_virtqueue *vq = to_vvq(_vq);
> > > +	u16 bufs, used_idx, wrap_counter;
> > >   	START_USE(vq);
> > >   	/* We optimistically turn back on interrupts, then check if there was
> > >   	 * more to do. */
> > > +	/* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to
> > > +	 * either clear the flags bit or point the event index at the next
> > > +	 * entry. Always update the event index to keep code simple. */
> > > +
> > > +	/* TODO: tune this threshold */
> > > +	bufs = (u16)(vq->next_avail_idx - vq->last_used_idx) * 3 / 4;
> > > +
> > > +	used_idx = vq->last_used_idx + bufs;
> > > +	wrap_counter = vq->wrap_counter;
> > > +
> > > +	if (used_idx >= vq->vring_packed.num) {
> > > +		used_idx -= vq->vring_packed.num;
> > > +		wrap_counter ^= 1;
> > > +	}
> > > +
> > > +	vq->vring_packed.driver->off_wrap = cpu_to_virtio16(_vq->vdev,
> > > +			used_idx | (wrap_counter << 15));
> > >   	if (vq->event_flags_shadow == VRING_EVENT_F_DISABLE) {
> > >   		virtio_wmb(vq->weak_barriers);
> > > -		vq->event_flags_shadow = VRING_EVENT_F_ENABLE;
> > > +		vq->event_flags_shadow = vq->event ? VRING_EVENT_F_DESC :
> > > +						     VRING_EVENT_F_ENABLE;
> > >   		vq->vring_packed.driver->flags = cpu_to_virtio16(_vq->vdev,
> > >   							vq->event_flags_shadow);
> > >   	}
> > > @@ -1822,8 +1865,10 @@ void vring_transport_features(struct virtio_device *vdev)
> > >   		switch (i) {
> > >   		case VIRTIO_RING_F_INDIRECT_DESC:
> > >   			break;
> > > +#if 0
> > >   		case VIRTIO_RING_F_EVENT_IDX:
> > >   			break;
> > > +#endif
> > >   		case VIRTIO_F_VERSION_1:
> > >   			break;
> > >   		case VIRTIO_F_IOMMU_PLATFORM:
> > 

^ permalink raw reply

* Re: [PATCH v2] net/mlx4_en: fix potential use-after-free with dma_unmap_page
From: Tariq Toukan @ 2018-05-02 13:50 UTC (permalink / raw)
  To: David Miller, srn; +Cc: yishaih, netdev
In-Reply-To: <20180427.194859.554972405986118921.davem@davemloft.net>



On 28/04/2018 2:48 AM, David Miller wrote:
> From: Sarah Newman <srn@prgmr.com>
> Date: Wed, 25 Apr 2018 21:00:34 -0700
> 
>> When swiotlb is in use, calling dma_unmap_page means that
>> the original page mapped with dma_map_page must still be valid
>> as swiotlb will copy data from its internal cache back to the
>> originally requested DMA location. When GRO is enabled,
>> all references to the original frag may be put before
>> mlx4_en_free_frag is called, meaning the page has been freed
>> before the call to dma_unmap_page in mlx4_en_free_frag.
>>
>> To fix, unmap the page as soon as possible.
>>
>> This can be trivially detected by doing the following:
>>
>> Compile the kernel with DEBUG_PAGEALLOC
>> Run the kernel as a Xen Dom0
>> Leave GRO enabled on the interface
>> Run a 10 second or more test with iperf over the interface.
>>
>> Signed-off-by: Sarah Newman <srn@prgmr.com>
> 
> Tariq, I assume I will get this from you in the next set of
> changes you submit to me.
> 
> Thanks.
> 

This patch fixes an issue existing in old kernels. It is not relevant 
per latest code.

So I'm not sure about the process. After I review it, do I just submit 
it again for -stable?

Thanks.

^ permalink raw reply

* Re: [PATCH V2 net-next 5/6] macvlan/macvtap: Add support for SCTP checksum offload.
From: Michael S. Tsirkin @ 2018-05-02 13:46 UTC (permalink / raw)
  To: Vlad Yasevich
  Cc: Vladislav Yasevich, netdev, linux-sctp, virtualization,
	virtio-dev, jasowang, nhorman, marcelo.leitner
In-Reply-To: <35def050-3803-42f4-5ee3-c10f98186c6d@redhat.com>

On Wed, May 02, 2018 at 09:27:00AM -0400, Vlad Yasevich wrote:
> On 05/01/2018 11:24 PM, Michael S. Tsirkin wrote:
> > On Tue, May 01, 2018 at 10:07:38PM -0400, Vladislav Yasevich wrote:
> >> Since we now have support for software CRC32c offload, turn it on
> >> for macvlan and macvtap devices so that guests can take advantage
> >> of offload SCTP checksums to the host or host hardware.
> >>
> >> Signed-off-by: Vladislav Yasevich <vyasevic@redhat.com>
> >> ---
> >>  drivers/net/macvlan.c | 5 +++--
> >>  drivers/net/tap.c     | 8 +++++---
> >>  2 files changed, 8 insertions(+), 5 deletions(-)
> >>
> >> diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
> >> index 725f4b4..646b730 100644
> >> --- a/drivers/net/macvlan.c
> >> +++ b/drivers/net/macvlan.c
> >> @@ -834,7 +834,7 @@ static struct lock_class_key macvlan_netdev_addr_lock_key;
> >>  
> >>  #define ALWAYS_ON_OFFLOADS \
> >>  	(NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_GSO_SOFTWARE | \
> >> -	 NETIF_F_GSO_ROBUST | NETIF_F_GSO_ENCAP_ALL)
> >> +	 NETIF_F_GSO_ROBUST | NETIF_F_GSO_ENCAP_ALL | NETIF_F_SCTP_CRC)
> >>  
> >>  #define ALWAYS_ON_FEATURES (ALWAYS_ON_OFFLOADS | NETIF_F_LLTX)
> >>  
> >> @@ -842,7 +842,8 @@ static struct lock_class_key macvlan_netdev_addr_lock_key;
> >>  	(NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST | \
> >>  	 NETIF_F_GSO | NETIF_F_TSO | NETIF_F_LRO | \
> >>  	 NETIF_F_TSO_ECN | NETIF_F_TSO6 | NETIF_F_GRO | NETIF_F_RXCSUM | \
> >> -	 NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_VLAN_STAG_FILTER)
> >> +	 NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_VLAN_STAG_FILTER | \
> >> +	 NETIF_F_SCTP_CRC)
> >>  
> >>  #define MACVLAN_STATE_MASK \
> >>  	((1<<__LINK_STATE_NOCARRIER) | (1<<__LINK_STATE_DORMANT))
> >> diff --git a/drivers/net/tap.c b/drivers/net/tap.c
> >> index 9b6cb78..2c8512b 100644
> >> --- a/drivers/net/tap.c
> >> +++ b/drivers/net/tap.c
> >> @@ -369,8 +369,7 @@ rx_handler_result_t tap_handle_frame(struct sk_buff **pskb)
> >>  		 *	  check, we either support them all or none.
> >>  		 */
> >>  		if (skb->ip_summed == CHECKSUM_PARTIAL &&
> >> -		    !(features & NETIF_F_CSUM_MASK) &&
> >> -		    skb_checksum_help(skb))
> >> +		    skb_csum_hwoffload_help(skb, features))
> >>  			goto drop;
> >>  		if (ptr_ring_produce(&q->ring, skb))
> >>  			goto drop;
> >> @@ -945,6 +944,9 @@ static int set_offload(struct tap_queue *q, unsigned long arg)
> >>  		}
> >>  	}
> >>  
> >> +	if (arg & TUN_F_SCTP_CSUM)
> >> +		feature_mask |= NETIF_F_SCTP_CRC;
> >> +
> > 
> > so this still affects TX, shouldn't this affect RX instead?
> 
> There is no bit to set on the RX path just like there is no bit to set on the RX patch
> for TUN_F_CSUM.
> 
> We only invert TSO offloads, not checksum offloads as the comment below states.
> For checksum,  macvtap has to compute the checksum itself in tap_handle_frame() above.
> It uses tx feature bits to see if needs do to the checksum.
> 
> If you think we need another flag to macvtap to control RXCSUM, that would need to be
> separate and cover standard TCP checksum as well.
> 
> -vlad

Confused. What is the meaning of TUN_F_SCTP_CSUM? I assume this is
a way for userspace to tell tun device: "I can handle
packets without SCTP checksum, pls send them my way".

Now what is the implication for macvtap? And why  are
you setting NETIF_F_SCTP_CRC which is a flag
that affects packets sent by guest to host?


> > 
> > 
> >>  	/* tun/tap driver inverts the usage for TSO offloads, where
> >>  	 * setting the TSO bit means that the userspace wants to
> >>  	 * accept TSO frames and turning it off means that user space
> >> @@ -1077,7 +1079,7 @@ static long tap_ioctl(struct file *file, unsigned int cmd,
> >>  	case TUNSETOFFLOAD:
> >>  		/* let the user check for future flags */
> >>  		if (arg & ~(TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 |
> >> -			    TUN_F_TSO_ECN | TUN_F_UFO))
> >> +			    TUN_F_TSO_ECN | TUN_F_UFO | TUN_F_SCTP_CSUM))
> >>  			return -EINVAL;
> >>  
> >>  		rtnl_lock();
> >> -- 
> >> 2.9.5

^ permalink raw reply

* Re: [dm-devel] kvmalloc: always use vmalloc if CONFIG_DEBUG_VM
From: John Stoffel @ 2018-05-02 13:40 UTC (permalink / raw)
  To: Mike Snitzer
  Cc: Andrew Morton, eric.dumazet, mst, netdev, jasowang, linux-kernel,
	Matthew Wilcox, Michal Hocko, linux-mm, dm-devel, Mikulas Patocka,
	Babka, virtualization, David Miller, edumazet
In-Reply-To: <20180502133224.GA22123@redhat.com>

>>>>> "Mike" == Mike Snitzer <snitzer@redhat.com> writes:

Mike> On Tue, May 01 2018 at  8:36pm -0400,
Mike> Andrew Morton <akpm@linux-foundation.org> wrote:

>> On Tue, 24 Apr 2018 12:33:01 -0400 (EDT) Mikulas Patocka <mpatocka@redhat.com> wrote:
>> 
>> > 
>> > 
>> > On Tue, 24 Apr 2018, Michal Hocko wrote:
>> > 
>> > > On Tue 24-04-18 11:30:40, Mikulas Patocka wrote:
>> > > > 
>> > > > 
>> > > > On Tue, 24 Apr 2018, Michal Hocko wrote:
>> > > > 
>> > > > > On Mon 23-04-18 20:25:15, Mikulas Patocka wrote:
>> > > > > 
>> > > > > > Fixing __vmalloc code 
>> > > > > > is easy and it doesn't require cooperation with maintainers.
>> > > > > 
>> > > > > But it is a hack against the intention of the scope api.
>> > > > 
>> > > > It is not!
>> > > 
>> > > This discussion simply doesn't make much sense it seems. The scope API
>> > > is to document the scope of the reclaim recursion critical section. That
>> > > certainly is not a utility function like vmalloc.
>> > 
>> > That 15-line __vmalloc bugfix doesn't prevent you (or any other kernel 
>> > developer) from converting the code to the scope API. You make nonsensical 
>> > excuses.
>> > 
>> 
>> Fun thread!
>> 
>> Winding back to the original problem, I'd state it as
>> 
>> - Caller uses kvmalloc() but passes the address into vmalloc-naive
>> DMA API and
>> 
>> - Caller uses kvmalloc() but passes the address into kfree()
>> 
>> Yes?

Mike> I think so.

>> If so, then...
>> 
>> Is there a way in which, in the kvmalloc-called-kmalloc path, we can
>> tag the slab-allocated memory with a "this memory was allocated with
>> kvmalloc()" flag?  I *think* there's extra per-object storage available
>> with suitable slab/slub debugging options?  Perhaps we could steal one
>> bit from the redzone, dunno.
>> 
>> If so then we can
>> 
>> a) set that flag in kvmalloc() if the kmalloc() call succeeded
>> 
>> b) check for that flag in the DMA code, WARN if it is set.
>> 
>> c) in kvfree(), clear that flag before calling kfree()
>> 
>> d) in kfree(), check for that flag and go WARN() if set.
>> 
>> So both potential bugs are detected all the time, dependent upon
>> CONFIG_SLUB_DEBUG (and perhaps other slub config options).

Mike> Thanks Andrew, definitely the most sane proposal I've seen to resolve
Mike> this.

Cuts to the heart of the issue I think, and seems pretty sane.  Should
the WARN be rate limited as well?

John

^ permalink raw reply

* Re: [PATCH RFC iproute2-next 2/2] rdma: print provider resource attributes
From: Leon Romanovsky @ 2018-05-02 13:38 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: Steve Wise, dsahern, netdev, linux-rdma
In-Reply-To: <20180430082524.530eff32@xeon-e3>

[-- Attachment #1: Type: text/plain, Size: 1629 bytes --]

On Mon, Apr 30, 2018 at 08:25:24AM -0700, Stephen Hemminger wrote:
> On Mon, 30 Apr 2018 07:36:18 -0700
> Steve Wise <swise@opengridcomputing.com> wrote:
>
> > +#define nla_type(attr) ((attr)->nla_type & NLA_TYPE_MASK)
> > +
> > +void newline(struct rd *rd)
> > +{
> > +	if (rd->json_output)
> > +		jsonw_end_array(rd->jw);
> > +	else
> > +		pr_out("\n");
> > +}
> > +
> > +void newline_indent(struct rd *rd)
> > +{
> > +	newline(rd);
> > +	if (!rd->json_output)
> > +		pr_out("    ");
> > +}
> > +
> > +static int print_provider_string(struct rd *rd, const char *key_str,
> > +				 const char *val_str)
> > +{
> > +	if (rd->json_output) {
> > +		jsonw_string_field(rd->jw, key_str, val_str);
> > +		return 0;
> > +	} else {
> > +		return pr_out("%s %s ", key_str, val_str);
> > +	}
> > +}
> > +
> > +static int print_provider_s32(struct rd *rd, const char *key_str, int32_t val,
> > +			      enum rdma_nldev_print_type print_type)
> > +{
> > +	if (rd->json_output) {
> > +		jsonw_int_field(rd->jw, key_str, val);
> > +		return 0;
> > +	}
> > +	switch (print_type) {
> > +	case RDMA_NLDEV_PRINT_TYPE_UNSPEC:
> > +		return pr_out("%s %d ", key_str, val);
> > +	case RDMA_NLDEV_PRINT_TYPE_HEX:
> > +		return pr_out("%s 0x%x ", key_str, val);
> > +	default:
> > +		return -EINVAL;
> > +	}
> > +}
> > +
>
> This code should get converted to json_print library that handles the
> different output modes; rather than rolling it's own equivalent functionality.

Can it be done after this patch is merged? It will simplify review and
testing because current code is implemented to be in the same format as
the rest of the tool.

Thanks

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply

* Re: Page allocator bottleneck
From: Tariq Toukan @ 2018-05-02 13:38 UTC (permalink / raw)
  To: Aaron Lu, Tariq Toukan
  Cc: Linux Kernel Network Developers, linux-mm, Mel Gorman,
	David Miller, Jesper Dangaard Brouer, Eric Dumazet,
	Alexei Starovoitov, Saeed Mahameed, Eran Ben Elisha,
	Andrew Morton, Michal Hocko
In-Reply-To: <20180427084558.GB4009@intel.com>



On 27/04/2018 11:45 AM, Aaron Lu wrote:
> On Mon, Apr 23, 2018 at 09:10:33PM +0800, Aaron Lu wrote:
>> On Mon, Apr 23, 2018 at 11:54:57AM +0300, Tariq Toukan wrote:
>>> Hi,
>>>
>>> I ran my tests with your patches.
>>> Initial BW numbers are significantly higher than I documented back then in
>>> this mail-thread.
>>> For example, in driver #2 (see original mail thread), with 6 rings, I now
>>> get 92Gbps (slightly less than linerate) in comparison to 64Gbps back then.
>>>
>>> However, there were many kernel changes since then, I need to isolate your
>>> changes. I am not sure I can finish this today, but I will surely get to it
>>> next week after I'm back from vacation.
>>>
>>> Still, when I increase the scale (more rings, i.e. more cpus), I see that
>>> queued_spin_lock_slowpath gets to 60%+ cpu. Still high, but lower than it
>>> used to be.
>>
>> I wonder if it is on allocation path or free path?
> 
> Just FYI, I have pushed two more commits on top of the branch.
> They should improve free path zone lock contention for MIGRATE_UNMOVABLE
> pages(most kernel code alloc such pages), you may consider apply them if
> free path contention is a problem.
> 

Hi Aaron,
Thanks for the update, I did not analyze the contention yet.
I am back in office and will start testing soon.

^ permalink raw reply

* Re: [dm-devel] [PATCH v5] fault-injection: introduce kvmalloc fallback options
From: John Stoffel @ 2018-05-02 13:38 UTC (permalink / raw)
  To: Mikulas Patocka
  Cc: John Stoffel, Andrew, dm-devel, eric.dumazet, mst, netdev,
	jasowang, Randy Dunlap, linux-kernel, Matthew Wilcox, Hocko,
	James Bottomley, Michal, edumazet, linux-mm, David Rientjes,
	Morton, virtualization, David Miller, Vlastimil Babka
In-Reply-To: <alpine.LRH.2.02.1804301622480.4454@file01.intranet.prod.int.rdu2.redhat.com>

>>>>> "Mikulas" == Mikulas Patocka <mpatocka@redhat.com> writes:

Mikulas> On Mon, 30 Apr 2018, John Stoffel wrote:

>> >>>>> "Mikulas" == Mikulas Patocka <mpatocka@redhat.com> writes:
>> 
Mikulas> On Thu, 26 Apr 2018, John Stoffel wrote:
>> 
Mikulas> I see your point - and I think the misunderstanding is this.
>> 
>> Thanks.
>> 
Mikulas> This patch is not really helping people to debug existing crashes. It is 
Mikulas> not like "you get a crash" - "you google for some keywords" - "you get a 
Mikulas> page that suggests to turn this option on" - "you turn it on and solve the 
Mikulas> crash".
>> 
Mikulas> What this patch really does is that - it makes the kernel deliberately 
Mikulas> crash in a situation when the code violates the specification, but it 
Mikulas> would not crash otherwise or it would crash very rarely. It helps to 
Mikulas> detect specification violations.
>> 
Mikulas> If the kernel developer (or tester) doesn't use this option, his buggy 
Mikulas> code won't crash - and if it won't crash, he won't fix the bug or report 
Mikulas> it. How is the user or developer supposed to learn about this option, if 
Mikulas> he gets no crash at all?
>> 
>> So why do we make this a KConfig option at all?

Mikulas> Because other people see the KConfig option (so, they may enable it) and 
Mikulas> they don't see the kernel parameter (so, they won't enable it).

Mikulas> Close your eyes and say how many kernel parameters do you remember :-)

>> Just turn it on and let it rip.

Mikulas> I can't test if all the networking drivers use kvmalloc properly, because 
Mikulas> I don't have the hardware. You can't test it neither. No one has all the 
Mikulas> hardware that is supported by Linux.

Mikulas> Driver issues can only be tested by a mass of users. And if the users 
Mikulas> don't know about the debugging option, they won't enable it.

>> >> I agree with James here.  Looking at the SLAB vs SLUB Kconfig entries
>> >> tells me *nothing* about why I should pick one or the other, as an
>> >> example.

Mikulas> BTW. You can enable slub debugging either with CONFIG_SLUB_DEBUG_ON or 
Mikulas> with the kernel parameter "slub_debug" - and most users who compile their 
Mikulas> own kernel use CONFIG_SLUB_DEBUG_ON - just because it is visible.

You miss my point, which is that there's no explanation of what the
difference is between SLAB and SLUB and which I should choose.  The
same goes here.  If the KConfig option doesn't give useful info, it's
useless.

>> Now I also think that Linus has the right idea to not just sprinkle 
>> BUG_ONs into the code, just dump and oops and keep going if you can.  
>> If it's a filesystem or a device, turn it read only so that people 
>> notice right away.

Mikulas> This vmalloc fallback is similar to
Mikulas> CONFIG_DEBUG_KOBJECT_RELEASE.  CONFIG_DEBUG_KOBJECT_RELEASE
Mikulas> changes the behavior of kobject_put in order to cause
Mikulas> deliberate crashes (that wouldn't happen otherwise) in
Mikulas> drivers that misuse kobject_put. In the same sense, we want
Mikulas> to cause deliberate crashes (that wouldn't happen otherwise)
Mikulas> in drivers that misuse kvmalloc.

Mikulas> The crashes will only happen in debugging kernels, not in
Mikulas> production kernels.

Says you.  What about people or distros that enable it
unconditionally?  They're going to get all kinds of reports and then
turn it off again.  Crashing the system isn't the answer here.  

^ permalink raw reply

* Re: [PATCH] vhost: make msg padding explicit
From: Michael S. Tsirkin @ 2018-05-02 13:36 UTC (permalink / raw)
  To: Kevin Easton
  Cc: David Miller, linux-kernel, jasowang, kvm, virtualization, netdev
In-Reply-To: <20180502062809.GA14485@la.guarana.org>

On Wed, May 02, 2018 at 02:28:09AM -0400, Kevin Easton wrote:
> On Tue, May 01, 2018 at 02:05:51PM -0400, David Miller wrote:
> > From: "Michael S. Tsirkin" <mst@redhat.com>
> > Date: Tue, 1 May 2018 20:19:19 +0300
> > 
> > > On Tue, May 01, 2018 at 11:28:22AM -0400, David Miller wrote:
> > >> From: "Michael S. Tsirkin" <mst@redhat.com>
> > >> Date: Fri, 27 Apr 2018 19:02:05 +0300
> > >> 
> > >> > There's a 32 bit hole just after type. It's best to
> > >> > give it a name, this way compiler is forced to initialize
> > >> > it with rest of the structure.
> > >> > 
> > >> > Reported-by: Kevin Easton <kevin@guarana.org>
> > >> > Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> > >> 
> > >> Michael, will you be sending this directly to Linus or would you like
> > >> me to apply it to net or net-next?
> > >> 
> > >> Thanks.
> > > 
> > > I'd prefer you to apply it for net and cc stable if possible.
> > 
> > Ok, applied, and added to my -stable submission queue.
> 
> Hold on, this patch changes the layout for i386 (where there is
> no padding at all).  And it's part of UAPI.
> 
>     - Kevin
> 
> > 

Ouch.  True - and in particular the 32 bit ABI on 64 bit kernels doesn't
work at all. Hmm. It's relatively new and maybe there aren't any 32 bit
users yet. Thoughts?

-- 
MST

^ permalink raw reply

* Re: non-blocking connect for kernel SCTP sockets
From: Marcelo Ricardo Leitner @ 2018-05-02 13:36 UTC (permalink / raw)
  To: Michal Kubecek
  Cc: Xin Long, network dev, linux-sctp, LKML, Vlad Yasevich,
	Neil Horman, Gang He, GuoQing Jiang
In-Reply-To: <20180502123227.x7yucbi4lejj55o5@unicorn.suse.cz>

On Wed, May 02, 2018 at 02:32:28PM +0200, Michal Kubecek wrote:
> On Wed, May 02, 2018 at 05:46:23PM +0800, Xin Long wrote:
...
> > It is a bug, https://bugzilla.redhat.com/show_bug.cgi?id=1251530
>
> Not authorized. :-)

Oups! I just made it public.

  Marcelo

^ permalink raw reply

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox