Netdev List

Netdev List
 help / color / mirror / Atom feed

* RE: [PATCH v3] can: Convert to runtime_pm
From: Appana Durga Kedareswara Rao @ 2014-11-28  1:49 UTC (permalink / raw)
  To: Soren Brinkmann
  Cc: wg@grandegger.com, mkl@pengutronix.de, Michal Simek,
	grant.likely@linaro.org, robh+dt@kernel.org,
	devicetree@vger.kernel.org, netdev@vger.kernel.org,
	linux-kernel@vger.kernel.org, linux-can@vger.kernel.org,
	linux-arm-kernel@lists.infradead.org
In-Reply-To: <20141127182337.GF17884@xsjandreislx>

Hi Soren,

-----Original Message-----
From: Sören Brinkmann [mailto:soren.brinkmann@xilinx.com]
Sent: Thursday, November 27, 2014 11:54 PM
To: Appana Durga Kedareswara Rao
Cc: wg@grandegger.com; mkl@pengutronix.de; Michal Simek; grant.likely@linaro.org; robh+dt@kernel.org; devicetree@vger.kernel.org; netdev@vger.kernel.org; linux-kernel@vger.kernel.org; linux-can@vger.kernel.org; Appana Durga Kedareswara Rao; linux-arm-kernel@lists.infradead.org
Subject: Re: [PATCH v3] can: Convert to runtime_pm

Hi Kedar,

On Thu, 2014-11-27 at 06:38PM +0530, Kedareswara rao Appana wrote:
> Instead of enabling/disabling clocks at several locations in the
> driver, use the runtime_pm framework. This consolidates the actions
> for runtime PM in the appropriate callbacks and makes the driver more
> readable and mantainable.
>
> Signed-off-by: Soren Brinkmann <soren.brinkmann@xilinx.com>
> Signed-off-by: Kedareswara rao Appana <appanad@xilinx.com>
> ---
> Changes for v3:
>   - Converted the driver to use runtime_pm.
> Changes for v2:
>   - Removed the struct platform_device* from suspend/resume
>     as suggest by Lothar.
>
>  drivers/net/can/xilinx_can.c |  119
> +++++++++++++++++++++++++----------------
>  1 files changed, 72 insertions(+), 47 deletions(-)
>
> diff --git a/drivers/net/can/xilinx_can.c
> b/drivers/net/can/xilinx_can.c index 8a998e3..1be28ed 100644
> --- a/drivers/net/can/xilinx_can.c
> +++ b/drivers/net/can/xilinx_can.c
> @@ -32,6 +32,7 @@
>  #include <linux/can/dev.h>
>  #include <linux/can/error.h>
>  #include <linux/can/led.h>
> +#include <linux/pm_runtime.h>
>
>  #define DRIVER_NAME  "xilinx_can"
>
> @@ -138,7 +139,7 @@ struct xcan_priv {
>       u32 (*read_reg)(const struct xcan_priv *priv, enum xcan_reg reg);
>       void (*write_reg)(const struct xcan_priv *priv, enum xcan_reg reg,
>                       u32 val);
> -     struct net_device *dev;
> +     struct device *dev;
>       void __iomem *reg_base;
>       unsigned long irq_flags;
>       struct clk *bus_clk;
> @@ -842,6 +843,13 @@ static int xcan_open(struct net_device *ndev)
>       struct xcan_priv *priv = netdev_priv(ndev);
>       int ret;
>
> +     ret = pm_runtime_get_sync(priv->dev);
> +     if (ret < 0) {
> +             netdev_err(ndev, "%s: runtime CAN resume failed(%d)\n\r",

There might be other issues than the resume that make this fail. It should probably just say 'pm_runtime_get failed'.
The CAN in the string should not be needed, the netdev_err macro makes sure the device name is printed.
Can we have a space between 'failed' and the error code?
There should not be a '\r'

K sure will modify the error message as you explained above.

> +                             __func__, ret);
> +             return ret;
> +     }
> +
>       ret = request_irq(ndev->irq, xcan_interrupt, priv->irq_flags,
>                       ndev->name, ndev);
>       if (ret < 0) {
> @@ -849,29 +857,17 @@ static int xcan_open(struct net_device *ndev)
>               goto err;
>       }
>
> -     ret = clk_prepare_enable(priv->can_clk);
> -     if (ret) {
> -             netdev_err(ndev, "unable to enable device clock\n");
> -             goto err_irq;
> -     }
> -
> -     ret = clk_prepare_enable(priv->bus_clk);
> -     if (ret) {
> -             netdev_err(ndev, "unable to enable bus clock\n");
> -             goto err_can_clk;
> -     }
> -
>       /* Set chip into reset mode */
>       ret = set_reset_mode(ndev);
>       if (ret < 0) {
>               netdev_err(ndev, "mode resetting failed!\n");
> -             goto err_bus_clk;
> +             goto err_irq;
>       }
>
>       /* Common open */
>       ret = open_candev(ndev);
>       if (ret)
> -             goto err_bus_clk;
> +             goto err_irq;
>
>       ret = xcan_chip_start(ndev);
>       if (ret < 0) {
> @@ -887,13 +883,11 @@ static int xcan_open(struct net_device *ndev)
>
>  err_candev:
>       close_candev(ndev);
> -err_bus_clk:
> -     clk_disable_unprepare(priv->bus_clk);
> -err_can_clk:
> -     clk_disable_unprepare(priv->can_clk);
>  err_irq:
>       free_irq(ndev->irq, ndev);
>  err:
> +     pm_runtime_put(priv->dev);
> +
>       return ret;
>  }
>
> @@ -910,12 +904,11 @@ static int xcan_close(struct net_device *ndev)
>       netif_stop_queue(ndev);
>       napi_disable(&priv->napi);
>       xcan_chip_stop(ndev);
> -     clk_disable_unprepare(priv->bus_clk);
> -     clk_disable_unprepare(priv->can_clk);
>       free_irq(ndev->irq, ndev);
>       close_candev(ndev);
>
>       can_led_event(ndev, CAN_LED_EVENT_STOP);
> +     pm_runtime_put(priv->dev);
>
>       return 0;
>  }
> @@ -934,27 +927,21 @@ static int xcan_get_berr_counter(const struct net_device *ndev,
>       struct xcan_priv *priv = netdev_priv(ndev);
>       int ret;
>
> -     ret = clk_prepare_enable(priv->can_clk);
> -     if (ret)
> -             goto err;
> +     ret = pm_runtime_get_sync(priv->dev);
> +     if (ret < 0) {
> +             netdev_err(ndev, "%s: runtime resume failed(%d)\n\r",
> +                             __func__, ret);

As above.

K will modify this too.

Regards,
Kedar.
        Sören


This email and any attachments are intended for the sole use of the named recipient(s) and contain(s) confidential information that may be proprietary, privileged or copyrighted under applicable law. If you are not the intended recipient, do not read, copy, or forward this email message or any attachments. Delete this email message and any attachments immediately.


^ permalink raw reply

* [PATCH] wil6210: Fix potential memory leaks on error paths
From: Lino Sanfilippo @ 2014-11-28  1:47 UTC (permalink / raw)
  To: qca_vkondrat; +Cc: netdev, ahmedtamrawi, davem, Lino Sanfilippo

Fix missing memory deallocation on error paths in wil_write_file_wmi()
and wil_write_file_txmgmt().

Reported-by: Ahmed Tamrawi <ahmedtamrawi@gmail.com>
Signed-off-by: Lino Sanfilippo <LinoSanfilippo@gmx.de>
---
 drivers/net/wireless/ath/wil6210/debugfs.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/ath/wil6210/debugfs.c b/drivers/net/wireless/ath/wil6210/debugfs.c
index 54a6ddc..4e6e145 100644
--- a/drivers/net/wireless/ath/wil6210/debugfs.c
+++ b/drivers/net/wireless/ath/wil6210/debugfs.c
@@ -573,8 +573,10 @@ static ssize_t wil_write_file_txmgmt(struct file *file, const char __user *buf,
 	if (!frame)
 		return -ENOMEM;
 
-	if (copy_from_user(frame, buf, len))
+	if (copy_from_user(frame, buf, len)) {
+		kfree(frame);
 		return -EIO;
+	}
 
 	params.buf = frame;
 	params.len = len;
@@ -614,8 +616,10 @@ static ssize_t wil_write_file_wmi(struct file *file, const char __user *buf,
 		return -ENOMEM;
 
 	rc = simple_write_to_buffer(wmi, len, ppos, buf, len);
-	if (rc < 0)
+	if (rc < 0) {
+		kfree(wmi);
 		return rc;
+	}
 
 	cmd = &wmi[1];
 	cmdid = le16_to_cpu(wmi->id);
-- 
1.9.1

^ permalink raw reply related

* RE: [PATCH v3] can: Convert to runtime_pm
From: Appana Durga Kedareswara Rao @ 2014-11-28  1:47 UTC (permalink / raw)
  To: Michal Simek, wg@grandegger.com, mkl@pengutronix.de, Michal Simek,
	Soren Brinkmann, grant.likely@linaro.org, robh+dt@kernel.org
  Cc: linux-can@vger.kernel.org, netdev@vger.kernel.org,
	linux-arm-kernel@lists.infradead.org,
	linux-kernel@vger.kernel.org, devicetree@vger.kernel.org
In-Reply-To: <54773948.2070605@xilinx.com>

Hi Michal,

-----Original Message-----
From: Michal Simek [mailto:michal.simek@xilinx.com]
Sent: Thursday, November 27, 2014 8:17 PM
To: Appana Durga Kedareswara Rao; wg@grandegger.com; mkl@pengutronix.de; Michal Simek; Soren Brinkmann; grant.likely@linaro.org; robh+dt@kernel.org
Cc: linux-can@vger.kernel.org; netdev@vger.kernel.org; linux-arm-kernel@lists.infradead.org; linux-kernel@vger.kernel.org; devicetree@vger.kernel.org; Appana Durga Kedareswara Rao
Subject: Re: [PATCH v3] can: Convert to runtime_pm

On 11/27/2014 02:08 PM, Kedareswara rao Appana wrote:
> Instead of enabling/disabling clocks at several locations in the
> driver, use the runtime_pm framework. This consolidates the actions
> for runtime PM in the appropriate callbacks and makes the driver more
> readable and mantainable.
>
> Signed-off-by: Soren Brinkmann <soren.brinkmann@xilinx.com>
> Signed-off-by: Kedareswara rao Appana <appanad@xilinx.com>
> ---
> Changes for v3:
>   - Converted the driver to use runtime_pm.
> Changes for v2:
>   - Removed the struct platform_device* from suspend/resume
>     as suggest by Lothar.
>
>  drivers/net/can/xilinx_can.c |  119
> +++++++++++++++++++++++++----------------
>  1 files changed, 72 insertions(+), 47 deletions(-)
>
> diff --git a/drivers/net/can/xilinx_can.c
> b/drivers/net/can/xilinx_can.c index 8a998e3..1be28ed 100644
> --- a/drivers/net/can/xilinx_can.c
> +++ b/drivers/net/can/xilinx_can.c
> @@ -32,6 +32,7 @@
>  #include <linux/can/dev.h>
>  #include <linux/can/error.h>
>  #include <linux/can/led.h>
> +#include <linux/pm_runtime.h>
>
>  #define DRIVER_NAME  "xilinx_can"
>
> @@ -138,7 +139,7 @@ struct xcan_priv {
>       u32 (*read_reg)(const struct xcan_priv *priv, enum xcan_reg reg);
>       void (*write_reg)(const struct xcan_priv *priv, enum xcan_reg reg,
>                       u32 val);
> -     struct net_device *dev;
> +     struct device *dev;
>       void __iomem *reg_base;
>       unsigned long irq_flags;
>       struct clk *bus_clk;
> @@ -842,6 +843,13 @@ static int xcan_open(struct net_device *ndev)
>       struct xcan_priv *priv = netdev_priv(ndev);
>       int ret;
>
> +     ret = pm_runtime_get_sync(priv->dev);
> +     if (ret < 0) {
> +             netdev_err(ndev, "%s: runtime CAN resume failed(%d)\n\r",
> +                             __func__, ret);
> +             return ret;
> +     }
> +
>       ret = request_irq(ndev->irq, xcan_interrupt, priv->irq_flags,
>                       ndev->name, ndev);
>       if (ret < 0) {
> @@ -849,29 +857,17 @@ static int xcan_open(struct net_device *ndev)
>               goto err;
>       }
>
> -     ret = clk_prepare_enable(priv->can_clk);
> -     if (ret) {
> -             netdev_err(ndev, "unable to enable device clock\n");
> -             goto err_irq;
> -     }
> -
> -     ret = clk_prepare_enable(priv->bus_clk);
> -     if (ret) {
> -             netdev_err(ndev, "unable to enable bus clock\n");
> -             goto err_can_clk;
> -     }
> -
>       /* Set chip into reset mode */
>       ret = set_reset_mode(ndev);
>       if (ret < 0) {
>               netdev_err(ndev, "mode resetting failed!\n");
> -             goto err_bus_clk;
> +             goto err_irq;
>       }
>
>       /* Common open */
>       ret = open_candev(ndev);
>       if (ret)
> -             goto err_bus_clk;
> +             goto err_irq;
>
>       ret = xcan_chip_start(ndev);
>       if (ret < 0) {
> @@ -887,13 +883,11 @@ static int xcan_open(struct net_device *ndev)
>
>  err_candev:
>       close_candev(ndev);
> -err_bus_clk:
> -     clk_disable_unprepare(priv->bus_clk);
> -err_can_clk:
> -     clk_disable_unprepare(priv->can_clk);
>  err_irq:
>       free_irq(ndev->irq, ndev);
>  err:
> +     pm_runtime_put(priv->dev);
> +
>       return ret;
>  }
>
> @@ -910,12 +904,11 @@ static int xcan_close(struct net_device *ndev)
>       netif_stop_queue(ndev);
>       napi_disable(&priv->napi);
>       xcan_chip_stop(ndev);
> -     clk_disable_unprepare(priv->bus_clk);
> -     clk_disable_unprepare(priv->can_clk);
>       free_irq(ndev->irq, ndev);
>       close_candev(ndev);
>
>       can_led_event(ndev, CAN_LED_EVENT_STOP);
> +     pm_runtime_put(priv->dev);
>
>       return 0;
>  }
> @@ -934,27 +927,21 @@ static int xcan_get_berr_counter(const struct net_device *ndev,
>       struct xcan_priv *priv = netdev_priv(ndev);
>       int ret;
>
> -     ret = clk_prepare_enable(priv->can_clk);
> -     if (ret)
> -             goto err;
> +     ret = pm_runtime_get_sync(priv->dev);
> +     if (ret < 0) {
> +             netdev_err(ndev, "%s: runtime resume failed(%d)\n\r",
> +                             __func__, ret);
> +             return ret;
> +     }
>
> -     ret = clk_prepare_enable(priv->bus_clk);
> -     if (ret)
> -             goto err_clk;
>
>       bec->txerr = priv->read_reg(priv, XCAN_ECR_OFFSET) & XCAN_ECR_TEC_MASK;
>       bec->rxerr = ((priv->read_reg(priv, XCAN_ECR_OFFSET) &
>                       XCAN_ECR_REC_MASK) >> XCAN_ESR_REC_SHIFT);
>
> -     clk_disable_unprepare(priv->bus_clk);
> -     clk_disable_unprepare(priv->can_clk);
> +     pm_runtime_put(priv->dev);
>
>       return 0;
> -
> -err_clk:
> -     clk_disable_unprepare(priv->can_clk);
> -err:
> -     return ret;
>  }
>
>
> @@ -967,15 +954,45 @@ static const struct net_device_ops
> xcan_netdev_ops = {
>
>  /**
>   * xcan_suspend - Suspend method for the driver
> - * @dev:     Address of the platform_device structure
> + * @dev:     Address of the net_device structure

This is just the device structure.

K will modify in the next version of the patch .

>   *
>   * Put the driver into low power mode.
> - * Return: 0 always
> + * Return: 0 on success and failure value on error
>   */
>  static int __maybe_unused xcan_suspend(struct device *dev)  {
> -     struct platform_device *pdev = dev_get_drvdata(dev);
> -     struct net_device *ndev = platform_get_drvdata(pdev);
> +     if (!device_may_wakeup(dev))
> +             return pm_runtime_force_suspend(dev);
> +
> +     return 0;
> +}
> +
> +/**
> + * xcan_resume - Resume from suspend
> + * @dev:     Address of the net_device structure

ditto

K will modify in the next version of the patch .

> + *
> + * Resume operation after suspend.
> + * Return: 0 on success and failure value on error  */ static int
> +__maybe_unused xcan_resume(struct device *dev) {
> +     if (!device_may_wakeup(dev))
> +             return pm_runtime_force_resume(dev);
> +
> +     return 0;
> +
> +}
> +
> +/**
> + * xcan_runtime_suspend - Runtime suspend method for the driver
> + * @dev:     Address of the net_device structure

ditto.
K will modify in the next version of the patch .
> + *
> + * Put the driver into low power mode.
> + * Return: 0 always
> + */
> +static int __maybe_unused xcan_runtime_suspend(struct device *dev) {
> +     struct net_device *ndev = dev_get_drvdata(dev);
>       struct xcan_priv *priv = netdev_priv(ndev);
>
>       if (netif_running(ndev)) {
> @@ -993,16 +1010,15 @@ static int __maybe_unused xcan_suspend(struct
> device *dev)  }
>
>  /**
> - * xcan_resume - Resume from suspend
> - * @dev:     Address of the platformdevice structure
> + * xcan_runtime_resume - Runtime resume from suspend
> + * @dev:     Address of the net_device structure

ditto.
K will modify in the next version of the patch .

Regards,
Kedar.
Thanks,
Michal


This email and any attachments are intended for the sole use of the named recipient(s) and contain(s) confidential information that may be proprietary, privileged or copyrighted under applicable law. If you are not the intended recipient, do not read, copy, or forward this email message or any attachments. Delete this email message and any attachments immediately.


^ permalink raw reply

* [PATCH v2 net] sh_eth: Fix sleeping function called from invalid context
From: Simon Horman @ 2014-11-28  1:04 UTC (permalink / raw)
  To: David S. Miller, netdev, linux-sh
  Cc: linux-arm-kernel, Magnus Damm, Sergei Shtylyov, Yoshihiro Kaneko

From: Mitsuhiro Kimura <mitsuhiro.kimura.kc@renesas.com>

This resolves the following bug which can be reproduced by building the
kernel with CONFIG_DEBUG_ATOMIC_SLEEP=y and reading network statistics
while the network interface is down.

e.g.:

ifconfig eth0 down
cat /sys/class/net/eth0/statistics/tx_errors

----
[ 1238.161349] BUG: sleeping function called from invalid context at drivers/base/power/runtime.c:952
[ 1238.188279] in_atomic(): 1, irqs_disabled(): 0, pid: 1388, name: cat
[ 1238.207425] CPU: 0 PID: 1388 Comm: cat Not tainted 3.10.31-ltsi-00046-gefa0b46 #1087
[ 1238.230737] Backtrace:
[ 1238.238123] [<c0012e64>] (dump_backtrace+0x0/0x10c) from [<c0013000>] (show_stack+0x18/0x1c)
[ 1238.263499]  r6:000003b8 r5:c06160c0 r4:c0669e00 r3:00404000
[ 1238.280583] [<c0012fe8>] (show_stack+0x0/0x1c) from [<c04515a4>] (dump_stack+0x20/0x28)
[ 1238.304631] [<c0451584>] (dump_stack+0x0/0x28) from [<c004970c>] (__might_sleep+0xf8/0x118)
[ 1238.329734] [<c0049614>] (__might_sleep+0x0/0x118) from [<c02465ac>] (__pm_runtime_resume+0x38/0x90)
[ 1238.357170]  r7:d616f000 r6:c049c458 r5:00000004 r4:d6a17210
[ 1238.374251] [<c0246574>] (__pm_runtime_resume+0x0/0x90) from [<c029b1c4>] (sh_eth_get_stats+0x44/0x280)
[ 1238.402468]  r7:d616f000 r6:c049c458 r5:d5c21000 r4:d5c21000
[ 1238.419552] [<c029b180>] (sh_eth_get_stats+0x0/0x280) from [<c03ae39c>] (dev_get_stats+0x54/0x88)
[ 1238.446204]  r5:d5c21000 r4:d5ed7e08
[ 1238.456980] [<c03ae348>] (dev_get_stats+0x0/0x88) from [<c03c677c>] (netstat_show.isra.15+0x54/0x9c)
[ 1238.484413]  r6:d5c21000 r5:d5c21238 r4:00000028 r3:00000001
[ 1238.501495] [<c03c6728>] (netstat_show.isra.15+0x0/0x9c) from [<c03c69b8>] (show_tx_errors+0x18/0x1c)
[ 1238.529196]  r7:d5f945d8 r6:d5f945c0 r5:c049716c r4:c0650e7c
[ 1238.546279] [<c03c69a0>] (show_tx_errors+0x0/0x1c) from [<c023963c>] (dev_attr_show+0x24/0x50)
[ 1238.572157] [<c0239618>] (dev_attr_show+0x0/0x50) from [<c010c148>] (sysfs_read_file+0xb0/0x140)
[ 1238.598554]  r5:c049716c r4:d5c21240
[ 1238.609326] [<c010c098>] (sysfs_read_file+0x0/0x140) from [<c00b9ee4>] (vfs_read+0xb0/0x13c)
[ 1238.634679] [<c00b9e34>] (vfs_read+0x0/0x13c) from [<c00ba0ac>] (SyS_read+0x44/0x74)
[ 1238.657944]  r8:bef45bf0 r7:00000000 r6:d6ac0600 r5:00000000 r4:00000000
[ 1238.678172] [<c00ba068>] (SyS_read+0x0/0x74) from [<c000eec0>] (ret_fast_syscall+0x0/0x30)
----

Signed-off-by: Mitsuhiro Kimura <mitsuhiro.kimura.kc@renesas.com>
Signed-off-by: Yoshihiro Kaneko <ykaneko0929@gmail.com>
Signed-off-by: Simon Horman <horms+renesas@verge.net.au>

---
v2 [Simon Horman]
* Enhance changelog to describe how the problem may be reproduced.
* Use bitfield rather than boolean for is_opened field of
  struct sh_eth_private. As suggested by Sergei Shtylyov.
---
 drivers/net/ethernet/renesas/sh_eth.c | 64 +++++++++++++++++++----------------
 drivers/net/ethernet/renesas/sh_eth.h |  1 +
 2 files changed, 36 insertions(+), 29 deletions(-)

diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c
index 60e9c2c..862a691 100644
--- a/drivers/net/ethernet/renesas/sh_eth.c
+++ b/drivers/net/ethernet/renesas/sh_eth.c
@@ -2042,6 +2042,8 @@ static int sh_eth_open(struct net_device *ndev)
 	if (ret)
 		goto out_free_irq;
 
+	mdp->is_opened = 1;
+
 	return ret;
 
 out_free_irq:
@@ -2131,6 +2133,36 @@ static int sh_eth_start_xmit(struct sk_buff *skb, struct net_device *ndev)
 	return NETDEV_TX_OK;
 }
 
+static struct net_device_stats *sh_eth_get_stats(struct net_device *ndev)
+{
+	struct sh_eth_private *mdp = netdev_priv(ndev);
+
+	if (sh_eth_is_rz_fast_ether(mdp))
+		return &ndev->stats;
+
+	if (!mdp->is_opened)
+		return &ndev->stats;
+
+	ndev->stats.tx_dropped += sh_eth_read(ndev, TROCR);
+	sh_eth_write(ndev, 0, TROCR);	/* (write clear) */
+	ndev->stats.collisions += sh_eth_read(ndev, CDCR);
+	sh_eth_write(ndev, 0, CDCR);	/* (write clear) */
+	ndev->stats.tx_carrier_errors += sh_eth_read(ndev, LCCR);
+	sh_eth_write(ndev, 0, LCCR);	/* (write clear) */
+
+	if (sh_eth_is_gether(mdp)) {
+		ndev->stats.tx_carrier_errors += sh_eth_read(ndev, CERCR);
+		sh_eth_write(ndev, 0, CERCR);	/* (write clear) */
+		ndev->stats.tx_carrier_errors += sh_eth_read(ndev, CEECR);
+		sh_eth_write(ndev, 0, CEECR);	/* (write clear) */
+	} else {
+		ndev->stats.tx_carrier_errors += sh_eth_read(ndev, CNDCR);
+		sh_eth_write(ndev, 0, CNDCR);	/* (write clear) */
+	}
+
+	return &ndev->stats;
+}
+
 /* device close function */
 static int sh_eth_close(struct net_device *ndev)
 {
@@ -2145,6 +2177,7 @@ static int sh_eth_close(struct net_device *ndev)
 	sh_eth_write(ndev, 0, EDTRR);
 	sh_eth_write(ndev, 0, EDRRR);
 
+	sh_eth_get_stats(ndev);
 	/* PHY Disconnect */
 	if (mdp->phydev) {
 		phy_stop(mdp->phydev);
@@ -2163,36 +2196,9 @@ static int sh_eth_close(struct net_device *ndev)
 
 	pm_runtime_put_sync(&mdp->pdev->dev);
 
-	return 0;
-}
-
-static struct net_device_stats *sh_eth_get_stats(struct net_device *ndev)
-{
-	struct sh_eth_private *mdp = netdev_priv(ndev);
-
-	if (sh_eth_is_rz_fast_ether(mdp))
-		return &ndev->stats;
-
-	pm_runtime_get_sync(&mdp->pdev->dev);
-
-	ndev->stats.tx_dropped += sh_eth_read(ndev, TROCR);
-	sh_eth_write(ndev, 0, TROCR);	/* (write clear) */
-	ndev->stats.collisions += sh_eth_read(ndev, CDCR);
-	sh_eth_write(ndev, 0, CDCR);	/* (write clear) */
-	ndev->stats.tx_carrier_errors += sh_eth_read(ndev, LCCR);
-	sh_eth_write(ndev, 0, LCCR);	/* (write clear) */
-	if (sh_eth_is_gether(mdp)) {
-		ndev->stats.tx_carrier_errors += sh_eth_read(ndev, CERCR);
-		sh_eth_write(ndev, 0, CERCR);	/* (write clear) */
-		ndev->stats.tx_carrier_errors += sh_eth_read(ndev, CEECR);
-		sh_eth_write(ndev, 0, CEECR);	/* (write clear) */
-	} else {
-		ndev->stats.tx_carrier_errors += sh_eth_read(ndev, CNDCR);
-		sh_eth_write(ndev, 0, CNDCR);	/* (write clear) */
-	}
-	pm_runtime_put_sync(&mdp->pdev->dev);
+	mdp->is_opened = 0;
 
-	return &ndev->stats;
+	return 0;
 }
 
 /* ioctl to device function */
diff --git a/drivers/net/ethernet/renesas/sh_eth.h b/drivers/net/ethernet/renesas/sh_eth.h
index b37c427..81fcab3 100644
--- a/drivers/net/ethernet/renesas/sh_eth.h
+++ b/drivers/net/ethernet/renesas/sh_eth.h
@@ -522,6 +522,7 @@ struct sh_eth_private {
 
 	unsigned no_ether_link:1;
 	unsigned ether_link_active_low:1;
+	unsigned is_opened:1;
 };
 
 static inline void sh_eth_soft_swap(char *src, int len)
-- 
2.1.3


^ permalink raw reply related

* Re: [PATCH rfc] packet: zerocopy packet_snd
From: Willem de Bruijn @ 2014-11-28  0:39 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Jason Wang, Network Development, David Miller, Eric Dumazet,
	Daniel Borkmann
In-Reply-To: <20141127104445.GA8961@redhat.com>

On Thu, Nov 27, 2014 at 5:44 AM, Michael S. Tsirkin <mst@redhat.com> wrote:
> On Thu, Nov 27, 2014 at 09:18:12AM +0008, Jason Wang wrote:
>>
>>
>> On Thu, Nov 27, 2014 at 5:17 AM, Michael S. Tsirkin <mst@redhat.com> wrote:
>> >On Wed, Nov 26, 2014 at 02:59:34PM -0500, Willem de Bruijn wrote:
>> >> > The main problem with zero copy ATM is with queueing disciplines
>> >> > which might keep the socket around essentially forever.
>> >> > The case was described here:
>> >> > https://lkml.org/lkml/2014/1/17/105
>> >> > and of course this will make it more serious now that
>> >> > more applications will be able to do this, so
>> >> > chances that an administrator enables this
>> >> > are higher.
>> >> The denial of service issue raised there, that a single queue can
>> >> block an entire virtio-net device, is less problematic in the case of
>> >> packet sockets. A socket can run out of sk_wmem_alloc, but a prudent
>> >> application can increase the limit or use separate sockets for
>> >> separate flows.
>> >
>> >Socket per flow? Maybe just use TCP then?  increasing the limit
>> >sounds like a wrong solution, it hurts security.
>> >
>> >> > One possible solution is some kind of timer orphaning frags
>> >> > for skbs that have been around for too long.
>> >>   Perhaps this can be approximated without an explicit timer by calling
>> >> skb_copy_ubufs on enqueue whenever qlen exceeds a threshold value?
>> >
>> >Hard to say. Will have to see that patch to judge how robust this is.
>>
>> This could not work, consider if the threshold is greater than vring size
>> or vhost_net pending limit, transmission may still be blocked.
>
>
> Well, application can e.g. just switch to non zero copy after
> reaching a specific number of requests.
> I think the real problem isn't reaching the queue full
> condition, it's the fact a specific buffer might never
> get freed. This API isn't half as useful as it could be
> if applications had a way to force the memory
> to be reclaimed.
>
>
> And actually, I see a way for applications to reclaim the memory:
> application could invoke something like MADV_SOFT_OFFLINE on the memory
> submitted for zero copy transmit, to invalidate PTEs, and make next
> access fault new pages in.
> If dedicated memory is used for packets, you could even use
> MADV_DONTNEED - but this doesn't work in many cases, certainly
> not for virtualization type workloads.

Why not?

>
>
> Playting with PTEs needs to invalidate the TLB so it is not fast,
> but it does not need to be: we are talking about ability to close the
> socket, which should be rare.
>
> For example, an application/hypervisor can detect a timeout when a
> packet is not transmitted within a predefined time period, and trigger
> such reclaim.
> Making this period shorter than network watchdog timer of the VM
> will ensure that watchdog does not trigger within VM.
> Alternatively, VM network watchdog could trigger this reclaim
> in order to recover packet memory.
>
> With this idea, if application merely reads memory, we incur a lot of
> overhead with pagefaults. So maybe a new call to enable COW for a range
> of pages would be a good idea.
>
>
> We'd have to make sure whatever's used for reclaim works for
> a wide range of memory types: mmap-ed file, hugetlbfs, anonymous memory.
>
>
> Thoughts?

Very nice. When exactly are these madvise hints preferable over munmap?

> --
> MST

^ permalink raw reply

* Re: [PATCH rfc] packet: zerocopy packet_snd
From: Willem de Bruijn @ 2014-11-28  0:32 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Network Development, David Miller, Eric Dumazet, Daniel Borkmann
In-Reply-To: <20141127072717.GA8644@redhat.com>

On Thu, Nov 27, 2014 at 2:27 AM, Michael S. Tsirkin <mst@redhat.com> wrote:
> On Wed, Nov 26, 2014 at 06:05:16PM -0500, Willem de Bruijn wrote:
>> On Wed, Nov 26, 2014 at 4:20 PM, Michael S. Tsirkin <mst@redhat.com> wrote:
>> > On Wed, Nov 26, 2014 at 02:59:34PM -0500, Willem de Bruijn wrote:
>> >> > The main problem with zero copy ATM is with queueing disciplines
>> >> > which might keep the socket around essentially forever.
>> >> > The case was described here:
>> >> > https://lkml.org/lkml/2014/1/17/105
>> >> > and of course this will make it more serious now that
>> >> > more applications will be able to do this, so
>> >> > chances that an administrator enables this
>> >> > are higher.
>> >>
>> >> The denial of service issue raised there, that a single queue can
>> >> block an entire virtio-net device, is less problematic in the case of
>> >> packet sockets. A socket can run out of sk_wmem_alloc, but a prudent
>> >> application can increase the limit or use separate sockets for
>> >> separate flows.
>> >
>> > Sounds like this interface is very hard to use correctly.
>>
>> Actually, this socket alloc issue is the same for zerocopy and
>> non-zerocopy. Packets can be held in deep queues at which point
>> the packet socket is blocked. This is accepted behavior.
>>
>> >From the above thread:
>>
>> "It's ok for non-zerocopy packet to be blocked since VM1 thought the
>> packets has been sent instead of pending in the virtqueue. So VM1 can
>> still send packet to other destination."
>>
>> This is very specific to virtio and vhost-net. I don't think that that
>> concern applies to a packet interface.
>
> Well, you are obviously building the interface with some use-case in mind.
> Let's try to make it work for multiple use-cases.
>
> So at some level, you are right.  The issue is not running out of wmem.
> But I think I'm right too - this is hard to use correctly.
>
> I think the difference is that with your patch, application
> can't reuse the memory until packet is transmitted, otherwise junk goes
> out on the network.

The packet headers are in linear skb memory, so the integrity
of the kernel is not affected. But payload (and derived data,
like checksums) may be junk.

> Even closing the socket won't help.
> Is this true?

Good point. Indeed. Your approach in the follow up email,
to let the process release its mappings, sounds like a good
answer.

In general, I think that leaving this under process control is
preferable over kernel guarantees using a timer, because it
is it is more predictable from a process point of view. Both
the state of the system at any point in time, and the per-packet
cycle cost are easier to reason about (were packet sent out
with zero-copy, or were deep copies triggered by a timer? this
question would be difficult to answer otherwise, and it is
important, as deep copies are likely more expensive).

>
> I see this as a problem.
>
> I'm trying to figure out how would one use this interface, one obvious
> use would be to tunnel out raw packets directly from VM memory.
> For this application, a zero copy packet never completing is a problem:
> at minimum, you want to be able to remove the device, which
> translates to a requirement that closing the socket effectively stops
> using userspace memory.

The application can ensure this by releasing the relevant
pages. For mmap()ed pages, munmap() is sufficient. The approach
you mentioned with madvise may be more widely applicable. I had
so far only targeted mmapped (incl. MAP_HUGETLB) pages.

Alternatively, the kernel could enforce this, by keeping a reference
on all in-flight ubuf_info structs associated with the sk. Though this
adds non trivial accounting overhead and potentially massive
copying at socket close.

If outstanding ubuf_info are tracked, then the forced copy can
be triggered by other actions, as well, such as in response to a
sendmsg cmsg or setsockopt.

Because there is per-call cost associated with maintaining
kernel state, but not necessarily with userspace strategies,
I would choose to leave this responsibility to the user. That
is already the behavior with vmsplice gifting, I think.

> In case you want to be able to run e.g. a watchdog,
> ability to specify a deadline also seems benefitial.

>> Another issue, though, is that the method currently really only helps
>> TSO because ll other paths cause a deep copy. There are more use
>> cases once it can send up to 64KB MTU over loopback or send out
>> GSO datagrams without triggering skb_copy_ubufs. I have not looked
>> into how (or if) that can be achieved yet.
>
> I think this was done intentionally at some point,
> try to look at git history to find out the reasons.
>
>> >
>> >> > One possible solution is some kind of timer orphaning frags
>> >> > for skbs that have been around for too long.
>> >>
>> >> Perhaps this can be approximated without an explicit timer by calling
>> >> skb_copy_ubufs on enqueue whenever qlen exceeds a threshold value?
>> >
>> > Not sure.  I'll have to see that patch to judge.

^ permalink raw reply

* Possible memory leak in function (wil_write_file_wmi) not freeing pointer (wmi) on error path
From: Ahmed Tamrawi @ 2014-11-27 23:46 UTC (permalink / raw)
  To: netdev

Bug Report: https://bugzilla.kernel.org/show_bug.cgi?id=88971
Linux Version [3.17-rc1]
Configuration: Default configuration for x86

Function (wil_write_file_wmi) in file
(drivers/net/wireless/ath/wil6210/debugfs.c) allocates pointer (wmi)
on line (536) and passes it as a parameter to function
(simple_write_to_buffer) at line (540). Function
(simple_write_to_buffer) returns on line (539) with a negative value.
This causes function (wil_write_file_wmi) to return on line (542)
without freeing pointer (wmi). Thus, causing a possible memory leak
not freeing pointer (wmi).

(wil_write_file_wmi) source code reference:
http://lxr.free-electrons.com/source/drivers/net/wireless/ath/wil6210/debugfs.c#L523
(simple_write_to_buffer) source code reference:
http://lxr.free-electrons.com/source/fs/libfs.c#L625

~Ahmed

^ permalink raw reply

* Re: [PATCH v3] can: Convert to runtime_pm
From: Sören Brinkmann @ 2014-11-27 23:46 UTC (permalink / raw)
  To: Marc Kleine-Budde
  Cc: Kedareswara rao Appana, wg, michal.simek, grant.likely, robh+dt,
	linux-can, netdev, linux-arm-kernel, linux-kernel, devicetree,
	Kedareswara rao Appana
In-Reply-To: <5477ABF3.1020605@pengutronix.de>

On Thu, 2014-11-27 at 11:55PM +0100, Marc Kleine-Budde wrote:
> On 11/27/2014 11:47 PM, Sören Brinkmann wrote:
> > On Thu, 2014-11-27 at 10:17PM +0100, Marc Kleine-Budde wrote:
> >> On 11/27/2014 02:08 PM, Kedareswara rao Appana wrote:
> >>> Instead of enabling/disabling clocks at several locations in the driver,
> >>> use the runtime_pm framework. This consolidates the actions for
> >>> runtime PM in the appropriate callbacks and makes the driver more
> >>> readable and mantainable.
> >>>
> >>> Signed-off-by: Soren Brinkmann <soren.brinkmann@xilinx.com>
> >>> Signed-off-by: Kedareswara rao Appana <appanad@xilinx.com>
> >>> ---
> >>> Changes for v3:
> >>>   - Converted the driver to use runtime_pm.
> >>> Changes for v2:
> >>>   - Removed the struct platform_device* from suspend/resume
> >>>     as suggest by Lothar.
> >>>
> >>>  drivers/net/can/xilinx_can.c |  119 +++++++++++++++++++++++++----------------
> >>>  1 files changed, 72 insertions(+), 47 deletions(-)
> >>>
> >>> diff --git a/drivers/net/can/xilinx_can.c b/drivers/net/can/xilinx_can.c
> >>> index 8a998e3..1be28ed 100644
> >>> --- a/drivers/net/can/xilinx_can.c
> >>> +++ b/drivers/net/can/xilinx_can.c
> > [...]
> >>> @@ -1030,7 +1046,10 @@ static int __maybe_unused xcan_resume(struct device *dev)
> >>>  	return 0;
> >>>  }
> >>>  
> >>> -static SIMPLE_DEV_PM_OPS(xcan_dev_pm_ops, xcan_suspend, xcan_resume);
> >>> +static const struct dev_pm_ops xcan_dev_pm_ops = {
> >>> +	SET_SYSTEM_SLEEP_PM_OPS(xcan_suspend, xcan_resume)
> >>> +	SET_PM_RUNTIME_PM_OPS(xcan_runtime_suspend, xcan_runtime_resume, NULL)
> >>> +};
> >>>  
> >>>  /**
> >>>   * xcan_probe - Platform registration call
> >>> @@ -1071,7 +1090,7 @@ static int xcan_probe(struct platform_device *pdev)
> >>>  		return -ENOMEM;
> >>>  
> >>>  	priv = netdev_priv(ndev);
> >>> -	priv->dev = ndev;
> >>> +	priv->dev = &pdev->dev;
> >>>  	priv->can.bittiming_const = &xcan_bittiming_const;
> >>>  	priv->can.do_set_mode = xcan_do_set_mode;
> >>>  	priv->can.do_get_berr_counter = xcan_get_berr_counter;
> >>> @@ -1137,6 +1156,11 @@ static int xcan_probe(struct platform_device *pdev)
> >>>  
> >>>  	netif_napi_add(ndev, &priv->napi, xcan_rx_poll, rx_max);
> >>>  
> >>> +	pm_runtime_set_active(&pdev->dev);
> >>> +	pm_runtime_irq_safe(&pdev->dev);
> >>
> >> You use just clock_enable()/disable() in the runtime functions, thus you
> >> can say they are irq_safe. On the other the the zync grpio driver uses
> >> "full" prepare_enable/disable_unprepare calls. What's best practice here?
> > 
> > IIRC, the prepare/unprepare functions can sleep. xcan_get_berr_counter
> > is called from atomic context. So, I think we have to use the
> > disable/enable functions without the prepare/unprepare.
> > In the GPIO driver the that problem does not exist.
> 
> IC, yes, correct.
> 
> This is why we introducted in other drivers a __get_berr_counter()
> function, that doesn't touch the clocks, which is used from within the
> driver (from the atomic contects), while get_berr_counter() will fiddle
> with the clocks. This function is used for the
> priv->can.do_get_berr_counter callback.

I have the feeling I'm missing something. If I remove the 'must not
sleep' requirement from the runtime suspend/resume functions, I get
this:

BUG: sleeping function called from invalid context at drivers/base/power/runtime.c:954
in_atomic(): 0, irqs_disabled(): 0, pid: 161, name: ip
INFO: lockdep is turned off.
CPU: 0 PID: 161 Comm: ip Not tainted 3.18.0-rc1-xilinx-00059-g21da26693b61-dirty #104
[<c00186a8>] (unwind_backtrace) from [<c00139f4>] (show_stack+0x20/0x24)
[<c00139f4>] (show_stack) from [<c055a41c>] (dump_stack+0x8c/0xd0)
[<c055a41c>] (dump_stack) from [<c0054808>] (__might_sleep+0x1ac/0x1e4)
[<c0054808>] (__might_sleep) from [<c034f8f0>] (__pm_runtime_resume+0x40/0x9c)
[<c034f8f0>] (__pm_runtime_resume) from [<c03b48d8>] (xcan_get_berr_counter+0x2c/0x9c)
[<c03b48d8>] (xcan_get_berr_counter) from [<c03b2ecc>] (can_fill_info+0x160/0x1f4)
[<c03b2ecc>] (can_fill_info) from [<c049f3b0>] (rtnl_fill_ifinfo+0x794/0x970)
[<c049f3b0>] (rtnl_fill_ifinfo) from [<c04a0048>] (rtnl_dump_ifinfo+0x1b4/0x2fc)
[<c04a0048>] (rtnl_dump_ifinfo) from [<c04af9c8>] (netlink_dump+0xe4/0x270)
[<c04af9c8>] (netlink_dump) from [<c04b0764>] (__netlink_dump_start+0xdc/0x170)
[<c04b0764>] (__netlink_dump_start) from [<c04a1fc4>] (rtnetlink_rcv_msg+0x154/0x1e0)
[<c04a1fc4>] (rtnetlink_rcv_msg) from [<c04b1e88>] (netlink_rcv_skb+0x68/0xc4)
[<c04b1e88>] (netlink_rcv_skb) from [<c04a045c>] (rtnetlink_rcv+0x28/0x34)
[<c04a045c>] (rtnetlink_rcv) from [<c04b1770>] (netlink_unicast+0x144/0x210)
[<c04b1770>] (netlink_unicast) from [<c04b1c9c>] (netlink_sendmsg+0x394/0x414)
[<c04b1c9c>] (netlink_sendmsg) from [<c046ffcc>] (sock_sendmsg+0x8c/0xc0)
[<c046ffcc>] (sock_sendmsg) from [<c04726bc>] (SyS_sendto+0xd8/0x114)
[<c04726bc>] (SyS_sendto) from [<c000f3e0>] (ret_fast_syscall+0x0/0x48)

I.e. the core calls this function from atomic context. And in an earlier
thread you said the core can also call this before/after calling the
open/close callbacks (which applies here too, I think).

I think the callback is required to
 - not sleep
 - get the device in a power state that allows querying its registers

So, I don't see how splitting the xcan_get_berr_counter callback helps
here, especially since it is not even used within the driver.

	Thanks,
	Sören

^ permalink raw reply

* Re: [PATCH v3] can: Convert to runtime_pm
From: Marc Kleine-Budde @ 2014-11-27 22:55 UTC (permalink / raw)
  To: Sören Brinkmann
  Cc: Kedareswara rao Appana, wg, michal.simek, grant.likely, robh+dt,
	linux-can, netdev, linux-arm-kernel, linux-kernel, devicetree,
	Kedareswara rao Appana
In-Reply-To: <820358d5369f465188dfa9306eae5658@BL2FFO11FD009.protection.gbl>

[-- Attachment #1: Type: text/plain, Size: 3275 bytes --]

On 11/27/2014 11:47 PM, Sören Brinkmann wrote:
> On Thu, 2014-11-27 at 10:17PM +0100, Marc Kleine-Budde wrote:
>> On 11/27/2014 02:08 PM, Kedareswara rao Appana wrote:
>>> Instead of enabling/disabling clocks at several locations in the driver,
>>> use the runtime_pm framework. This consolidates the actions for
>>> runtime PM in the appropriate callbacks and makes the driver more
>>> readable and mantainable.
>>>
>>> Signed-off-by: Soren Brinkmann <soren.brinkmann@xilinx.com>
>>> Signed-off-by: Kedareswara rao Appana <appanad@xilinx.com>
>>> ---
>>> Changes for v3:
>>>   - Converted the driver to use runtime_pm.
>>> Changes for v2:
>>>   - Removed the struct platform_device* from suspend/resume
>>>     as suggest by Lothar.
>>>
>>>  drivers/net/can/xilinx_can.c |  119 +++++++++++++++++++++++++----------------
>>>  1 files changed, 72 insertions(+), 47 deletions(-)
>>>
>>> diff --git a/drivers/net/can/xilinx_can.c b/drivers/net/can/xilinx_can.c
>>> index 8a998e3..1be28ed 100644
>>> --- a/drivers/net/can/xilinx_can.c
>>> +++ b/drivers/net/can/xilinx_can.c
> [...]
>>> @@ -1030,7 +1046,10 @@ static int __maybe_unused xcan_resume(struct device *dev)
>>>  	return 0;
>>>  }
>>>  
>>> -static SIMPLE_DEV_PM_OPS(xcan_dev_pm_ops, xcan_suspend, xcan_resume);
>>> +static const struct dev_pm_ops xcan_dev_pm_ops = {
>>> +	SET_SYSTEM_SLEEP_PM_OPS(xcan_suspend, xcan_resume)
>>> +	SET_PM_RUNTIME_PM_OPS(xcan_runtime_suspend, xcan_runtime_resume, NULL)
>>> +};
>>>  
>>>  /**
>>>   * xcan_probe - Platform registration call
>>> @@ -1071,7 +1090,7 @@ static int xcan_probe(struct platform_device *pdev)
>>>  		return -ENOMEM;
>>>  
>>>  	priv = netdev_priv(ndev);
>>> -	priv->dev = ndev;
>>> +	priv->dev = &pdev->dev;
>>>  	priv->can.bittiming_const = &xcan_bittiming_const;
>>>  	priv->can.do_set_mode = xcan_do_set_mode;
>>>  	priv->can.do_get_berr_counter = xcan_get_berr_counter;
>>> @@ -1137,6 +1156,11 @@ static int xcan_probe(struct platform_device *pdev)
>>>  
>>>  	netif_napi_add(ndev, &priv->napi, xcan_rx_poll, rx_max);
>>>  
>>> +	pm_runtime_set_active(&pdev->dev);
>>> +	pm_runtime_irq_safe(&pdev->dev);
>>
>> You use just clock_enable()/disable() in the runtime functions, thus you
>> can say they are irq_safe. On the other the the zync grpio driver uses
>> "full" prepare_enable/disable_unprepare calls. What's best practice here?
> 
> IIRC, the prepare/unprepare functions can sleep. xcan_get_berr_counter
> is called from atomic context. So, I think we have to use the
> disable/enable functions without the prepare/unprepare.
> In the GPIO driver the that problem does not exist.

IC, yes, correct.

This is why we introducted in other drivers a __get_berr_counter()
function, that doesn't touch the clocks, which is used from within the
driver (from the atomic contects), while get_berr_counter() will fiddle
with the clocks. This function is used for the
priv->can.do_get_berr_counter callback.

Marc

-- 
Pengutronix e.K.                  | Marc Kleine-Budde           |
Industrial Linux Solutions        | Phone: +49-231-2826-924     |
Vertretung West/Dortmund          | Fax:   +49-5121-206917-5555 |
Amtsgericht Hildesheim, HRA 2686  | http://www.pengutronix.de   |


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply

* Re: [PATCH v3] can: Convert to runtime_pm
From: Sören Brinkmann @ 2014-11-27 22:47 UTC (permalink / raw)
  To: Marc Kleine-Budde
  Cc: Kedareswara rao Appana, wg, michal.simek, grant.likely, robh+dt,
	linux-can, netdev, linux-arm-kernel, linux-kernel, devicetree,
	Kedareswara rao Appana
In-Reply-To: <54779501.8060009@pengutronix.de>

On Thu, 2014-11-27 at 10:17PM +0100, Marc Kleine-Budde wrote:
> On 11/27/2014 02:08 PM, Kedareswara rao Appana wrote:
> > Instead of enabling/disabling clocks at several locations in the driver,
> > use the runtime_pm framework. This consolidates the actions for
> > runtime PM in the appropriate callbacks and makes the driver more
> > readable and mantainable.
> > 
> > Signed-off-by: Soren Brinkmann <soren.brinkmann@xilinx.com>
> > Signed-off-by: Kedareswara rao Appana <appanad@xilinx.com>
> > ---
> > Changes for v3:
> >   - Converted the driver to use runtime_pm.
> > Changes for v2:
> >   - Removed the struct platform_device* from suspend/resume
> >     as suggest by Lothar.
> > 
> >  drivers/net/can/xilinx_can.c |  119 +++++++++++++++++++++++++----------------
> >  1 files changed, 72 insertions(+), 47 deletions(-)
> > 
> > diff --git a/drivers/net/can/xilinx_can.c b/drivers/net/can/xilinx_can.c
> > index 8a998e3..1be28ed 100644
> > --- a/drivers/net/can/xilinx_can.c
> > +++ b/drivers/net/can/xilinx_can.c
[...]
> > @@ -1030,7 +1046,10 @@ static int __maybe_unused xcan_resume(struct device *dev)
> >  	return 0;
> >  }
> >  
> > -static SIMPLE_DEV_PM_OPS(xcan_dev_pm_ops, xcan_suspend, xcan_resume);
> > +static const struct dev_pm_ops xcan_dev_pm_ops = {
> > +	SET_SYSTEM_SLEEP_PM_OPS(xcan_suspend, xcan_resume)
> > +	SET_PM_RUNTIME_PM_OPS(xcan_runtime_suspend, xcan_runtime_resume, NULL)
> > +};
> >  
> >  /**
> >   * xcan_probe - Platform registration call
> > @@ -1071,7 +1090,7 @@ static int xcan_probe(struct platform_device *pdev)
> >  		return -ENOMEM;
> >  
> >  	priv = netdev_priv(ndev);
> > -	priv->dev = ndev;
> > +	priv->dev = &pdev->dev;
> >  	priv->can.bittiming_const = &xcan_bittiming_const;
> >  	priv->can.do_set_mode = xcan_do_set_mode;
> >  	priv->can.do_get_berr_counter = xcan_get_berr_counter;
> > @@ -1137,6 +1156,11 @@ static int xcan_probe(struct platform_device *pdev)
> >  
> >  	netif_napi_add(ndev, &priv->napi, xcan_rx_poll, rx_max);
> >  
> > +	pm_runtime_set_active(&pdev->dev);
> > +	pm_runtime_irq_safe(&pdev->dev);
> 
> You use just clock_enable()/disable() in the runtime functions, thus you
> can say they are irq_safe. On the other the the zync grpio driver uses
> "full" prepare_enable/disable_unprepare calls. What's best practice here?

IIRC, the prepare/unprepare functions can sleep. xcan_get_berr_counter
is called from atomic context. So, I think we have to use the
disable/enable functions without the prepare/unprepare.
In the GPIO driver the that problem does not exist.

	Sören

^ permalink raw reply

* Re: [patch net-next v3 02/17] net: make vid as a parameter for ndo_fdb_add/ndo_fdb_del
From: Jiri Pirko @ 2014-11-27 21:55 UTC (permalink / raw)
  To: Scott Feldman
  Cc: Jamal Hadi Salim, John Fastabend, Netdev, David S. Miller,
	nhorman@tuxdriver.com, Andy Gospodarek, Thomas Graf,
	dborkman@redhat.com, ogerlitz@mellanox.com, jesse@nicira.com,
	pshelar@nicira.com, azhou@nicira.com, ben@decadent.org.uk,
	stephen@networkplumber.org, Kirsher, Jeffrey T,
	vyasevic@redhat.com, Cong Wang, Eric Dumazet, Florian Fainelli,
	Roopa Prabhu, John Linville
In-Reply-To: <CAE4R7bDzxo1xY-MHzqtQ70s+nf+3dkWhr7neV-atk1YkMP3KJw@mail.gmail.com>

Thu, Nov 27, 2014 at 09:59:37PM CET, sfeldma@gmail.com wrote:
>Ya right now the driver just doesn't call br_fdb_external_learn_add()
>if LEARNING_SYNC is not set.  It's a port driver setting so it seems
>fine to handle it in the port driver.  You could move the check up to
>br_fdb_external_learn_add(), but then you have an extra call every 1s
>for each fdb entry being refreshed.  (1s or whatever the refresh
>frequency is).  Easier to avoid this overhead and make the decision at
>the source.

I have been thinking about moving the check into bridge code, it to make
it there as well as in drivers. This is easily changeable on demenad
later though, so I left this for now.

>
>-scott
>
>On Thu, Nov 27, 2014 at 2:14 AM, Jamal Hadi Salim <jhs@mojatatu.com> wrote:
>> On 11/27/14 01:50, Scott Feldman wrote:
>>
>> [..]
>>
>>>
>>> It's there: IFLA_BRPORT_LEARNING_SYNC.  From iproute2:
>>>
>>> $ bridge -d link show dev swp1
>>> 2: swp1 state UNKNOWN : <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500
>>> master br0 state forwarding priority 32 cost 2
>>>      hairpin off guard off root_block off fastleave off learning off flood
>>> off
>>> 2: swp1 state UNKNOWN : <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 master
>>> br0
>>>      learning on learning_sync on hwmode swdev
>>>
>>> Turn it off:
>>>
>>> $ bridge link set dev swp1 hwmode swdev learning_sync off
>>>
>>> And now:
>>>
>>> $ bridge -d link show dev swp1
>>> 2: swp1 state UNKNOWN : <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500
>>> master br0 state forwarding priority 32 cost 2
>>>      hairpin off guard off root_block off fastleave off learning off flood
>>> off
>>> 2: swp1 state UNKNOWN : <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 master
>>> br0
>>>      learning on learning_sync off hwmode swdev
>>>
>>>
>>
>> Yes, this is the nice control portion.
>> From reviewing the patches, I didnt see how the core to the driver was
>> using the  learning_sync. IOW, how do i turn off the drivers sync
>> from being activated? Maybe you are doing this in the rocker patches
>> which i didnt review? i think this needs to be core infrastructure i.e
>> if you are doing this in a timer (as opposed to interrupt driven), then
>> the core sync timer would kick in and call some driver ops.
>> In any case, details that can be ironed out later..
>>
>> cheers,
>> jamal
>>

^ permalink raw reply

* Re: [PATCH v7 6/8] net: can: c_can: Disable pins when CAN interface is down
From: Marc Kleine-Budde @ 2014-11-27 21:19 UTC (permalink / raw)
  To: Linus Walleij, Roger Quadros
  Cc: wg, Wolfram Sang, Tony Lindgren, Thomas Gleixner, Mugunthan V N,
	George Cherian, Felipe Balbi, Sekhar Nori, Nishanth Menon,
	Sergei Shtylyov, Linux-OMAP, linux-can, netdev@vger.kernel.org
In-Reply-To: <CACRpkdbVXPi7fNjGEzChayQ1KQg8h2R6sm0TNRsVOFJmAT=YMQ@mail.gmail.com>

[-- Attachment #1: Type: text/plain, Size: 1852 bytes --]

On 11/27/2014 02:26 PM, Linus Walleij wrote:
> On Fri, Nov 14, 2014 at 4:40 PM, Roger Quadros <rogerq@ti.com> wrote:
> 
>> DRA7 CAN IP suffers from a problem which causes it to be prevented
>> from fully turning OFF (i.e. stuck in transition) if the module was
>> disabled while there was traffic on the CAN_RX line.
>>
>> To work around this issue we select the SLEEP pin state by default
>> on probe and use the DEFAULT pin state on CAN up and back to the
>> SLEEP pin state on CAN down.
>>
>> Signed-off-by: Roger Quadros <rogerq@ti.com>
> 
> Reviewed-by: Linus Walleij <linus.walleij@linaro.org>

Thanks, however the patch is already upstream.

> 
> I see you figured it out all by yourselves :D
> 
> (Sorry for being absent.)
> 
>> +#include <linux/pinctrl/consumer.h>
>> +       pinctrl_pm_select_default_state(dev->dev.parent);
>> +       pinctrl_pm_select_sleep_state(dev->dev.parent);
>> +       pinctrl_pm_select_sleep_state(dev->dev.parent);
> 
> NB: in drivers/base/pinctrl.c:
> 
> #ifdef CONFIG_PM
>         /*
>          * If power management is enabled, we also look for the optional
>          * sleep and idle pin states, with semantics as defined in
>          * <linux/pinctrl/pinctrl-state.h>
>          */
>         dev->pins->sleep_state = pinctrl_lookup_state(dev->pins->p,
>                                         PINCTRL_STATE_SLEEP);
> 
> So if these states are necessary for the driver to work, put
> depends on PM or select PM in the Kconfig.

Roger, you can prepare a patch, if needed.

Thanks,
Marc

-- 
Pengutronix e.K.                  | Marc Kleine-Budde           |
Industrial Linux Solutions        | Phone: +49-231-2826-924     |
Vertretung West/Dortmund          | Fax:   +49-5121-206917-5555 |
Amtsgericht Hildesheim, HRA 2686  | http://www.pengutronix.de   |


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply

* Re: [PATCH v3] can: Convert to runtime_pm
From: Marc Kleine-Budde @ 2014-11-27 21:17 UTC (permalink / raw)
  To: Kedareswara rao Appana, wg, michal.simek, soren.brinkmann,
	grant.likely, robh+dt
  Cc: linux-can, netdev, linux-arm-kernel, linux-kernel, devicetree,
	Kedareswara rao Appana
In-Reply-To: <6b6db31d46074512a322eae6e4ef64d4@BN1BFFO11FD038.protection.gbl>

[-- Attachment #1: Type: text/plain, Size: 9724 bytes --]

On 11/27/2014 02:08 PM, Kedareswara rao Appana wrote:
> Instead of enabling/disabling clocks at several locations in the driver,
> use the runtime_pm framework. This consolidates the actions for
> runtime PM in the appropriate callbacks and makes the driver more
> readable and mantainable.
> 
> Signed-off-by: Soren Brinkmann <soren.brinkmann@xilinx.com>
> Signed-off-by: Kedareswara rao Appana <appanad@xilinx.com>
> ---
> Changes for v3:
>   - Converted the driver to use runtime_pm.
> Changes for v2:
>   - Removed the struct platform_device* from suspend/resume
>     as suggest by Lothar.
> 
>  drivers/net/can/xilinx_can.c |  119 +++++++++++++++++++++++++----------------
>  1 files changed, 72 insertions(+), 47 deletions(-)
> 
> diff --git a/drivers/net/can/xilinx_can.c b/drivers/net/can/xilinx_can.c
> index 8a998e3..1be28ed 100644
> --- a/drivers/net/can/xilinx_can.c
> +++ b/drivers/net/can/xilinx_can.c
> @@ -32,6 +32,7 @@
>  #include <linux/can/dev.h>
>  #include <linux/can/error.h>
>  #include <linux/can/led.h>
> +#include <linux/pm_runtime.h>
>  
>  #define DRIVER_NAME	"xilinx_can"
>  
> @@ -138,7 +139,7 @@ struct xcan_priv {
>  	u32 (*read_reg)(const struct xcan_priv *priv, enum xcan_reg reg);
>  	void (*write_reg)(const struct xcan_priv *priv, enum xcan_reg reg,
>  			u32 val);
> -	struct net_device *dev;
> +	struct device *dev;
>  	void __iomem *reg_base;
>  	unsigned long irq_flags;
>  	struct clk *bus_clk;
> @@ -842,6 +843,13 @@ static int xcan_open(struct net_device *ndev)
>  	struct xcan_priv *priv = netdev_priv(ndev);
>  	int ret;
>  
> +	ret = pm_runtime_get_sync(priv->dev);
> +	if (ret < 0) {
> +		netdev_err(ndev, "%s: runtime CAN resume failed(%d)\n\r",
> +				__func__, ret);
> +		return ret;
> +	}
> +
>  	ret = request_irq(ndev->irq, xcan_interrupt, priv->irq_flags,
>  			ndev->name, ndev);
>  	if (ret < 0) {
> @@ -849,29 +857,17 @@ static int xcan_open(struct net_device *ndev)
>  		goto err;
>  	}
>  
> -	ret = clk_prepare_enable(priv->can_clk);
> -	if (ret) {
> -		netdev_err(ndev, "unable to enable device clock\n");
> -		goto err_irq;
> -	}
> -
> -	ret = clk_prepare_enable(priv->bus_clk);
> -	if (ret) {
> -		netdev_err(ndev, "unable to enable bus clock\n");
> -		goto err_can_clk;
> -	}
> -
>  	/* Set chip into reset mode */
>  	ret = set_reset_mode(ndev);
>  	if (ret < 0) {
>  		netdev_err(ndev, "mode resetting failed!\n");
> -		goto err_bus_clk;
> +		goto err_irq;
>  	}
>  
>  	/* Common open */
>  	ret = open_candev(ndev);
>  	if (ret)
> -		goto err_bus_clk;
> +		goto err_irq;
>  
>  	ret = xcan_chip_start(ndev);
>  	if (ret < 0) {
> @@ -887,13 +883,11 @@ static int xcan_open(struct net_device *ndev)
>  
>  err_candev:
>  	close_candev(ndev);
> -err_bus_clk:
> -	clk_disable_unprepare(priv->bus_clk);
> -err_can_clk:
> -	clk_disable_unprepare(priv->can_clk);
>  err_irq:
>  	free_irq(ndev->irq, ndev);
>  err:
> +	pm_runtime_put(priv->dev);
> +
>  	return ret;
>  }
>  
> @@ -910,12 +904,11 @@ static int xcan_close(struct net_device *ndev)
>  	netif_stop_queue(ndev);
>  	napi_disable(&priv->napi);
>  	xcan_chip_stop(ndev);
> -	clk_disable_unprepare(priv->bus_clk);
> -	clk_disable_unprepare(priv->can_clk);
>  	free_irq(ndev->irq, ndev);
>  	close_candev(ndev);
>  
>  	can_led_event(ndev, CAN_LED_EVENT_STOP);
> +	pm_runtime_put(priv->dev);
>  
>  	return 0;
>  }
> @@ -934,27 +927,21 @@ static int xcan_get_berr_counter(const struct net_device *ndev,
>  	struct xcan_priv *priv = netdev_priv(ndev);
>  	int ret;
>  
> -	ret = clk_prepare_enable(priv->can_clk);
> -	if (ret)
> -		goto err;
> +	ret = pm_runtime_get_sync(priv->dev);
> +	if (ret < 0) {
> +		netdev_err(ndev, "%s: runtime resume failed(%d)\n\r",
> +				__func__, ret);
> +		return ret;
> +	}
>  
> -	ret = clk_prepare_enable(priv->bus_clk);
> -	if (ret)
> -		goto err_clk;
>  
>  	bec->txerr = priv->read_reg(priv, XCAN_ECR_OFFSET) & XCAN_ECR_TEC_MASK;
>  	bec->rxerr = ((priv->read_reg(priv, XCAN_ECR_OFFSET) &
>  			XCAN_ECR_REC_MASK) >> XCAN_ESR_REC_SHIFT);
>  
> -	clk_disable_unprepare(priv->bus_clk);
> -	clk_disable_unprepare(priv->can_clk);
> +	pm_runtime_put(priv->dev);
>  
>  	return 0;
> -
> -err_clk:
> -	clk_disable_unprepare(priv->can_clk);
> -err:
> -	return ret;
>  }
>  
>  
> @@ -967,15 +954,45 @@ static const struct net_device_ops xcan_netdev_ops = {
>  
>  /**
>   * xcan_suspend - Suspend method for the driver
> - * @dev:	Address of the platform_device structure
> + * @dev:	Address of the net_device structure
>   *
>   * Put the driver into low power mode.
> - * Return: 0 always
> + * Return: 0 on success and failure value on error
>   */
>  static int __maybe_unused xcan_suspend(struct device *dev)
>  {
> -	struct platform_device *pdev = dev_get_drvdata(dev);
> -	struct net_device *ndev = platform_get_drvdata(pdev);
> +	if (!device_may_wakeup(dev))
> +		return pm_runtime_force_suspend(dev);
> +
> +	return 0;
> +}
> +
> +/**
> + * xcan_resume - Resume from suspend
> + * @dev:	Address of the net_device structure
> + *
> + * Resume operation after suspend.
> + * Return: 0 on success and failure value on error
> + */
> +static int __maybe_unused xcan_resume(struct device *dev)
> +{
> +	if (!device_may_wakeup(dev))
> +		return pm_runtime_force_resume(dev);
> +
> +	return 0;
> +
> +}
> +
> +/**
> + * xcan_runtime_suspend - Runtime suspend method for the driver
> + * @dev:	Address of the net_device structure
> + *
> + * Put the driver into low power mode.
> + * Return: 0 always
> + */
> +static int __maybe_unused xcan_runtime_suspend(struct device *dev)
> +{
> +	struct net_device *ndev = dev_get_drvdata(dev);
>  	struct xcan_priv *priv = netdev_priv(ndev);
>  
>  	if (netif_running(ndev)) {
> @@ -993,16 +1010,15 @@ static int __maybe_unused xcan_suspend(struct device *dev)
>  }
>  
>  /**
> - * xcan_resume - Resume from suspend
> - * @dev:	Address of the platformdevice structure
> + * xcan_runtime_resume - Runtime resume from suspend
> + * @dev:	Address of the net_device structure
>   *
>   * Resume operation after suspend.
>   * Return: 0 on success and failure value on error
>   */
> -static int __maybe_unused xcan_resume(struct device *dev)
> +static int __maybe_unused xcan_runtime_resume(struct device *dev)
>  {
> -	struct platform_device *pdev = dev_get_drvdata(dev);
> -	struct net_device *ndev = platform_get_drvdata(pdev);
> +	struct net_device *ndev = dev_get_drvdata(dev);
>  	struct xcan_priv *priv = netdev_priv(ndev);
>  	int ret;

...adding the rest of the function to comment it:

> 	ret = clk_enable(priv->bus_clk);
> 	if (ret) {
> 		dev_err(dev, "Cannot enable clock.\n");
> 		return ret;
> 	}
> 	ret = clk_enable(priv->can_clk);
> 	if (ret) {
> 		dev_err(dev, "Cannot enable clock.\n");
> 		clk_disable_unprepare(priv->bus_clk);
> 		return ret;
> 	}
> 
> 	priv->write_reg(priv, XCAN_MSR_OFFSET, 0);
> 	priv->write_reg(priv, XCAN_SRR_OFFSET, XCAN_SRR_CEN_MASK);
> 	priv->can.state = CAN_STATE_ERROR_ACTIVE;

Does it have any side effects, when writing these values when the CAN
device is not UP? (e.g. when running the berr callback?) Setting its
state to ERROR active is probably wrong. Do you have to move it into the
if(netif_running())?

> 	if (netif_running(ndev)) {
> 		netif_device_attach(ndev);
> 		netif_start_queue(ndev);
> 	}
> 
> 	return 0;
> }

>  
> @@ -1030,7 +1046,10 @@ static int __maybe_unused xcan_resume(struct device *dev)
>  	return 0;
>  }
>  
> -static SIMPLE_DEV_PM_OPS(xcan_dev_pm_ops, xcan_suspend, xcan_resume);
> +static const struct dev_pm_ops xcan_dev_pm_ops = {
> +	SET_SYSTEM_SLEEP_PM_OPS(xcan_suspend, xcan_resume)
> +	SET_PM_RUNTIME_PM_OPS(xcan_runtime_suspend, xcan_runtime_resume, NULL)
> +};
>  
>  /**
>   * xcan_probe - Platform registration call
> @@ -1071,7 +1090,7 @@ static int xcan_probe(struct platform_device *pdev)
>  		return -ENOMEM;
>  
>  	priv = netdev_priv(ndev);
> -	priv->dev = ndev;
> +	priv->dev = &pdev->dev;
>  	priv->can.bittiming_const = &xcan_bittiming_const;
>  	priv->can.do_set_mode = xcan_do_set_mode;
>  	priv->can.do_get_berr_counter = xcan_get_berr_counter;
> @@ -1137,6 +1156,11 @@ static int xcan_probe(struct platform_device *pdev)
>  
>  	netif_napi_add(ndev, &priv->napi, xcan_rx_poll, rx_max);
>  
> +	pm_runtime_set_active(&pdev->dev);
> +	pm_runtime_irq_safe(&pdev->dev);

You use just clock_enable()/disable() in the runtime functions, thus you
can say they are irq_safe. On the other the the zync grpio driver uses
"full" prepare_enable/disable_unprepare calls. What's best practice here?

> +	pm_runtime_enable(&pdev->dev);
> +	pm_runtime_get_sync(&pdev->dev);
> +
>  	ret = register_candev(ndev);
>  	if (ret) {
>  		dev_err(&pdev->dev, "fail to register failed (err=%d)\n", ret);

I think in case of an error, you have to call pm_runtime_put() here

> @@ -1144,8 +1168,9 @@ static int xcan_probe(struct platform_device *pdev)
>  	}
>  
>  	devm_can_led_init(ndev);
> -	clk_disable_unprepare(priv->bus_clk);
> -	clk_disable_unprepare(priv->can_clk);
> +
> +	pm_runtime_put(&pdev->dev);
> +
>  	netdev_dbg(ndev, "reg_base=0x%p irq=%d clock=%d, tx fifo depth:%d\n",
>  			priv->reg_base, ndev->irq, priv->can.clock.freq,
>  			priv->tx_max);
> 

Marc

-- 
Pengutronix e.K.                  | Marc Kleine-Budde           |
Industrial Linux Solutions        | Phone: +49-231-2826-924     |
Vertretung West/Dortmund          | Fax:   +49-5121-206917-5555 |
Amtsgericht Hildesheim, HRA 2686  | http://www.pengutronix.de   |


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply

* Re: [patch net-next v4 21/21] rocker: Use logical operators on booleans
From: Scott Feldman @ 2014-11-27 21:04 UTC (permalink / raw)
  To: Jiri Pirko
  Cc: Netdev, David S. Miller, nhorman@tuxdriver.com, Andy Gospodarek,
	Thomas Graf, dborkman@redhat.com, ogerlitz@mellanox.com,
	jesse@nicira.com, pshelar@nicira.com, azhou@nicira.com,
	ben@decadent.org.uk, stephen@networkplumber.org,
	Kirsher, Jeffrey T, vyasevic@redhat.com, Cong Wang,
	Fastabend, John R, Eric Dumazet, Jamal Hadi Salim,
	Florian Fainelli, Roopa Prabhu, John Linville
In-Reply-To: <1417084826-9875-22-git-send-email-jiri@resnulli.us>

Signed-off-by: Scott Feldman <sfeldma@gmail.com>

On Thu, Nov 27, 2014 at 12:40 AM, Jiri Pirko <jiri@resnulli.us> wrote:
> From: Thomas Graf <tgraf@suug.ch>
>
> Silences various sparse warnings
>
> Signed-off-by: Thomas Graf <tgraf@suug.ch>
> Signed-off-by: Jiri Pirko <jiri@resnulli.us>
> ---
> new in v4
> ---
>  drivers/net/ethernet/rocker/rocker.c | 16 ++++++++--------
>  1 file changed, 8 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/net/ethernet/rocker/rocker.c b/drivers/net/ethernet/rocker/rocker.c
> index 30687bf..fded127 100644
> --- a/drivers/net/ethernet/rocker/rocker.c
> +++ b/drivers/net/ethernet/rocker/rocker.c
> @@ -2404,17 +2404,17 @@ static int rocker_flow_tbl_bridge(struct rocker_port *rocker_port,
>         }
>
>         priority = ROCKER_PRIORITY_UNKNOWN;
> -       if (vlan_bridging & dflt & wild)
> +       if (vlan_bridging && dflt && wild)
>                 priority = ROCKER_PRIORITY_BRIDGING_VLAN_DFLT_WILD;
> -       else if (vlan_bridging & dflt & !wild)
> +       else if (vlan_bridging && dflt && !wild)
>                 priority = ROCKER_PRIORITY_BRIDGING_VLAN_DFLT_EXACT;
> -       else if (vlan_bridging & !dflt)
> +       else if (vlan_bridging && !dflt)
>                 priority = ROCKER_PRIORITY_BRIDGING_VLAN;
> -       else if (!vlan_bridging & dflt & wild)
> +       else if (!vlan_bridging && dflt && wild)
>                 priority = ROCKER_PRIORITY_BRIDGING_TENANT_DFLT_WILD;
> -       else if (!vlan_bridging & dflt & !wild)
> +       else if (!vlan_bridging && dflt && !wild)
>                 priority = ROCKER_PRIORITY_BRIDGING_TENANT_DFLT_EXACT;
> -       else if (!vlan_bridging & !dflt)
> +       else if (!vlan_bridging && !dflt)
>                 priority = ROCKER_PRIORITY_BRIDGING_TENANT;
>
>         entry->key.priority = priority;
> @@ -3010,9 +3010,9 @@ static void rocker_port_fdb_learn_work(struct work_struct *work)
>         bool removing = (lw->flags & ROCKER_OP_FLAG_REMOVE);
>         bool learned = (lw->flags & ROCKER_OP_FLAG_LEARNED);
>
> -       if (learned & removing)
> +       if (learned && removing)
>                 br_fdb_external_learn_del(lw->dev, lw->addr, lw->vid);
> -       else if (learned & !removing)
> +       else if (learned && !removing)
>                 br_fdb_external_learn_add(lw->dev, lw->addr, lw->vid);
>
>         kfree(work);
> --
> 1.9.3
>

^ permalink raw reply

* Re: [patch net-next v4 20/21] rocker: Add proper validation of Netlink attributes
From: Scott Feldman @ 2014-11-27 21:04 UTC (permalink / raw)
  To: Jiri Pirko
  Cc: Netdev, David S. Miller, nhorman@tuxdriver.com, Andy Gospodarek,
	Thomas Graf, dborkman@redhat.com, ogerlitz@mellanox.com,
	jesse@nicira.com, pshelar@nicira.com, azhou@nicira.com,
	ben@decadent.org.uk, stephen@networkplumber.org,
	Kirsher, Jeffrey T, vyasevic@redhat.com, Cong Wang,
	Fastabend, John R, Eric Dumazet, Jamal Hadi Salim,
	Florian Fainelli, Roopa Prabhu, John Linville
In-Reply-To: <1417084826-9875-21-git-send-email-jiri@resnulli.us>

Signed-off-by: Scott Feldman <sfeldma@gmail.com>

On Thu, Nov 27, 2014 at 12:40 AM, Jiri Pirko <jiri@resnulli.us> wrote:
> From: Thomas Graf <tgraf@suug.ch>
>
> Signed-off-by: Thomas Graf <tgraf@suug.ch>
> Signed-off-by: Jiri Pirko <jiri@resnulli.us>
> ---
> new in v4
> ---
>  drivers/net/ethernet/rocker/rocker.c | 9 +++++++++
>  1 file changed, 9 insertions(+)
>
> diff --git a/drivers/net/ethernet/rocker/rocker.c b/drivers/net/ethernet/rocker/rocker.c
> index 61cfdbf..30687bf 100644
> --- a/drivers/net/ethernet/rocker/rocker.c
> +++ b/drivers/net/ethernet/rocker/rocker.c
> @@ -3712,6 +3712,9 @@ static int rocker_port_bridge_setlink(struct net_device *dev,
>         if (afspec) {
>                 attr = nla_find_nested(afspec, IFLA_BRIDGE_MODE);
>                 if (attr) {
> +                       if (nla_len(attr) < sizeof(mode))
> +                               return -EINVAL;
> +
>                         mode = nla_get_u16(attr);
>                         if (mode != BRIDGE_MODE_SWDEV)
>                                 return -EINVAL;
> @@ -3721,6 +3724,9 @@ static int rocker_port_bridge_setlink(struct net_device *dev,
>         if (protinfo) {
>                 attr = nla_find_nested(protinfo, IFLA_BRPORT_LEARNING);
>                 if (attr) {
> +                       if (nla_len(attr) < sizeof(u8))
> +                               return -EINVAL;
> +
>                         if (nla_get_u8(attr))
>                                 rocker_port->brport_flags |= BR_LEARNING;
>                         else
> @@ -3731,6 +3737,9 @@ static int rocker_port_bridge_setlink(struct net_device *dev,
>                 }
>                 attr = nla_find_nested(protinfo, IFLA_BRPORT_LEARNING_SYNC);
>                 if (attr) {
> +                       if (nla_len(attr) < sizeof(u8))
> +                               return -EINVAL;
> +
>                         if (nla_get_u8(attr))
>                                 rocker_port->brport_flags |= BR_LEARNING_SYNC;
>                         else
> --
> 1.9.3
>

^ permalink raw reply

* Re: [patch net-next v3 02/17] net: make vid as a parameter for ndo_fdb_add/ndo_fdb_del
From: Scott Feldman @ 2014-11-27 20:59 UTC (permalink / raw)
  To: Jamal Hadi Salim
  Cc: John Fastabend, Jiri Pirko, Netdev, David S. Miller,
	nhorman@tuxdriver.com, Andy Gospodarek, Thomas Graf,
	dborkman@redhat.com, ogerlitz@mellanox.com, jesse@nicira.com,
	pshelar@nicira.com, azhou@nicira.com, ben@decadent.org.uk,
	stephen@networkplumber.org, Kirsher, Jeffrey T,
	vyasevic@redhat.com, Cong Wang, Eric Dumazet, Florian Fainelli,
	Roopa Prabhu, John Linville
In-Reply-To: <54771598.80806@mojatatu.com>

Ya right now the driver just doesn't call br_fdb_external_learn_add()
if LEARNING_SYNC is not set.  It's a port driver setting so it seems
fine to handle it in the port driver.  You could move the check up to
br_fdb_external_learn_add(), but then you have an extra call every 1s
for each fdb entry being refreshed.  (1s or whatever the refresh
frequency is).  Easier to avoid this overhead and make the decision at
the source.

-scott

On Thu, Nov 27, 2014 at 2:14 AM, Jamal Hadi Salim <jhs@mojatatu.com> wrote:
> On 11/27/14 01:50, Scott Feldman wrote:
>
> [..]
>
>>
>> It's there: IFLA_BRPORT_LEARNING_SYNC.  From iproute2:
>>
>> $ bridge -d link show dev swp1
>> 2: swp1 state UNKNOWN : <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500
>> master br0 state forwarding priority 32 cost 2
>>      hairpin off guard off root_block off fastleave off learning off flood
>> off
>> 2: swp1 state UNKNOWN : <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 master
>> br0
>>      learning on learning_sync on hwmode swdev
>>
>> Turn it off:
>>
>> $ bridge link set dev swp1 hwmode swdev learning_sync off
>>
>> And now:
>>
>> $ bridge -d link show dev swp1
>> 2: swp1 state UNKNOWN : <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500
>> master br0 state forwarding priority 32 cost 2
>>      hairpin off guard off root_block off fastleave off learning off flood
>> off
>> 2: swp1 state UNKNOWN : <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 master
>> br0
>>      learning on learning_sync off hwmode swdev
>>
>>
>
> Yes, this is the nice control portion.
> From reviewing the patches, I didnt see how the core to the driver was
> using the  learning_sync. IOW, how do i turn off the drivers sync
> from being activated? Maybe you are doing this in the rocker patches
> which i didnt review? i think this needs to be core infrastructure i.e
> if you are doing this in a timer (as opposed to interrupt driven), then
> the core sync timer would kick in and call some driver ops.
> In any case, details that can be ironed out later..
>
> cheers,
> jamal
>

^ permalink raw reply

* Re: [patch net-next v4 14/21] bridge: add brport flags to dflt bridge_getlink
From: Scott Feldman @ 2014-11-27 20:46 UTC (permalink / raw)
  To: Jamal Hadi Salim
  Cc: Jiri Pirko, Netdev, David S. Miller, nhorman@tuxdriver.com,
	Andy Gospodarek, Thomas Graf, dborkman@redhat.com,
	ogerlitz@mellanox.com, jesse@nicira.com, pshelar@nicira.com,
	azhou@nicira.com, ben@decadent.org.uk, stephen@networkplumber.org,
	Kirsher, Jeffrey T, vyasevic@redhat.com, Cong Wang,
	Fastabend, John R, Eric Dumazet, Florian Fainelli, Roopa Prabhu,
	John Linville
In-Reply-To: <5477244C.2030507@mojatatu.com>

On Thu, Nov 27, 2014 at 3:17 AM, Jamal Hadi Salim <jhs@mojatatu.com> wrote:
> On 11/27/14 05:40, Jiri Pirko wrote:
>>
>> From: Scott Feldman <sfeldma@gmail.com>
>>
>> To allow brport device to return current brport flags set on port.  Add
>> returned flags to nested IFLA_PROTINFO netlink msg built in dflt getlink.
>> With this change, netlink msg returned for bridge_getlink contains the
>> port's
>> offloaded flag settings (the port's SELF settings).
>
>
> Am i missing something or we already have this stuff showing up in user
> space today?

For RTM_GETLINK, rtnl_bridge_getlink() calls ndo_bridge_getlink twice
for each dev, once on bridge and second time on dev.  Each call adds
an RTM_NEWLINK to skb.  For the ndo_bridge_getlink() call to bridge,
the MASTER port flags are filled in using br_port_fill_attr().  For
the second ndo_bridge_getlink() call to dev, the port driver calls
ndo_dflt_bridge_getlink() which fills in the SELF port flags.  Before
this patch, ndo_dflt_bridge_getlink() was only filling in hwmode.

Whew, in any case, I think you'll agree this code needs a refactoring
down the road.  This change is just the bare minimum building on
what's there to get SELF port flags up to user-space.  A refactoring
effort should get the port drivers out of parsing/filling netlink msg
and leave that to the core code in rtnetlink.c.  That way we can have
one place for policy checks and one place for fill.  I think this
refactoring effort should be left out in this patch series, otherwise
this is going to drag on into the next year.

>
> cheers,
> jamal

^ permalink raw reply

* [PATCH v6 46/46] af_packet: virtio 1.0 stubs
From: Michael S. Tsirkin @ 2014-11-27 20:11 UTC (permalink / raw)
  To: linux-kernel
  Cc: David Miller, cornelia.huck, rusty, nab, pbonzini, thuth, dahi,
	Daniel Borkmann, Atzm Watanabe, Hannes Frederic Sowa,
	Eric Dumazet, Tom Herbert, netdev
In-Reply-To: <1417118789-18231-1-git-send-email-mst@redhat.com>

This merely fixes sparse warnings, without actually
adding support for the new APIs.

Still working out the best way to enable the new
functionality.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 net/packet/af_packet.c | 35 ++++++++++++++++++++++-------------
 1 file changed, 22 insertions(+), 13 deletions(-)

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 87d20f4..d4a877e 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2444,13 +2444,15 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
 			goto out_unlock;
 
 		if ((vnet_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) &&
-		    (vnet_hdr.csum_start + vnet_hdr.csum_offset + 2 >
-		      vnet_hdr.hdr_len))
-			vnet_hdr.hdr_len = vnet_hdr.csum_start +
-						 vnet_hdr.csum_offset + 2;
+		    (__virtio16_to_cpu(false, vnet_hdr.csum_start) +
+		     __virtio16_to_cpu(false, vnet_hdr.csum_offset) + 2 >
+		      __virtio16_to_cpu(false, vnet_hdr.hdr_len)))
+			vnet_hdr.hdr_len = __cpu_to_virtio16(false,
+				 __virtio16_to_cpu(false, vnet_hdr.csum_start) +
+				__virtio16_to_cpu(false, vnet_hdr.csum_offset) + 2);
 
 		err = -EINVAL;
-		if (vnet_hdr.hdr_len > len)
+		if (__virtio16_to_cpu(false, vnet_hdr.hdr_len) > len)
 			goto out_unlock;
 
 		if (vnet_hdr.gso_type != VIRTIO_NET_HDR_GSO_NONE) {
@@ -2492,7 +2494,8 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
 	err = -ENOBUFS;
 	hlen = LL_RESERVED_SPACE(dev);
 	tlen = dev->needed_tailroom;
-	skb = packet_alloc_skb(sk, hlen + tlen, hlen, len, vnet_hdr.hdr_len,
+	skb = packet_alloc_skb(sk, hlen + tlen, hlen, len,
+			       __virtio16_to_cpu(false, vnet_hdr.hdr_len),
 			       msg->msg_flags & MSG_DONTWAIT, &err);
 	if (skb == NULL)
 		goto out_unlock;
@@ -2534,14 +2537,16 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
 
 	if (po->has_vnet_hdr) {
 		if (vnet_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
-			if (!skb_partial_csum_set(skb, vnet_hdr.csum_start,
-						  vnet_hdr.csum_offset)) {
+			u16 s = __virtio16_to_cpu(false, vnet_hdr.csum_start);
+			u16 o = __virtio16_to_cpu(false, vnet_hdr.csum_offset);
+			if (!skb_partial_csum_set(skb, s, o)) {
 				err = -EINVAL;
 				goto out_free;
 			}
 		}
 
-		skb_shinfo(skb)->gso_size = vnet_hdr.gso_size;
+		skb_shinfo(skb)->gso_size =
+			__virtio16_to_cpu(false, vnet_hdr.gso_size);
 		skb_shinfo(skb)->gso_type = gso_type;
 
 		/* Header must be checked, and gso_segs computed. */
@@ -2912,8 +2917,10 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
 			struct skb_shared_info *sinfo = skb_shinfo(skb);
 
 			/* This is a hint as to how much should be linear. */
-			vnet_hdr.hdr_len = skb_headlen(skb);
-			vnet_hdr.gso_size = sinfo->gso_size;
+			vnet_hdr.hdr_len =
+				__cpu_to_virtio16(false, skb_headlen(skb));
+			vnet_hdr.gso_size =
+				__cpu_to_virtio16(false, sinfo->gso_size);
 			if (sinfo->gso_type & SKB_GSO_TCPV4)
 				vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
 			else if (sinfo->gso_type & SKB_GSO_TCPV6)
@@ -2931,8 +2938,10 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
 
 		if (skb->ip_summed == CHECKSUM_PARTIAL) {
 			vnet_hdr.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
-			vnet_hdr.csum_start = skb_checksum_start_offset(skb);
-			vnet_hdr.csum_offset = skb->csum_offset;
+			vnet_hdr.csum_start = __cpu_to_virtio16(false,
+					  skb_checksum_start_offset(skb));
+			vnet_hdr.csum_offset = __cpu_to_virtio16(false,
+							 skb->csum_offset);
 		} else if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
 			vnet_hdr.flags = VIRTIO_NET_HDR_F_DATA_VALID;
 		} /* else everything is zero */
-- 
MST

^ permalink raw reply related

* [PATCH v6 45/46] vhost/scsi: partial virtio 1.0 support
From: Michael S. Tsirkin @ 2014-11-27 20:11 UTC (permalink / raw)
  To: linux-kernel
  Cc: David Miller, cornelia.huck, rusty, nab, pbonzini, thuth, dahi,
	kvm, virtualization, netdev
In-Reply-To: <1417118789-18231-1-git-send-email-mst@redhat.com>

Include all endian conversions as required by virtio 1.0.
Don't set virtio 1.0 yet, since that requires ANY_LAYOUT
which we don't yet support.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
---
 drivers/vhost/scsi.c | 22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c
index a17f118..01c01cb 100644
--- a/drivers/vhost/scsi.c
+++ b/drivers/vhost/scsi.c
@@ -168,6 +168,7 @@ enum {
 	VHOST_SCSI_VQ_IO = 2,
 };
 
+/* Note: can't set VIRTIO_F_VERSION_1 yet, since that implies ANY_LAYOUT. */
 enum {
 	VHOST_SCSI_FEATURES = VHOST_FEATURES | (1ULL << VIRTIO_SCSI_F_HOTPLUG) |
 					       (1ULL << VIRTIO_SCSI_F_T10_PI)
@@ -577,8 +578,8 @@ tcm_vhost_allocate_evt(struct vhost_scsi *vs,
 		return NULL;
 	}
 
-	evt->event.event = event;
-	evt->event.reason = reason;
+	evt->event.event = cpu_to_vhost32(vq, event);
+	evt->event.reason = cpu_to_vhost32(vq, reason);
 	vs->vs_events_nr++;
 
 	return evt;
@@ -636,7 +637,7 @@ again:
 	}
 
 	if (vs->vs_events_missed) {
-		event->event |= VIRTIO_SCSI_T_EVENTS_MISSED;
+		event->event |= cpu_to_vhost32(vq, VIRTIO_SCSI_T_EVENTS_MISSED);
 		vs->vs_events_missed = false;
 	}
 
@@ -695,12 +696,13 @@ static void vhost_scsi_complete_cmd_work(struct vhost_work *work)
 			cmd, se_cmd->residual_count, se_cmd->scsi_status);
 
 		memset(&v_rsp, 0, sizeof(v_rsp));
-		v_rsp.resid = se_cmd->residual_count;
+		v_rsp.resid = cpu_to_vhost32(cmd->tvc_vq, se_cmd->residual_count);
 		/* TODO is status_qualifier field needed? */
 		v_rsp.status = se_cmd->scsi_status;
-		v_rsp.sense_len = se_cmd->scsi_sense_length;
+		v_rsp.sense_len = cpu_to_vhost32(cmd->tvc_vq,
+						 se_cmd->scsi_sense_length);
 		memcpy(v_rsp.sense, cmd->tvc_sense_buf,
-		       v_rsp.sense_len);
+		       se_cmd->scsi_sense_length);
 		ret = copy_to_user(cmd->tvc_resp, &v_rsp, sizeof(v_rsp));
 		if (likely(ret == 0)) {
 			struct vhost_scsi_virtqueue *q;
@@ -1095,14 +1097,14 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
 						", but wrong data_direction\n");
 					goto err_cmd;
 				}
-				prot_bytes = v_req_pi.pi_bytesout;
+				prot_bytes = vhost32_to_cpu(vq, v_req_pi.pi_bytesout);
 			} else if (v_req_pi.pi_bytesin) {
 				if (data_direction != DMA_FROM_DEVICE) {
 					vq_err(vq, "Received non zero di_pi_niov"
 						", but wrong data_direction\n");
 					goto err_cmd;
 				}
-				prot_bytes = v_req_pi.pi_bytesin;
+				prot_bytes = vhost32_to_cpu(vq, v_req_pi.pi_bytesin);
 			}
 			if (prot_bytes) {
 				int tmp = 0;
@@ -1117,12 +1119,12 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
 				data_first += prot_niov;
 				data_niov = data_num - prot_niov;
 			}
-			tag = v_req_pi.tag;
+			tag = vhost64_to_cpu(vq, v_req_pi.tag);
 			task_attr = v_req_pi.task_attr;
 			cdb = &v_req_pi.cdb[0];
 			lun = ((v_req_pi.lun[2] << 8) | v_req_pi.lun[3]) & 0x3FFF;
 		} else {
-			tag = v_req.tag;
+			tag = vhost64_to_cpu(vq, v_req.tag);
 			task_attr = v_req.task_attr;
 			cdb = &v_req.cdb[0];
 			lun = ((v_req.lun[2] << 8) | v_req.lun[3]) & 0x3FFF;
-- 
MST

^ permalink raw reply related

* [PATCH v6 41/46] macvtap: TUN_VNET_HDR support
From: Michael S. Tsirkin @ 2014-11-27 20:11 UTC (permalink / raw)
  To: linux-kernel
  Cc: David Miller, cornelia.huck, rusty, nab, pbonzini, thuth, dahi,
	Jason Wang, Vlad Yasevich, Zhi Yong Wu, Tom Herbert,
	Ben Hutchings, netdev
In-Reply-To: <1417118789-18231-1-git-send-email-mst@redhat.com>

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/net/macvtap.c | 68 ++++++++++++++++++++++++++++++++-------------------
 1 file changed, 43 insertions(+), 25 deletions(-)

diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c
index 880cc09..af90ab5 100644
--- a/drivers/net/macvtap.c
+++ b/drivers/net/macvtap.c
@@ -45,6 +45,18 @@ struct macvtap_queue {
 	struct list_head next;
 };
 
+#define MACVTAP_FEATURES (IFF_VNET_HDR | IFF_VNET_LE | IFF_MULTI_QUEUE)
+
+static inline u16 macvtap16_to_cpu(struct macvtap_queue *q, __virtio16 val)
+{
+	return __virtio16_to_cpu(q->flags & IFF_VNET_LE, val);
+}
+
+static inline __virtio16 cpu_to_macvtap16(struct macvtap_queue *q, u16 val)
+{
+	return __cpu_to_virtio16(q->flags & IFF_VNET_LE, val);
+}
+
 static struct proto macvtap_proto = {
 	.name = "macvtap",
 	.owner = THIS_MODULE,
@@ -557,7 +569,8 @@ static inline struct sk_buff *macvtap_alloc_skb(struct sock *sk, size_t prepad,
  * macvtap_skb_from_vnet_hdr and macvtap_skb_to_vnet_hdr should
  * be shared with the tun/tap driver.
  */
-static int macvtap_skb_from_vnet_hdr(struct sk_buff *skb,
+static int macvtap_skb_from_vnet_hdr(struct macvtap_queue *q,
+				     struct sk_buff *skb,
 				     struct virtio_net_hdr *vnet_hdr)
 {
 	unsigned short gso_type = 0;
@@ -588,13 +601,13 @@ static int macvtap_skb_from_vnet_hdr(struct sk_buff *skb,
 	}
 
 	if (vnet_hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
-		if (!skb_partial_csum_set(skb, vnet_hdr->csum_start,
-					  vnet_hdr->csum_offset))
+		if (!skb_partial_csum_set(skb, macvtap16_to_cpu(q, vnet_hdr->csum_start),
+					  macvtap16_to_cpu(q, vnet_hdr->csum_offset)))
 			return -EINVAL;
 	}
 
 	if (vnet_hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
-		skb_shinfo(skb)->gso_size = vnet_hdr->gso_size;
+		skb_shinfo(skb)->gso_size = macvtap16_to_cpu(q, vnet_hdr->gso_size);
 		skb_shinfo(skb)->gso_type = gso_type;
 
 		/* Header must be checked, and gso_segs computed. */
@@ -604,8 +617,9 @@ static int macvtap_skb_from_vnet_hdr(struct sk_buff *skb,
 	return 0;
 }
 
-static void macvtap_skb_to_vnet_hdr(const struct sk_buff *skb,
-				   struct virtio_net_hdr *vnet_hdr)
+static void macvtap_skb_to_vnet_hdr(struct macvtap_queue *q,
+				    const struct sk_buff *skb,
+				    struct virtio_net_hdr *vnet_hdr)
 {
 	memset(vnet_hdr, 0, sizeof(*vnet_hdr));
 
@@ -613,8 +627,8 @@ static void macvtap_skb_to_vnet_hdr(const struct sk_buff *skb,
 		struct skb_shared_info *sinfo = skb_shinfo(skb);
 
 		/* This is a hint as to how much should be linear. */
-		vnet_hdr->hdr_len = skb_headlen(skb);
-		vnet_hdr->gso_size = sinfo->gso_size;
+		vnet_hdr->hdr_len = cpu_to_macvtap16(q, skb_headlen(skb));
+		vnet_hdr->gso_size = cpu_to_macvtap16(q, sinfo->gso_size);
 		if (sinfo->gso_type & SKB_GSO_TCPV4)
 			vnet_hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
 		else if (sinfo->gso_type & SKB_GSO_TCPV6)
@@ -628,10 +642,13 @@ static void macvtap_skb_to_vnet_hdr(const struct sk_buff *skb,
 
 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
 		vnet_hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
-		vnet_hdr->csum_start = skb_checksum_start_offset(skb);
 		if (vlan_tx_tag_present(skb))
-			vnet_hdr->csum_start += VLAN_HLEN;
-		vnet_hdr->csum_offset = skb->csum_offset;
+			vnet_hdr->csum_start = cpu_to_macvtap16(q,
+				skb_checksum_start_offset(skb) + VLAN_HLEN);
+		else
+			vnet_hdr->csum_start = cpu_to_macvtap16(q,
+				skb_checksum_start_offset(skb));
+		vnet_hdr->csum_offset = cpu_to_macvtap16(q, skb->csum_offset);
 	} else if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
 		vnet_hdr->flags = VIRTIO_NET_HDR_F_DATA_VALID;
 	} /* else everything is zero */
@@ -666,12 +683,14 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m,
 		if (err < 0)
 			goto err;
 		if ((vnet_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) &&
-		     vnet_hdr.csum_start + vnet_hdr.csum_offset + 2 >
-							vnet_hdr.hdr_len)
-			vnet_hdr.hdr_len = vnet_hdr.csum_start +
-						vnet_hdr.csum_offset + 2;
+		     macvtap16_to_cpu(q, vnet_hdr.csum_start) +
+		     macvtap16_to_cpu(q, vnet_hdr.csum_offset) + 2 >
+			     macvtap16_to_cpu(q, vnet_hdr.hdr_len))
+			vnet_hdr.hdr_len = cpu_to_macvtap16(q,
+				 macvtap16_to_cpu(q, vnet_hdr.csum_start) +
+				 macvtap16_to_cpu(q, vnet_hdr.csum_offset) + 2);
 		err = -EINVAL;
-		if (vnet_hdr.hdr_len > len)
+		if (macvtap16_to_cpu(q, vnet_hdr.hdr_len) > len)
 			goto err;
 	}
 
@@ -684,7 +703,8 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m,
 		goto err;
 
 	if (m && m->msg_control && sock_flag(&q->sk, SOCK_ZEROCOPY)) {
-		copylen = vnet_hdr.hdr_len ? vnet_hdr.hdr_len : GOODCOPY_LEN;
+		copylen = vnet_hdr.hdr_len ?
+			macvtap16_to_cpu(q, vnet_hdr.hdr_len) : GOODCOPY_LEN;
 		if (copylen > good_linear)
 			copylen = good_linear;
 		linear = copylen;
@@ -695,10 +715,10 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m,
 
 	if (!zerocopy) {
 		copylen = len;
-		if (vnet_hdr.hdr_len > good_linear)
+		if (macvtap16_to_cpu(q, vnet_hdr.hdr_len) > good_linear)
 			linear = good_linear;
 		else
-			linear = vnet_hdr.hdr_len;
+			linear = macvtap16_to_cpu(q, vnet_hdr.hdr_len);
 	}
 
 	skb = macvtap_alloc_skb(&q->sk, NET_IP_ALIGN, copylen,
@@ -725,7 +745,7 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m,
 	skb->protocol = eth_hdr(skb)->h_proto;
 
 	if (vnet_hdr_len) {
-		err = macvtap_skb_from_vnet_hdr(skb, &vnet_hdr);
+		err = macvtap_skb_from_vnet_hdr(q, skb, &vnet_hdr);
 		if (err)
 			goto err_kfree;
 	}
@@ -791,7 +811,7 @@ static ssize_t macvtap_put_user(struct macvtap_queue *q,
 		if ((len -= vnet_hdr_len) < 0)
 			return -EINVAL;
 
-		macvtap_skb_to_vnet_hdr(skb, &vnet_hdr);
+		macvtap_skb_to_vnet_hdr(q, skb, &vnet_hdr);
 
 		if (memcpy_toiovecend(iv, (void *)&vnet_hdr, 0, sizeof(vnet_hdr)))
 			return -EFAULT;
@@ -1003,8 +1023,7 @@ static long macvtap_ioctl(struct file *file, unsigned int cmd,
 			return -EFAULT;
 
 		ret = 0;
-		if ((u & ~(IFF_VNET_HDR | IFF_MULTI_QUEUE)) !=
-		    (IFF_NO_PI | IFF_TAP))
+		if ((u & ~MACVTAP_FEATURES) != (IFF_NO_PI | IFF_TAP))
 			ret = -EINVAL;
 		else
 			q->flags = u;
@@ -1036,8 +1055,7 @@ static long macvtap_ioctl(struct file *file, unsigned int cmd,
 		return ret;
 
 	case TUNGETFEATURES:
-		if (put_user(IFF_TAP | IFF_NO_PI | IFF_VNET_HDR |
-			     IFF_MULTI_QUEUE, up))
+		if (put_user(IFF_TAP | IFF_NO_PI | MACVTAP_FEATURES, up))
 			return -EFAULT;
 		return 0;
 
-- 
MST

^ permalink raw reply related

* [PATCH v6 40/46] tun: TUN_VNET_LE support, fix sparse warnings for virtio headers
From: Michael S. Tsirkin @ 2014-11-27 20:11 UTC (permalink / raw)
  To: linux-kernel
  Cc: David Miller, cornelia.huck, rusty, nab, pbonzini, thuth, dahi,
	Jason Wang, Zhi Yong Wu, Ben Hutchings, Tom Herbert,
	Masatake YAMATO, Herbert Xu, Xi Wang, netdev
In-Reply-To: <1417118789-18231-1-git-send-email-mst@redhat.com>

Pretty straight-forward: convert all fields to/from
virtio endian-ness.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/net/tun.c | 48 +++++++++++++++++++++++++++++-------------------
 1 file changed, 29 insertions(+), 19 deletions(-)

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 61c000c..f411ffd 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -111,7 +111,7 @@ do {								\
 #define TUN_FASYNC	IFF_ATTACH_QUEUE
 
 #define TUN_FEATURES (IFF_NO_PI | IFF_ONE_QUEUE | IFF_VNET_HDR | \
-		      IFF_MULTI_QUEUE)
+		      IFF_VNET_LE | IFF_MULTI_QUEUE)
 #define GOODCOPY_LEN 128
 
 #define FLT_EXACT_COUNT 8
@@ -205,6 +205,16 @@ struct tun_struct {
 	u32 flow_count;
 };
 
+static inline u16 tun16_to_cpu(struct tun_struct *tun, __virtio16 val)
+{
+	return __virtio16_to_cpu(tun->flags & IFF_VNET_LE, val);
+}
+
+static inline __virtio16 cpu_to_tun16(struct tun_struct *tun, u16 val)
+{
+	return __cpu_to_virtio16(tun->flags & IFF_VNET_LE, val);
+}
+
 static inline u32 tun_hashfn(u32 rxhash)
 {
 	return rxhash & 0x3ff;
@@ -1053,10 +1063,10 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
 			return -EFAULT;
 
 		if ((gso.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) &&
-		    gso.csum_start + gso.csum_offset + 2 > gso.hdr_len)
-			gso.hdr_len = gso.csum_start + gso.csum_offset + 2;
+		    tun16_to_cpu(tun, gso.csum_start) + tun16_to_cpu(tun, gso.csum_offset) + 2 > tun16_to_cpu(tun, gso.hdr_len))
+			gso.hdr_len = cpu_to_tun16(tun, tun16_to_cpu(tun, gso.csum_start) + tun16_to_cpu(tun, gso.csum_offset) + 2);
 
-		if (gso.hdr_len > len)
+		if (tun16_to_cpu(tun, gso.hdr_len) > len)
 			return -EINVAL;
 		offset += tun->vnet_hdr_sz;
 	}
@@ -1064,7 +1074,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
 	if ((tun->flags & TUN_TYPE_MASK) == IFF_TAP) {
 		align += NET_IP_ALIGN;
 		if (unlikely(len < ETH_HLEN ||
-			     (gso.hdr_len && gso.hdr_len < ETH_HLEN)))
+			     (gso.hdr_len && tun16_to_cpu(tun, gso.hdr_len) < ETH_HLEN)))
 			return -EINVAL;
 	}
 
@@ -1075,7 +1085,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
 		 * enough room for skb expand head in case it is used.
 		 * The rest of the buffer is mapped from userspace.
 		 */
-		copylen = gso.hdr_len ? gso.hdr_len : GOODCOPY_LEN;
+		copylen = gso.hdr_len ? tun16_to_cpu(tun, gso.hdr_len) : GOODCOPY_LEN;
 		if (copylen > good_linear)
 			copylen = good_linear;
 		linear = copylen;
@@ -1085,10 +1095,10 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
 
 	if (!zerocopy) {
 		copylen = len;
-		if (gso.hdr_len > good_linear)
+		if (tun16_to_cpu(tun, gso.hdr_len) > good_linear)
 			linear = good_linear;
 		else
-			linear = gso.hdr_len;
+			linear = tun16_to_cpu(tun, gso.hdr_len);
 	}
 
 	skb = tun_alloc_skb(tfile, align, copylen, linear, noblock);
@@ -1115,8 +1125,8 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
 	}
 
 	if (gso.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
-		if (!skb_partial_csum_set(skb, gso.csum_start,
-					  gso.csum_offset)) {
+		if (!skb_partial_csum_set(skb, tun16_to_cpu(tun, gso.csum_start),
+					  tun16_to_cpu(tun, gso.csum_offset))) {
 			tun->dev->stats.rx_frame_errors++;
 			kfree_skb(skb);
 			return -EINVAL;
@@ -1184,7 +1194,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
 		if (gso.gso_type & VIRTIO_NET_HDR_GSO_ECN)
 			skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN;
 
-		skb_shinfo(skb)->gso_size = gso.gso_size;
+		skb_shinfo(skb)->gso_size = tun16_to_cpu(tun, gso.gso_size);
 		if (skb_shinfo(skb)->gso_size == 0) {
 			tun->dev->stats.rx_frame_errors++;
 			kfree_skb(skb);
@@ -1276,8 +1286,8 @@ static ssize_t tun_put_user(struct tun_struct *tun,
 			struct skb_shared_info *sinfo = skb_shinfo(skb);
 
 			/* This is a hint as to how much should be linear. */
-			gso.hdr_len = skb_headlen(skb);
-			gso.gso_size = sinfo->gso_size;
+			gso.hdr_len = cpu_to_tun16(tun, skb_headlen(skb));
+			gso.gso_size = cpu_to_tun16(tun, sinfo->gso_size);
 			if (sinfo->gso_type & SKB_GSO_TCPV4)
 				gso.gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
 			else if (sinfo->gso_type & SKB_GSO_TCPV6)
@@ -1285,12 +1295,12 @@ static ssize_t tun_put_user(struct tun_struct *tun,
 			else {
 				pr_err("unexpected GSO type: "
 				       "0x%x, gso_size %d, hdr_len %d\n",
-				       sinfo->gso_type, gso.gso_size,
-				       gso.hdr_len);
+				       sinfo->gso_type, tun16_to_cpu(tun, gso.gso_size),
+				       tun16_to_cpu(tun, gso.hdr_len));
 				print_hex_dump(KERN_ERR, "tun: ",
 					       DUMP_PREFIX_NONE,
 					       16, 1, skb->head,
-					       min((int)gso.hdr_len, 64), true);
+					       min((int)tun16_to_cpu(tun, gso.hdr_len), 64), true);
 				WARN_ON_ONCE(1);
 				return -EINVAL;
 			}
@@ -1301,9 +1311,9 @@ static ssize_t tun_put_user(struct tun_struct *tun,
 
 		if (skb->ip_summed == CHECKSUM_PARTIAL) {
 			gso.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
-			gso.csum_start = skb_checksum_start_offset(skb) +
-					 vlan_hlen;
-			gso.csum_offset = skb->csum_offset;
+			gso.csum_start = cpu_to_tun16(tun, skb_checksum_start_offset(skb) +
+						      vlan_hlen);
+			gso.csum_offset = cpu_to_tun16(tun, skb->csum_offset);
 		} else if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
 			gso.flags = VIRTIO_NET_HDR_F_DATA_VALID;
 		} /* else everything is zero */
-- 
MST

^ permalink raw reply related

* [PATCH v6 39/46] tun: add VNET_LE flag
From: Michael S. Tsirkin @ 2014-11-27 20:11 UTC (permalink / raw)
  To: linux-kernel
  Cc: David Miller, cornelia.huck, rusty, nab, pbonzini, thuth, dahi,
	netdev, linux-api
In-Reply-To: <1417118789-18231-1-git-send-email-mst@redhat.com>

virtio 1.0 modified virtio net header format,
making all fields little endian.

Users can tweak header format before submitting it to tun,
but this means more data copies where none were necessary.
And if the iovec is in RO memory, this means we might
need to split iovec also means we might in theory overflow
iovec max size.

This patch adds a simpler way for applications to handle this,
using new "little endian" flag in tun.
As a result, tun simply byte-swaps header fields as appropriate.
This is a NOP on LE architectures.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 include/uapi/linux/if_tun.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/uapi/linux/if_tun.h b/include/uapi/linux/if_tun.h
index 277a260..18b2403 100644
--- a/include/uapi/linux/if_tun.h
+++ b/include/uapi/linux/if_tun.h
@@ -57,6 +57,7 @@
 #define IFF_ONE_QUEUE	0x2000
 #define IFF_VNET_HDR	0x4000
 #define IFF_TUN_EXCL	0x8000
+#define IFF_VNET_LE	0x10000
 #define IFF_MULTI_QUEUE 0x0100
 #define IFF_ATTACH_QUEUE 0x0200
 #define IFF_DETACH_QUEUE 0x0400
-- 
MST

^ permalink raw reply related

* [PATCH v6 38/46] tun: drop most type defines
From: Michael S. Tsirkin @ 2014-11-27 20:11 UTC (permalink / raw)
  To: linux-kernel
  Cc: David Miller, cornelia.huck, rusty, nab, pbonzini, thuth, dahi,
	Jason Wang, Zhi Yong Wu, Tom Herbert, Ben Hutchings,
	Masatake YAMATO, Herbert Xu, Xi Wang, netdev
In-Reply-To: <1417118789-18231-1-git-send-email-mst@redhat.com>

It's just as easy to use IFF_ flags directly,
there's no point in adding our own defines.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/net/tun.c | 62 +++++++++++++++++++++++++------------------------------
 1 file changed, 28 insertions(+), 34 deletions(-)

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index bc89d07..61c000c 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -109,12 +109,6 @@ do {								\
  * overload it to mean fasync when stored there.
  */
 #define TUN_FASYNC	IFF_ATTACH_QUEUE
-#define TUN_NO_PI	IFF_NO_PI
-/* This flag has no real effect */
-#define TUN_ONE_QUEUE	IFF_ONE_QUEUE
-#define TUN_PERSIST 	IFF_PERSIST
-#define TUN_VNET_HDR 	IFF_VNET_HDR
-#define TUN_TAP_MQ      IFF_MULTI_QUEUE
 
 #define TUN_FEATURES (IFF_NO_PI | IFF_ONE_QUEUE | IFF_VNET_HDR | \
 		      IFF_MULTI_QUEUE)
@@ -487,7 +481,7 @@ static void __tun_detach(struct tun_file *tfile, bool clean)
 		if (tun && tun->numqueues == 0 && tun->numdisabled == 0) {
 			netif_carrier_off(tun->dev);
 
-			if (!(tun->flags & TUN_PERSIST) &&
+			if (!(tun->flags & IFF_PERSIST) &&
 			    tun->dev->reg_state == NETREG_REGISTERED)
 				unregister_netdevice(tun->dev);
 		}
@@ -538,7 +532,7 @@ static void tun_detach_all(struct net_device *dev)
 	}
 	BUG_ON(tun->numdisabled != 0);
 
-	if (tun->flags & TUN_PERSIST)
+	if (tun->flags & IFF_PERSIST)
 		module_put(THIS_MODULE);
 }
 
@@ -556,7 +550,7 @@ static int tun_attach(struct tun_struct *tun, struct file *file, bool skip_filte
 		goto out;
 
 	err = -EBUSY;
-	if (!(tun->flags & TUN_TAP_MQ) && tun->numqueues == 1)
+	if (!(tun->flags & IFF_MULTI_QUEUE) && tun->numqueues == 1)
 		goto out;
 
 	err = -E2BIG;
@@ -935,7 +929,7 @@ static void tun_net_init(struct net_device *dev)
 	struct tun_struct *tun = netdev_priv(dev);
 
 	switch (tun->flags & TUN_TYPE_MASK) {
-	case TUN_TUN_DEV:
+	case IFF_TUN:
 		dev->netdev_ops = &tun_netdev_ops;
 
 		/* Point-to-Point TUN Device */
@@ -949,7 +943,7 @@ static void tun_net_init(struct net_device *dev)
 		dev->tx_queue_len = TUN_READQ_SIZE;  /* We prefer our own queue length */
 		break;
 
-	case TUN_TAP_DEV:
+	case IFF_TAP:
 		dev->netdev_ops = &tap_netdev_ops;
 		/* Ethernet TAP Device */
 		ether_setup(dev);
@@ -1040,7 +1034,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
 	int err;
 	u32 rxhash;
 
-	if (!(tun->flags & TUN_NO_PI)) {
+	if (!(tun->flags & IFF_NO_PI)) {
 		if (len < sizeof(pi))
 			return -EINVAL;
 		len -= sizeof(pi);
@@ -1050,7 +1044,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
 		offset += sizeof(pi);
 	}
 
-	if (tun->flags & TUN_VNET_HDR) {
+	if (tun->flags & IFF_VNET_HDR) {
 		if (len < tun->vnet_hdr_sz)
 			return -EINVAL;
 		len -= tun->vnet_hdr_sz;
@@ -1067,7 +1061,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
 		offset += tun->vnet_hdr_sz;
 	}
 
-	if ((tun->flags & TUN_TYPE_MASK) == TUN_TAP_DEV) {
+	if ((tun->flags & TUN_TYPE_MASK) == IFF_TAP) {
 		align += NET_IP_ALIGN;
 		if (unlikely(len < ETH_HLEN ||
 			     (gso.hdr_len && gso.hdr_len < ETH_HLEN)))
@@ -1130,8 +1124,8 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
 	}
 
 	switch (tun->flags & TUN_TYPE_MASK) {
-	case TUN_TUN_DEV:
-		if (tun->flags & TUN_NO_PI) {
+	case IFF_TUN:
+		if (tun->flags & IFF_NO_PI) {
 			switch (skb->data[0] & 0xf0) {
 			case 0x40:
 				pi.proto = htons(ETH_P_IP);
@@ -1150,7 +1144,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
 		skb->protocol = pi.proto;
 		skb->dev = tun->dev;
 		break;
-	case TUN_TAP_DEV:
+	case IFF_TAP:
 		skb->protocol = eth_type_trans(skb, tun->dev);
 		break;
 	}
@@ -1256,10 +1250,10 @@ static ssize_t tun_put_user(struct tun_struct *tun,
 	if (vlan_tx_tag_present(skb))
 		vlan_hlen = VLAN_HLEN;
 
-	if (tun->flags & TUN_VNET_HDR)
+	if (tun->flags & IFF_VNET_HDR)
 		vnet_hdr_sz = tun->vnet_hdr_sz;
 
-	if (!(tun->flags & TUN_NO_PI)) {
+	if (!(tun->flags & IFF_NO_PI)) {
 		if ((len -= sizeof(pi)) < 0)
 			return -EINVAL;
 
@@ -1592,7 +1586,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
 			return -EINVAL;
 
 		if (!!(ifr->ifr_flags & IFF_MULTI_QUEUE) !=
-		    !!(tun->flags & TUN_TAP_MQ))
+		    !!(tun->flags & IFF_MULTI_QUEUE))
 			return -EINVAL;
 
 		if (tun_not_capable(tun))
@@ -1605,7 +1599,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
 		if (err < 0)
 			return err;
 
-		if (tun->flags & TUN_TAP_MQ &&
+		if (tun->flags & IFF_MULTI_QUEUE &&
 		    (tun->numqueues + tun->numdisabled > 1)) {
 			/* One or more queue has already been attached, no need
 			 * to initialize the device again.
@@ -1628,11 +1622,11 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
 		/* Set dev type */
 		if (ifr->ifr_flags & IFF_TUN) {
 			/* TUN device */
-			flags |= TUN_TUN_DEV;
+			flags |= IFF_TUN;
 			name = "tun%d";
 		} else if (ifr->ifr_flags & IFF_TAP) {
 			/* TAP device */
-			flags |= TUN_TAP_DEV;
+			flags |= IFF_TAP;
 			name = "tap%d";
 		} else
 			return -EINVAL;
@@ -1825,7 +1819,7 @@ static int tun_set_queue(struct file *file, struct ifreq *ifr)
 		ret = tun_attach(tun, file, false);
 	} else if (ifr->ifr_flags & IFF_DETACH_QUEUE) {
 		tun = rtnl_dereference(tfile->tun);
-		if (!tun || !(tun->flags & TUN_TAP_MQ) || tfile->detached)
+		if (!tun || !(tun->flags & IFF_MULTI_QUEUE) || tfile->detached)
 			ret = -EINVAL;
 		else
 			__tun_detach(tfile, false);
@@ -1931,12 +1925,12 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
 		/* Disable/Enable persist mode. Keep an extra reference to the
 		 * module to prevent the module being unprobed.
 		 */
-		if (arg && !(tun->flags & TUN_PERSIST)) {
-			tun->flags |= TUN_PERSIST;
+		if (arg && !(tun->flags & IFF_PERSIST)) {
+			tun->flags |= IFF_PERSIST;
 			__module_get(THIS_MODULE);
 		}
-		if (!arg && (tun->flags & TUN_PERSIST)) {
-			tun->flags &= ~TUN_PERSIST;
+		if (!arg && (tun->flags & IFF_PERSIST)) {
+			tun->flags &= ~IFF_PERSIST;
 			module_put(THIS_MODULE);
 		}
 
@@ -1994,7 +1988,7 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
 	case TUNSETTXFILTER:
 		/* Can be set only for TAPs */
 		ret = -EINVAL;
-		if ((tun->flags & TUN_TYPE_MASK) != TUN_TAP_DEV)
+		if ((tun->flags & TUN_TYPE_MASK) != IFF_TAP)
 			break;
 		ret = update_filter(&tun->txflt, (void __user *)arg);
 		break;
@@ -2053,7 +2047,7 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
 	case TUNATTACHFILTER:
 		/* Can be set only for TAPs */
 		ret = -EINVAL;
-		if ((tun->flags & TUN_TYPE_MASK) != TUN_TAP_DEV)
+		if ((tun->flags & TUN_TYPE_MASK) != IFF_TAP)
 			break;
 		ret = -EFAULT;
 		if (copy_from_user(&tun->fprog, argp, sizeof(tun->fprog)))
@@ -2065,7 +2059,7 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
 	case TUNDETACHFILTER:
 		/* Can be set only for TAPs */
 		ret = -EINVAL;
-		if ((tun->flags & TUN_TYPE_MASK) != TUN_TAP_DEV)
+		if ((tun->flags & TUN_TYPE_MASK) != IFF_TAP)
 			break;
 		ret = 0;
 		tun_detach_filter(tun, tun->numqueues);
@@ -2073,7 +2067,7 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
 
 	case TUNGETFILTER:
 		ret = -EINVAL;
-		if ((tun->flags & TUN_TYPE_MASK) != TUN_TAP_DEV)
+		if ((tun->flags & TUN_TYPE_MASK) != IFF_TAP)
 			break;
 		ret = -EFAULT;
 		if (copy_to_user(argp, &tun->fprog, sizeof(tun->fprog)))
@@ -2266,10 +2260,10 @@ static void tun_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info
 	strlcpy(info->version, DRV_VERSION, sizeof(info->version));
 
 	switch (tun->flags & TUN_TYPE_MASK) {
-	case TUN_TUN_DEV:
+	case IFF_TUN:
 		strlcpy(info->bus_info, "tun", sizeof(info->bus_info));
 		break;
-	case TUN_TAP_DEV:
+	case IFF_TAP:
 		strlcpy(info->bus_info, "tap", sizeof(info->bus_info));
 		break;
 	}
-- 
MST

^ permalink raw reply related

* [PATCH v6 37/46] tun: move internal flag defines out of uapi
From: Michael S. Tsirkin @ 2014-11-27 20:10 UTC (permalink / raw)
  To: linux-kernel
  Cc: David Miller, cornelia.huck, rusty, nab, pbonzini, thuth, dahi,
	Jason Wang, Zhi Yong Wu, Ben Hutchings, Herbert Xu, Tom Herbert,
	Masatake YAMATO, Xi Wang, netdev, linux-api
In-Reply-To: <1417118789-18231-1-git-send-email-mst@redhat.com>

TUN_ flags are internal and never exposed
to userspace. Any application using it is almost
certainly buggy.

Move them out to tun.c, we'll remove them in follow-up patches.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 include/uapi/linux/if_tun.h | 16 ++--------
 drivers/net/tun.c           | 74 ++++++++++++++-------------------------------
 2 files changed, 26 insertions(+), 64 deletions(-)

diff --git a/include/uapi/linux/if_tun.h b/include/uapi/linux/if_tun.h
index e9502dd..277a260 100644
--- a/include/uapi/linux/if_tun.h
+++ b/include/uapi/linux/if_tun.h
@@ -22,21 +22,11 @@
 
 /* Read queue size */
 #define TUN_READQ_SIZE	500
-
-/* TUN device flags */
-#define TUN_TUN_DEV 	0x0001	
-#define TUN_TAP_DEV	0x0002
+/* TUN device type flags: deprecated. Use IFF_TUN/IFF_TAP instead. */
+#define TUN_TUN_DEV 	IFF_TUN
+#define TUN_TAP_DEV	IFF_TAP
 #define TUN_TYPE_MASK   0x000f
 
-#define TUN_FASYNC	0x0010
-#define TUN_NOCHECKSUM	0x0020
-#define TUN_NO_PI	0x0040
-/* This flag has no real effect */
-#define TUN_ONE_QUEUE	0x0080
-#define TUN_PERSIST 	0x0100	
-#define TUN_VNET_HDR 	0x0200
-#define TUN_TAP_MQ      0x0400
-
 /* Ioctl defines */
 #define TUNSETNOCSUM  _IOW('T', 200, int) 
 #define TUNSETDEBUG   _IOW('T', 201, int) 
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 9dd3746..bc89d07 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -103,6 +103,21 @@ do {								\
 } while (0)
 #endif
 
+/* TUN device flags */
+
+/* IFF_ATTACH_QUEUE is never stored in device flags,
+ * overload it to mean fasync when stored there.
+ */
+#define TUN_FASYNC	IFF_ATTACH_QUEUE
+#define TUN_NO_PI	IFF_NO_PI
+/* This flag has no real effect */
+#define TUN_ONE_QUEUE	IFF_ONE_QUEUE
+#define TUN_PERSIST 	IFF_PERSIST
+#define TUN_VNET_HDR 	IFF_VNET_HDR
+#define TUN_TAP_MQ      IFF_MULTI_QUEUE
+
+#define TUN_FEATURES (IFF_NO_PI | IFF_ONE_QUEUE | IFF_VNET_HDR | \
+		      IFF_MULTI_QUEUE)
 #define GOODCOPY_LEN 128
 
 #define FLT_EXACT_COUNT 8
@@ -1521,32 +1536,7 @@ static struct proto tun_proto = {
 
 static int tun_flags(struct tun_struct *tun)
 {
-	int flags = 0;
-
-	if (tun->flags & TUN_TUN_DEV)
-		flags |= IFF_TUN;
-	else
-		flags |= IFF_TAP;
-
-	if (tun->flags & TUN_NO_PI)
-		flags |= IFF_NO_PI;
-
-	/* This flag has no real effect.  We track the value for backwards
-	 * compatibility.
-	 */
-	if (tun->flags & TUN_ONE_QUEUE)
-		flags |= IFF_ONE_QUEUE;
-
-	if (tun->flags & TUN_VNET_HDR)
-		flags |= IFF_VNET_HDR;
-
-	if (tun->flags & TUN_TAP_MQ)
-		flags |= IFF_MULTI_QUEUE;
-
-	if (tun->flags & TUN_PERSIST)
-		flags |= IFF_PERSIST;
-
-	return flags;
+	return tun->flags & (TUN_FEATURES | IFF_PERSIST | IFF_TUN | IFF_TAP);
 }
 
 static ssize_t tun_show_flags(struct device *dev, struct device_attribute *attr,
@@ -1706,28 +1696,8 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
 
 	tun_debug(KERN_INFO, tun, "tun_set_iff\n");
 
-	if (ifr->ifr_flags & IFF_NO_PI)
-		tun->flags |= TUN_NO_PI;
-	else
-		tun->flags &= ~TUN_NO_PI;
-
-	/* This flag has no real effect.  We track the value for backwards
-	 * compatibility.
-	 */
-	if (ifr->ifr_flags & IFF_ONE_QUEUE)
-		tun->flags |= TUN_ONE_QUEUE;
-	else
-		tun->flags &= ~TUN_ONE_QUEUE;
-
-	if (ifr->ifr_flags & IFF_VNET_HDR)
-		tun->flags |= TUN_VNET_HDR;
-	else
-		tun->flags &= ~TUN_VNET_HDR;
-
-	if (ifr->ifr_flags & IFF_MULTI_QUEUE)
-		tun->flags |= TUN_TAP_MQ;
-	else
-		tun->flags &= ~TUN_TAP_MQ;
+	tun->flags = (tun->flags & ~TUN_FEATURES) |
+		(ifr->ifr_flags & TUN_FEATURES);
 
 	/* Make sure persistent devices do not get stuck in
 	 * xoff state.
@@ -1890,9 +1860,11 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
 	if (cmd == TUNGETFEATURES) {
 		/* Currently this just means: "what IFF flags are valid?".
 		 * This is needed because we never checked for invalid flags on
-		 * TUNSETIFF. */
-		return put_user(IFF_TUN | IFF_TAP | IFF_NO_PI | IFF_ONE_QUEUE |
-				IFF_VNET_HDR | IFF_MULTI_QUEUE,
+		 * TUNSETIFF.  Why do we report IFF_TUN and IFF_TAP which are
+		 * not legal for TUNSETIFF here?  It's probably a bug, but it
+		 * doesn't seem to be worth fixing.
+		 */
+		return put_user(IFF_TUN | IFF_TAP | TUN_FEATURES,
 				(unsigned int __user*)argp);
 	} else if (cmd == TUNSETQUEUE)
 		return tun_set_queue(file, &ifr);
-- 
MST

^ permalink raw reply related

* [PATCH v6 36/46] vhost/net: suppress compiler warning
From: Michael S. Tsirkin @ 2014-11-27 20:10 UTC (permalink / raw)
  To: linux-kernel
  Cc: thuth, kvm, rusty, netdev, virtualization, dahi, pbonzini,
	David Miller
In-Reply-To: <1417118789-18231-1-git-send-email-mst@redhat.com>

len is always initialized since function is called with size > 0.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/vhost/net.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 984242e..54ffbb0 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -501,7 +501,7 @@ static int get_rx_bufs(struct vhost_virtqueue *vq,
 	int headcount = 0;
 	unsigned d;
 	int r, nlogs = 0;
-	u32 len;
+	u32 uninitialized_var(len);
 
 	while (datalen > 0 && headcount < quota) {
 		if (unlikely(seg >= UIO_MAXIOV)) {
-- 
MST

^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox