Netdev List
 help / color / mirror / Atom feed
* [PATCH net-next v2 0/3] bonding: patchset for rcu use in bonding
From: Ding Tianhong @ 2013-10-15  8:28 UTC (permalink / raw)
  To: Jay Vosburgh, Andy Gospodarek, David S. Miller,
	Nikolay Aleksandrov, Veaceslav Falico, Netdev

Hi:

The Patch Set convert the xmit of 3ad and alb mode to use rcu lock.
dd rtnl lock and remove read lock for bond sysfs.

v2 because the bond_for_each_slave_rcu without rcu_read_lock() will occurs one warming, so
add new function for alb xmit path to avoid warming.

Ding Tianhong (3):
Wang Yufen (1):
Yang Yingliang (1):
  bonding: use RCU protection for 3ad xmit path
  bonding: use RCU protection for alb xmit path
  bonding: add rtnl lock and remove read lock for bond sysfs

 drivers/net/bonding/bond_3ad.c   | 10 +++----
 drivers/net/bonding/bond_alb.c   | 58 +++++++++++++++++++++++++++++-----------
 drivers/net/bonding/bond_sysfs.c | 30 ++++++++++++---------
 drivers/net/bonding/bonding.h    | 14 ++++++++++
 4 files changed, 78 insertions(+), 34 deletions(-)

-- 
1.8.2.1

^ permalink raw reply

* [PATCH net-next v2 3/3] bonding: add rtnl lock and remove read lock for bond sysfs
From: Ding Tianhong @ 2013-10-15  8:28 UTC (permalink / raw)
  To: Jay Vosburgh, Andy Gospodarek, David S. Miller,
	Nikolay Aleksandrov, Veaceslav Falico, Netdev

The bond_for_each_slave() will not be protected by read_lock(),
only protected by rtnl_lock(), so need to replace read_lock()
with rtnl_lock().

Signed-off-by: Ding Tianhong <dingtianhong@huawei.com>
---
 drivers/net/bonding/bond_sysfs.c | 30 +++++++++++++++++-------------
 1 file changed, 17 insertions(+), 13 deletions(-)

diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c
index e06c644..2ba1114 100644
--- a/drivers/net/bonding/bond_sysfs.c
+++ b/drivers/net/bonding/bond_sysfs.c
@@ -179,7 +179,9 @@ static ssize_t bonding_show_slaves(struct device *d,
 	struct slave *slave;
 	int res = 0;
 
-	read_lock(&bond->lock);
+	if (!rtnl_trylock())
+		return restart_syscall();
+
 	bond_for_each_slave(bond, slave, iter) {
 		if (res > (PAGE_SIZE - IFNAMSIZ)) {
 			/* not enough space for another interface name */
@@ -190,7 +192,9 @@ static ssize_t bonding_show_slaves(struct device *d,
 		}
 		res += sprintf(buf + res, "%s ", slave->dev->name);
 	}
-	read_unlock(&bond->lock);
+
+	rtnl_unlock();
+
 	if (res)
 		buf[res-1] = '\n'; /* eat the leftover space */
 
@@ -628,6 +632,9 @@ static ssize_t bonding_store_arp_targets(struct device *d,
 	unsigned long *targets_rx;
 	int ind, i, j, ret = -EINVAL;
 
+	if (!rtnl_trylock())
+		return restart_syscall();
+
 	targets = bond->params.arp_targets;
 	newtarget = in_aton(buf + 1);
 	/* look for adds */
@@ -701,6 +708,7 @@ static ssize_t bonding_store_arp_targets(struct device *d,
 
 	ret = count;
 out:
+	rtnl_unlock();
 	return ret;
 }
 static DEVICE_ATTR(arp_ip_target, S_IRUGO | S_IWUSR , bonding_show_arp_targets, bonding_store_arp_targets);
@@ -1469,7 +1477,6 @@ static ssize_t bonding_show_queue_id(struct device *d,
 	if (!rtnl_trylock())
 		return restart_syscall();
 
-	read_lock(&bond->lock);
 	bond_for_each_slave(bond, slave, iter) {
 		if (res > (PAGE_SIZE - IFNAMSIZ - 6)) {
 			/* not enough space for another interface_name:queue_id pair */
@@ -1481,9 +1488,9 @@ static ssize_t bonding_show_queue_id(struct device *d,
 		res += sprintf(buf + res, "%s:%d ",
 			       slave->dev->name, slave->queue_id);
 	}
-	read_unlock(&bond->lock);
 	if (res)
 		buf[res-1] = '\n'; /* eat the leftover space */
+
 	rtnl_unlock();
 
 	return res;
@@ -1532,8 +1539,6 @@ static ssize_t bonding_store_queue_id(struct device *d,
 	if (!sdev)
 		goto err_no_cmd;
 
-	read_lock(&bond->lock);
-
 	/* Search for thes slave and check for duplicate qids */
 	update_slave = NULL;
 	bond_for_each_slave(bond, slave, iter) {
@@ -1544,23 +1549,20 @@ static ssize_t bonding_store_queue_id(struct device *d,
 			 */
 			update_slave = slave;
 		else if (qid && qid == slave->queue_id) {
-			goto err_no_cmd_unlock;
+			goto err_no_cmd;
 		}
 	}
 
 	if (!update_slave)
-		goto err_no_cmd_unlock;
+		goto err_no_cmd;
 
 	/* Actually set the qids for the slave */
 	update_slave->queue_id = qid;
 
-	read_unlock(&bond->lock);
 out:
 	rtnl_unlock();
 	return ret;
 
-err_no_cmd_unlock:
-	read_unlock(&bond->lock);
 err_no_cmd:
 	pr_info("invalid input for queue_id set for %s.\n",
 		bond->dev->name);
@@ -1593,6 +1595,9 @@ static ssize_t bonding_store_slaves_active(struct device *d,
 	struct list_head *iter;
 	struct slave *slave;
 
+	if (!rtnl_trylock())
+		return restart_syscall();
+
 	if (sscanf(buf, "%d", &new_value) != 1) {
 		pr_err("%s: no all_slaves_active value specified.\n",
 		       bond->dev->name);
@@ -1612,7 +1617,6 @@ static ssize_t bonding_store_slaves_active(struct device *d,
 		goto out;
 	}
 
-	read_lock(&bond->lock);
 	bond_for_each_slave(bond, slave, iter) {
 		if (!bond_is_active_slave(slave)) {
 			if (new_value)
@@ -1621,8 +1625,8 @@ static ssize_t bonding_store_slaves_active(struct device *d,
 				slave->inactive = 1;
 		}
 	}
-	read_unlock(&bond->lock);
 out:
+	rtnl_unlock();
 	return ret;
 }
 static DEVICE_ATTR(all_slaves_active, S_IRUGO | S_IWUSR,
-- 
1.8.2.1

^ permalink raw reply related

* Re: [PATCH RFC 5/5] net: macb: Adjust tx_clk when link speed changes
From: Michal Simek @ 2013-10-15  7:58 UTC (permalink / raw)
  To: Nicolas Ferre
  Cc: Soren Brinkmann, netdev, David Miller, linux-kernel, Michal Simek
In-Reply-To: <525CF4AD.1070304@atmel.com>

On 10/15/2013 09:54 AM, Nicolas Ferre wrote:
> On 15/10/2013 01:59, Soren Brinkmann :
>> Adjust the ethernet clock according to the negotiated link speed.
>>
>> Signed-off-by: Soren Brinkmann <soren.brinkmann@xilinx.com>
> 
> I will need more time to study this one.
> 
> Moreover, I will have to add the "tx_clk" to every user of this driver before switchin to the addition of this clock.

As I am reading this patch, Soren just protected this
case that if this clk is not specified then it is not used.

But anyway feel free to take more time to study it.

If there is device-tree binding then it should be extend
by this optional value.

Thanks,
Michal

^ permalink raw reply

* Re: [PATCH RFC 5/5] net: macb: Adjust tx_clk when link speed changes
From: Nicolas Ferre @ 2013-10-15  7:54 UTC (permalink / raw)
  To: Soren Brinkmann, netdev, David Miller; +Cc: linux-kernel, Michal Simek
In-Reply-To: <1381795140-10792-6-git-send-email-soren.brinkmann@xilinx.com>

On 15/10/2013 01:59, Soren Brinkmann :
> Adjust the ethernet clock according to the negotiated link speed.
>
> Signed-off-by: Soren Brinkmann <soren.brinkmann@xilinx.com>

I will need more time to study this one.

Moreover, I will have to add the "tx_clk" to every user of this driver 
before switchin to the addition of this clock.

Best regards,

> ---
>   drivers/net/ethernet/cadence/macb.c | 66 +++++++++++++++++++++++++++++++++++++
>   drivers/net/ethernet/cadence/macb.h |  1 +
>   2 files changed, 67 insertions(+)
>
> diff --git a/drivers/net/ethernet/cadence/macb.c b/drivers/net/ethernet/cadence/macb.c
> index 603844b1d483..beb9fa863811 100644
> --- a/drivers/net/ethernet/cadence/macb.c
> +++ b/drivers/net/ethernet/cadence/macb.c
> @@ -204,6 +204,49 @@ static int macb_mdio_reset(struct mii_bus *bus)
>   	return 0;
>   }
>
> +/**
> + * macb_set_tx_clk() - Set a clock to a new frequency
> + * @clk		Pointer to the clock to change
> + * @rate	New frequency in Hz
> + * @dev		Pointer to the struct net_device
> + */
> +static void macb_set_tx_clk(struct clk *clk, int speed, struct net_device *dev)
> +{
> +	long ferr;
> +	long rate;
> +	long rate_rounded;
> +
> +	switch (speed) {
> +	case SPEED_10:
> +		rate = 2500000;
> +		break;
> +	case SPEED_100:
> +		rate = 25000000;
> +		break;
> +	case SPEED_1000:
> +		rate = 125000000;
> +		break;
> +	default:
> +		break;
> +	}
> +
> +	rate_rounded = clk_round_rate(clk, rate);
> +	if (rate_rounded < 0)
> +		return;
> +
> +	/* RGMII allows 50 ppm frequency error. Test and warn if this limit
> +	 * are not satisfied.
> +	 */
> +	ferr = abs(rate_rounded - rate);
> +	ferr = DIV_ROUND_UP(ferr, rate / 100000);
> +	if (ferr > 5)
> +		netdev_warn(dev, "unable to generate target frequency: %ld Hz\n",
> +				rate);
> +
> +	if (clk_set_rate(clk, rate_rounded))
> +		netdev_err(dev, "adjusting tx_clk failed.\n");
> +}
> +
>   static void macb_handle_link_change(struct net_device *dev)
>   {
>   	struct macb *bp = netdev_priv(dev);
> @@ -251,6 +294,9 @@ static void macb_handle_link_change(struct net_device *dev)
>
>   	spin_unlock_irqrestore(&bp->lock, flags);
>
> +	if (!IS_ERR(bp->tx_clk))
> +		macb_set_tx_clk(bp->tx_clk, phydev->speed, dev);
> +
>   	if (status_change) {
>   		if (phydev->link) {
>   			netif_carrier_on(dev);
> @@ -1805,6 +1851,8 @@ static int __init macb_probe(struct platform_device *pdev)
>   		goto err_out_free_dev;
>   	}
>
> +	bp->tx_clk = devm_clk_get(&pdev->dev, "tx_clk");
> +
>   	err = clk_prepare_enable(bp->pclk);
>   	if (err) {
>   		dev_err(&pdev->dev, "failed to enable pclk (%u)\n", err);
> @@ -1817,6 +1865,15 @@ static int __init macb_probe(struct platform_device *pdev)
>   		goto err_out_disable_pclk;
>   	}
>
> +	if (!IS_ERR(bp->tx_clk)) {
> +		err = clk_prepare_enable(bp->tx_clk);
> +		if (err) {
> +			dev_err(&pdev->dev, "failed to enable tx_clk (%u)\n",
> +					err);
> +			goto err_out_disable_hclk;
> +		}
> +	}
> +
>   	bp->regs = devm_ioremap(&pdev->dev, regs->start, resource_size(regs));
>   	if (!bp->regs) {
>   		dev_err(&pdev->dev, "failed to map registers, aborting.\n");
> @@ -1917,6 +1974,9 @@ static int __init macb_probe(struct platform_device *pdev)
>   err_out_unregister_netdev:
>   	unregister_netdev(dev);
>   err_out_disable_clocks:
> +	if (!IS_ERR(bp->tx_clk))
> +		clk_disable_unprepare(bp->tx_clk);
> +err_out_disable_hclk:
>   	clk_disable_unprepare(bp->hclk);
>   err_out_disable_pclk:
>   	clk_disable_unprepare(bp->pclk);
> @@ -1941,6 +2001,8 @@ static int __exit macb_remove(struct platform_device *pdev)
>   		kfree(bp->mii_bus->irq);
>   		mdiobus_free(bp->mii_bus);
>   		unregister_netdev(dev);
> +		if (!IS_ERR(bp->tx_clk))
> +			clk_disable_unprepare(bp->tx_clk);
>   		clk_disable_unprepare(bp->hclk);
>   		clk_disable_unprepare(bp->pclk);
>   		free_netdev(dev);
> @@ -1959,6 +2021,8 @@ static int macb_suspend(struct device *dev)
>   	netif_carrier_off(netdev);
>   	netif_device_detach(netdev);
>
> +	if (!IS_ERR(bp->tx_clk))
> +		clk_disable_unprepare(bp->tx_clk);
>   	clk_disable_unprepare(bp->hclk);
>   	clk_disable_unprepare(bp->pclk);
>
> @@ -1973,6 +2037,8 @@ static int macb_resume(struct device *dev)
>
>   	clk_prepare_enable(bp->pclk);
>   	clk_prepare_enable(bp->hclk);
> +	if (!IS_ERR(bp->tx_clk))
> +		clk_prepare_enable(bp->tx_clk);
>
>   	netif_device_attach(netdev);
>
> diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h
> index f4076155bed7..51c02442160a 100644
> --- a/drivers/net/ethernet/cadence/macb.h
> +++ b/drivers/net/ethernet/cadence/macb.h
> @@ -572,6 +572,7 @@ struct macb {
>   	struct platform_device	*pdev;
>   	struct clk		*pclk;
>   	struct clk		*hclk;
> +	struct clk		*tx_clk;
>   	struct net_device	*dev;
>   	struct napi_struct	napi;
>   	struct work_struct	tx_error_task;
>


-- 
Nicolas Ferre

^ permalink raw reply

* Re: [PATCH RFC 4/5] net: macb: Use devm_request_irq()
From: Nicolas Ferre @ 2013-10-15  7:46 UTC (permalink / raw)
  To: Soren Brinkmann, netdev, David Miller; +Cc: linux-kernel, Michal Simek
In-Reply-To: <1381795140-10792-5-git-send-email-soren.brinkmann@xilinx.com>

On 15/10/2013 01:58, Soren Brinkmann :
> Use the device managed interface to request the IRQ, simplifying error
> paths.
>
> Signed-off-by: Soren Brinkmann <soren.brinkmann@xilinx.com>

Acked-by: Nicolas Ferre <nicolas.ferre@atmel.com>

> ---
>   drivers/net/ethernet/cadence/macb.c | 8 +++-----
>   1 file changed, 3 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/net/ethernet/cadence/macb.c b/drivers/net/ethernet/cadence/macb.c
> index 436aecc31732..603844b1d483 100644
> --- a/drivers/net/ethernet/cadence/macb.c
> +++ b/drivers/net/ethernet/cadence/macb.c
> @@ -1825,7 +1825,8 @@ static int __init macb_probe(struct platform_device *pdev)
>   	}
>
>   	dev->irq = platform_get_irq(pdev, 0);
> -	err = request_irq(dev->irq, macb_interrupt, 0, dev->name, dev);
> +	err = devm_request_irq(&pdev->dev, dev->irq, macb_interrupt, 0,
> +			dev->name, dev);
>   	if (err) {
>   		dev_err(&pdev->dev, "Unable to request IRQ %d (error %d)\n",
>   			dev->irq, err);
> @@ -1892,7 +1893,7 @@ static int __init macb_probe(struct platform_device *pdev)
>   	err = register_netdev(dev);
>   	if (err) {
>   		dev_err(&pdev->dev, "Cannot register net device, aborting.\n");
> -		goto err_out_free_irq;
> +		goto err_out_disable_clocks;
>   	}
>
>   	err = macb_mii_init(bp);
> @@ -1915,8 +1916,6 @@ static int __init macb_probe(struct platform_device *pdev)
>
>   err_out_unregister_netdev:
>   	unregister_netdev(dev);
> -err_out_free_irq:
> -	free_irq(dev->irq, dev);
>   err_out_disable_clocks:
>   	clk_disable_unprepare(bp->hclk);
>   err_out_disable_pclk:
> @@ -1942,7 +1941,6 @@ static int __exit macb_remove(struct platform_device *pdev)
>   		kfree(bp->mii_bus->irq);
>   		mdiobus_free(bp->mii_bus);
>   		unregister_netdev(dev);
> -		free_irq(dev->irq, dev);
>   		clk_disable_unprepare(bp->hclk);
>   		clk_disable_unprepare(bp->pclk);
>   		free_netdev(dev);
>


-- 
Nicolas Ferre

^ permalink raw reply

* Re: [PATCH RFC 3/5] net: macb: Use devm_ioremap()
From: Nicolas Ferre @ 2013-10-15  7:45 UTC (permalink / raw)
  To: Soren Brinkmann, netdev, David Miller; +Cc: linux-kernel, Michal Simek
In-Reply-To: <1381795140-10792-4-git-send-email-soren.brinkmann@xilinx.com>

On 15/10/2013 01:58, Soren Brinkmann :
> Use the device managed version of ioremap to remap IO memory,
> simplifying error paths.
>
> Signed-off-by: Soren Brinkmann <soren.brinkmann@xilinx.com>

Acked-by: Nicolas Ferre <nicolas.ferre@atmel.com>

> ---
>   drivers/net/ethernet/cadence/macb.c | 8 +++-----
>   1 file changed, 3 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/net/ethernet/cadence/macb.c b/drivers/net/ethernet/cadence/macb.c
> index 62aa136889a4..436aecc31732 100644
> --- a/drivers/net/ethernet/cadence/macb.c
> +++ b/drivers/net/ethernet/cadence/macb.c
> @@ -17,6 +17,7 @@
>   #include <linux/circ_buf.h>
>   #include <linux/slab.h>
>   #include <linux/init.h>
> +#include <linux/io.h>
>   #include <linux/gpio.h>
>   #include <linux/interrupt.h>
>   #include <linux/netdevice.h>
> @@ -1816,7 +1817,7 @@ static int __init macb_probe(struct platform_device *pdev)
>   		goto err_out_disable_pclk;
>   	}
>
> -	bp->regs = ioremap(regs->start, resource_size(regs));
> +	bp->regs = devm_ioremap(&pdev->dev, regs->start, resource_size(regs));
>   	if (!bp->regs) {
>   		dev_err(&pdev->dev, "failed to map registers, aborting.\n");
>   		err = -ENOMEM;
> @@ -1828,7 +1829,7 @@ static int __init macb_probe(struct platform_device *pdev)
>   	if (err) {
>   		dev_err(&pdev->dev, "Unable to request IRQ %d (error %d)\n",
>   			dev->irq, err);
> -		goto err_out_iounmap;
> +		goto err_out_disable_clocks;
>   	}
>
>   	dev->netdev_ops = &macb_netdev_ops;
> @@ -1916,8 +1917,6 @@ err_out_unregister_netdev:
>   	unregister_netdev(dev);
>   err_out_free_irq:
>   	free_irq(dev->irq, dev);
> -err_out_iounmap:
> -	iounmap(bp->regs);
>   err_out_disable_clocks:
>   	clk_disable_unprepare(bp->hclk);
>   err_out_disable_pclk:
> @@ -1944,7 +1943,6 @@ static int __exit macb_remove(struct platform_device *pdev)
>   		mdiobus_free(bp->mii_bus);
>   		unregister_netdev(dev);
>   		free_irq(dev->irq, dev);
> -		iounmap(bp->regs);
>   		clk_disable_unprepare(bp->hclk);
>   		clk_disable_unprepare(bp->pclk);
>   		free_netdev(dev);
>


-- 
Nicolas Ferre

^ permalink raw reply

* Re: [PATCH RFC 2/5] net: macb: Migrate to devm clock interface
From: Nicolas Ferre @ 2013-10-15  7:44 UTC (permalink / raw)
  To: Soren Brinkmann, netdev, David Miller; +Cc: linux-kernel, Michal Simek
In-Reply-To: <1381795140-10792-3-git-send-email-soren.brinkmann@xilinx.com>

On 15/10/2013 01:58, Soren Brinkmann :
> Migrate to using the device managed intreface for clocks and clean up
> the associated error paths.
>
> Signed-off-by: Soren Brinkmann <soren.brinkmann@xilinx.com>

Acked-by: Nicolas Ferre <nicolas.ferre@atmel.com>

> ---
>   drivers/net/ethernet/cadence/macb.c | 32 ++++++++++++++++++++------------
>   1 file changed, 20 insertions(+), 12 deletions(-)
>
> diff --git a/drivers/net/ethernet/cadence/macb.c b/drivers/net/ethernet/cadence/macb.c
> index 389ccf1362d5..62aa136889a4 100644
> --- a/drivers/net/ethernet/cadence/macb.c
> +++ b/drivers/net/ethernet/cadence/macb.c
> @@ -1790,19 +1790,31 @@ static int __init macb_probe(struct platform_device *pdev)
>   	spin_lock_init(&bp->lock);
>   	INIT_WORK(&bp->tx_error_task, macb_tx_error_task);
>
> -	bp->pclk = clk_get(&pdev->dev, "pclk");
> +	bp->pclk = devm_clk_get(&pdev->dev, "pclk");
>   	if (IS_ERR(bp->pclk)) {
> -		dev_err(&pdev->dev, "failed to get macb_clk\n");
> +		err = PTR_ERR(bp->pclk);
> +		dev_err(&pdev->dev, "failed to get macb_clk (%u)\n", err);
>   		goto err_out_free_dev;
>   	}
> -	clk_prepare_enable(bp->pclk);
>
> -	bp->hclk = clk_get(&pdev->dev, "hclk");
> +	bp->hclk = devm_clk_get(&pdev->dev, "hclk");
>   	if (IS_ERR(bp->hclk)) {
> -		dev_err(&pdev->dev, "failed to get hclk\n");
> -		goto err_out_put_pclk;
> +		err = PTR_ERR(bp->hclk);
> +		dev_err(&pdev->dev, "failed to get hclk (%u)\n", err);
> +		goto err_out_free_dev;
> +	}
> +
> +	err = clk_prepare_enable(bp->pclk);
> +	if (err) {
> +		dev_err(&pdev->dev, "failed to enable pclk (%u)\n", err);
> +		goto err_out_free_dev;
> +	}
> +
> +	err = clk_prepare_enable(bp->hclk);
> +	if (err) {
> +		dev_err(&pdev->dev, "failed to enable hclk (%u)\n", err);
> +		goto err_out_disable_pclk;
>   	}
> -	clk_prepare_enable(bp->hclk);
>
>   	bp->regs = ioremap(regs->start, resource_size(regs));
>   	if (!bp->regs) {
> @@ -1908,10 +1920,8 @@ err_out_iounmap:
>   	iounmap(bp->regs);
>   err_out_disable_clocks:
>   	clk_disable_unprepare(bp->hclk);
> -	clk_put(bp->hclk);
> +err_out_disable_pclk:
>   	clk_disable_unprepare(bp->pclk);
> -err_out_put_pclk:
> -	clk_put(bp->pclk);
>   err_out_free_dev:
>   	free_netdev(dev);
>   err_out:
> @@ -1936,9 +1946,7 @@ static int __exit macb_remove(struct platform_device *pdev)
>   		free_irq(dev->irq, dev);
>   		iounmap(bp->regs);
>   		clk_disable_unprepare(bp->hclk);
> -		clk_put(bp->hclk);
>   		clk_disable_unprepare(bp->pclk);
> -		clk_put(bp->pclk);
>   		free_netdev(dev);
>   	}
>
>


-- 
Nicolas Ferre

^ permalink raw reply

* Re: [PATCH RFC 1/5] net: macb: Migrate to dev_pm_ops
From: Nicolas Ferre @ 2013-10-15  7:41 UTC (permalink / raw)
  To: Soren Brinkmann, netdev, David Miller; +Cc: linux-kernel, Michal Simek
In-Reply-To: <1381795140-10792-2-git-send-email-soren.brinkmann@xilinx.com>

On 15/10/2013 01:58, Soren Brinkmann :
> Migrate the suspend/resume functions to use the dev_pm_ops PM interface.
>
> Signed-off-by: Soren Brinkmann <soren.brinkmann@xilinx.com>

Acked-by: Nicolas Ferre <nicolas.ferre@atmel.com>

> ---
>   drivers/net/ethernet/cadence/macb.c | 14 +++++++-------
>   1 file changed, 7 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/net/ethernet/cadence/macb.c b/drivers/net/ethernet/cadence/macb.c
> index 92578690f6de..389ccf1362d5 100644
> --- a/drivers/net/ethernet/cadence/macb.c
> +++ b/drivers/net/ethernet/cadence/macb.c
> @@ -1946,8 +1946,9 @@ static int __exit macb_remove(struct platform_device *pdev)
>   }
>
>   #ifdef CONFIG_PM
> -static int macb_suspend(struct platform_device *pdev, pm_message_t state)
> +static int macb_suspend(struct device *dev)
>   {
> +	struct platform_device *pdev = to_platform_device(dev);
>   	struct net_device *netdev = platform_get_drvdata(pdev);
>   	struct macb *bp = netdev_priv(netdev);
>
> @@ -1960,8 +1961,9 @@ static int macb_suspend(struct platform_device *pdev, pm_message_t state)
>   	return 0;
>   }
>
> -static int macb_resume(struct platform_device *pdev)
> +static int macb_resume(struct device *dev)
>   {
> +	struct platform_device *pdev = to_platform_device(dev);
>   	struct net_device *netdev = platform_get_drvdata(pdev);
>   	struct macb *bp = netdev_priv(netdev);
>
> @@ -1972,19 +1974,17 @@ static int macb_resume(struct platform_device *pdev)
>
>   	return 0;
>   }
> -#else
> -#define macb_suspend	NULL
> -#define macb_resume	NULL
>   #endif
>
> +static SIMPLE_DEV_PM_OPS(macb_pm_ops, macb_suspend, macb_resume);
> +
>   static struct platform_driver macb_driver = {
>   	.remove		= __exit_p(macb_remove),
> -	.suspend	= macb_suspend,
> -	.resume		= macb_resume,
>   	.driver		= {
>   		.name		= "macb",
>   		.owner	= THIS_MODULE,
>   		.of_match_table	= of_match_ptr(macb_dt_ids),
> +		.pm	= &macb_pm_ops,
>   	},
>   };
>
>


-- 
Nicolas Ferre

^ permalink raw reply

* Re: [PATCH RFC 0/2] xfrm: Remove ancient sleeping code
From: Steffen Klassert @ 2013-10-15  7:30 UTC (permalink / raw)
  To: David Miller; +Cc: netdev
In-Reply-To: <20131011.150124.527914076255487526.davem@davemloft.net>

On Fri, Oct 11, 2013 at 03:01:24PM -0400, David Miller wrote:
> From: Steffen Klassert <steffen.klassert@secunet.com>
> Date: Thu, 10 Oct 2013 08:33:01 +0200
> 
> > The two RFC patches to remove the sleeping code are in reply to this
> > mail. I'd add this to the ipsec-next tree if there are no objections.
> 
> The sleep path has the slight benefit that the TCP retransmit timers
> for the initial SYN packet will not be started until the IPSEC rule
> is resolved and the SYN actually goes out.

Yes, that's a slight advantage of the sleeping. But if the IPsec state
does not get resolved for whatever reason, the retransmit timer will
never start. The task will wake up but goes back to sleep immediately
because the needed state is not resolved.

> 
> With the packet queue, if the IPSEC resolution is slow then we'll have
> spurious SYN retransmits.
> 
> It makes no sense for TCP to keep queueing up SYNs if they will just
> all get stuck in the packet queue.  The first one is enough.

Right, that's why I've limited the queue to 100 packets. We can
queue the SYNs of up to 100 tcp connestions that want to use
this IPsec state. It surely can happen that we queue multiple
retransmitted SYNs if the IPsec resolution is slow. But the
queueing code tries at least to get the packets out before
the first tcp retransmit. I think there is still room for
optimizations, maybe reducing the queue lenght or the queue
timeout to avoid queueing retransmitted SYNs as much as possible.

> 
> On the other hand we do want TCP to timeout, we do want the user to
> be able to "Ctrl-C" (ie. send a SIGINT) during a connect, etc.

As mentioned above, tcp does not timeout if the state is not
getting resolved and the task that tried to open the tcp
conection hangs indefinitely.

We could fiddle something to get a terminating condition if the
state is not resolved after some time, but my plan was to disable
the larval_drop sysctl by default some day again. At best without
any notable change to userspace. That's why I would prefer to
remove the sleeping entirely.

^ permalink raw reply

* Re: [PATCH] net: sh_eth: Fix RX packets errors on R8A7740
From: Guennadi Liakhovetski @ 2013-10-15  7:28 UTC (permalink / raw)
  To: Sergei Shtylyov
  Cc: Nguyen Hong Ky, David S. Miller, netdev, Ryusuke Sakato,
	Simon Horman
In-Reply-To: <52597FC0.4090801@cogentembedded.com>

Hi Sergei

On Sat, 12 Oct 2013, Sergei Shtylyov wrote:

> Hello.
> 
> On 07-10-2013 8:29, Nguyen Hong Ky wrote:
> 
> > This patch will fix RX packets errors when receiving big size
> > of data by set bit RNC = 1.
> 
> > RNC - Receive Enable Control
> 
> > 0: Upon completion of reception of one frame, the E-DMAC writes
> > the receive status to the descriptor and clears the RR bit in
> > EDRRR to 0.
> 
> > 1: Upon completion of reception of one frame, the E-DMAC writes
> > (writes back) the receive status to the descriptor. In addition,
> > the E-DMAC reads the next descriptor and prepares for reception
> > of the next frame.
> 
> > In addition, for get more stable when receiving packets, I set
> > maximum size for the transmit/receive FIFO and inserts padding
> > in receive data.
> 
> > Signed-off-by: Nguyen Hong Ky <nh-ky@jinso.co.jp>
> > ---
> >   drivers/net/ethernet/renesas/sh_eth.c |    4 ++++
> >   1 files changed, 4 insertions(+), 0 deletions(-)
> 
> > diff --git a/drivers/net/ethernet/renesas/sh_eth.c
> > b/drivers/net/ethernet/renesas/sh_eth.c
> > index a753928..11d34f0 100644
> > --- a/drivers/net/ethernet/renesas/sh_eth.c
> > +++ b/drivers/net/ethernet/renesas/sh_eth.c
> > @@ -649,12 +649,16 @@ static struct sh_eth_cpu_data r8a7740_data = {
> >   	.eesr_err_check	= EESR_TWB1 | EESR_TWB | EESR_TABT | EESR_RABT |
> >   			  EESR_RFE | EESR_RDE | EESR_RFRMER | EESR_TFE |
> >   			  EESR_TDE | EESR_ECI,
> > +	.fdr_value	= 0x0000070f,
> > +	.rmcr_value	= 0x00000001,
> > 
> >   	.apr		= 1,
> >   	.mpr		= 1,
> >   	.tpauser	= 1,
> >   	.bculr		= 1,
> >   	.hw_swap	= 1,
> > +	.rpadir		= 1,
> > +	.rpadir_value   = 2 << 16,
> >   	.no_trimd	= 1,
> >   	.no_ade		= 1,
> >   	.tsu		= 1,
> 
>    Guennadi, could you check if this patch fixes your issue with NFS. Make
> sure it applies to 'r8a7740_data' (it was misapplied to DaveM's tree).

Yes, the current -next, which includes this patch (in a slightly different 
form) boots fine over NFS for me.

Thanks
Guennadi
---
Guennadi Liakhovetski, Ph.D.
Freelance Open-Source Software Developer
http://www.open-technology.de/

^ permalink raw reply

* [PACTH net-next] SUNRPC: remove an unnecessary if statement
From: wangweidong @ 2013-10-15  3:44 UTC (permalink / raw)
  To: davem, Trond.Myklebust, bfields
  Cc: netdev, linux-nfs, linux-kernel, dingtianhong

If req allocated failed just goto out_free, no need to check the
'i < num_prealloc'. There is just code simplification, no
functional changes.

Signed-off-by: Wang Weidong <wangweidong1@huawei.com>
---
 net/sunrpc/xprt.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 095363e..a8e20de 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -1087,11 +1087,9 @@ struct rpc_xprt *xprt_alloc(struct net *net, size_t size,
 	for (i = 0; i < num_prealloc; i++) {
 		req = kzalloc(sizeof(struct rpc_rqst), GFP_KERNEL);
 		if (!req)
-			break;
+			goto out_free;
 		list_add(&req->rq_list, &xprt->free);
 	}
-	if (i < num_prealloc)
-		goto out_free;
 	if (max_alloc > num_prealloc)
 		xprt->max_reqs = max_alloc;
 	else
-- 1.7.12

^ permalink raw reply related

* [PATCH net V2 2/2] virtio-net: refill only when device is up during setting queues
From: Jason Wang @ 2013-10-15  3:18 UTC (permalink / raw)
  To: mst, rusty, virtualization, netdev, linux-kernel
In-Reply-To: <1381807139-3450-1-git-send-email-jasowang@redhat.com>

We used to schedule the refill work unconditionally after changing the
number of queues. This may lead an issue if the device is not
up. Since we only try to cancel the work in ndo_stop(), this may cause
the refill work still work after removing the device. Fix this by only
schedule the work when device is up.

The bug were introduce by commit 9b9cd8024a2882e896c65222aa421d461354e3f2.
(virtio-net: fix the race between channels setting and refill)

Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
---
Changes from v1: add missing rtnl_lock() in virtnet_restore().
The patch were need for 3.10 and above.
---
 drivers/net/virtio_net.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index c4bc1cc..9fbdfcd 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -938,7 +938,9 @@ static int virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs)
 		return -EINVAL;
 	} else {
 		vi->curr_queue_pairs = queue_pairs;
-		schedule_delayed_work(&vi->refill, 0);
+		/* virtnet_open() will refill when device is going to up. */
+		if (dev->flags & IFF_UP)
+			schedule_delayed_work(&vi->refill, 0);
 	}
 
 	return 0;
@@ -1741,7 +1743,9 @@ static int virtnet_restore(struct virtio_device *vdev)
 	vi->config_enable = true;
 	mutex_unlock(&vi->config_lock);
 
+	rtnl_lock();
 	virtnet_set_queues(vi, vi->curr_queue_pairs);
+	rtnl_unlock();
 
 	return 0;
 }
-- 
1.8.1.2

^ permalink raw reply related

* [PATCH net V2 1/2] virtio-net: don't respond to cpu hotplug notifier if we're not ready
From: Jason Wang @ 2013-10-15  3:18 UTC (permalink / raw)
  To: mst, rusty, virtualization, netdev, linux-kernel

We're trying to re-configure the affinity unconditionally in cpu hotplug
callback. This may lead the issue during resuming from s3/s4 since

- virt queues haven't been allocated at that time.
- it's unnecessary since thaw method will re-configure the affinity.

Fix this issue by checking the config_enable and do nothing is we're not ready.

The bug were introduced by commit 8de4b2f3ae90c8fc0f17eeaab87d5a951b66ee17
(virtio-net: reset virtqueue affinity when doing cpu hotplug).

Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Michael S. Tsirkin <mst@redhat.com>
Cc: Wanlong Gao <gaowanlong@cn.fujitsu.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Wanlong Gao <gaowanlong@cn.fujitsu.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
---
The patch is need for 3.8 and above.
---
 drivers/net/virtio_net.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index defec2b..c4bc1cc 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -1116,6 +1116,11 @@ static int virtnet_cpu_callback(struct notifier_block *nfb,
 {
 	struct virtnet_info *vi = container_of(nfb, struct virtnet_info, nb);
 
+	mutex_lock(&vi->config_lock);
+
+	if (!vi->config_enable)
+		goto done;
+
 	switch(action & ~CPU_TASKS_FROZEN) {
 	case CPU_ONLINE:
 	case CPU_DOWN_FAILED:
@@ -1128,6 +1133,9 @@ static int virtnet_cpu_callback(struct notifier_block *nfb,
 	default:
 		break;
 	}
+
+done:
+	mutex_unlock(&vi->config_lock);
 	return NOTIFY_OK;
 }
 
-- 
1.8.1.2

^ permalink raw reply related

* Re: [PATCH net 2/2] virtio-net: refill only when device is up during setting queues
From: Jason Wang @ 2013-10-15  3:15 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: netdev, linux-kernel, virtualization
In-Reply-To: <20131014110951.GG30954@redhat.com>

On 10/14/2013 07:09 PM, Michael S. Tsirkin wrote:
> On Mon, Oct 14, 2013 at 05:56:35PM +0800, Jason Wang wrote:
>> > We used to schedule the refill work unconditionally after changing the
>> > number of queues. This may lead an issue if the device is not
>> > up. Since we only try to cancel the work in ndo_stop(), this may cause
>> > the refill work still work after removing the device. Fix this by only
>> > schedule the work when device is up.
>> > 
>> > The bug were introduce by commit 9b9cd8024a2882e896c65222aa421d461354e3f2.
>> > (virtio-net: fix the race between channels setting and refill)
>> > 
>> > Cc: Rusty Russell <rusty@rustcorp.com.au>
>> > Cc: Michael S. Tsirkin <mst@redhat.com>
>> > Signed-off-by: Jason Wang <jasowang@redhat.com>
> It bothers me that we look at the flag without any
> locks here.
> I think we'll need to take the rtnl lock at least
> on restore.
>

True, will post v2.

^ permalink raw reply

* Re: [PATCHv2 RESEND] {xfrm, sctp} Stick to software crc32 even if hardware is capable of that
From: Vlad Yasevich @ 2013-10-14 14:16 UTC (permalink / raw)
  To: Fan Du, Daniel Borkmann; +Cc: nhorman, steffen.klassert, davem, netdev
In-Reply-To: <525BAC6A.8000004@windriver.com>



Fan Du <fan.du@windriver.com> wrote:

>
>
>On 2013年10月14日 16:07, Daniel Borkmann wrote:
>> On 10/14/2013 09:27 AM, Fan Du wrote:
>>> igb/ixgbe have hardware sctp checksum support, when this feature is
>enabled
>>> and also IPsec is armed to protect sctp traffic, ugly things
>happened as
>>> xfrm_output checks CHECKSUM_PARTIAL to do check sum operation(sum
>every thing
>>> up and pack the 16bits result in the checksum field). The result is
>fail
>>> establishment of sctp communication.
>>>
>>> Signed-off-by: Fan Du <fan.du@windriver.com>
>>> Cc: Vlad Yasevich <vyasevich@gmail.com>
>>> Cc: Neil Horman <nhorman@tuxdriver.com>
>>> Cc: Steffen Klassert <steffen.klassert@secunet.com>
>>> Acked-by: Vlad Yasevich <vyasevich@gmail.com>
>>> ---
>>>   net/sctp/output.c |   14 +++++++++++++-
>>>   1 file changed, 13 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/net/sctp/output.c b/net/sctp/output.c
>>> index 0ac3a65..6de6402 100644
>>> --- a/net/sctp/output.c
>>> +++ b/net/sctp/output.c
>>> @@ -372,6 +372,16 @@ static void sctp_packet_set_owner_w(struct
>sk_buff *skb, struct sock *sk)
>>>       atomic_inc(&sk->sk_wmem_alloc);
>>>   }
>>>
>>> +static int is_xfrm_armed(struct dst_entry *dst)
>>> +{
>>> +#ifdef CONFIG_XFRM
>>> +    /* If dst->xfrm is valid, this skb needs to be transformed */
>>> +    return dst->xfrm != NULL;
>>> +#else
>>> +    return 0;
>>> +#endif
>>> +}
>>
>> Instead of putting this into SCTP code, isn't the above rather a
>candidate for
>> include/net/xfrm.h, e.g. as ... bool xfrm_is_armed(...) ?
>
>Should be in such style in terms of its name, but this is truly SCTP
>specific in this scenario.
>No one elsewhere barely need this as far as I can tell...

It almost begs for dst_xfrm() function that returns NULL or dst->xfrm.
Thar can live in dst code.

-vlad

-- 
Sent from my Android phone with K-9 Mail. Please excuse my brevity.

^ permalink raw reply

* [net-next REPOST] 8390 ei_debug : Reenable the use of debugging in 8390 based chips
From: Matthew Whitehead @ 2013-10-15  2:46 UTC (permalink / raw)
  To: netdev; +Cc: Matthew Whitehead

Ethernet boards based on the 8390 chip had an '#ifdef notdef' disabling the
use of the debug variable ei_debug. Reenable it for those of us who still
occasionally use it.

Also handle the case of the 'ne' driver which uses 8390p.o rather than
8390.o. In that case ei_debug is aliased to eip_debug so it doesn't clash
with the previously exported ei_debug.

Signed-off-by: Matthew Whitehead <tedheadster@gmail.com>
---
 drivers/net/ethernet/8390/8390.h     |    5 ++++-
 drivers/net/ethernet/8390/8390p.c    |    1 +
 drivers/net/ethernet/8390/axnet_cs.c |    5 -----
 drivers/net/ethernet/8390/lib8390.c  |    8 ++++++--
 drivers/net/ethernet/8390/ne.c       |    1 +
 5 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/8390/8390.h b/drivers/net/ethernet/8390/8390.h
index 2923c51..e6c547d 100644
--- a/drivers/net/ethernet/8390/8390.h
+++ b/drivers/net/ethernet/8390/8390.h
@@ -21,7 +21,10 @@ struct e8390_pkt_hdr {
   unsigned short count; /* header + packet length in bytes */
 };
 
-#ifdef notdef
+#if (defined EI_DEBUG && defined EIP_DEBUG)
+#define ei_debug eip_debug
+extern int eip_debug;
+#elif (defined EI_DEBUG && ! defined EIP_DEBUG)
 extern int ei_debug;
 #else
 #define ei_debug 1
diff --git a/drivers/net/ethernet/8390/8390p.c b/drivers/net/ethernet/8390/8390p.c
index e8fc2e8..0a27dea 100644
--- a/drivers/net/ethernet/8390/8390p.c
+++ b/drivers/net/ethernet/8390/8390p.c
@@ -7,6 +7,7 @@ static const char version[] =
 #define ei_outb(_v, _p)	outb(_v, _p)
 #define ei_inb_p(_p)	inb_p(_p)
 #define ei_outb_p(_v, _p) outb_p(_v, _p)
+#define EIP_DEBUG 1
 
 #include "lib8390.c"
 
diff --git a/drivers/net/ethernet/8390/axnet_cs.c b/drivers/net/ethernet/8390/axnet_cs.c
index d801c141..581560c 100644
--- a/drivers/net/ethernet/8390/axnet_cs.c
+++ b/drivers/net/ethernet/8390/axnet_cs.c
@@ -810,11 +810,6 @@ module_pcmcia_driver(axnet_cs_driver);
 #define ei_block_input (ei_local->block_input)
 #define ei_get_8390_hdr (ei_local->get_8390_hdr)
 
-/* use 0 for production, 1 for verification, >2 for debug */
-#ifndef ei_debug
-int ei_debug = 1;
-#endif
-
 /* Index to functions. */
 static void ei_tx_intr(struct net_device *dev);
 static void ei_tx_err(struct net_device *dev);
diff --git a/drivers/net/ethernet/8390/lib8390.c b/drivers/net/ethernet/8390/lib8390.c
index b329f5c..889d1fd 100644
--- a/drivers/net/ethernet/8390/lib8390.c
+++ b/drivers/net/ethernet/8390/lib8390.c
@@ -100,8 +100,12 @@
 #define ei_get_8390_hdr (ei_local->get_8390_hdr)
 
 /* use 0 for production, 1 for verification, >2 for debug */
-#ifndef ei_debug
-int ei_debug = 1;
+#if (defined EI_DEBUG && defined EIP_DEBUG)
+int eip_debug = EI_DEBUG;
+EXPORT_SYMBOL(eip_debug);
+#elif (defined EI_DEBUG && ! defined EIP_DEBUG)
+int ei_debug = EI_DEBUG;
+EXPORT_SYMBOL(ei_debug);
 #endif
 
 /* Index to functions. */
diff --git a/drivers/net/ethernet/8390/ne.c b/drivers/net/ethernet/8390/ne.c
index b2e8405..4136b31 100644
--- a/drivers/net/ethernet/8390/ne.c
+++ b/drivers/net/ethernet/8390/ne.c
@@ -54,6 +54,7 @@ static const char version2[] =
 #include <linux/platform_device.h>
 
 #include <asm/io.h>
+#define EIP_DEBUG 1
 
 #include "8390.h"
 
-- 
1.7.2.5

^ permalink raw reply related

* Re: DomU's network interface will hung when Dom0 running 32bit
From: jianhai luan @ 2013-10-15  2:44 UTC (permalink / raw)
  To: Wei Liu; +Cc: Ian Campbell, xen-devel, netdev
In-Reply-To: <20131014111958.GE11739@zion.uk.xensource.com>


On 2013-10-14 19:19, Wei Liu wrote:
> On Sat, Oct 12, 2013 at 04:53:18PM +0800, jianhai luan wrote:
>> Hi Ian,
>>    I meet the DomU's network interface hung issue recently, and have
>> been working on the issue from that time. I find that DomU's network
>> interface, which send lesser package, will hung if Dom0 running
>> 32bit and DomU's up-time is very long.  I think that one jiffies
>> overflow bug exist in the function tx_credit_exceeded().
>>    I know the inline function time_after_eq(a,b) will process jiffies
>> overflow, but the function have one limit a should little that (b +
>> MAX_SIGNAL_LONG). If a large than the value, time_after_eq will
>> return false. The MAX_SINGNAL_LONG should be 0x7fffffff at 32-bit
>> machine.
>>    If DomU's network interface send lesser package (<0.5k/s if
>> jiffies=250 and credit_bytes=ULONG_MAX), jiffies will beyond out
>> (credit_timeout.expires + MAX_SIGNAL_LONG) and time_after_eq(now,
>> next_credit) will failure (should be true). So one timer which will
>> not be trigger in short time, and later process will be aborted when
>> timer_pending(&vif->credit_timeout) is true. The result will be
>> DomU's network interface will be hung in long time (> 40days).
>>    Please think about the below scenario:
>>    Condition:
>>      Dom0 running 32-bit and HZ = 1000
>>      vif->credit_timeout->expire = 0xffffffff, vif->remaining_credit
>> = 0xffffffff, vif->credit_usec=0 jiffies=0
>>      vif receive lesser package (DomU send lesser package). If the
>> value is litter than 2K/s, consume 4G(0xffffffff) will need 582.55
>> hours. jiffies will large than 0x7ffffff. we guess jiffies =
>> 0x800000ff, time_after_eq(0x800000ff, 0xffffffff) will failure, and
>> one time which expire is 0xfffffff will be pended into system. So
>> the interface will hung until jiffies recount 0xffffffff (that will
>> need very long time).
> If I'm not mistaken you meant time_after_eq(now, next_credit) in
> netback. How does next_credit become 0xffffffff?

I only assume the value is 0xfffffff, and the value of next_credit 
isn't  point. If the delta between now and next_credit larger than 
ULONG_MAX, time_after_eq will do wrong judge.
>
> Wei.
>
>>    If some error exist in above explain, please help me point it out.
>>
>> Thanks,
>> Jason

^ permalink raw reply

* [PATCH 3/3] ipvs: improved SH fallback strategy
From: Simon Horman @ 2013-10-15  2:01 UTC (permalink / raw)
  To: Pablo Neira Ayuso
  Cc: lvs-devel, netdev, netfilter-devel, Wensong Zhang,
	Julian Anastasov, Alexander Frolkin, Simon Horman
In-Reply-To: <1381802507-7934-1-git-send-email-horms@verge.net.au>

From: Alexander Frolkin <avf@eldamar.org.uk>

Improve the SH fallback realserver selection strategy.

With sh and sh-fallback, if a realserver is down, this attempts to
distribute the traffic that would have gone to that server evenly
among the remaining servers.

Signed-off-by: Alexander Frolkin <avf@eldamar.org.uk>
Acked-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
 net/netfilter/ipvs/ip_vs_sh.c | 39 +++++++++++++++++++++++++++++----------
 1 file changed, 29 insertions(+), 10 deletions(-)

diff --git a/net/netfilter/ipvs/ip_vs_sh.c b/net/netfilter/ipvs/ip_vs_sh.c
index 3588fae..cc65b2f 100644
--- a/net/netfilter/ipvs/ip_vs_sh.c
+++ b/net/netfilter/ipvs/ip_vs_sh.c
@@ -115,27 +115,46 @@ ip_vs_sh_get(struct ip_vs_service *svc, struct ip_vs_sh_state *s,
 }
 
 
-/* As ip_vs_sh_get, but with fallback if selected server is unavailable */
+/* As ip_vs_sh_get, but with fallback if selected server is unavailable
+ *
+ * The fallback strategy loops around the table starting from a "random"
+ * point (in fact, it is chosen to be the original hash value to make the
+ * algorithm deterministic) to find a new server.
+ */
 static inline struct ip_vs_dest *
 ip_vs_sh_get_fallback(struct ip_vs_service *svc, struct ip_vs_sh_state *s,
 		      const union nf_inet_addr *addr, __be16 port)
 {
-	unsigned int offset;
-	unsigned int hash;
+	unsigned int offset, roffset;
+	unsigned int hash, ihash;
 	struct ip_vs_dest *dest;
 
+	/* first try the dest it's supposed to go to */
+	ihash = ip_vs_sh_hashkey(svc->af, addr, port, 0);
+	dest = rcu_dereference(s->buckets[ihash].dest);
+	if (!dest)
+		return NULL;
+	if (!is_unavailable(dest))
+		return dest;
+
+	IP_VS_DBG_BUF(6, "SH: selected unavailable server %s:%d, reselecting",
+		      IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port));
+
+	/* if the original dest is unavailable, loop around the table
+	 * starting from ihash to find a new dest
+	 */
 	for (offset = 0; offset < IP_VS_SH_TAB_SIZE; offset++) {
-		hash = ip_vs_sh_hashkey(svc->af, addr, port, offset);
+		roffset = (offset + ihash) % IP_VS_SH_TAB_SIZE;
+		hash = ip_vs_sh_hashkey(svc->af, addr, port, roffset);
 		dest = rcu_dereference(s->buckets[hash].dest);
 		if (!dest)
 			break;
-		if (is_unavailable(dest))
-			IP_VS_DBG_BUF(6, "SH: selected unavailable server "
-				      "%s:%d (offset %d)",
-				      IP_VS_DBG_ADDR(svc->af, &dest->addr),
-				      ntohs(dest->port), offset);
-		else
+		if (!is_unavailable(dest))
 			return dest;
+		IP_VS_DBG_BUF(6, "SH: selected unavailable "
+			      "server %s:%d (offset %d), reselecting",
+			      IP_VS_DBG_ADDR(svc->af, &dest->addr),
+			      ntohs(dest->port), roffset);
 	}
 
 	return NULL;
-- 
1.8.4

^ permalink raw reply related

* [PATCH 1/3] ipvs: fix the IPVS_CMD_ATTR_MAX definition
From: Simon Horman @ 2013-10-15  2:01 UTC (permalink / raw)
  To: Pablo Neira Ayuso
  Cc: lvs-devel, netdev, netfilter-devel, Wensong Zhang,
	Julian Anastasov, Simon Horman
In-Reply-To: <1381802507-7934-1-git-send-email-horms@verge.net.au>

From: Julian Anastasov <ja@ssi.bg>

It was wrong (bigger) but problem is harmless.

Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
 include/uapi/linux/ip_vs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/uapi/linux/ip_vs.h b/include/uapi/linux/ip_vs.h
index 2945822..fbcffe8 100644
--- a/include/uapi/linux/ip_vs.h
+++ b/include/uapi/linux/ip_vs.h
@@ -334,7 +334,7 @@ enum {
 	__IPVS_CMD_ATTR_MAX,
 };
 
-#define IPVS_CMD_ATTR_MAX (__IPVS_SVC_ATTR_MAX - 1)
+#define IPVS_CMD_ATTR_MAX (__IPVS_CMD_ATTR_MAX - 1)
 
 /*
  * Attributes used to describe a service
-- 
1.8.4

^ permalink raw reply related

* [PATCH 2/3] ipvs: avoid rcu_barrier during netns cleanup
From: Simon Horman @ 2013-10-15  2:01 UTC (permalink / raw)
  To: Pablo Neira Ayuso
  Cc: lvs-devel, netdev, netfilter-devel, Wensong Zhang,
	Julian Anastasov, Simon Horman
In-Reply-To: <1381802507-7934-1-git-send-email-horms@verge.net.au>

From: Julian Anastasov <ja@ssi.bg>

commit 578bc3ef1e473a ("ipvs: reorganize dest trash") added
rcu_barrier() on cleanup to wait dest users and schedulers
like LBLC and LBLCR to put their last dest reference.
Using rcu_barrier with many namespaces is problematic.

Trying to fix it by freeing dest with kfree_rcu is not
a solution, RCU callbacks can run in parallel and execution
order is random.

Fix it by creating new function ip_vs_dest_put_and_free()
which is heavier than ip_vs_dest_put(). We will use it just
for schedulers like LBLC, LBLCR that can delay their dest
release.

By default, dests reference is above 0 if they are present in
service and it is 0 when deleted but still in trash list.
Change the dest trash code to use ip_vs_dest_put_and_free(),
so that refcnt -1 can be used for freeing. As result,
such checks remain in slow path and the rcu_barrier() from
netns cleanup can be removed.

Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
 include/net/ip_vs.h              | 6 ++++++
 net/netfilter/ipvs/ip_vs_ctl.c   | 6 +-----
 net/netfilter/ipvs/ip_vs_lblc.c  | 2 +-
 net/netfilter/ipvs/ip_vs_lblcr.c | 2 +-
 4 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 1c2e1b9..cd7275f 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -1442,6 +1442,12 @@ static inline void ip_vs_dest_put(struct ip_vs_dest *dest)
 	atomic_dec(&dest->refcnt);
 }
 
+static inline void ip_vs_dest_put_and_free(struct ip_vs_dest *dest)
+{
+	if (atomic_dec_return(&dest->refcnt) < 0)
+		kfree(dest);
+}
+
 /*
  *      IPVS sync daemon data and function prototypes
  *      (from ip_vs_sync.c)
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index a3df9bd..62786a4 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -704,7 +704,7 @@ static void ip_vs_dest_free(struct ip_vs_dest *dest)
 	__ip_vs_dst_cache_reset(dest);
 	__ip_vs_svc_put(svc, false);
 	free_percpu(dest->stats.cpustats);
-	kfree(dest);
+	ip_vs_dest_put_and_free(dest);
 }
 
 /*
@@ -3820,10 +3820,6 @@ void __net_exit ip_vs_control_net_cleanup(struct net *net)
 {
 	struct netns_ipvs *ipvs = net_ipvs(net);
 
-	/* Some dest can be in grace period even before cleanup, we have to
-	 * defer ip_vs_trash_cleanup until ip_vs_dest_wait_readers is called.
-	 */
-	rcu_barrier();
 	ip_vs_trash_cleanup(net);
 	ip_vs_stop_estimator(net, &ipvs->tot_stats);
 	ip_vs_control_net_cleanup_sysctl(net);
diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c
index eff13c9..ca056a3 100644
--- a/net/netfilter/ipvs/ip_vs_lblc.c
+++ b/net/netfilter/ipvs/ip_vs_lblc.c
@@ -136,7 +136,7 @@ static void ip_vs_lblc_rcu_free(struct rcu_head *head)
 						   struct ip_vs_lblc_entry,
 						   rcu_head);
 
-	ip_vs_dest_put(en->dest);
+	ip_vs_dest_put_and_free(en->dest);
 	kfree(en);
 }
 
diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c
index 0b85500..3f21a2f 100644
--- a/net/netfilter/ipvs/ip_vs_lblcr.c
+++ b/net/netfilter/ipvs/ip_vs_lblcr.c
@@ -130,7 +130,7 @@ static void ip_vs_lblcr_elem_rcu_free(struct rcu_head *head)
 	struct ip_vs_dest_set_elem *e;
 
 	e = container_of(head, struct ip_vs_dest_set_elem, rcu_head);
-	ip_vs_dest_put(e->dest);
+	ip_vs_dest_put_and_free(e->dest);
 	kfree(e);
 }
 
-- 
1.8.4


^ permalink raw reply related

* [GIT PULL] IPVS updates for v3.13
From: Simon Horman @ 2013-10-15  2:01 UTC (permalink / raw)
  To: Pablo Neira Ayuso
  Cc: lvs-devel, netdev, netfilter-devel, Wensong Zhang,
	Julian Anastasov, Simon Horman

Hi Pablo,

please consider the following fixes for IPVS for v3.13.

This pull request is based on nf-next.


The following changes since commit 58308451e91974267e1f4a618346055342019e02:

  Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/jkirsher/net-next (2013-10-10 15:29:44 -0400)

are available in the git repository at:


  git://git.kernel.org/pub/scm/linux/kernel/git/horms/ipvs-next.git tags/ipvs-for-v3.13

for you to fetch changes up to 1255ce5f10dbb4646c8d43b8d59faab48ae4a6b2:

  ipvs: improved SH fallback strategy (2013-10-15 10:54:50 +0900)

----------------------------------------------------------------
IPVS updates for v3.13

* Improvements to SH fallback strategy
* Avoid rcu_barrier during netns cleanup
* Fix the IPVS_CMD_ATTR_MAX definition

----------------------------------------------------------------
Alexander Frolkin (1):
      ipvs: improved SH fallback strategy

Julian Anastasov (2):
      ipvs: fix the IPVS_CMD_ATTR_MAX definition
      ipvs: avoid rcu_barrier during netns cleanup

 include/net/ip_vs.h              |  6 ++++++
 include/uapi/linux/ip_vs.h       |  2 +-
 net/netfilter/ipvs/ip_vs_ctl.c   |  6 +-----
 net/netfilter/ipvs/ip_vs_lblc.c  |  2 +-
 net/netfilter/ipvs/ip_vs_lblcr.c |  2 +-
 net/netfilter/ipvs/ip_vs_sh.c    | 39 +++++++++++++++++++++++++++++----------
 6 files changed, 39 insertions(+), 18 deletions(-)

^ permalink raw reply

* Re: [PATCHv2] ipvs: improved SH fallback strategy
From: Simon Horman @ 2013-10-15  1:55 UTC (permalink / raw)
  To: Julian Anastasov
  Cc: Alexander Frolkin, Sergei Shtylyov, lvs-devel, Wensong Zhang,
	netdev, linux-kernel
In-Reply-To: <alpine.LFD.2.00.1309272218180.1725@ja.ssi.bg>

On Fri, Sep 27, 2013 at 10:20:42PM +0300, Julian Anastasov wrote:
> 
> 	Hello,
> 
> On Fri, 27 Sep 2013, Alexander Frolkin wrote:
> 
> > Improve the SH fallback realserver selection strategy.
> > 
> > With sh and sh-fallback, if a realserver is down, this attempts to
> > distribute the traffic that would have gone to that server evenly
> > among the remaining servers.
> > 
> > Signed-off-by: Alexander Frolkin <avf@eldamar.org.uk>
> 
> 	Thanks! Looks good to me.
> 
> Acked-by: Julian Anastasov <ja@ssi.bg>

Sorry for letting this one slip.
I have queued it up.

^ permalink raw reply

* Re: [PATCH net-next] sctp: Namespacify checksum_disable
From: Fan Du @ 2013-10-15  1:38 UTC (permalink / raw)
  To: Vlad Yasevich; +Cc: nhorman, davem, netdev
In-Reply-To: <1170c5bd-54e0-4051-a280-ff6538a47614@email.android.com>



On 2013年10月14日 22:08, Vlad Yasevich wrote:
>
> Fan Du<fan.du@windriver.com>  wrote:
>
>> >SCTP CRC32-C checksum computing and verifying should be
>> >namespace-sensible,
>> >as each, e.g. tenant instance might need different checksum
>> >configuration for
>> >its requirement. So this patch enhance SCTP with this feature.
>> >
>> >Signed-off-by: Fan Du<fan.du@windriver.com>
> NACK.  We don't want that setting to be sysctl configurable.  It is only useful in very limited circumstances and is not really for production/everyday use.
>
> In fact, I am going to send in a patch that makes this module parameter read only in /sys.

Thanks for the background explanation, Vlad.

-- 
浮沉随浪只记今朝笑

--fan

^ permalink raw reply

* Re: [PATCH net 1/2] virtio-net: don't respond to cpu hotplug notifier if we're not ready
From: Wanlong Gao @ 2013-10-15  0:18 UTC (permalink / raw)
  To: Jason Wang; +Cc: mst, netdev, linux-kernel, virtualization
In-Reply-To: <1381744595-26881-1-git-send-email-jasowang@redhat.com>

On 10/14/2013 05:56 PM, Jason Wang wrote:
> We're trying to re-configure the affinity unconditionally in cpu hotplug
> callback. This may lead the issue during resuming from s3/s4 since
> 
> - virt queues haven't been allocated at that time.
> - it's unnecessary since thaw method will re-configure the affinity.
> 
> Fix this issue by checking the config_enable and do nothing is we're not ready.
> 
> The bug were introduced by commit 8de4b2f3ae90c8fc0f17eeaab87d5a951b66ee17
> (virtio-net: reset virtqueue affinity when doing cpu hotplug).
> 
> Cc: Rusty Russell <rusty@rustcorp.com.au>
> Cc: Michael S. Tsirkin <mst@redhat.com>
> Cc: Wanlong Gao <gaowanlong@cn.fujitsu.com>
> Signed-off-by: Jason Wang <jasowang@redhat.com>

Thank you.

Reviewed-by: Wanlong Gao <gaowanlong@cn.fujitsu.com>


> ---
> The patch is need for 3.8 and above.
> ---
>  drivers/net/virtio_net.c | 8 ++++++++
>  1 file changed, 8 insertions(+)
> 
> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> index defec2b..c4bc1cc 100644
> --- a/drivers/net/virtio_net.c
> +++ b/drivers/net/virtio_net.c
> @@ -1116,6 +1116,11 @@ static int virtnet_cpu_callback(struct notifier_block *nfb,
>  {
>  	struct virtnet_info *vi = container_of(nfb, struct virtnet_info, nb);
>  
> +	mutex_lock(&vi->config_lock);
> +
> +	if (!vi->config_enable)
> +		goto done;
> +
>  	switch(action & ~CPU_TASKS_FROZEN) {
>  	case CPU_ONLINE:
>  	case CPU_DOWN_FAILED:
> @@ -1128,6 +1133,9 @@ static int virtnet_cpu_callback(struct notifier_block *nfb,
>  	default:
>  		break;
>  	}
> +
> +done:
> +	mutex_unlock(&vi->config_lock);
>  	return NOTIFY_OK;
>  }
>  
> 

^ permalink raw reply

* [PATCH RFC 5/5] net: macb: Adjust tx_clk when link speed changes
From: Soren Brinkmann @ 2013-10-14 23:59 UTC (permalink / raw)
  To: Nicolas Ferre; +Cc: netdev, linux-kernel, Michal Simek, Soren Brinkmann
In-Reply-To: <1381795140-10792-1-git-send-email-soren.brinkmann@xilinx.com>

Adjust the ethernet clock according to the negotiated link speed.

Signed-off-by: Soren Brinkmann <soren.brinkmann@xilinx.com>
---
 drivers/net/ethernet/cadence/macb.c | 66 +++++++++++++++++++++++++++++++++++++
 drivers/net/ethernet/cadence/macb.h |  1 +
 2 files changed, 67 insertions(+)

diff --git a/drivers/net/ethernet/cadence/macb.c b/drivers/net/ethernet/cadence/macb.c
index 603844b1d483..beb9fa863811 100644
--- a/drivers/net/ethernet/cadence/macb.c
+++ b/drivers/net/ethernet/cadence/macb.c
@@ -204,6 +204,49 @@ static int macb_mdio_reset(struct mii_bus *bus)
 	return 0;
 }
 
+/**
+ * macb_set_tx_clk() - Set a clock to a new frequency
+ * @clk		Pointer to the clock to change
+ * @rate	New frequency in Hz
+ * @dev		Pointer to the struct net_device
+ */
+static void macb_set_tx_clk(struct clk *clk, int speed, struct net_device *dev)
+{
+	long ferr;
+	long rate;
+	long rate_rounded;
+
+	switch (speed) {
+	case SPEED_10:
+		rate = 2500000;
+		break;
+	case SPEED_100:
+		rate = 25000000;
+		break;
+	case SPEED_1000:
+		rate = 125000000;
+		break;
+	default:
+		break;
+	}
+
+	rate_rounded = clk_round_rate(clk, rate);
+	if (rate_rounded < 0)
+		return;
+
+	/* RGMII allows 50 ppm frequency error. Test and warn if this limit
+	 * are not satisfied.
+	 */
+	ferr = abs(rate_rounded - rate);
+	ferr = DIV_ROUND_UP(ferr, rate / 100000);
+	if (ferr > 5)
+		netdev_warn(dev, "unable to generate target frequency: %ld Hz\n",
+				rate);
+
+	if (clk_set_rate(clk, rate_rounded))
+		netdev_err(dev, "adjusting tx_clk failed.\n");
+}
+
 static void macb_handle_link_change(struct net_device *dev)
 {
 	struct macb *bp = netdev_priv(dev);
@@ -251,6 +294,9 @@ static void macb_handle_link_change(struct net_device *dev)
 
 	spin_unlock_irqrestore(&bp->lock, flags);
 
+	if (!IS_ERR(bp->tx_clk))
+		macb_set_tx_clk(bp->tx_clk, phydev->speed, dev);
+
 	if (status_change) {
 		if (phydev->link) {
 			netif_carrier_on(dev);
@@ -1805,6 +1851,8 @@ static int __init macb_probe(struct platform_device *pdev)
 		goto err_out_free_dev;
 	}
 
+	bp->tx_clk = devm_clk_get(&pdev->dev, "tx_clk");
+
 	err = clk_prepare_enable(bp->pclk);
 	if (err) {
 		dev_err(&pdev->dev, "failed to enable pclk (%u)\n", err);
@@ -1817,6 +1865,15 @@ static int __init macb_probe(struct platform_device *pdev)
 		goto err_out_disable_pclk;
 	}
 
+	if (!IS_ERR(bp->tx_clk)) {
+		err = clk_prepare_enable(bp->tx_clk);
+		if (err) {
+			dev_err(&pdev->dev, "failed to enable tx_clk (%u)\n",
+					err);
+			goto err_out_disable_hclk;
+		}
+	}
+
 	bp->regs = devm_ioremap(&pdev->dev, regs->start, resource_size(regs));
 	if (!bp->regs) {
 		dev_err(&pdev->dev, "failed to map registers, aborting.\n");
@@ -1917,6 +1974,9 @@ static int __init macb_probe(struct platform_device *pdev)
 err_out_unregister_netdev:
 	unregister_netdev(dev);
 err_out_disable_clocks:
+	if (!IS_ERR(bp->tx_clk))
+		clk_disable_unprepare(bp->tx_clk);
+err_out_disable_hclk:
 	clk_disable_unprepare(bp->hclk);
 err_out_disable_pclk:
 	clk_disable_unprepare(bp->pclk);
@@ -1941,6 +2001,8 @@ static int __exit macb_remove(struct platform_device *pdev)
 		kfree(bp->mii_bus->irq);
 		mdiobus_free(bp->mii_bus);
 		unregister_netdev(dev);
+		if (!IS_ERR(bp->tx_clk))
+			clk_disable_unprepare(bp->tx_clk);
 		clk_disable_unprepare(bp->hclk);
 		clk_disable_unprepare(bp->pclk);
 		free_netdev(dev);
@@ -1959,6 +2021,8 @@ static int macb_suspend(struct device *dev)
 	netif_carrier_off(netdev);
 	netif_device_detach(netdev);
 
+	if (!IS_ERR(bp->tx_clk))
+		clk_disable_unprepare(bp->tx_clk);
 	clk_disable_unprepare(bp->hclk);
 	clk_disable_unprepare(bp->pclk);
 
@@ -1973,6 +2037,8 @@ static int macb_resume(struct device *dev)
 
 	clk_prepare_enable(bp->pclk);
 	clk_prepare_enable(bp->hclk);
+	if (!IS_ERR(bp->tx_clk))
+		clk_prepare_enable(bp->tx_clk);
 
 	netif_device_attach(netdev);
 
diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h
index f4076155bed7..51c02442160a 100644
--- a/drivers/net/ethernet/cadence/macb.h
+++ b/drivers/net/ethernet/cadence/macb.h
@@ -572,6 +572,7 @@ struct macb {
 	struct platform_device	*pdev;
 	struct clk		*pclk;
 	struct clk		*hclk;
+	struct clk		*tx_clk;
 	struct net_device	*dev;
 	struct napi_struct	napi;
 	struct work_struct	tx_error_task;
-- 
1.8.4

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox