Netdev List
 help / color / mirror / Atom feed
* [PATCH net-next v1 1/3] net: fec: add Wake-on-LAN support
From: Fugang Duan @ 2014-12-24  9:30 UTC (permalink / raw)
  To: shawn.guo, davem; +Cc: netdev, bhutchings, stephen, b38611
In-Reply-To: <1419413441-3406-1-git-send-email-b38611@freescale.com>

Support for Wake-on-LAN using Magic Packet. ENET IP supports sleep mode
in low power status, when system enter suspend status, Magic packet can
wake up system even if all SOC clocks are gate. The patch doing below things:
- flagging the device as a wakeup source for the system, as well as
  its Wake-on-LAN interrupt
- prepare the hardware for entering WoL mode
- add standard ethtool WOL interface
- enable the ENET interrupt to wake us

Tested on i.MX6q/dl sabresd, sabreauto boards, i.MX6SX arm2 boards.

Signed-off-by: Fugang Duan <B38611@freescale.com>
---
 Documentation/devicetree/bindings/net/fsl-fec.txt |    2 +
 drivers/net/ethernet/freescale/fec.h              |    2 +
 drivers/net/ethernet/freescale/fec_main.c         |  104 +++++++++++++++++++--
 include/linux/fec.h                               |    1 +
 4 files changed, 99 insertions(+), 10 deletions(-)

diff --git a/Documentation/devicetree/bindings/net/fsl-fec.txt b/Documentation/devicetree/bindings/net/fsl-fec.txt
index 0c8775c..a9eb611 100644
--- a/Documentation/devicetree/bindings/net/fsl-fec.txt
+++ b/Documentation/devicetree/bindings/net/fsl-fec.txt
@@ -22,6 +22,8 @@ Optional properties:
 - fsl,num-rx-queues : The property is valid for enet-avb IP, which supports
   hw multi queues. Should specify the rx queue number, otherwise set rx queue
   number to 1.
+- fsl,magic-packet : If present, indicates that the hardware supports waking
+  up via magic packet.
 
 Optional subnodes:
 - mdio : specifies the mdio bus in the FEC, used as a container for phy nodes
diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h
index 469691a..e51ab5d 100644
--- a/drivers/net/ethernet/freescale/fec.h
+++ b/drivers/net/ethernet/freescale/fec.h
@@ -356,6 +356,7 @@ struct bufdesc_ex {
 #define FEC_ENET_RXB    ((uint)0x01000000)      /* A buffer was received */
 #define FEC_ENET_MII    ((uint)0x00800000)      /* MII interrupt */
 #define FEC_ENET_EBERR  ((uint)0x00400000)      /* SDMA bus error */
+#define FEC_ENET_WAKEUP	((uint)0x00020000)	/* Wakeup request */
 #define FEC_ENET_TXF	(FEC_ENET_TXF_0 | FEC_ENET_TXF_1 | FEC_ENET_TXF_2)
 #define FEC_ENET_RXF	(FEC_ENET_RXF_0 | FEC_ENET_RXF_1 | FEC_ENET_RXF_2)
 #define FEC_ENET_TS_AVAIL       ((uint)0x00010000)
@@ -511,6 +512,7 @@ struct fec_enet_private {
 	int	irq[FEC_IRQ_NUM];
 	bool	bufdesc_ex;
 	int	pause_flag;
+	int	wol_flag;
 	u32	quirks;
 
 	struct	napi_struct napi;
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index 5c4a8bd..615b51d 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -187,6 +187,9 @@ MODULE_PARM_DESC(macaddr, "FEC Ethernet MAC address");
 #define FEC_MMFR_RA(v)		((v & 0x1f) << 18)
 #define FEC_MMFR_TA		(2 << 16)
 #define FEC_MMFR_DATA(v)	(v & 0xffff)
+/* FEC ECR bits definition */
+#define FEC_ECR_MAGICEN		(1 << 2)
+#define FEC_ECR_SLEEP		(1 << 3)
 
 #define FEC_MII_TIMEOUT		30000 /* us */
 
@@ -195,6 +198,9 @@ MODULE_PARM_DESC(macaddr, "FEC Ethernet MAC address");
 
 #define FEC_PAUSE_FLAG_AUTONEG	0x1
 #define FEC_PAUSE_FLAG_ENABLE	0x2
+#define FEC_WOL_HAS_MAGIC_PACKET	(0x1 << 0)
+#define FEC_WOL_FLAG_ENABLE		(0x1 << 1)
+#define FEC_WOL_FLAG_SLEEP_ON		(0x1 << 2)
 
 #define COPYBREAK_DEFAULT	256
 
@@ -1089,7 +1095,9 @@ static void
 fec_stop(struct net_device *ndev)
 {
 	struct fec_enet_private *fep = netdev_priv(ndev);
+	struct fec_platform_data *pdata = fep->pdev->dev.platform_data;
 	u32 rmii_mode = readl(fep->hwp + FEC_R_CNTRL) & (1 << 8);
+	u32 val;
 
 	/* We cannot expect a graceful transmit stop without link !!! */
 	if (fep->link) {
@@ -1103,17 +1111,28 @@ fec_stop(struct net_device *ndev)
 	 * For i.MX6SX SOC, enet use AXI bus, we use disable MAC
 	 * instead of reset MAC itself.
 	 */
-	if (fep->quirks & FEC_QUIRK_HAS_AVB) {
-		writel(0, fep->hwp + FEC_ECNTRL);
+	if (!(fep->wol_flag & FEC_WOL_FLAG_SLEEP_ON)) {
+		if (fep->quirks & FEC_QUIRK_HAS_AVB) {
+			writel(0, fep->hwp + FEC_ECNTRL);
+		} else {
+			writel(1, fep->hwp + FEC_ECNTRL);
+			udelay(10);
+		}
+		writel(FEC_DEFAULT_IMASK, fep->hwp + FEC_IMASK);
 	} else {
-		writel(1, fep->hwp + FEC_ECNTRL);
-		udelay(10);
+		writel(FEC_DEFAULT_IMASK | FEC_ENET_WAKEUP, fep->hwp + FEC_IMASK);
+		val = readl(fep->hwp + FEC_ECNTRL);
+		val |= (FEC_ECR_MAGICEN | FEC_ECR_SLEEP);
+		writel(val, fep->hwp + FEC_ECNTRL);
+
+		if (pdata && pdata->sleep_mode_enable)
+			pdata->sleep_mode_enable(true);
 	}
 	writel(fep->phy_speed, fep->hwp + FEC_MII_SPEED);
-	writel(FEC_DEFAULT_IMASK, fep->hwp + FEC_IMASK);
 
 	/* We have to keep ENET enabled to have MII interrupt stay working */
-	if (fep->quirks & FEC_QUIRK_ENET_MAC) {
+	if (fep->quirks & FEC_QUIRK_ENET_MAC &&
+		!(fep->wol_flag & FEC_WOL_FLAG_SLEEP_ON)) {
 		writel(2, fep->hwp + FEC_ECNTRL);
 		writel(rmii_mode, fep->hwp + FEC_R_CNTRL);
 	}
@@ -2427,6 +2446,44 @@ static int fec_enet_set_tunable(struct net_device *netdev,
 	return ret;
 }
 
+static void
+fec_enet_get_wol(struct net_device *ndev, struct ethtool_wolinfo *wol)
+{
+	struct fec_enet_private *fep = netdev_priv(ndev);
+
+	if (fep->wol_flag & FEC_WOL_HAS_MAGIC_PACKET) {
+		wol->supported = WAKE_MAGIC;
+		wol->wolopts = fep->wol_flag & FEC_WOL_FLAG_ENABLE ? WAKE_MAGIC : 0;
+	} else {
+		wol->supported = wol->wolopts = 0;
+	}
+}
+
+static int
+fec_enet_set_wol(struct net_device *ndev, struct ethtool_wolinfo *wol)
+{
+	struct fec_enet_private *fep = netdev_priv(ndev);
+
+	if (!(fep->wol_flag & FEC_WOL_HAS_MAGIC_PACKET))
+		return -EINVAL;
+
+	if (wol->wolopts & ~WAKE_MAGIC)
+		return -EINVAL;
+
+	device_set_wakeup_enable(&ndev->dev, wol->wolopts & WAKE_MAGIC);
+	if (device_may_wakeup(&ndev->dev)) {
+		fep->wol_flag |= FEC_WOL_FLAG_ENABLE;
+		if (fep->irq[0] > 0)
+			enable_irq_wake(fep->irq[0]);
+	} else {
+		fep->wol_flag &= (~FEC_WOL_FLAG_ENABLE);
+		if (fep->irq[0] > 0)
+			disable_irq_wake(fep->irq[0]);
+	}
+
+	return 0;
+}
+
 static const struct ethtool_ops fec_enet_ethtool_ops = {
 	.get_settings		= fec_enet_get_settings,
 	.set_settings		= fec_enet_set_settings,
@@ -2445,6 +2502,8 @@ static const struct ethtool_ops fec_enet_ethtool_ops = {
 	.get_ts_info		= fec_enet_get_ts_info,
 	.get_tunable		= fec_enet_get_tunable,
 	.set_tunable		= fec_enet_set_tunable,
+	.get_wol		= fec_enet_get_wol,
+	.set_wol		= fec_enet_set_wol,
 };
 
 static int fec_enet_ioctl(struct net_device *ndev, struct ifreq *rq, int cmd)
@@ -2705,6 +2764,9 @@ fec_enet_open(struct net_device *ndev)
 	phy_start(fep->phy_dev);
 	netif_tx_start_all_queues(ndev);
 
+	device_set_wakeup_enable(&ndev->dev, fep->wol_flag &
+				 FEC_WOL_FLAG_ENABLE);
+
 	return 0;
 
 err_enet_mii_probe:
@@ -3153,6 +3215,9 @@ fec_probe(struct platform_device *pdev)
 
 	platform_set_drvdata(pdev, ndev);
 
+	if (of_get_property(np, "fsl,magic-packet", NULL))
+		fep->wol_flag |= FEC_WOL_HAS_MAGIC_PACKET;
+
 	phy_node = of_parse_phandle(np, "phy-handle", 0);
 	if (!phy_node && of_phy_is_fixed_link(np)) {
 		ret = of_phy_register_fixed_link(np);
@@ -3247,6 +3312,8 @@ fec_probe(struct platform_device *pdev)
 				       0, pdev->name, ndev);
 		if (ret)
 			goto failed_irq;
+
+		fep->irq[i] = irq;
 	}
 
 	init_completion(&fep->mdio_done);
@@ -3263,6 +3330,9 @@ fec_probe(struct platform_device *pdev)
 	if (ret)
 		goto failed_register;
 
+	device_init_wakeup(&ndev->dev, fep->wol_flag &
+			   FEC_WOL_HAS_MAGIC_PACKET);
+
 	if (fep->bufdesc_ex && fep->ptp_clock)
 		netdev_info(ndev, "registered PHC device %d\n", fep->dev_id);
 
@@ -3316,6 +3386,8 @@ static int __maybe_unused fec_suspend(struct device *dev)
 
 	rtnl_lock();
 	if (netif_running(ndev)) {
+		if (fep->wol_flag & FEC_WOL_FLAG_ENABLE)
+			fep->wol_flag |= FEC_WOL_FLAG_SLEEP_ON;
 		phy_stop(fep->phy_dev);
 		napi_disable(&fep->napi);
 		netif_tx_lock_bh(ndev);
@@ -3323,11 +3395,12 @@ static int __maybe_unused fec_suspend(struct device *dev)
 		netif_tx_unlock_bh(ndev);
 		fec_stop(ndev);
 		fec_enet_clk_enable(ndev, false);
-		pinctrl_pm_select_sleep_state(&fep->pdev->dev);
+		if (!(fep->wol_flag & FEC_WOL_FLAG_ENABLE))
+			pinctrl_pm_select_sleep_state(&fep->pdev->dev);
 	}
 	rtnl_unlock();
 
-	if (fep->reg_phy)
+	if (fep->reg_phy && !(fep->wol_flag & FEC_WOL_FLAG_ENABLE))
 		regulator_disable(fep->reg_phy);
 
 	/* SOC supply clock to phy, when clock is disabled, phy link down
@@ -3343,9 +3416,11 @@ static int __maybe_unused fec_resume(struct device *dev)
 {
 	struct net_device *ndev = dev_get_drvdata(dev);
 	struct fec_enet_private *fep = netdev_priv(ndev);
+	struct fec_platform_data *pdata = fep->pdev->dev.platform_data;
 	int ret;
+	int val;
 
-	if (fep->reg_phy) {
+	if (fep->reg_phy && !(fep->wol_flag & FEC_WOL_FLAG_ENABLE)) {
 		ret = regulator_enable(fep->reg_phy);
 		if (ret)
 			return ret;
@@ -3353,12 +3428,21 @@ static int __maybe_unused fec_resume(struct device *dev)
 
 	rtnl_lock();
 	if (netif_running(ndev)) {
-		pinctrl_pm_select_default_state(&fep->pdev->dev);
 		ret = fec_enet_clk_enable(ndev, true);
 		if (ret) {
 			rtnl_unlock();
 			goto failed_clk;
 		}
+		if (fep->wol_flag & FEC_WOL_FLAG_ENABLE) {
+			if (pdata && pdata->sleep_mode_enable)
+				pdata->sleep_mode_enable(false);
+			val = readl(fep->hwp + FEC_ECNTRL);
+			val &= ~(FEC_ECR_MAGICEN | FEC_ECR_SLEEP);
+			writel(val, fep->hwp + FEC_ECNTRL);
+			fep->wol_flag &= ~FEC_WOL_FLAG_SLEEP_ON;
+		} else {
+			pinctrl_pm_select_default_state(&fep->pdev->dev);
+		}
 		fec_restart(ndev);
 		netif_tx_lock_bh(ndev);
 		netif_device_attach(ndev);
diff --git a/include/linux/fec.h b/include/linux/fec.h
index bcff455..1454a50 100644
--- a/include/linux/fec.h
+++ b/include/linux/fec.h
@@ -19,6 +19,7 @@
 struct fec_platform_data {
 	phy_interface_t phy;
 	unsigned char mac[ETH_ALEN];
+	void (*sleep_mode_enable)(int enabled);
 };
 
 #endif
-- 
1.7.8

^ permalink raw reply related

* Re: [PATCH net-next] Allow to set net namespace for wireless device via RTM_LINK
From: Vadim Kochan @ 2014-12-24  9:48 UTC (permalink / raw)
  To: Johannes Berg; +Cc: Marcel Holtmann, netdev@vger.kernel.org, linux-wireless
In-Reply-To: <1413802344.10246.20.camel@jlt4.sipsolutions.net>

On Mon, Oct 20, 2014 at 1:52 PM, Johannes Berg
<johannes@sipsolutions.net> wrote:
> On Mon, 2014-10-20 at 12:46 +0200, Marcel Holtmann wrote:
>
>> Maybe relaxing the check and allow ip link to move a wireless netdev
>> into a namespace (and having the wiphy follow) could be allowed if it
>> is the only netdev or the original wlan0 that each wiphy creates. I
>> really do not know if this is worth it, but for some simpler container
>> cases it could be beneficial if RTNL can be used instead of having to
>> go through nl80211.
>
> The thought crossed my mind, but
>
> 1) it's relatively complex, though by no means impossible
> 2) it still moves more than you bargained for, since in theory the wiphy
> could be
>    used to create new interfaces etc.
>
> That said, I'm much more inclined to believe such a patch would be
> worthwhile than the original.
>
> johannes
>

Hi Johannes,

What about the following thoughts:

    1) Set NETIF_F_NETNS_LOCAL for phy wireless device only if there
is at least one virtual interface which was created on it
    2) What about to inherit netns for newer created interfaces from
the phy device ?

Thanks,

^ permalink raw reply

* [PATCH] openvswitch: fix odd_ptr_err.cocci warnings
From: kbuild test robot @ 2014-12-24  6:41 UTC (permalink / raw)
  To: Pravin B Shelar
  Cc: dev-yBygre7rU0TnMu66kgdUjQ, netdev-u79uwXL29TY76Z2rM5mHXA,
	kbuild-all-JC7UmRfGjtg, linux-kernel-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <201412241437.BpaCtsIE%fengguang.wu-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>

net/openvswitch/vport-gre.c:188:5-11: inconsistent IS_ERR and PTR_ERR, PTR_ERR on line 189

 PTR_ERR should access the value just tested by IS_ERR

Semantic patch information:
 There can be false positives in the patch case, where it is the call
 IS_ERR that is wrong.

Generated by: scripts/coccinelle/tests/odd_ptr_err.cocci

CC: Pravin B Shelar <pshelar@nicira.com>
Signed-off-by: Fengguang Wu <fengguang.wu@intel.com>
---

 vport-gre.c |    2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

--- a/net/openvswitch/vport-gre.c
+++ b/net/openvswitch/vport-gre.c
@@ -186,7 +186,7 @@ static int gre_tnl_send(struct vport *vp
 	/* Push Tunnel header. */
 	skb = __build_header(skb, tunnel_hlen);
 	if (IS_ERR(skb)) {
-		err = PTR_ERR(rt);
+		err = PTR_ERR(skb);
 		skb = NULL;
 		goto err_free_rt;
 	}
_______________________________________________
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev

^ permalink raw reply

* [PATCH net v2] net: Generalize ndo_gso_check to ndo_features_check
From: Jesse Gross @ 2014-12-24  6:37 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, Tom Herbert, Joe Stringer, Eric Dumazet, Hayes Wang

GSO isn't the only offload feature with restrictions that
potentially can't be expressed with the current features mechanism.
Checksum is another although it's a general issue that could in
theory apply to anything. Even if it may be possible to
implement these restrictions in other ways, it can result in
duplicate code or inefficient per-packet behavior.

This generalizes ndo_gso_check so that drivers can remove any
features that don't make sense for a given packet, similar to
netif_skb_features(). It also converts existing driver
restrictions to the new format, completing the work that was
done to support tunnel protocols since the issues apply to
checksums as well.

By actually removing features from the set that are used to do
offloading, it solves another problem with the existing
interface. In these cases, GSO would run with the original set
of features and not do anything because it appears that
segmentation is not required.

CC: Tom Herbert <therbert@google.com>
CC: Joe Stringer <joestringer@nicira.com>
CC: Eric Dumazet <edumazet@google.com>
CC: Hayes Wang <hayeswang@realtek.com>
Signed-off-by: Jesse Gross <jesse@nicira.com>
Acked-by:  Tom Herbert <therbert@google.com>
Fixes: 04ffcb255f22 ("net: Add ndo_gso_check")
---
v2: Rebase
    Drop overzealous use of netdev_intersect_features()
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c |  8 ++++---
 drivers/net/ethernet/emulex/benet/be_main.c      |  8 ++++---
 drivers/net/ethernet/mellanox/mlx4/en_netdev.c   | 10 +++++----
 drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c |  8 ++++---
 include/linux/netdevice.h                        | 20 +++++++++--------
 include/net/vxlan.h                              | 28 ++++++++++++++++++++----
 net/core/dev.c                                   | 23 +++++++++++--------
 7 files changed, 70 insertions(+), 35 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index 9f5e387..72eef9f 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -12553,9 +12553,11 @@ static int bnx2x_get_phys_port_id(struct net_device *netdev,
 	return 0;
 }
 
-static bool bnx2x_gso_check(struct sk_buff *skb, struct net_device *dev)
+static netdev_features_t bnx2x_features_check(struct sk_buff *skb,
+					      struct net_device *dev,
+					      netdev_features_t features)
 {
-	return vxlan_gso_check(skb);
+	return vxlan_features_check(skb, features);
 }
 
 static const struct net_device_ops bnx2x_netdev_ops = {
@@ -12589,7 +12591,7 @@ static const struct net_device_ops bnx2x_netdev_ops = {
 #endif
 	.ndo_get_phys_port_id	= bnx2x_get_phys_port_id,
 	.ndo_set_vf_link_state	= bnx2x_set_vf_link_state,
-	.ndo_gso_check		= bnx2x_gso_check,
+	.ndo_features_check	= bnx2x_features_check,
 };
 
 static int bnx2x_set_coherency_mask(struct bnx2x *bp)
diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
index 1960731..41a0a54 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@ -4459,9 +4459,11 @@ done:
 	adapter->vxlan_port_count--;
 }
 
-static bool be_gso_check(struct sk_buff *skb, struct net_device *dev)
+static netdev_features_t be_features_check(struct sk_buff *skb,
+					   struct net_device *dev,
+					   netdev_features_t features)
 {
-	return vxlan_gso_check(skb);
+	return vxlan_features_check(skb, features);
 }
 #endif
 
@@ -4492,7 +4494,7 @@ static const struct net_device_ops be_netdev_ops = {
 #ifdef CONFIG_BE2NET_VXLAN
 	.ndo_add_vxlan_port	= be_add_vxlan_port,
 	.ndo_del_vxlan_port	= be_del_vxlan_port,
-	.ndo_gso_check		= be_gso_check,
+	.ndo_features_check	= be_features_check,
 #endif
 };
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index 190cbd9..d0d6dc1 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -2365,9 +2365,11 @@ static void mlx4_en_del_vxlan_port(struct  net_device *dev,
 	queue_work(priv->mdev->workqueue, &priv->vxlan_del_task);
 }
 
-static bool mlx4_en_gso_check(struct sk_buff *skb, struct net_device *dev)
+static netdev_features_t mlx4_en_features_check(struct sk_buff *skb,
+						struct net_device *dev,
+						netdev_features_t features)
 {
-	return vxlan_gso_check(skb);
+	return vxlan_features_check(skb, features);
 }
 #endif
 
@@ -2400,7 +2402,7 @@ static const struct net_device_ops mlx4_netdev_ops = {
 #ifdef CONFIG_MLX4_EN_VXLAN
 	.ndo_add_vxlan_port	= mlx4_en_add_vxlan_port,
 	.ndo_del_vxlan_port	= mlx4_en_del_vxlan_port,
-	.ndo_gso_check		= mlx4_en_gso_check,
+	.ndo_features_check	= mlx4_en_features_check,
 #endif
 };
 
@@ -2434,7 +2436,7 @@ static const struct net_device_ops mlx4_netdev_ops_master = {
 #ifdef CONFIG_MLX4_EN_VXLAN
 	.ndo_add_vxlan_port	= mlx4_en_add_vxlan_port,
 	.ndo_del_vxlan_port	= mlx4_en_del_vxlan_port,
-	.ndo_gso_check		= mlx4_en_gso_check,
+	.ndo_features_check	= mlx4_en_features_check,
 #endif
 };
 
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
index 1aa25b1..9929b97 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
@@ -505,9 +505,11 @@ static void qlcnic_del_vxlan_port(struct net_device *netdev,
 	adapter->flags |= QLCNIC_DEL_VXLAN_PORT;
 }
 
-static bool qlcnic_gso_check(struct sk_buff *skb, struct net_device *dev)
+static netdev_features_t qlcnic_features_check(struct sk_buff *skb,
+					       struct net_device *dev,
+					       netdev_features_t features)
 {
-	return vxlan_gso_check(skb);
+	return vxlan_features_check(skb, features);
 }
 #endif
 
@@ -532,7 +534,7 @@ static const struct net_device_ops qlcnic_netdev_ops = {
 #ifdef CONFIG_QLCNIC_VXLAN
 	.ndo_add_vxlan_port	= qlcnic_add_vxlan_port,
 	.ndo_del_vxlan_port	= qlcnic_del_vxlan_port,
-	.ndo_gso_check		= qlcnic_gso_check,
+	.ndo_features_check	= qlcnic_features_check,
 #endif
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	.ndo_poll_controller = qlcnic_poll_controller,
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index c31f74d..679e6e9 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1012,12 +1012,15 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev,
  *	Callback to use for xmit over the accelerated station. This
  *	is used in place of ndo_start_xmit on accelerated net
  *	devices.
- * bool	(*ndo_gso_check) (struct sk_buff *skb,
- *			  struct net_device *dev);
+ * netdev_features_t (*ndo_features_check) (struct sk_buff *skb,
+ *					    struct net_device *dev
+ *					    netdev_features_t features);
  *	Called by core transmit path to determine if device is capable of
- *	performing GSO on a packet. The device returns true if it is
- *	able to GSO the packet, false otherwise. If the return value is
- *	false the stack will do software GSO.
+ *	performing offload operations on a given packet. This is to give
+ *	the device an opportunity to implement any restrictions that cannot
+ *	be otherwise expressed by feature flags. The check is called with
+ *	the set of features that the stack has calculated and it returns
+ *	those the driver believes to be appropriate.
  *
  * int (*ndo_switch_parent_id_get)(struct net_device *dev,
  *				   struct netdev_phys_item_id *psid);
@@ -1178,8 +1181,9 @@ struct net_device_ops {
 							struct net_device *dev,
 							void *priv);
 	int			(*ndo_get_lock_subclass)(struct net_device *dev);
-	bool			(*ndo_gso_check) (struct sk_buff *skb,
-						  struct net_device *dev);
+	netdev_features_t	(*ndo_features_check) (struct sk_buff *skb,
+						       struct net_device *dev,
+						       netdev_features_t features);
 #ifdef CONFIG_NET_SWITCHDEV
 	int			(*ndo_switch_parent_id_get)(struct net_device *dev,
 							    struct netdev_phys_item_id *psid);
@@ -3611,8 +3615,6 @@ static inline bool netif_needs_gso(struct net_device *dev, struct sk_buff *skb,
 				   netdev_features_t features)
 {
 	return skb_is_gso(skb) && (!skb_gso_ok(skb, features) ||
-		(dev->netdev_ops->ndo_gso_check &&
-		 !dev->netdev_ops->ndo_gso_check(skb, dev)) ||
 		unlikely((skb->ip_summed != CHECKSUM_PARTIAL) &&
 			 (skb->ip_summed != CHECKSUM_UNNECESSARY)));
 }
diff --git a/include/net/vxlan.h b/include/net/vxlan.h
index 57cccd0..903461a 100644
--- a/include/net/vxlan.h
+++ b/include/net/vxlan.h
@@ -1,6 +1,9 @@
 #ifndef __NET_VXLAN_H
 #define __NET_VXLAN_H 1
 
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/if_vlan.h>
 #include <linux/skbuff.h>
 #include <linux/netdevice.h>
 #include <linux/udp.h>
@@ -51,16 +54,33 @@ int vxlan_xmit_skb(struct vxlan_sock *vs,
 		   __be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df,
 		   __be16 src_port, __be16 dst_port, __be32 vni, bool xnet);
 
-static inline bool vxlan_gso_check(struct sk_buff *skb)
+static inline netdev_features_t vxlan_features_check(struct sk_buff *skb,
+						     netdev_features_t features)
 {
-	if ((skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL) &&
+	u8 l4_hdr = 0;
+
+	if (!skb->encapsulation)
+		return features;
+
+	switch (vlan_get_protocol(skb)) {
+	case htons(ETH_P_IP):
+		l4_hdr = ip_hdr(skb)->protocol;
+		break;
+	case htons(ETH_P_IPV6):
+		l4_hdr = ipv6_hdr(skb)->nexthdr;
+		break;
+	default:
+		return features;;
+	}
+
+	if ((l4_hdr == IPPROTO_UDP) &&
 	    (skb->inner_protocol_type != ENCAP_TYPE_ETHER ||
 	     skb->inner_protocol != htons(ETH_P_TEB) ||
 	     (skb_inner_mac_header(skb) - skb_transport_header(skb) !=
 	      sizeof(struct udphdr) + sizeof(struct vxlanhdr))))
-		return false;
+		return features & ~(NETIF_F_ALL_CSUM | NETIF_F_GSO_MASK);
 
-	return true;
+	return features;
 }
 
 /* IP header + UDP + VXLAN + Ethernet header */
diff --git a/net/core/dev.c b/net/core/dev.c
index bd44e28..cf89f8a 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2562,7 +2562,7 @@ static netdev_features_t harmonize_features(struct sk_buff *skb,
 
 netdev_features_t netif_skb_features(struct sk_buff *skb)
 {
-	const struct net_device *dev = skb->dev;
+	struct net_device *dev = skb->dev;
 	netdev_features_t features = dev->features;
 	u16 gso_segs = skb_shinfo(skb)->gso_segs;
 	__be16 protocol = skb->protocol;
@@ -2570,13 +2570,20 @@ netdev_features_t netif_skb_features(struct sk_buff *skb)
 	if (gso_segs > dev->gso_max_segs || gso_segs < dev->gso_min_segs)
 		features &= ~NETIF_F_GSO_MASK;
 
+	/* If encapsulation offload request, verify we are testing
+	 * hardware encapsulation features instead of standard
+	 * features for the netdev
+	 */
+	if (skb->encapsulation)
+		features &= dev->hw_enc_features;
+
 	if (!vlan_tx_tag_present(skb)) {
 		if (unlikely(protocol == htons(ETH_P_8021Q) ||
 			     protocol == htons(ETH_P_8021AD))) {
 			struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
 			protocol = veh->h_vlan_encapsulated_proto;
 		} else {
-			return harmonize_features(skb, features);
+			goto finalize;
 		}
 	}
 
@@ -2594,6 +2601,11 @@ netdev_features_t netif_skb_features(struct sk_buff *skb)
 						     NETIF_F_HW_VLAN_CTAG_TX |
 						     NETIF_F_HW_VLAN_STAG_TX);
 
+finalize:
+	if (dev->netdev_ops->ndo_features_check)
+		features &= dev->netdev_ops->ndo_features_check(skb, dev,
+								features);
+
 	return harmonize_features(skb, features);
 }
 EXPORT_SYMBOL(netif_skb_features);
@@ -2664,13 +2676,6 @@ static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device
 	if (unlikely(!skb))
 		goto out_null;
 
-	/* If encapsulation offload request, verify we are testing
-	 * hardware encapsulation features instead of standard
-	 * features for the netdev
-	 */
-	if (skb->encapsulation)
-		features &= dev->hw_enc_features;
-
 	if (netif_needs_gso(dev, skb, features)) {
 		struct sk_buff *segs;
 
-- 
1.9.1

^ permalink raw reply related

* hi I'm confused with Implementation of RPS and RFS.
From: lx @ 2014-12-24  6:34 UTC (permalink / raw)
  To: netdev

hi all:
       I'm read the source code of RPS and RFS, I think the key function 
of RPS and RFS is get_rps_cpu(), and
It's codes is:
###############################################################
3079 static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
3080                struct rps_dev_flow **rflowp)
3081 {
3082     struct netdev_rx_queue *rxqueue;
3083     struct rps_map *map;
3084     struct rps_dev_flow_table *flow_table;
3085     struct rps_sock_flow_table *sock_flow_table;
3086     int cpu = -1;
3087     u16 tcpu;
3088     u32 hash;
3089
3090     if (skb_rx_queue_recorded(skb)) {
3091         u16 index = skb_get_rx_queue(skb);
3092         if (unlikely(index >= dev->real_num_rx_queues)) {
3093             WARN_ONCE(dev->real_num_rx_queues > 1,
3094                   "%s received packet on queue %u, but number "
3095                   "of RX queues is %u\n",
3096                   dev->name, index, dev->real_num_rx_queues);
3097             goto done;
3098         }
3099         rxqueue = dev->_rx + index;
3100     } else
3101         rxqueue = dev->_rx;
3102
3103     map = rcu_dereference(rxqueue->rps_map);
3104     if (map) {
3105         if (map->len == 1 &&
3106             !rcu_access_pointer(rxqueue->rps_flow_table)) {
3107             tcpu = map->cpus[0];
3108             if (cpu_online(tcpu))
3109                 cpu = tcpu;
3110             goto done;
3111         }
3112     } else if (!rcu_access_pointer(rxqueue->rps_flow_table)) {
3113         goto done;
3114     }
3115
3116     skb_reset_network_header(skb);
3117     hash = skb_get_hash(skb);
3118     if (!hash)
3119         goto done;
3120
3121     flow_table = rcu_dereference(rxqueue->rps_flow_table);
3122     sock_flow_table = rcu_dereference(rps_sock_flow_table);
3123     if (flow_table && sock_flow_table) {
3124         u16 next_cpu;
3125         struct rps_dev_flow *rflow;
3126
3127         rflow = &flow_table->flows[hash & flow_table->mask];
3128         tcpu = rflow->cpu;
3129
3130         next_cpu = sock_flow_table->ents[hash & sock_flow_table->mask];
3131
3132         /*
3133          * If the desired CPU (where last recvmsg was done) is
3134          * different from current CPU (one in the rx-queue flow
3135          * table entry), switch if one of the following holds:
3136          *   - Current CPU is unset (equal to RPS_NO_CPU).
3137          *   - Current CPU is offline.
3138          *   - The current CPU's queue tail has advanced beyond the
3139          *     last packet that was enqueued using this table entry.
3140          *     This guarantees that all previous packets for the flow
3141          *     have been dequeued, thus preserving in order delivery.
3142          */
3143         if (unlikely(tcpu != next_cpu) &&
3144             (tcpu == RPS_NO_CPU || !cpu_online(tcpu) ||
3145              ((int)(per_cpu(softnet_data, tcpu).input_queue_head -
3146               rflow->last_qtail)) >= 0)) {
3147             tcpu = next_cpu;
3148             rflow = set_rps_cpu(dev, skb, rflow, next_cpu);
3149         }
3150
3151         if (tcpu != RPS_NO_CPU && cpu_online(tcpu)) {
3152             *rflowp = rflow;
3153             cpu = tcpu;
3154             goto done;
3155         }
3156     }
3157
3158     if (map) {
3159         tcpu = map->cpus[reciprocal_scale(hash, map->len)];
3160         if (cpu_online(tcpu)) {
3161             cpu = tcpu;
3162             goto done;
3163         }
3164     }
3165
3166 done:
3167     return cpu;
3168 }
###############################################################
I know the RPS/RFS get the CPU id by this function.I think RPS can't work,
when RFS is active, beacuse the implementation of RPS is between 3158 
and 3163.
and the codes between 3123 and 3156 are used for RFS.

I think RPS is work just in this two  conditions:
1.  When the packet first arrived this host,  the value of tcpu and 
next_cpu are RPS_NO_CPU.
2. When the destination of packet is not this host, the value of tcpu 
and next_cpu always are RPS_NO_CPU.

My question is: If the RFS is enable, the RPS can't work?

Thank you.

^ permalink raw reply

* [PATCH net-next 2/2] net: gianfar: add missing __iomem annotation
From: Kevin Hao @ 2014-12-24  6:05 UTC (permalink / raw)
  To: netdev; +Cc: David Miller, Claudiu Manoil
In-Reply-To: <1419401145-8967-1-git-send-email-haokexin@gmail.com>

Fix the following spare warning:
drivers/net/ethernet/freescale/gianfar.c:3521:60: warning: incorrect type in argument 1 (different address spaces)
drivers/net/ethernet/freescale/gianfar.c:3521:60:    expected unsigned int [noderef] <asn:2>*addr
drivers/net/ethernet/freescale/gianfar.c:3521:60:    got unsigned int [usertype] *rfbptr
drivers/net/ethernet/freescale/gianfar.c:205:16: warning: incorrect type in assignment (different address spaces)
drivers/net/ethernet/freescale/gianfar.c:205:16:    expected unsigned int [usertype] *rfbptr
drivers/net/ethernet/freescale/gianfar.c:205:16:    got unsigned int [noderef] <asn:2>*<noident>
drivers/net/ethernet/freescale/gianfar.c:2918:44: warning: incorrect type in argument 1 (different address spaces)
drivers/net/ethernet/freescale/gianfar.c:2918:44:    expected unsigned int [noderef] <asn:2>*addr
drivers/net/ethernet/freescale/gianfar.c:2918:44:    got unsigned int [usertype] *rfbptr

Signed-off-by: Kevin Hao <haokexin@gmail.com>
---
 drivers/net/ethernet/freescale/gianfar.c | 2 +-
 drivers/net/ethernet/freescale/gianfar.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c
index d2a67ed10a38..e54b1e39f9b4 100644
--- a/drivers/net/ethernet/freescale/gianfar.c
+++ b/drivers/net/ethernet/freescale/gianfar.c
@@ -177,7 +177,7 @@ static int gfar_init_bds(struct net_device *ndev)
 	struct gfar_priv_rx_q *rx_queue = NULL;
 	struct txbd8 *txbdp;
 	struct rxbd8 *rxbdp;
-	u32 *rfbptr;
+	u32 __iomem *rfbptr;
 	int i, j;
 	dma_addr_t bufaddr;
 
diff --git a/drivers/net/ethernet/freescale/gianfar.h b/drivers/net/ethernet/freescale/gianfar.h
index b581b8823a2a..9e1802400c23 100644
--- a/drivers/net/ethernet/freescale/gianfar.h
+++ b/drivers/net/ethernet/freescale/gianfar.h
@@ -1039,7 +1039,7 @@ struct gfar_priv_rx_q {
 	/* RX Coalescing values */
 	unsigned char rxcoalescing;
 	unsigned long rxic;
-	u32 *rfbptr;
+	u32 __iomem *rfbptr;
 };
 
 enum gfar_irqinfo_id {
-- 
1.9.3

^ permalink raw reply related

* [PATCH net-next 1/2] net: gianfar: mark the local functions static
From: Kevin Hao @ 2014-12-24  6:05 UTC (permalink / raw)
  To: netdev; +Cc: David Miller, Claudiu Manoil

Signed-off-by: Kevin Hao <haokexin@gmail.com>
---
 drivers/net/ethernet/freescale/gianfar.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c
index 5645342f5b28..d2a67ed10a38 100644
--- a/drivers/net/ethernet/freescale/gianfar.c
+++ b/drivers/net/ethernet/freescale/gianfar.c
@@ -116,7 +116,8 @@ static int gfar_start_xmit(struct sk_buff *skb, struct net_device *dev);
 static void gfar_reset_task(struct work_struct *work);
 static void gfar_timeout(struct net_device *dev);
 static int gfar_close(struct net_device *dev);
-struct sk_buff *gfar_new_skb(struct net_device *dev, dma_addr_t *bufaddr);
+static struct sk_buff *gfar_new_skb(struct net_device *dev,
+				    dma_addr_t *bufaddr);
 static int gfar_set_mac_address(struct net_device *dev);
 static int gfar_change_mtu(struct net_device *dev, int new_mtu);
 static irqreturn_t gfar_error(int irq, void *dev_id);
@@ -554,7 +555,7 @@ static void gfar_ints_enable(struct gfar_private *priv)
 	}
 }
 
-void lock_tx_qs(struct gfar_private *priv)
+static void lock_tx_qs(struct gfar_private *priv)
 {
 	int i;
 
@@ -562,7 +563,7 @@ void lock_tx_qs(struct gfar_private *priv)
 		spin_lock(&priv->tx_queue[i]->txlock);
 }
 
-void unlock_tx_qs(struct gfar_private *priv)
+static void unlock_tx_qs(struct gfar_private *priv)
 {
 	int i;
 
@@ -2671,7 +2672,7 @@ static struct sk_buff *gfar_alloc_skb(struct net_device *dev)
 	return skb;
 }
 
-struct sk_buff *gfar_new_skb(struct net_device *dev, dma_addr_t *bufaddr)
+static struct sk_buff *gfar_new_skb(struct net_device *dev, dma_addr_t *bufaddr)
 {
 	struct gfar_private *priv = netdev_priv(dev);
 	struct sk_buff *skb;
-- 
1.9.3

^ permalink raw reply related

* Re: [PATCH net 0/6] openvswitch: datapath fixes
From: David Miller @ 2014-12-24  5:58 UTC (permalink / raw)
  To: pshelar; +Cc: netdev
In-Reply-To: <1419380405-1515-1-git-send-email-pshelar@nicira.com>

From: Pravin B Shelar <pshelar@nicira.com>
Date: Tue, 23 Dec 2014 16:20:05 -0800

> Following patch series is mostly targeted to MPLS fixes. other
> patches are related datapth transmit path error handling. 

Series applied, thanks.

^ permalink raw reply

* Re: [PATCH net] net: Reset secmark when scrubbing packet
From: David Miller @ 2014-12-24  5:22 UTC (permalink / raw)
  To: tgraf; +Cc: netdev
In-Reply-To: <efb09aca5173a9a18f15066c33a4998ed70bd34a.1419293504.git.tgraf@suug.ch>

From: Thomas Graf <tgraf@suug.ch>
Date: Tue, 23 Dec 2014 01:13:18 +0100

> skb_scrub_packet() is called when a packet switches between a context
> such as between underlay and overlay, between namespaces, or between
> L3 subnets.
> 
> While we already scrub the packet mark, connection tracking entry,
> and cached destination, the security mark/context is left intact.
> 
> It seems wrong to inherit the security context of a packet when going
> from overlay to underlay or across forwarding paths.
> 
> Signed-off-by: Thomas Graf <tgraf@suug.ch>

Applied and queued up for -stable, thanks Thomas.

^ permalink raw reply

* Re: [PATCH net] net: Generalize ndo_gso_check to ndo_features_check
From: Jesse Gross @ 2014-12-24  5:11 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, Tom Herbert, Joe Stringer, Eric Dumazet
In-Reply-To: <20141223.235238.1958837159787674842.davem@davemloft.net>

On Tue, Dec 23, 2014 at 11:52 PM, David Miller <davem@davemloft.net> wrote:
> From: Jesse Gross <jesse@nicira.com>
> Date: Mon, 22 Dec 2014 08:03:43 -0800
>
>> GSO isn't the only offload feature with restrictions that
>> potentially can't be expressed with the current features mechanism.
>> Checksum is another although it's a general issue that could in
>> theory apply to anything. Even if it may be possible to
>> implement these restrictions in other ways, it can result in
>> duplicate code or inefficient per-packet behavior.
>>
>> This generalizes ndo_gso_check so that drivers can remove any
>> features that don't make sense for a given packet, similar to
>> netif_skb_features(). It also converts existing driver
>> restrictions to the new format, completing the work that was
>> done to support tunnel protocols since the issues apply to
>> checksums as well.
>>
>> CC: Tom Herbert <therbert@google.com>
>> CC: Joe Stringer <joestringer@nicira.com>
>> CC: Eric Dumazet <edumazet@google.com>
>> Signed-off-by: Jesse Gross <jesse@nicira.com>
>> Fixes: 04ffcb255f22 ("net: Add ndo_gso_check")
>
> I don't think this fixes the case which was the main impetus for Eric
> Dumazet's patch.
>
> The r8152 USB networking driver supports TSO, but has a length
> restriction, and we weren't software segmenting when netif_needs_gso()
> returns true, exactly because we didn't clear TSO from the feature
> flags.

I believe that this should behave exactly the same as Eric's patch in
this case. The driver would implement the length validation and return
the set of features with ANDed with ~NETIF_F_GSO_MASK. This is
combined with the features computed by netif_skb_features() used for
all future offload decisions, including skb_gso_segment().

^ permalink raw reply

* Re: [PATCH net] net: Fix stacked vlan offload features computation
From: David Miller @ 2014-12-24  5:09 UTC (permalink / raw)
  To: makita.toshiaki; +Cc: jesse, netdev
In-Reply-To: <1419242654-4824-1-git-send-email-makita.toshiaki@lab.ntt.co.jp>

From: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Date: Mon, 22 Dec 2014 19:04:14 +0900

> When vlan tags are stacked, it is very likely that the outer tag is stored
> in skb->vlan_tci and skb->protocol shows the inner tag's vlan_proto.
> Currently netif_skb_features() first looks at skb->protocol even if there
> is the outer tag in vlan_tci, thus it incorrectly retrieves the protocol
> encapsulated by the inner vlan instead of the inner vlan protocol.
> This allows GSO packets to be passed to HW and they end up being
> corrupted.
> 
> Fixes: 58e998c6d239 ("offloading: Force software GSO for multiple vlan tags.")
> Signed-off-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>

Applied and queued up for -stable, thanks.

^ permalink raw reply

* Re: [PATCH net] net: Generalize ndo_gso_check to ndo_features_check
From: David Miller @ 2014-12-24  4:52 UTC (permalink / raw)
  To: jesse; +Cc: netdev, therbert, joestringer, edumazet
In-Reply-To: <1419264223-30004-1-git-send-email-jesse@nicira.com>

From: Jesse Gross <jesse@nicira.com>
Date: Mon, 22 Dec 2014 08:03:43 -0800

> GSO isn't the only offload feature with restrictions that
> potentially can't be expressed with the current features mechanism.
> Checksum is another although it's a general issue that could in
> theory apply to anything. Even if it may be possible to
> implement these restrictions in other ways, it can result in
> duplicate code or inefficient per-packet behavior.
> 
> This generalizes ndo_gso_check so that drivers can remove any
> features that don't make sense for a given packet, similar to
> netif_skb_features(). It also converts existing driver
> restrictions to the new format, completing the work that was
> done to support tunnel protocols since the issues apply to
> checksums as well.
> 
> CC: Tom Herbert <therbert@google.com>
> CC: Joe Stringer <joestringer@nicira.com>
> CC: Eric Dumazet <edumazet@google.com>
> Signed-off-by: Jesse Gross <jesse@nicira.com>
> Fixes: 04ffcb255f22 ("net: Add ndo_gso_check")

I don't think this fixes the case which was the main impetus for Eric
Dumazet's patch.

The r8152 USB networking driver supports TSO, but has a length
restriction, and we weren't software segmenting when netif_needs_gso()
returns true, exactly because we didn't clear TSO from the feature
flags.

We really need to sort this out.

^ permalink raw reply

* Re: [PATCH 4/4] net: Rearrange loop in net_rx_action
From: David Miller @ 2014-12-24  4:20 UTC (permalink / raw)
  To: herbert
  Cc: eric.dumazet, david.vrabel, netdev, xen-devel, konrad.wilk,
	boris.ostrovsky, edumazet
In-Reply-To: <E1Y2QRt-0001qq-Lp@gondolin.me.apana.org.au>

From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 21 Dec 2014 07:16:25 +1100

> This patch rearranges the loop in net_rx_action to reduce the
> amount of jumping back and forth when reading the code.
> 
> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

Applied.

^ permalink raw reply

* Re: [PATCH 3/4] net: Always poll at least one device in net_rx_action
From: David Miller @ 2014-12-24  4:20 UTC (permalink / raw)
  To: herbert
  Cc: eric.dumazet, david.vrabel, netdev, xen-devel, konrad.wilk,
	boris.ostrovsky, edumazet
In-Reply-To: <E1Y2QRs-0001q5-6u@gondolin.me.apana.org.au>

From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 21 Dec 2014 07:16:24 +1100

> We should only perform the softnet_break check after we have polled
> at least one device in net_rx_action.  Otherwise a zero or negative
> setting of netdev_budget can lock up the whole system.
> 
> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

Applied.

^ permalink raw reply

* Re: [PATCH 2/4] net: Detect drivers that reschedule NAPI and exhaust budget
From: David Miller @ 2014-12-24  4:20 UTC (permalink / raw)
  To: herbert
  Cc: eric.dumazet, david.vrabel, netdev, xen-devel, konrad.wilk,
	boris.ostrovsky, edumazet
In-Reply-To: <E1Y2QRq-0001pW-Tn@gondolin.me.apana.org.au>

From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 21 Dec 2014 07:16:22 +1100

> The commit d75b1ade567ffab085e8adbbdacf0092d10cd09c (net: less
> interrupt masking in NAPI) required drivers to leave poll_list
> empty if the entire budget is consumed.
> 
> We have already had two broken drivers so let's add a check for
> this.
> 
> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

Applied.

^ permalink raw reply

* Re: [PATCH 1/4] net: Move napi polling code out of net_rx_action
From: David Miller @ 2014-12-24  4:20 UTC (permalink / raw)
  To: herbert
  Cc: eric.dumazet, david.vrabel, netdev, xen-devel, konrad.wilk,
	boris.ostrovsky, edumazet
In-Reply-To: <E1Y2QRp-0001pD-J7@gondolin.me.apana.org.au>

From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 21 Dec 2014 07:16:21 +1100

> This patch creates a new function napi_poll and moves the napi
> polling code from net_rx_action into it.
> 
> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

Applied.

^ permalink raw reply

* Re: [PATCH v3] 3c59x: Fix memory leaks in vortex_open
From: David Miller @ 2014-12-24  4:10 UTC (permalink / raw)
  To: nhorman
  Cc: baijiaju1990, ebiederm, dingtianhong, paul.gortmaker,
	justinvanwijngaarden, netdev
In-Reply-To: <20141224032728.GA20392@localhost.localdomain>

From: Neil Horman <nhorman@tuxdriver.com>
Date: Tue, 23 Dec 2014 22:27:28 -0500

> Sooo, fix it.  Add some checks to not delete the timer if its not been
> initalized.  Its really preferable to have a single teardown path and a single
> bringup path if at all possible

Or simply have vortex_up() initialize the two timers before it does
anything else.

^ permalink raw reply

* Re: [PATCH v3] 3c59x: Fix memory leaks in vortex_open
From: Neil Horman @ 2014-12-24  3:27 UTC (permalink / raw)
  To: Jia-Ju Bai
  Cc: davem, ebiederm, dingtianhong, paul.gortmaker,
	justinvanwijngaarden, netdev
In-Reply-To: <549A212A.60001@163.com>

On Wed, Dec 24, 2014 at 10:12:58AM +0800, Jia-Ju Bai wrote:
> On 12/23/2014 11:43 PM, Neil Horman wrote:
> >No, I don't think so.  vortex_close predicates each free with a NULL check, so
> >if its not been allocated, it shouldn't be freed.  vortex_close also puts the
> >adapter back into a known state (undoing all the setup that vortex_open does).
> >I really think its better to go with the proper close path than just unwinding
> >the allocation
> >
> >Neil
> >
> 
> Firstly, I run my match on the real hardware(3com 3c905B 100Base
> PCI Ethernet Controller) and make vortex_up failed on purpose
> (make "pci_enable_device" in vortex_up failed). During runtime, the driver
> works well and memory leaks are fixed.
> 
> Secondly, I revise the code according to your opinion:
> 
>         retval = vortex_up(dev);
>         if (!retval)
>             goto out;
> 
> +      vortex_close(dev);
> +      return -ENOMEM;
> 
> Then I repeat my experiment, but system hang occurs!
> 
> After adding some "printk"s into the code and running the driver, I find
> the problem's source:
> vortex_close calls vortex_down in runtime, and vortex_down calls
> "del_timer_sync(&vp->rx_oom_timer);" in the code. However, I make
> "pci_enable_device" failed in vortext_up to let vortex_up return an
> error code directly, but "vp->rx_oom_timer" is initialized only by
> "init_timer" after "pci_enable_device". Thus when
> "del_timer_sync(&vp->rx_oom_timer);" is called in vortex_down,
> a null dereference may occur.
> Moreover, only "pci_enable_device" can make vortex_up failed.
> 
> 
Sooo, fix it.  Add some checks to not delete the timer if its not been
initalized.  Its really preferable to have a single teardown path and a single
bringup path if at all possible
Neil

^ permalink raw reply

* Re: [PATCH net 5/6] openvswitch: Fix vport_send double free
From: Lino Sanfilippo @ 2014-12-24  3:16 UTC (permalink / raw)
  To: Pravin B Shelar, davem; +Cc: netdev
In-Reply-To: <1419380432-1665-1-git-send-email-pshelar@nicira.com>

Hi,

On 24.12.2014 01:20, Pravin B Shelar wrote:
l_hlen = ip_gre_calc_hlen(tun_key->tun_flags);
>  
> @@ -183,8 +185,9 @@ static int gre_tnl_send(struct vport *vport, struct sk_buff *skb)
>  
>  	/* Push Tunnel header. */
>  	skb = __build_header(skb, tunnel_hlen);
> -	if (unlikely(!skb)) {
> -		err = 0;
> +	if (IS_ERR(skb)) {
> +		err = PTR_ERR(rt);

Shouldn't be it

err = PTR_ERR(skb); ?

Regards,
Lino

^ permalink raw reply

* [PATCH net-next V2] virtio-net: don't do header check for dodgy gso packets
From: Jason Wang @ 2014-12-24  3:03 UTC (permalink / raw)
  To: rusty, mst, virtualization, netdev, linux-kernel

There's no need to do header check for virtio-net since:

- Host sets dodgy for all gso packets from guest and check the header.
- Host should be prepared for all kinds of evil packets from guest, since
  malicious guest can send any kinds of packet.

So this patch sets NETIF_F_GSO_ROBUST for virtio-net to skip the check.

Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Michael S. Tsirkin <mst@redhat.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
---
Changes from V1:
- typo fixes
---
 drivers/net/virtio_net.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index b8bd719..45c6ce2 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -1761,6 +1761,8 @@ static int virtnet_probe(struct virtio_device *vdev)
 		if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_ECN))
 			dev->hw_features |= NETIF_F_TSO_ECN;
 
+		dev->features |= NETIF_F_GSO_ROBUST;
+
 		if (gso)
 			dev->features |= dev->hw_features & NETIF_F_ALL_TSO;
 		/* (!csum && gso) case will be fixed by register_netdev() */
-- 
1.9.1

^ permalink raw reply related

* RE: [PATCH 2/3] net/fsl: remove irq assignment from xgmac_mdio
From: Shaohui Xie @ 2014-12-24  2:22 UTC (permalink / raw)
  To: Sergei Shtylyov
  Cc: shh.xie@gmail.com, netdev@vger.kernel.org, davem@davemloft.net
In-Reply-To: <54997817.6010108@cogentembedded.com>

> -----Original Message-----
> From: Sergei Shtylyov [mailto:sergei.shtylyov@cogentembedded.com]
> Sent: Tuesday, December 23, 2014 10:12 PM
> To: shh.xie@gmail.com; netdev@vger.kernel.org; davem@davemloft.net
> Cc: Xie Shaohui-B21989
> Subject: Re: [PATCH 2/3] net/fsl: remove irq assignment from xgmac_mdio
> 
> Hello.
> 
> On 12/23/2014 12:46 PM, shh.xie@gmail.com wrote:
> 
> > From: Shaohui Xie <Shaohui.Xie@freescale.com>
> 
> > Which is wrong and not used, so no extra space needed by
> > mdio_alloc_size(), change the parameter accordingly.
> 
> > Signed-off-by: Shaohui Xie <Shaohui.Xie@freescale.com>
> > ---
> >   drivers/net/ethernet/freescale/xgmac_mdio.c | 3 +--
> >   1 file changed, 1 insertion(+), 2 deletions(-)
> 
> > diff --git a/drivers/net/ethernet/freescale/xgmac_mdio.c
> > b/drivers/net/ethernet/freescale/xgmac_mdio.c
> > index 90adba1..72e0b85 100644
> > --- a/drivers/net/ethernet/freescale/xgmac_mdio.c
> > +++ b/drivers/net/ethernet/freescale/xgmac_mdio.c
> > @@ -187,14 +187,13 @@ static int xgmac_mdio_probe(struct
> platform_device *pdev)
> >   		return ret;
> >   	}
> >
> > -	bus = mdiobus_alloc_size(PHY_MAX_ADDR * sizeof(int));
> > +	bus = mdiobus_alloc_size(0);
> 
>     It's now equivalent to a mere mdiobus_alloc().
[S.H] Yes, mdiobus_alloc() defined as:

static inline struct mii_bus *mdiobus_alloc(void)
{       
        return mdiobus_alloc_size(0);
} 

Should I use mdiobus_alloc() instead?

Thanks!
Shaohui

^ permalink raw reply

* Re: [PATCH v3] 3c59x: Fix memory leaks in vortex_open
From: Jia-Ju Bai @ 2014-12-24  2:12 UTC (permalink / raw)
  To: Neil Horman
  Cc: davem, ebiederm, dingtianhong, paul.gortmaker,
	justinvanwijngaarden, netdev
In-Reply-To: <20141223154313.GE31876@hmsreliant.think-freely.org>

On 12/23/2014 11:43 PM, Neil Horman wrote:
> No, I don't think so.  vortex_close predicates each free with a NULL check, so
> if its not been allocated, it shouldn't be freed.  vortex_close also puts the
> adapter back into a known state (undoing all the setup that vortex_open does).
> I really think its better to go with the proper close path than just unwinding
> the allocation
>
> Neil
>

Firstly, I run my match on the real hardware(3com 3c905B 100Base
PCI Ethernet Controller) and make vortex_up failed on purpose
(make "pci_enable_device" in vortex_up failed). During runtime, the driver
works well and memory leaks are fixed.

Secondly, I revise the code according to your opinion:

         retval = vortex_up(dev);
         if (!retval)
             goto out;

+      vortex_close(dev);
+      return -ENOMEM;

Then I repeat my experiment, but system hang occurs!

After adding some "printk"s into the code and running the driver, I find
the problem's source:
vortex_close calls vortex_down in runtime, and vortex_down calls
"del_timer_sync(&vp->rx_oom_timer);" in the code. However, I make
"pci_enable_device" failed in vortext_up to let vortex_up return an
error code directly, but "vp->rx_oom_timer" is initialized only by
"init_timer" after "pci_enable_device". Thus when
"del_timer_sync(&vp->rx_oom_timer);" is called in vortex_down,
a null dereference may occur.
Moreover, only "pci_enable_device" can make vortex_up failed.

^ permalink raw reply

* [PATCH iproute2 v3] tc: Show classes in tree view
From: Vadim Kochan @ 2014-12-24  0:46 UTC (permalink / raw)
  To: netdev; +Cc: Vadim Kochan

From: Vadim Kochan <vadim4j@gmail.com>

Added new '-t[ree]' which shows classes dependency
in the tree view. Meanwhile only generic stats info
is supported.

e.g.:

$ tc/tc -t class show dev tap0
+---(1:2) htb rate 6Mbit ceil 6Mbit burst 15Kb cburst 1599b
|    +---(1:40) htb prio 0 rate 5Mbit ceil 5Mbit burst 15Kb cburst 1600b
|    +---(1:50) htb rate 3Mbit ceil 6Mbit burst 15Kb cburst 1599b
|    |    +---(1:51) htb prio 0 rate 1Kbit ceil 6Mbit burst 15Kb cburst 1599b
|    |
|    +---(1:60) htb prio 0 rate 1Kbit ceil 6Mbit burst 15Kb cburst 1599b
|
+---(1:1) htb rate 6Mbit ceil 6Mbit burst 15Kb cburst 1599b
     +---(1:10) htb prio 0 rate 5Mbit ceil 5Mbit burst 15Kb cburst 1600b
     +---(1:20) htb prio 0 rate 3Mbit ceil 6Mbit burst 15Kb cburst 1599b
     +---(1:30) htb prio 0 rate 1Kbit ceil 6Mbit burst 15Kb cburst 1599b

$ tc/tc -t -s class show dev tap0
+---(1:2) htb rate 6Mbit ceil 6Mbit burst 15Kb cburst 1599b
|    |    Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0)
|    |    rate 0bit 0pps backlog 0b 0p requeues 0
|    |
|    +---(1:40) htb prio 0 rate 5Mbit ceil 5Mbit burst 15Kb cburst 1600b
|    |          Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0)
|    |          rate 0bit 0pps backlog 0b 0p requeues 0
|    |
|    +---(1:50) htb rate 3Mbit ceil 6Mbit burst 15Kb cburst 1599b
|    |    |     Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0)
|    |    |     rate 0bit 0pps backlog 0b 0p requeues 0
|    |    |
|    |    +---(1:51) htb prio 0 rate 1Kbit ceil 6Mbit burst 15Kb cburst 1599b
|    |               Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0)
|    |               rate 0bit 0pps backlog 0b 0p requeues 0
|    |
|    +---(1:60) htb prio 0 rate 1Kbit ceil 6Mbit burst 15Kb cburst 1599b
|               Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0)
|               rate 0bit 0pps backlog 0b 0p requeues 0
|
+---(1:1) htb rate 6Mbit ceil 6Mbit burst 15Kb cburst 1599b
     |    Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0)
     |    rate 0bit 0pps backlog 0b 0p requeues 0
     |
     +---(1:10) htb prio 0 rate 5Mbit ceil 5Mbit burst 15Kb cburst 1600b
     |          Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0)
     |          rate 0bit 0pps backlog 0b 0p requeues 0
     |
     +---(1:20) htb prio 0 rate 3Mbit ceil 6Mbit burst 15Kb cburst 1599b
     |          Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0)
     |          rate 0bit 0pps backlog 0b 0p requeues 0
     |
     +---(1:30) htb prio 0 rate 1Kbit ceil 6Mbit burst 15Kb cburst 1599b
                Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0)
                rate 0bit 0pps backlog 0b 0p requeues 0

Signed-off-by: Vadim Kochan <vadim4j@gmail.com>
---
Changes v3:
    Fixed wrong brackets style

Changes v2:
    Removed "Date:" from commit message which was added by mistake.

Changes RFC -> PATCH:
    #1 get rid of INIT_HLIST_NODE
    #2 added sample output to commit message
    #3 use "show_tree=1" instead of "show_tree++"
    #4 no need update include/hlist.h (because of #1)
    #5 changed a little tree output: parentheses around class id instead of qdisc name

 tc/tc.c        |   5 +-
 tc/tc_class.c  | 161 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 tc/tc_common.h |   2 +
 3 files changed, 165 insertions(+), 3 deletions(-)

diff --git a/tc/tc.c b/tc/tc.c
index 9b50e74..30950a6 100644
--- a/tc/tc.c
+++ b/tc/tc.c
@@ -34,8 +34,9 @@ int show_stats = 0;
 int show_details = 0;
 int show_raw = 0;
 int show_pretty = 0;
-int batch_mode = 0;
+int show_tree = 0;
 
+int batch_mode = 0;
 int resolve_hosts = 0;
 int use_iec = 0;
 int force = 0;
@@ -278,6 +279,8 @@ int main(int argc, char **argv)
 			++show_raw;
 		} else if (matches(argv[1], "-pretty") == 0) {
 			++show_pretty;
+		} else if (matches(argv[1], "-tree") == 0) {
+			show_tree = 1;
 		} else if (matches(argv[1], "-Version") == 0) {
 			printf("tc utility, iproute2-ss%s\n", SNAPSHOT);
 			return 0;
diff --git a/tc/tc_class.c b/tc/tc_class.c
index e56bf07..f155294 100644
--- a/tc/tc_class.c
+++ b/tc/tc_class.c
@@ -24,6 +24,22 @@
 #include "utils.h"
 #include "tc_util.h"
 #include "tc_common.h"
+#include "hlist.h"
+
+struct cls_node {
+	struct hlist_node hlist;
+	__u32 handle;
+	__u32 parent;
+	int level;
+	struct cls_node *cls_parent;
+	struct cls_node *cls_right;
+	struct rtattr *attr;
+	int attr_len;
+	int childs_count;
+};
+
+static struct hlist_head cls_list = {};
+static struct hlist_head root_cls_list = {};
 
 static void usage(void);
 
@@ -148,13 +164,145 @@ int filter_ifindex;
 __u32 filter_qdisc;
 __u32 filter_classid;
 
+static void tree_cls_add(__u32 parent, __u32 handle, struct rtattr *attr, int len)
+{
+	struct cls_node *cls = malloc(sizeof(struct cls_node));
+
+	memset(cls, 0, sizeof(*cls));
+	cls->handle    = handle;
+	cls->parent    = parent;
+	cls->attr      = malloc(len);
+	cls->attr_len  = len;
+
+	memcpy(cls->attr, attr, len);
+
+	if (parent == TC_H_ROOT)
+		hlist_add_head(&cls->hlist, &root_cls_list);
+	else
+		hlist_add_head(&cls->hlist, &cls_list);
+}
+
+static void tree_cls_indent(char *buf, struct cls_node *cls, int is_newline,
+		int add_spaces)
+{
+	char spaces[100] = {0};
+
+	while (cls && cls->cls_parent) {
+		cls->cls_parent->cls_right = cls;
+		cls = cls->cls_parent;
+	}
+	while (cls && cls->cls_right) {
+		if (cls->hlist.next)
+			strcat(buf, "|    ");
+		else
+			strcat(buf, "     ");
+
+		cls = cls->cls_right;
+	}
+
+	if (is_newline) {
+		if (cls->hlist.next && cls->childs_count)
+			strcat(buf, "|    |");
+		else if (cls->hlist.next)
+			strcat(buf, "|     ");
+		else if (cls->childs_count)
+			strcat(buf, "     |");
+		else if (!cls->hlist.next)
+			strcat(buf, "      ");
+	}
+	if (add_spaces > 0) {
+		sprintf(spaces, "%-*s", add_spaces, "");
+		strcat(buf, spaces);
+	}
+}
+
+static void tree_cls_show(FILE *fp, char *buf, struct hlist_head *root_list, int level)
+{
+	struct hlist_node *n, *tmp_cls;
+	char cls_id_str[256] = {};
+	struct rtattr * tb[TCA_MAX+1] = {};
+	struct qdisc_util *q;
+	char str[100] = {};
+
+	hlist_for_each_safe(n, tmp_cls, root_list) {
+		struct hlist_node *c, *tmp_chld;
+		struct hlist_head childs = {};
+		struct cls_node *cls = container_of(n, struct cls_node, hlist);
+
+		hlist_for_each_safe(c, tmp_chld, &cls_list) {
+			struct cls_node *child = container_of(c, struct cls_node, hlist);
+
+			if (cls->handle == child->parent) {
+				hlist_del(c);
+				hlist_add_head(c, &childs);
+				cls->childs_count++;
+				child->cls_parent = cls;
+			}
+		}
+
+		tree_cls_indent(buf, cls, 0, 0);
+
+		print_tc_classid(cls_id_str, sizeof(cls_id_str), cls->handle);
+		sprintf(str, "+---(%s)", cls_id_str);
+		strcat(buf, str);
+
+		parse_rtattr(tb, TCA_MAX, cls->attr, cls->attr_len);
+
+		if (tb[TCA_KIND] == NULL) {
+			strcat(buf, " [unknown qdisc kind] ");
+		} else {
+			const char *kind = rta_getattr_str(tb[TCA_KIND]);
+
+			sprintf(str, " %s ", kind);
+			strcat(buf, str);
+			fprintf(fp, "%s", buf);
+			buf[0] = '\0';
+
+			q = get_qdisc_kind(kind);
+			if (q && q->print_copt) {
+				q->print_copt(q, fp, tb[TCA_OPTIONS]);
+			}
+			if (q && show_stats) {
+				int cls_indent = strlen(q->id) - 2 +
+					strlen(cls_id_str);
+				struct rtattr *xstats = NULL;
+
+				tree_cls_indent(buf, cls, 1, cls_indent);
+
+				if (tb[TCA_STATS] || tb[TCA_STATS2]) {
+					fprintf(fp, "\n");
+					print_tcstats_attr(fp, tb, buf, &xstats);
+					buf[0] = '\0';
+				}
+				if (cls->hlist.next || cls->childs_count) {
+					strcat(buf, "\n");
+					tree_cls_indent(buf, cls, 1, 0);
+				}
+			}
+		}
+		free(cls->attr);
+		fprintf(fp, "%s\n", buf);
+		buf[0] = '\0';
+
+		tree_cls_show(fp, buf, &childs, level + 1);
+		if (!cls->hlist.next) {
+			tree_cls_indent(buf, cls, 0, 0);
+			strcat(buf, "\n");
+		}
+
+		fprintf(fp, "%s", buf);
+		buf[0] = '\0';
+		free(cls);
+	}
+}
+
 int print_class(const struct sockaddr_nl *who,
 		       struct nlmsghdr *n, void *arg)
 {
 	FILE *fp = (FILE*)arg;
 	struct tcmsg *t = NLMSG_DATA(n);
 	int len = n->nlmsg_len;
-	struct rtattr * tb[TCA_MAX+1];
+	struct rtattr * tb[TCA_MAX+1] = {};
 	struct qdisc_util *q;
 	char abuf[256];
 
@@ -167,13 +315,18 @@ int print_class(const struct sockaddr_nl *who,
 		fprintf(stderr, "Wrong len %d\n", len);
 		return -1;
 	}
+
+	if (show_tree) {
+		tree_cls_add(t->tcm_parent, t->tcm_handle, TCA_RTA(t), len);
+		return 0;
+	}
+
 	if (filter_qdisc && TC_H_MAJ(t->tcm_handle^filter_qdisc))
 		return 0;
 
 	if (filter_classid && t->tcm_handle != filter_classid)
 		return 0;
 
-	memset(tb, 0, sizeof(tb));
 	parse_rtattr(tb, TCA_MAX, TCA_RTA(t), len);
 
 	if (tb[TCA_KIND] == NULL) {
@@ -236,6 +389,7 @@ static int tc_class_list(int argc, char **argv)
 {
 	struct tcmsg t;
 	char d[16];
+	char buf[1024] = {0};
 
 	memset(&t, 0, sizeof(t));
 	t.tcm_family = AF_UNSPEC;
@@ -306,6 +460,9 @@ static int tc_class_list(int argc, char **argv)
 		return 1;
 	}
 
+	if (show_tree)
+		tree_cls_show(stdout, &buf[0], &root_cls_list, 0);
+
 	return 0;
 }
 
diff --git a/tc/tc_common.h b/tc/tc_common.h
index 4f88856..0ee009b 100644
--- a/tc/tc_common.h
+++ b/tc/tc_common.h
@@ -19,3 +19,5 @@ extern int parse_estimator(int *p_argc, char ***p_argv, struct tc_estimator *est
 struct tc_sizespec;
 extern int parse_size_table(int *p_argc, char ***p_argv, struct tc_sizespec *s);
 extern int check_size_table_opts(struct tc_sizespec *s);
+
+extern int show_tree;
-- 
2.1.3

^ permalink raw reply related

* [PATCH net 6/6] vxlan: Fix double free of skb.
From: Pravin B Shelar @ 2014-12-24  0:20 UTC (permalink / raw)
  To: davem; +Cc: netdev, Pravin B Shelar

In case of error vxlan_xmit_one() can free already freed skb.
Also fixes memory leak of dst-entry.

Fixes: acbf74a7630 ("vxlan: Refactor vxlan driver to make use
of the common UDP tunnel functions").

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
---
 drivers/net/vxlan.c |   34 ++++++++++++++++++++++++----------
 1 files changed, 24 insertions(+), 10 deletions(-)

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 49d9f22..7fbd89f 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -1579,8 +1579,10 @@ static int vxlan6_xmit_skb(struct vxlan_sock *vs,
 	bool udp_sum = !udp_get_no_check6_tx(vs->sock->sk);
 
 	skb = udp_tunnel_handle_offloads(skb, udp_sum);
-	if (IS_ERR(skb))
-		return -EINVAL;
+	if (IS_ERR(skb)) {
+		err = -EINVAL;
+		goto err;
+	}
 
 	skb_scrub_packet(skb, xnet);
 
@@ -1590,12 +1592,16 @@ static int vxlan6_xmit_skb(struct vxlan_sock *vs,
 
 	/* Need space for new headers (invalidates iph ptr) */
 	err = skb_cow_head(skb, min_headroom);
-	if (unlikely(err))
-		return err;
+	if (unlikely(err)) {
+		kfree_skb(skb);
+		goto err;
+	}
 
 	skb = vlan_hwaccel_push_inside(skb);
-	if (WARN_ON(!skb))
-		return -ENOMEM;
+	if (WARN_ON(!skb)) {
+		err = -ENOMEM;
+		goto err;
+	}
 
 	vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh));
 	vxh->vx_flags = htonl(VXLAN_FLAGS);
@@ -1606,6 +1612,9 @@ static int vxlan6_xmit_skb(struct vxlan_sock *vs,
 	udp_tunnel6_xmit_skb(vs->sock, dst, skb, dev, saddr, daddr, prio,
 			     ttl, src_port, dst_port);
 	return 0;
+err:
+	dst_release(dst);
+	return err;
 }
 #endif
 
@@ -1621,7 +1630,7 @@ int vxlan_xmit_skb(struct vxlan_sock *vs,
 
 	skb = udp_tunnel_handle_offloads(skb, udp_sum);
 	if (IS_ERR(skb))
-		return -EINVAL;
+		return PTR_ERR(skb);
 
 	min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len
 			+ VXLAN_HLEN + sizeof(struct iphdr)
@@ -1629,8 +1638,10 @@ int vxlan_xmit_skb(struct vxlan_sock *vs,
 
 	/* Need space for new headers (invalidates iph ptr) */
 	err = skb_cow_head(skb, min_headroom);
-	if (unlikely(err))
+	if (unlikely(err)) {
+		kfree_skb(skb);
 		return err;
+	}
 
 	skb = vlan_hwaccel_push_inside(skb);
 	if (WARN_ON(!skb))
@@ -1776,9 +1787,12 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
 				     tos, ttl, df, src_port, dst_port,
 				     htonl(vni << 8),
 				     !net_eq(vxlan->net, dev_net(vxlan->dev)));
-
-		if (err < 0)
+		if (err < 0) {
+			/* skb is already freed. */
+			skb = NULL;
 			goto rt_tx_error;
+		}
+
 		iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
 #if IS_ENABLED(CONFIG_IPV6)
 	} else {
-- 
1.7.1

^ permalink raw reply related

* [PATCH net 5/6] openvswitch: Fix vport_send double free
From: Pravin B Shelar @ 2014-12-24  0:20 UTC (permalink / raw)
  To: davem; +Cc: netdev, Pravin B Shelar

Today vport-send has complex error handling because it involves
freeing skb and updating stats depending on return value from
vport send implementation.
This can be simplified by delegating responsibility of freeing
skb to the vport implementation for all cases. So that
vport-send needs just update stats.

Fixes: 91b7514cdf ("openvswitch: Unify vport error stats
handling")
Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
---
 net/ipv4/geneve.c              |    6 +++++-
 net/openvswitch/vport-geneve.c |    3 +++
 net/openvswitch/vport-gre.c    |   18 +++++++++++-------
 net/openvswitch/vport-vxlan.c  |    2 ++
 net/openvswitch/vport.c        |    5 ++---
 5 files changed, 23 insertions(+), 11 deletions(-)

diff --git a/net/ipv4/geneve.c b/net/ipv4/geneve.c
index 95e47c9..394a200 100644
--- a/net/ipv4/geneve.c
+++ b/net/ipv4/geneve.c
@@ -122,14 +122,18 @@ int geneve_xmit_skb(struct geneve_sock *gs, struct rtable *rt,
 	int err;
 
 	skb = udp_tunnel_handle_offloads(skb, !gs->sock->sk->sk_no_check_tx);
+	if (IS_ERR(skb))
+		return PTR_ERR(skb);
 
 	min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len
 			+ GENEVE_BASE_HLEN + opt_len + sizeof(struct iphdr)
 			+ (vlan_tx_tag_present(skb) ? VLAN_HLEN : 0);
 
 	err = skb_cow_head(skb, min_headroom);
-	if (unlikely(err))
+	if (unlikely(err)) {
+		kfree_skb(skb);
 		return err;
+	}
 
 	skb = vlan_hwaccel_push_inside(skb);
 	if (unlikely(!skb))
diff --git a/net/openvswitch/vport-geneve.c b/net/openvswitch/vport-geneve.c
index 347fa23..484864d 100644
--- a/net/openvswitch/vport-geneve.c
+++ b/net/openvswitch/vport-geneve.c
@@ -219,7 +219,10 @@ static int geneve_tnl_send(struct vport *vport, struct sk_buff *skb)
 			      false);
 	if (err < 0)
 		ip_rt_put(rt);
+	return err;
+
 error:
+	kfree_skb(skb);
 	return err;
 }
 
diff --git a/net/openvswitch/vport-gre.c b/net/openvswitch/vport-gre.c
index 6b69df5..28f54e9 100644
--- a/net/openvswitch/vport-gre.c
+++ b/net/openvswitch/vport-gre.c
@@ -73,7 +73,7 @@ static struct sk_buff *__build_header(struct sk_buff *skb,
 
 	skb = gre_handle_offloads(skb, !!(tun_key->tun_flags & TUNNEL_CSUM));
 	if (IS_ERR(skb))
-		return NULL;
+		return skb;
 
 	tpi.flags = filter_tnl_flags(tun_key->tun_flags);
 	tpi.proto = htons(ETH_P_TEB);
@@ -144,7 +144,7 @@ static int gre_tnl_send(struct vport *vport, struct sk_buff *skb)
 
 	if (unlikely(!OVS_CB(skb)->egress_tun_info)) {
 		err = -EINVAL;
-		goto error;
+		goto err_free_skb;
 	}
 
 	tun_key = &OVS_CB(skb)->egress_tun_info->tunnel;
@@ -157,8 +157,10 @@ static int gre_tnl_send(struct vport *vport, struct sk_buff *skb)
 	fl.flowi4_proto = IPPROTO_GRE;
 
 	rt = ip_route_output_key(net, &fl);
-	if (IS_ERR(rt))
-		return PTR_ERR(rt);
+	if (IS_ERR(rt)) {
+		err = PTR_ERR(rt);
+		goto err_free_skb;
+	}
 
 	tunnel_hlen = ip_gre_calc_hlen(tun_key->tun_flags);
 
@@ -183,8 +185,9 @@ static int gre_tnl_send(struct vport *vport, struct sk_buff *skb)
 
 	/* Push Tunnel header. */
 	skb = __build_header(skb, tunnel_hlen);
-	if (unlikely(!skb)) {
-		err = 0;
+	if (IS_ERR(skb)) {
+		err = PTR_ERR(rt);
+		skb = NULL;
 		goto err_free_rt;
 	}
 
@@ -198,7 +201,8 @@ static int gre_tnl_send(struct vport *vport, struct sk_buff *skb)
 			     tun_key->ipv4_tos, tun_key->ipv4_ttl, df, false);
 err_free_rt:
 	ip_rt_put(rt);
-error:
+err_free_skb:
+	kfree_skb(skb);
 	return err;
 }
 
diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c
index 38f95a5..d7c46b3 100644
--- a/net/openvswitch/vport-vxlan.c
+++ b/net/openvswitch/vport-vxlan.c
@@ -187,7 +187,9 @@ static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb)
 			     false);
 	if (err < 0)
 		ip_rt_put(rt);
+	return err;
 error:
+	kfree_skb(skb);
 	return err;
 }
 
diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
index 9584526..53f3ebb 100644
--- a/net/openvswitch/vport.c
+++ b/net/openvswitch/vport.c
@@ -519,10 +519,9 @@ int ovs_vport_send(struct vport *vport, struct sk_buff *skb)
 		u64_stats_update_end(&stats->syncp);
 	} else if (sent < 0) {
 		ovs_vport_record_error(vport, VPORT_E_TX_ERROR);
-		kfree_skb(skb);
-	} else
+	} else {
 		ovs_vport_record_error(vport, VPORT_E_TX_DROPPED);
-
+	}
 	return sent;
 }
 
-- 
1.7.1

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox