Netdev List

Netdev List
 help / color / mirror / Atom feed

* [RFC PATCH 1/2] vlan: export functions to register/unregister vlan devices
From: James Chapman @ 2013-09-24 16:27 UTC (permalink / raw)
  To: netdev; +Cc: James Chapman
In-Reply-To: <1380040030-6648-1-git-send-email-jchapman@katalix.com>

Export two new symbols to let external code register and unregister
vlan devices. Allow the caller to specify the name template for the
new netdev instance.

The new symbols are:
vlan_register_device
unregister_vlan_dev
---
 include/linux/if_vlan.h |   11 ++++++
 net/8021q/vlan.c        |   82 ++++++++++++++++++++++++++++------------------
 2 files changed, 61 insertions(+), 32 deletions(-)

diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index 715c343..c6431d5 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -101,6 +101,8 @@ extern void vlan_vids_del_by_dev(struct net_device *dev,
 				 const struct net_device *by_dev);
 
 extern bool vlan_uses_dev(const struct net_device *dev);
+extern int vlan_register_device(struct net_device *real_dev, u16 vlan_id, struct net_device **dev, char *name);
+extern void unregister_vlan_dev(struct net_device *dev, struct list_head *head);
 #else
 static inline struct net_device *
 __vlan_find_dev_deep(struct net_device *real_dev,
@@ -155,6 +157,15 @@ static inline bool vlan_uses_dev(const struct net_device *dev)
 {
 	return false;
 }
+
+static inline int vlan_register_device(struct net_device *real_dev, u16 vlan_id, struct net_device **dev, char *name)
+{
+	return -EPROTONOSUPPORT;
+}
+
+static inline void unregister_vlan_dev(struct net_device *dev, struct list_head *head)
+{
+}
 #endif
 
 static inline bool vlan_hw_offload_capable(netdev_features_t features,
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 61fc573..272157c 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -121,6 +121,7 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head)
 	/* Get rid of the vlan's reference to real_dev */
 	dev_put(real_dev);
 }
+EXPORT_SYMBOL(unregister_vlan_dev);
 
 int vlan_check_real_dev(struct net_device *real_dev,
 			__be16 protocol, u16 vlan_id)
@@ -204,18 +205,16 @@ out_vid_del:
 	return err;
 }
 
-/*  Attach a VLAN device to a mac address (ie Ethernet Card).
- *  Returns 0 if the device was created or a negative error code otherwise.
- */
-static int register_vlan_device(struct net_device *real_dev, u16 vlan_id)
+int vlan_register_device(struct net_device *real_dev, u16 vlan_id, struct net_device **dev, char *name)
 {
 	struct net_device *new_dev;
 	struct vlan_dev_priv *vlan;
 	struct net *net = dev_net(real_dev);
-	struct vlan_net *vn = net_generic(net, vlan_net_id);
-	char name[IFNAMSIZ];
 	int err;
 
+	if (!dev || !name)
+		return -EINVAL;
+
 	if (vlan_id >= VLAN_VID_MASK)
 		return -ERANGE;
 
@@ -223,32 +222,6 @@ static int register_vlan_device(struct net_device *real_dev, u16 vlan_id)
 	if (err < 0)
 		return err;
 
-	/* Gotta set up the fields for the device. */
-	switch (vn->name_type) {
-	case VLAN_NAME_TYPE_RAW_PLUS_VID:
-		/* name will look like:	 eth1.0005 */
-		snprintf(name, IFNAMSIZ, "%s.%.4i", real_dev->name, vlan_id);
-		break;
-	case VLAN_NAME_TYPE_PLUS_VID_NO_PAD:
-		/* Put our vlan.VID in the name.
-		 * Name will look like:	 vlan5
-		 */
-		snprintf(name, IFNAMSIZ, "vlan%i", vlan_id);
-		break;
-	case VLAN_NAME_TYPE_RAW_PLUS_VID_NO_PAD:
-		/* Put our vlan.VID in the name.
-		 * Name will look like:	 eth0.5
-		 */
-		snprintf(name, IFNAMSIZ, "%s.%i", real_dev->name, vlan_id);
-		break;
-	case VLAN_NAME_TYPE_PLUS_VID:
-		/* Put our vlan.VID in the name.
-		 * Name will look like:	 vlan0005
-		 */
-	default:
-		snprintf(name, IFNAMSIZ, "vlan%.4i", vlan_id);
-	}
-
 	new_dev = alloc_netdev(sizeof(struct vlan_dev_priv), name, vlan_setup);
 
 	if (new_dev == NULL)
@@ -273,12 +246,57 @@ static int register_vlan_device(struct net_device *real_dev, u16 vlan_id)
 	if (err < 0)
 		goto out_free_newdev;
 
+	*dev = new_dev;
+
 	return 0;
 
 out_free_newdev:
 	free_netdev(new_dev);
 	return err;
 }
+EXPORT_SYMBOL(vlan_register_device);
+
+/*  Attach a VLAN device to a mac address (ie Ethernet Card).
+ *  Returns 0 if the device was created or a negative error code otherwise.
+ */
+static int register_vlan_device(struct net_device *real_dev, u16 vlan_id)
+{
+	struct net_device *new_dev;
+	struct net *net = dev_net(real_dev);
+	struct vlan_net *vn = net_generic(net, vlan_net_id);
+	char name[IFNAMSIZ];
+	int err;
+
+	/* Gotta set up the fields for the device. */
+	switch (vn->name_type) {
+	case VLAN_NAME_TYPE_RAW_PLUS_VID:
+		/* name will look like:	 eth1.0005 */
+		snprintf(name, IFNAMSIZ, "%s.%.4i", real_dev->name, vlan_id);
+		break;
+	case VLAN_NAME_TYPE_PLUS_VID_NO_PAD:
+		/* Put our vlan.VID in the name.
+		 * Name will look like:	 vlan5
+		 */
+		snprintf(name, IFNAMSIZ, "vlan%i", vlan_id);
+		break;
+	case VLAN_NAME_TYPE_RAW_PLUS_VID_NO_PAD:
+		/* Put our vlan.VID in the name.
+		 * Name will look like:	 eth0.5
+		 */
+		snprintf(name, IFNAMSIZ, "%s.%i", real_dev->name, vlan_id);
+		break;
+	case VLAN_NAME_TYPE_PLUS_VID:
+		/* Put our vlan.VID in the name.
+		 * Name will look like:	 vlan0005
+		 */
+	default:
+		snprintf(name, IFNAMSIZ, "vlan%.4i", vlan_id);
+	}
+
+	err = vlan_register_device(real_dev, vlan_id, &new_dev, name);
+
+	return err;
+}
 
 static void vlan_sync_address(struct net_device *dev,
 			      struct net_device *vlandev)
-- 
1.7.0.4

^ permalink raw reply related

* [RFC PATCH 2/2] l2tp: add vlan pseudowire support
From: James Chapman @ 2013-09-24 16:27 UTC (permalink / raw)
  To: netdev; +Cc: James Chapman
In-Reply-To: <1380040030-6648-1-git-send-email-jchapman@katalix.com>

Register the l2tp_eth driver for netlink ops using the vlan pseudowire
type. Add code to create/destroy a VLAN netdevice when the pseudowire
type is VLAN. This requires new exports in the vlan code.

This results in two netdevices per vlan pseudowire:
1. a master, which should never be used
2. a vlan device, which is enslaved to the master device

The session's ifname value is set to the VLAN netdevice name such
that it is the name returned in session_get netlink requests. For vlan
pseudowires, this should always be the interface that userspace
configures.
---
 net/l2tp/l2tp_eth.c |   73 +++++++++++++++++++++++++++++++++++++++++++++++++-
 1 files changed, 71 insertions(+), 2 deletions(-)

diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c
index 76125c5..aae38d9 100644
--- a/net/l2tp/l2tp_eth.c
+++ b/net/l2tp/l2tp_eth.c
@@ -17,6 +17,7 @@
 #include <linux/hash.h>
 #include <linux/l2tp.h>
 #include <linux/in.h>
+#include <linux/if_vlan.h>
 #include <linux/etherdevice.h>
 #include <linux/spinlock.h>
 #include <net/sock.h>
@@ -53,6 +54,7 @@ struct l2tp_eth {
 /* via l2tp_session_priv() */
 struct l2tp_eth_sess {
 	struct net_device	*dev;
+	struct net_device	*vlan_dev;
 };
 
 /* per-net private data for this module */
@@ -178,6 +180,50 @@ error:
 	kfree_skb(skb);
 }
 
+static int l2tp_vlan_create(struct l2tp_session *session, u16 vlan_id)
+{
+	struct l2tp_eth_sess *spriv;
+	struct net_device *dev;
+	struct net_device *vlan_dev;
+	char name[IFNAMSIZ + 1];
+	int rc;
+
+	spriv = l2tp_session_priv(session);
+	dev = spriv->dev;
+
+	if (!vlan_id)
+		return -EINVAL;
+
+	snprintf(name, IFNAMSIZ, "%s.%i", dev->name, vlan_id);
+	rtnl_lock();
+	rc = vlan_register_device(dev, vlan_id, &vlan_dev, name);
+	rtnl_unlock();
+	if (rc < 0)
+		return rc;
+
+	spriv->vlan_dev = vlan_dev;
+	strlcpy(session->ifname, vlan_dev->name, IFNAMSIZ);
+
+	return 0;
+}
+
+static int l2tp_vlan_delete(struct l2tp_session *session)
+{
+	struct l2tp_eth_sess *spriv;
+	struct net_device *dev;
+
+	spriv = l2tp_session_priv(session);
+	dev = spriv->dev;
+
+	rtnl_lock();
+	unregister_vlan_dev(spriv->vlan_dev, NULL);
+	rtnl_unlock();
+	strlcpy(session->ifname, dev->name, IFNAMSIZ);
+	spriv->vlan_dev = NULL;
+
+	return 0;
+}
+
 static void l2tp_eth_delete(struct l2tp_session *session)
 {
 	struct l2tp_eth_sess *spriv;
@@ -186,6 +232,8 @@ static void l2tp_eth_delete(struct l2tp_session *session)
 	if (session) {
 		spriv = l2tp_session_priv(session);
 		dev = spriv->dev;
+		if (spriv->vlan_dev)
+			l2tp_vlan_delete(session);
 		if (dev) {
 			unregister_netdev(dev);
 			spriv->dev = NULL;
@@ -277,10 +325,18 @@ static int l2tp_eth_create(struct net *net, u32 tunnel_id, u32 session_id, u32 p
 	if (rc < 0)
 		goto out_del_dev;
 
-	__module_get(THIS_MODULE);
 	/* Must be done after register_netdev() */
 	strlcpy(session->ifname, dev->name, IFNAMSIZ);
 
+	if (cfg->pw_type == L2TP_PWTYPE_ETH_VLAN) {
+		/* Create VLAN interface. Replaces session->ifname */
+		rc = l2tp_vlan_create(session, cfg->vlan_id);
+		if (rc < 0)
+			goto out_unreg;
+	}
+
+	__module_get(THIS_MODULE);
+
 	dev_hold(dev);
 	pn = l2tp_eth_pernet(dev_net(dev));
 	spin_lock(&pn->l2tp_eth_lock);
@@ -289,6 +345,8 @@ static int l2tp_eth_create(struct net *net, u32 tunnel_id, u32 session_id, u32 p
 
 	return 0;
 
+out_unreg:
+	unregister_netdev(dev);
 out_del_dev:
 	free_netdev(dev);
 	spriv->dev = NULL;
@@ -328,6 +386,11 @@ static int __init l2tp_eth_init(void)
 	err = l2tp_nl_register_ops(L2TP_PWTYPE_ETH, &l2tp_eth_nl_cmd_ops);
 	if (err)
 		goto out;
+#if IS_ENABLED(CONFIG_VLAN_8021Q)
+	err = l2tp_nl_register_ops(L2TP_PWTYPE_ETH_VLAN, &l2tp_eth_nl_cmd_ops);
+	if (err)
+		goto out;
+#endif
 
 	err = register_pernet_device(&l2tp_eth_net_ops);
 	if (err)
@@ -338,6 +401,9 @@ static int __init l2tp_eth_init(void)
 	return 0;
 
 out_unreg:
+#if IS_ENABLED(CONFIG_VLAN_8021Q)
+	l2tp_nl_unregister_ops(L2TP_PWTYPE_ETH_VLAN);
+#endif
 	l2tp_nl_unregister_ops(L2TP_PWTYPE_ETH);
 out:
 	return err;
@@ -346,6 +412,9 @@ out:
 static void __exit l2tp_eth_exit(void)
 {
 	unregister_pernet_device(&l2tp_eth_net_ops);
+#if IS_ENABLED(CONFIG_VLAN_8021Q)
+	l2tp_nl_unregister_ops(L2TP_PWTYPE_ETH_VLAN);
+#endif
 	l2tp_nl_unregister_ops(L2TP_PWTYPE_ETH);
 }
 
@@ -355,4 +424,4 @@ module_exit(l2tp_eth_exit);
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("James Chapman <jchapman@katalix.com>");
 MODULE_DESCRIPTION("L2TP ethernet pseudowire driver");
-MODULE_VERSION("1.0");
+MODULE_VERSION("1.1");
-- 
1.7.0.4

^ permalink raw reply related

* [RFC PATCH 0/2] l2tp: add vlan pseudowire support
From: James Chapman @ 2013-09-24 16:27 UTC (permalink / raw)
  To: netdev; +Cc: James Chapman

This RFC patch series adds L2TP vlan pseudowire support by creating a
vlan netdev in addition to the l2tpeth netdev when the user creates
vlan pseudowires. To do so, new symbols are exported in 8021q to allow
l2tp to create and destroy vlan netdevs. The existing L2TP netlink API
already contains attributes for vlan pseudowires but they have been
unused to date.

L2TP vlan pseudowires are similar to the existing L2TP ethernet
pseudowires except that vlan pseudowires always carry only one vlan
and never carry untagged frames, while ethernet pseudowires can carry
any ethernet frame (tagged or untagged). It is possible to create vlan
interfaces on top of l2tpeth interfaces (ethernet pseudowires) today
using standard tools. But for VLAN pseudowires, there must be one and
only one vlan.

The changes in this patch series create a pair of netdevs per vlan
pseudowire: a vlan netdev and an l2tpeth netdev; the l2tpeth netdev is
used as the master for the vlan netdev. The master netdev must not be
configured since untagged frames are not allowed in vlan pseudowires.

Some L2TP setups have thousands of L2TP pseudowires so creating two
netdevs per vlan pseudowire isn't ideal when one might suffice. Should I
investigate changing 8021q to allow for the case where there is no
master netdev?

James Chapman (2):
  vlan: export functions to register/unregister vlan devices
  l2tp: add vlan pseudowire support

 include/linux/if_vlan.h |   11 ++++++
 net/8021q/vlan.c        |   82 ++++++++++++++++++++++++++++------------------
 net/l2tp/l2tp_eth.c     |   73 ++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 132 insertions(+), 34 deletions(-)

^ permalink raw reply

* Re: [PATCH net v2] vxlan: Use RCU apis to access sk_user_data.
From: Pravin Shelar @ 2013-09-24 16:25 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: netdev, Jesse Gross
In-Reply-To: <1380039366.3165.92.camel@edumazet-glaptop>

On Tue, Sep 24, 2013 at 9:16 AM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> On Tue, 2013-09-24 at 09:09 -0700, Pravin B Shelar wrote:
>> Use of RCU api makes vxlan code easier to understand.  It also
>> fixes bug due to missing ACCESS_ONCE() on sk_user_data dereference.
>> In rare case without ACCESS_ONCE() compiler might omit vs on
>> sk_user_data dereference. Compiler can use vs as alias for
>> sk->sk_user_data, resulting in multiple sk_user_data dereference
>> in rcu read context which could change.
>>
>> CC: Jesse Gross <jesse@nicira.com>
>> Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
>> ---
>> v2:
>> Use RCU_INIT_POINTER
>> ---
>>  drivers/net/vxlan.c |   11 ++++-------
>>  include/net/sock.h  |    2 ++
>>  2 files changed, 6 insertions(+), 7 deletions(-)
>>
>> diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
>> index d1292fe..1fae0d6 100644
>> --- a/drivers/net/vxlan.c
>> +++ b/drivers/net/vxlan.c
>> @@ -1,4 +1,4 @@
>> -/*
>> + /*
>>   * VXLAN: Virtual eXtensible Local Area Network
>>   *
>
>
> extra space ?
>
right, I missed that.

^ permalink raw reply

* [PATCH net-next v2] dev: always advertise rx_flags changes
From: Nicolas Dichtel @ 2013-09-24 16:21 UTC (permalink / raw)
  To: stephen; +Cc: netdev, davem, Nicolas Dichtel
In-Reply-To: <20130923122715.1d1b2005@nehalam.linuxnetplumber.net>

Depending on the code path, there is/isn't netlink message/call to notifier
chains when rx_flags are updated, let's send advertisement for all cases.

Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
---

v2: rework the patch to avoid double notification

 net/core/dev.c | 64 +++++++++++++++++++++++++++++++++++++---------------------
 1 file changed, 41 insertions(+), 23 deletions(-)

diff --git a/net/core/dev.c b/net/core/dev.c
index 5c713f2239cc..067bfbe70c4c 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4822,7 +4822,7 @@ static void dev_change_rx_flags(struct net_device *dev, int flags)
 		ops->ndo_change_rx_flags(dev, flags);
 }
 
-static int __dev_set_promiscuity(struct net_device *dev, int inc)
+static int __dev_set_promiscuity(struct net_device *dev, int inc, bool notif)
 {
 	unsigned int old_flags = dev->flags;
 	kuid_t uid;
@@ -4865,6 +4865,10 @@ static int __dev_set_promiscuity(struct net_device *dev, int inc)
 
 		dev_change_rx_flags(dev, IFF_PROMISC);
 	}
+	if (notif) {
+		call_netdevice_notifiers(NETDEV_CHANGE, dev);
+		rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_PROMISC);
+	}
 	return 0;
 }
 
@@ -4884,7 +4888,7 @@ int dev_set_promiscuity(struct net_device *dev, int inc)
 	unsigned int old_flags = dev->flags;
 	int err;
 
-	err = __dev_set_promiscuity(dev, inc);
+	err = __dev_set_promiscuity(dev, inc, true);
 	if (err < 0)
 		return err;
 	if (dev->flags != old_flags)
@@ -4893,22 +4897,9 @@ int dev_set_promiscuity(struct net_device *dev, int inc)
 }
 EXPORT_SYMBOL(dev_set_promiscuity);
 
-/**
- *	dev_set_allmulti	- update allmulti count on a device
- *	@dev: device
- *	@inc: modifier
- *
- *	Add or remove reception of all multicast frames to a device. While the
- *	count in the device remains above zero the interface remains listening
- *	to all interfaces. Once it hits zero the device reverts back to normal
- *	filtering operation. A negative @inc value is used to drop the counter
- *	when releasing a resource needing all multicasts.
- *	Return 0 if successful or a negative errno code on error.
- */
-
-int dev_set_allmulti(struct net_device *dev, int inc)
+static int __dev_set_allmulti(struct net_device *dev, int inc, bool notif)
 {
-	unsigned int old_flags = dev->flags;
+	unsigned int old_flags = dev->flags, old_gflags = dev->gflags;
 
 	ASSERT_RTNL();
 
@@ -4931,9 +4922,32 @@ int dev_set_allmulti(struct net_device *dev, int inc)
 	if (dev->flags ^ old_flags) {
 		dev_change_rx_flags(dev, IFF_ALLMULTI);
 		dev_set_rx_mode(dev);
+		if (notif) {
+			call_netdevice_notifiers(NETDEV_CHANGE, dev);
+			if (dev->gflags ^ old_gflags)
+				rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_ALLMULTI);
+		}
 	}
 	return 0;
 }
+
+/**
+ *	dev_set_allmulti	- update allmulti count on a device
+ *	@dev: device
+ *	@inc: modifier
+ *
+ *	Add or remove reception of all multicast frames to a device. While the
+ *	count in the device remains above zero the interface remains listening
+ *	to all interfaces. Once it hits zero the device reverts back to normal
+ *	filtering operation. A negative @inc value is used to drop the counter
+ *	when releasing a resource needing all multicasts.
+ *	Return 0 if successful or a negative errno code on error.
+ */
+
+int dev_set_allmulti(struct net_device *dev, int inc)
+{
+	return __dev_set_allmulti(dev, inc, true);
+}
 EXPORT_SYMBOL(dev_set_allmulti);
 
 /*
@@ -4958,10 +4972,10 @@ void __dev_set_rx_mode(struct net_device *dev)
 		 * therefore calling __dev_set_promiscuity here is safe.
 		 */
 		if (!netdev_uc_empty(dev) && !dev->uc_promisc) {
-			__dev_set_promiscuity(dev, 1);
+			__dev_set_promiscuity(dev, 1, false);
 			dev->uc_promisc = true;
 		} else if (netdev_uc_empty(dev) && dev->uc_promisc) {
-			__dev_set_promiscuity(dev, -1);
+			__dev_set_promiscuity(dev, -1, false);
 			dev->uc_promisc = false;
 		}
 	}
@@ -5050,9 +5064,13 @@ int __dev_change_flags(struct net_device *dev, unsigned int flags)
 
 	if ((flags ^ dev->gflags) & IFF_PROMISC) {
 		int inc = (flags & IFF_PROMISC) ? 1 : -1;
+		unsigned int old_flags = dev->flags;
 
 		dev->gflags ^= IFF_PROMISC;
-		dev_set_promiscuity(dev, inc);
+
+		if (__dev_set_promiscuity(dev, inc, false) >= 0)
+			if (dev->flags != old_flags)
+				dev_set_rx_mode(dev);
 	}
 
 	/* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI
@@ -5063,7 +5081,7 @@ int __dev_change_flags(struct net_device *dev, unsigned int flags)
 		int inc = (flags & IFF_ALLMULTI) ? 1 : -1;
 
 		dev->gflags ^= IFF_ALLMULTI;
-		dev_set_allmulti(dev, inc);
+		__dev_set_allmulti(dev, inc, false);
 	}
 
 	return ret;
@@ -5101,13 +5119,13 @@ void __dev_notify_flags(struct net_device *dev, unsigned int old_flags)
 int dev_change_flags(struct net_device *dev, unsigned int flags)
 {
 	int ret;
-	unsigned int changes, old_flags = dev->flags;
+	unsigned int changes, old_flags = dev->flags, old_gflags = dev->gflags;
 
 	ret = __dev_change_flags(dev, flags);
 	if (ret < 0)
 		return ret;
 
-	changes = old_flags ^ dev->flags;
+	changes = (old_flags ^ dev->flags) | (old_gflags ^ dev->gflags);
 	if (changes)
 		rtmsg_ifinfo(RTM_NEWLINK, dev, changes);
 
-- 
1.8.2.1

^ permalink raw reply related

* Re: [PATCH net] vxlan: Use RCU apis to access sk_user_data.
From: Stephen Hemminger @ 2013-09-24 16:20 UTC (permalink / raw)
  To: Pravin B Shelar; +Cc: netdev, Jesse Gross
In-Reply-To: <1380000501-13969-1-git-send-email-pshelar@nicira.com>

On Mon, 23 Sep 2013 22:28:21 -0700
Pravin B Shelar <pshelar@nicira.com> wrote:

>  
> +#define sk_user_data_rcu(sk) ((*((void __rcu **)&(sk)->sk_user_data)))

I don't like the macro, is it just to keep sparse happy?
The name also implies that rcu_dereference() is built in.

^ permalink raw reply

* [PATCH 1/2] net: Add function to get SW rxhash
From: Tom Herbert @ 2013-09-24 16:20 UTC (permalink / raw)
  To: davem; +Cc: netdev

Some uses of skb_get_rxhash expect that the function will return
a consistent value whether it is called on RX or TX paths. On RX
path, we will use the rxhash if provided by the NIC, so this
would not normally be the same result computed in TX path which is
a software calculation.

This patch adds skb_get_sw_rxhash to explicitly request a hash
calculated by the stack, disregarding the hash provided by NIC.

Signed-off-by: Tom Herbert <therbert@google.com>
---
 include/linux/skbuff.h    | 11 ++++++++++-
 net/core/flow_dissector.c |  1 +
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 2ddb48d..917a590 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -488,6 +488,7 @@ struct sk_buff {
 	__u8			pfmemalloc:1;
 	__u8			ooo_okay:1;
 	__u8			l4_rxhash:1;
+	__u8			sw_rxhash:1;
 	__u8			wifi_acked_valid:1;
 	__u8			wifi_acked:1;
 	__u8			no_fcs:1;
@@ -498,7 +499,7 @@ struct sk_buff {
 	 * headers if needed
 	 */
 	__u8			encapsulation:1;
-	/* 7/9 bit hole (depending on ndisc_nodetype presence) */
+	/* 6/8 bit hole (depending on ndisc_nodetype presence) */
 	kmemcheck_bitfield_end(flags2);
 
 #if defined CONFIG_NET_DMA || defined CONFIG_NET_RX_BUSY_POLL
@@ -726,6 +727,14 @@ static inline __u32 skb_get_rxhash(struct sk_buff *skb)
 	return skb->rxhash;
 }
 
+static inline __u32 skb_get_sw_rxhash(struct sk_buff *skb)
+{
+	if (!skb->l4_rxhash && !skb->sw_rxhash)
+		__skb_get_rxhash(skb);
+
+	return skb->rxhash;
+}
+
 #ifdef NET_SKBUFF_DATA_USES_OFFSET
 static inline unsigned char *skb_end_pointer(const struct sk_buff *skb)
 {
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 1929af8..8979121 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -200,6 +200,7 @@ void __skb_get_rxhash(struct sk_buff *skb)
 		hash = 1;
 
 	skb->rxhash = hash;
+	skb->sw_rxhash = 1;
 }
 EXPORT_SYMBOL(__skb_get_rxhash);
 
-- 
1.8.4

^ permalink raw reply related

* Re: [PATCH net v2] vxlan: Use RCU apis to access sk_user_data.
From: Eric Dumazet @ 2013-09-24 16:16 UTC (permalink / raw)
  To: Pravin B Shelar; +Cc: netdev, Jesse Gross
In-Reply-To: <1380038979-24035-1-git-send-email-pshelar@nicira.com>

On Tue, 2013-09-24 at 09:09 -0700, Pravin B Shelar wrote:
> Use of RCU api makes vxlan code easier to understand.  It also
> fixes bug due to missing ACCESS_ONCE() on sk_user_data dereference.
> In rare case without ACCESS_ONCE() compiler might omit vs on
> sk_user_data dereference. Compiler can use vs as alias for
> sk->sk_user_data, resulting in multiple sk_user_data dereference
> in rcu read context which could change.
> 
> CC: Jesse Gross <jesse@nicira.com>
> Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
> ---
> v2:
> Use RCU_INIT_POINTER
> ---
>  drivers/net/vxlan.c |   11 ++++-------
>  include/net/sock.h  |    2 ++
>  2 files changed, 6 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
> index d1292fe..1fae0d6 100644
> --- a/drivers/net/vxlan.c
> +++ b/drivers/net/vxlan.c
> @@ -1,4 +1,4 @@
> -/*
> + /*
>   * VXLAN: Virtual eXtensible Local Area Network
>   *


extra space ?

^ permalink raw reply

* Re: [PATCH 1/2] net: Toeplitz library functions
From: Eric Dumazet @ 2013-09-24 16:14 UTC (permalink / raw)
  To: Hannes Frederic Sowa; +Cc: Tom Herbert, davem, netdev, jesse.brandeburg
In-Reply-To: <20130924160145.GB26769@order.stressinduktion.org>

On Tue, 2013-09-24 at 18:01 +0200, Hannes Frederic Sowa wrote:

> inet(6)_ehashfn does get used by udp btw. But I do not understand the code,
> yet. ;)

Oh well, thats the SO_REUSEPORT thing. We do not really care, thats only
used to try to steer udp flows on a given socket, but its a hint.

Presumably it should use a separate secret key.

^ permalink raw reply

* Re: [PATCH net v4 1/1] xen-netback: Handle backend state transitions in a more robust way
From: Wei Liu @ 2013-09-24 16:12 UTC (permalink / raw)
  To: Paul Durrant; +Cc: xen-devel, netdev, Ian Campbell, Wei Liu, David Vrabel
In-Reply-To: <1380037500-12174-2-git-send-email-paul.durrant@citrix.com>

You forgot to fix those typos -- metback, transtions. :-)

On Tue, Sep 24, 2013 at 04:45:00PM +0100, Paul Durrant wrote:
> When the frontend state changes metback now specifies its desired state to

metback

[...]
> + * The backend state starts in InitWait and the following transtions are

transtions

Wei.

^ permalink raw reply

* Re: [PATCH 1/2] net: Toeplitz library functions
From: Eric Dumazet @ 2013-09-24 16:10 UTC (permalink / raw)
  To: Tom Herbert
  Cc: David Miller, David Laight, Linux Netdev List, Brandeburg, Jesse
In-Reply-To: <CA+mtBx_AcmcNBDhRKVB8YDkSNdOwgJ3CXxBqpgxDw23X3m2rvQ@mail.gmail.com>

On Tue, 2013-09-24 at 08:54 -0700, Tom Herbert wrote:

> The Toeplitz function uses a secret key whose length is based on the
> input length.  96 bits in IPv4, 320 bits in IPv6.  I don't see how an
> attacker can reproduce this if the key is random.  If the problem is
> that devices are not being configured with a sufficiently random key
> (some actually are using a fixed key :-( ), that's a separate issue
> that should be addressed.  It is possible to DoS attack through the
> steering mechanism.

Well, your patch would make sense [1] only if we could use hardware
assistance, but right now we have no idea of how safe are the existing
assistances.

[1] Computing Toeplitz in software is way more expensive than jhash.

Dos attack is quite simple right now, even without knowing if the target
uses or not steering.

^ permalink raw reply

* [PATCH net v2] vxlan: Use RCU apis to access sk_user_data.
From: Pravin B Shelar @ 2013-09-24 16:09 UTC (permalink / raw)
  To: netdev; +Cc: eric.dumazet, Pravin B Shelar, Jesse Gross

Use of RCU api makes vxlan code easier to understand.  It also
fixes bug due to missing ACCESS_ONCE() on sk_user_data dereference.
In rare case without ACCESS_ONCE() compiler might omit vs on
sk_user_data dereference. Compiler can use vs as alias for
sk->sk_user_data, resulting in multiple sk_user_data dereference
in rcu read context which could change.

CC: Jesse Gross <jesse@nicira.com>
Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
---
v2:
Use RCU_INIT_POINTER
---
 drivers/net/vxlan.c |   11 ++++-------
 include/net/sock.h  |    2 ++
 2 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index d1292fe..1fae0d6 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -1,4 +1,4 @@
-/*
+ /*
  * VXLAN: Virtual eXtensible Local Area Network
  *
  * Copyright (c) 2012-2013 Vyatta Inc.
@@ -952,8 +952,7 @@ void vxlan_sock_release(struct vxlan_sock *vs)
 
 	spin_lock(&vn->sock_lock);
 	hlist_del_rcu(&vs->hlist);
-	smp_wmb();
-	vs->sock->sk->sk_user_data = NULL;
+	RCU_INIT_POINTER(sk_user_data_rcu(vs->sock->sk), NULL);
 	vxlan_notify_del_rx_port(sk);
 	spin_unlock(&vn->sock_lock);
 
@@ -1048,8 +1047,7 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
 
 	port = inet_sk(sk)->inet_sport;
 
-	smp_read_barrier_depends();
-	vs = (struct vxlan_sock *)sk->sk_user_data;
+	vs = rcu_dereference(sk_user_data_rcu(sk));
 	if (!vs)
 		goto drop;
 
@@ -2302,8 +2300,7 @@ static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port,
 	atomic_set(&vs->refcnt, 1);
 	vs->rcv = rcv;
 	vs->data = data;
-	smp_wmb();
-	vs->sock->sk->sk_user_data = vs;
+	rcu_assign_pointer(sk_user_data_rcu(vs->sock->sk), vs);
 
 	spin_lock(&vn->sock_lock);
 	hlist_add_head_rcu(&vs->hlist, vs_head(net, port));
diff --git a/include/net/sock.h b/include/net/sock.h
index 6ba2e7b..ffd1356 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -409,6 +409,8 @@ struct sock {
 	void                    (*sk_destruct)(struct sock *sk);
 };
 
+#define sk_user_data_rcu(sk) ((*((void __rcu **)&(sk)->sk_user_data)))
+
 /*
  * SK_CAN_REUSE and SK_NO_REUSE on a socket mean that the socket is OK
  * or not whether his port will be reused by someone else. SK_FORCE_REUSE
-- 
1.7.1

^ permalink raw reply related

* Re: [PATCH 1/2] net: Toeplitz library functions
From: Hannes Frederic Sowa @ 2013-09-24 16:01 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: Tom Herbert, davem, netdev, jesse.brandeburg
In-Reply-To: <1380001118.3165.41.camel@edumazet-glaptop>

On Mon, Sep 23, 2013 at 10:38:38PM -0700, Eric Dumazet wrote:
> On Tue, 2013-09-24 at 05:35 +0200, Hannes Frederic Sowa wrote:
> 
> > > build_ehash_secret builds up the data which seeds fragmentation ids, ephermal
> > > port randomization etc. Could we drop the check of sock->type? I guess the
> > > idea was that in-kernel sockets of type raw/udp do not seed the keys when no
> > > entropy is available?
> > 
> > Would this be better (I checked inet_ehash_secret, ipv6_hash_secret
> > and net_secret to actual get initialized)?
> > 
> 
> inet_ehash_secret is used only to make jhash() for tcp ehash, not for
> fragmentation ids or other uses (port randomization).

inet(6)_ehashfn does get used by udp btw. But I do not understand the code,
yet. ;)

^ permalink raw reply

* Re: [PATCH 1/2] net: Toeplitz library functions
From: Hannes Frederic Sowa @ 2013-09-24 16:00 UTC (permalink / raw)
  To: Tom Herbert
  Cc: David Miller, David Laight, Linux Netdev List, Brandeburg, Jesse
In-Reply-To: <CA+mtBx_AcmcNBDhRKVB8YDkSNdOwgJ3CXxBqpgxDw23X3m2rvQ@mail.gmail.com>

On Tue, Sep 24, 2013 at 08:54:24AM -0700, Tom Herbert wrote:
> On Tue, Sep 24, 2013 at 8:39 AM, David Miller <davem@redhat.com> wrote:
> > From: Tom Herbert <therbert@google.com>
> > Date: Tue, 24 Sep 2013 08:22:55 -0700
> >
> >> We use this value for steering, and could use it for other uses like
> >> connection lookup.
> >
> > For security reasons we absolutely cannot use it for that purpose,
> > please stop claiming this.
> >
> > Any hash function which an attacker can reproduce is attackable.
> 
> The Toeplitz function uses a secret key whose length is based on the
> input length.  96 bits in IPv4, 320 bits in IPv6.  I don't see how an
> attacker can reproduce this if the key is random.  If the problem is
> that devices are not being configured with a sufficiently random key
> (some actually are using a fixed key :-( ), that's a separate issue
> that should be addressed.  It is possible to DoS attack through the
> steering mechanism.

I agree, my first comment on the second patch was wrong. I did not assume that
the hashing function does seed itself. We also do not rehash the connection
tables. So if Eric's comments would be addressed its use could be fine.

^ permalink raw reply

* Re: [PATCH 1/2] net: Toeplitz library functions
From: Tom Herbert @ 2013-09-24 15:54 UTC (permalink / raw)
  To: David Miller; +Cc: David Laight, Linux Netdev List, Brandeburg, Jesse
In-Reply-To: <20130924.113953.1275344954032811572.davem@redhat.com>

On Tue, Sep 24, 2013 at 8:39 AM, David Miller <davem@redhat.com> wrote:
> From: Tom Herbert <therbert@google.com>
> Date: Tue, 24 Sep 2013 08:22:55 -0700
>
>> We use this value for steering, and could use it for other uses like
>> connection lookup.
>
> For security reasons we absolutely cannot use it for that purpose,
> please stop claiming this.
>
> Any hash function which an attacker can reproduce is attackable.

The Toeplitz function uses a secret key whose length is based on the
input length.  96 bits in IPv4, 320 bits in IPv6.  I don't see how an
attacker can reproduce this if the key is random.  If the problem is
that devices are not being configured with a sufficiently random key
(some actually are using a fixed key :-( ), that's a separate issue
that should be addressed.  It is possible to DoS attack through the
steering mechanism.

Tom

^ permalink raw reply

* Re: [PATCH] net: net_secret should not depend on TCP
From: Eric Dumazet @ 2013-09-24 15:46 UTC (permalink / raw)
  To: Hannes Frederic Sowa; +Cc: Tom Herbert, davem, netdev, jesse.brandeburg
In-Reply-To: <20130924152858.GB1527@order.stressinduktion.org>

On Tue, 2013-09-24 at 17:28 +0200, Hannes Frederic Sowa wrote:

> But couldn't it be that get_random_bytes always returns 0 and we won't make
> any progress here. Does the reseed happen from irq context or from softirqs? I
> always thought it would be from a softirq (which could be blocked).

Really if get_random_bytes() could return 0 forever I think we would
have a big problem with its implementation and documentation :

/*
 * This function is the exported kernel interface.  It returns some
 * number of good random numbers, suitable for key generation, seeding
 * TCP sequence numbers, etc.  It does not use the hw random number
 * generator, if available; use get_random_bytes_arch() for that.
 */
void get_random_bytes(void *buf, int nbytes)
{
        extract_entropy(&nonblocking_pool, buf, nbytes, 0, 0);
}
EXPORT_SYMBOL(get_random_bytes);

^ permalink raw reply

* [PATCH net v4 1/1] xen-netback: Handle backend state transitions in a more robust way
From: Paul Durrant @ 2013-09-24 15:45 UTC (permalink / raw)
  To: xen-devel, netdev; +Cc: Paul Durrant, Ian Campbell, Wei Liu, David Vrabel
In-Reply-To: <1380037500-12174-1-git-send-email-paul.durrant@citrix.com>

When the frontend state changes metback now specifies its desired state to
a new function, set_backend_state(), which transitions through any
necessary intermediate states.
This fixes an issue observed with some old Windows frontend drivers where
they failed to transition through the Closing state and netback would not
behave correctly.

Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
Cc: Ian Campbell <ian.campbell@citrix.com>
Cc: Wei Liu <wei.liu2@citrix.com>
Cc: David Vrabel <david.vrabel@citrix.com>
---
 drivers/net/xen-netback/xenbus.c |  143 ++++++++++++++++++++++++++++++--------
 1 file changed, 113 insertions(+), 30 deletions(-)

diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c
index a53782e..f311c38 100644
--- a/drivers/net/xen-netback/xenbus.c
+++ b/drivers/net/xen-netback/xenbus.c
@@ -24,6 +24,7 @@
 struct backend_info {
 	struct xenbus_device *dev;
 	struct xenvif *vif;
+	enum xenbus_state state;
 	enum xenbus_state frontend_state;
 	struct xenbus_watch hotplug_status_watch;
 	u8 have_hotplug_status_watch:1;
@@ -136,6 +137,8 @@ static int netback_probe(struct xenbus_device *dev,
 	if (err)
 		goto fail;
 
+	be->state = XenbusStateInitWait;
+
 	/* This kicks hotplug scripts, so do it immediately. */
 	backend_create_xenvif(be);
 
@@ -208,24 +211,113 @@ static void backend_create_xenvif(struct backend_info *be)
 	kobject_uevent(&dev->dev.kobj, KOBJ_ONLINE);
 }
 
-
-static void disconnect_backend(struct xenbus_device *dev)
+static void backend_disconnect(struct backend_info *be)
 {
-	struct backend_info *be = dev_get_drvdata(&dev->dev);
-
 	if (be->vif)
 		xenvif_disconnect(be->vif);
 }
 
-static void destroy_backend(struct xenbus_device *dev)
+static void backend_connect(struct backend_info *be)
 {
-	struct backend_info *be = dev_get_drvdata(&dev->dev);
+	if (be->vif)
+		connect(be);
+}
 
-	if (be->vif) {
-		kobject_uevent(&dev->dev.kobj, KOBJ_OFFLINE);
-		xenbus_rm(XBT_NIL, dev->nodename, "hotplug-status");
-		xenvif_free(be->vif);
-		be->vif = NULL;
+static inline void backend_switch_state(struct backend_info *be,
+					enum xenbus_state state)
+{
+	struct xenbus_device *dev = be->dev;
+
+	pr_debug("%s -> %s\n", dev->nodename, xenbus_strstate(state));
+	be->state = state;
+
+	/* If we are waiting for a hotplug script then defer the
+	 * actual xenbus state change.
+	 */
+	if (!be->have_hotplug_status_watch)
+		xenbus_switch_state(dev, state);
+}
+
+/* Handle backend state transitions:
+ *
+ * The backend state starts in InitWait and the following transtions are
+ * allowed.
+ *
+ * InitWait -> Connected
+ *
+ *    ^    \         |
+ *    |     \        |
+ *    |      \       |
+ *    |       \      |
+ *    |        \     |
+ *    |         \    |
+ *    |          V   V
+ *
+ *  Closed  <-> Closing
+ *
+ * The state argument specifies the eventual state of the backend and the
+ * function transitions to that state via the shortest path.
+ */
+static void set_backend_state(struct backend_info *be,
+			      enum xenbus_state state)
+{
+	while (be->state != state) {
+		switch (be->state) {
+		case XenbusStateClosed:
+			switch (state) {
+			case XenbusStateInitWait:
+			case XenbusStateConnected:
+				pr_info("%s: prepare for reconnect\n",
+					be->dev->nodename);
+				backend_switch_state(be, XenbusStateInitWait);
+				break;
+			case XenbusStateClosing:
+				backend_switch_state(be, XenbusStateClosing);
+				break;
+			default:
+				BUG();
+			}
+			break;
+		case XenbusStateInitWait:
+			switch (state) {
+			case XenbusStateConnected:
+				backend_connect(be);
+				backend_switch_state(be, XenbusStateConnected);
+				break;
+			case XenbusStateClosing:
+			case XenbusStateClosed:
+				backend_switch_state(be, XenbusStateClosing);
+				break;
+			default:
+				BUG();
+			}
+			break;
+		case XenbusStateConnected:
+			switch (state) {
+			case XenbusStateInitWait:
+			case XenbusStateClosing:
+			case XenbusStateClosed:
+				backend_disconnect(be);
+				backend_switch_state(be, XenbusStateClosing);
+				break;
+			default:
+				BUG();
+			}
+			break;
+		case XenbusStateClosing:
+			switch (state) {
+			case XenbusStateInitWait:
+			case XenbusStateConnected:
+			case XenbusStateClosed:
+				backend_switch_state(be, XenbusStateClosed);
+				break;
+			default:
+				BUG();
+			}
+			break;
+		default:
+			BUG();
+		}
 	}
 }
 
@@ -237,40 +329,33 @@ static void frontend_changed(struct xenbus_device *dev,
 {
 	struct backend_info *be = dev_get_drvdata(&dev->dev);
 
-	pr_debug("frontend state %s\n", xenbus_strstate(frontend_state));
+	pr_debug("%s -> %s\n", dev->otherend, xenbus_strstate(frontend_state));
 
 	be->frontend_state = frontend_state;
 
 	switch (frontend_state) {
 	case XenbusStateInitialising:
-		if (dev->state == XenbusStateClosed) {
-			pr_info("%s: prepare for reconnect\n", dev->nodename);
-			xenbus_switch_state(dev, XenbusStateInitWait);
-		}
+		set_backend_state(be, XenbusStateInitWait);
 		break;
 
 	case XenbusStateInitialised:
 		break;
 
 	case XenbusStateConnected:
-		if (dev->state == XenbusStateConnected)
-			break;
-		if (be->vif)
-			connect(be);
+		set_backend_state(be, XenbusStateConnected);
 		break;
 
 	case XenbusStateClosing:
-		disconnect_backend(dev);
-		xenbus_switch_state(dev, XenbusStateClosing);
+		set_backend_state(be, XenbusStateClosing);
 		break;
 
 	case XenbusStateClosed:
-		xenbus_switch_state(dev, XenbusStateClosed);
+		set_backend_state(be, XenbusStateClosed);
 		if (xenbus_dev_is_online(dev))
 			break;
-		destroy_backend(dev);
 		/* fall through if not online */
 	case XenbusStateUnknown:
+		set_backend_state(be, XenbusStateClosed);
 		device_unregister(&dev->dev);
 		break;
 
@@ -363,7 +448,9 @@ static void hotplug_status_changed(struct xenbus_watch *watch,
 	if (IS_ERR(str))
 		return;
 	if (len == sizeof("connected")-1 && !memcmp(str, "connected", len)) {
-		xenbus_switch_state(be->dev, XenbusStateConnected);
+		/* Complete any pending state change */
+		xenbus_switch_state(be->dev, be->state);
+
 		/* Not interested in this watch anymore. */
 		unregister_hotplug_status_watch(be);
 	}
@@ -393,12 +480,8 @@ static void connect(struct backend_info *be)
 	err = xenbus_watch_pathfmt(dev, &be->hotplug_status_watch,
 				   hotplug_status_changed,
 				   "%s/%s", dev->nodename, "hotplug-status");
-	if (err) {
-		/* Switch now, since we can't do a watch. */
-		xenbus_switch_state(dev, XenbusStateConnected);
-	} else {
+	if (!err)
 		be->have_hotplug_status_watch = 1;
-	}
 
 	netif_wake_queue(be->vif->dev);
 }
-- 
1.7.10.4

^ permalink raw reply related

* [PATCH net v4 0/1] xen-netback: windows frontend compatibility fixes
From: Paul Durrant @ 2013-09-24 15:44 UTC (permalink / raw)
  To: xen-devel, netdev

The following patches fix a couple more issues found when testing with
Windows frontends.

v4:
- Fix problem with hotplug failure noticed by Wei Liu

v3:
- Collapse both v2 patches into a single patch that introduces a new
  function to handle backend state transtions. By doing this we ensure that
  we always transition through intermediate states and that we don't attempt
  repeated connects or disconnects.

v2:
- Add comment in 2/2 to note that state transitions from Connected to Closed
  are incorrect.

^ permalink raw reply

* Re: [PATCH] ipv6: udp packets following an UFO enqueued packet need also be handled by UFO
From: David Miller @ 2013-09-24 15:43 UTC (permalink / raw)
  To: hannes; +Cc: netdev, yoshfuji
In-Reply-To: <20130921042700.GB8070@order.stressinduktion.org>

From: Hannes Frederic Sowa <hannes@stressinduktion.org>
Date: Sat, 21 Sep 2013 06:27:00 +0200

> In the following scenario the socket is corked:
> If the first UDP packet is larger then the mtu we try to append it to the
> write queue via ip6_ufo_append_data. A following packet, which is smaller
> than the mtu would be appended to the already queued up gso-skb via
> plain ip6_append_data. This causes random memory corruptions.
> 
> In ip6_ufo_append_data we also have to be careful to not queue up the
> same skb multiple times. So setup the gso frame only when no first skb
> is available.
> 
> This also fixes a shortcoming where we add the current packet's length to
> cork->length but return early because of a packet > mtu with dontfrag set
> (instead of sutracting it again).
> 
> Found with trinity.
> 
> Cc: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
> Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>

Applied and queued up for -stable, thanks.

^ permalink raw reply

* Re: [PATCH 1/2] net: Toeplitz library functions
From: David Miller @ 2013-09-24 15:39 UTC (permalink / raw)
  To: therbert; +Cc: David.Laight, netdev, jesse.brandeburg
In-Reply-To: <CA+mtBx_y0hK69jcdY5e0MUzAa8hEPwExBDuK9Dn4Ceo5Hkn_iw@mail.gmail.com>

From: Tom Herbert <therbert@google.com>
Date: Tue, 24 Sep 2013 08:22:55 -0700

> We use this value for steering, and could use it for other uses like
> connection lookup.

For security reasons we absolutely cannot use it for that purpose,
please stop claiming this.

Any hash function which an attacker can reproduce is attackable.

^ permalink raw reply

* RE: [PATCH net v3 1/1] xen-netback: Handle backend state transitions in a more robust way
From: Paul Durrant @ 2013-09-24 15:33 UTC (permalink / raw)
  To: Wei Liu
  Cc: xen-devel@lists.xen.org, netdev@vger.kernel.org, Ian Campbell,
	Wei Liu, David Vrabel
In-Reply-To: <20130924152611.GA14211@zion.uk.xensource.com>

> -----Original Message-----
> From: Paul Durrant
> Sent: 24 September 2013 16:31
> To: Wei Liu
> Cc: xen-devel@lists.xen.org; netdev@vger.kernel.org; Ian Campbell; Wei Liu;
> David Vrabel
> Subject: RE: [PATCH net v3 1/1] xen-netback: Handle backend state
> transitions in a more robust way
> 
> > -----Original Message-----
> > From: Wei Liu [mailto:wei.liu2@citrix.com]
> > Sent: 24 September 2013 16:26
> > To: Paul Durrant
> > Cc: xen-devel@lists.xen.org; netdev@vger.kernel.org; Ian Campbell; Wei
> Liu;
> > David Vrabel
> > Subject: Re: [PATCH net v3 1/1] xen-netback: Handle backend state
> > transitions in a more robust way
> >
> > On Tue, Sep 24, 2013 at 03:51:22PM +0100, Paul Durrant wrote:
> > > When the frontend state changes metback now specifies its desired state
> > to
> >                                   netback
> > > a new function, set_backend_state(), which transitions through any
> > [...]
> > > +/* Handle backend state transitions:
> > > + *
> > > + * The backend state starts in InitWait and the following transtions are
> >                                                              transitions
> > > + * allowed.
> > >
> > [...]
> > > @@ -363,7 +448,9 @@ static void hotplug_status_changed(struct
> > xenbus_watch *watch,
> > >  	if (IS_ERR(str))
> > >  		return;
> > >  	if (len == sizeof("connected")-1 && !memcmp(str, "connected", len))
> > {
> > > -		xenbus_switch_state(be->dev, XenbusStateConnected);
> > > +		/* Complete any pending state change */
> > > +		xenbus_switch_state(be->dev, be->state);
> > > +
> >
> > The state transition takes place iff hotplug status is "connected", is
> > this desirable? What if hotplug fails?
> >
> 
> The xenbus state will remain in InitWait but the backend state will be
> Connected.
> 
> > If it cycles through connect again it looks like it will trigger that
> > BUG_ON in connect()?
> >
> 
> No, I don't think so because be->state is not the same as dev->state. The
> frontend can go to Closing/Closed (which will take dev->state to
> Closing/Closed) and this should be fine.
> 

Sorry, I misread. You're right. I'll fix.

  Paul

^ permalink raw reply

* RE: [PATCH net v3 1/1] xen-netback: Handle backend state transitions in a more robust way
From: Paul Durrant @ 2013-09-24 15:30 UTC (permalink / raw)
  To: Wei Liu
  Cc: xen-devel@lists.xen.org, netdev@vger.kernel.org, Ian Campbell,
	Wei Liu, David Vrabel
In-Reply-To: <20130924152611.GA14211@zion.uk.xensource.com>

> -----Original Message-----
> From: Wei Liu [mailto:wei.liu2@citrix.com]
> Sent: 24 September 2013 16:26
> To: Paul Durrant
> Cc: xen-devel@lists.xen.org; netdev@vger.kernel.org; Ian Campbell; Wei Liu;
> David Vrabel
> Subject: Re: [PATCH net v3 1/1] xen-netback: Handle backend state
> transitions in a more robust way
> 
> On Tue, Sep 24, 2013 at 03:51:22PM +0100, Paul Durrant wrote:
> > When the frontend state changes metback now specifies its desired state
> to
>                                   netback
> > a new function, set_backend_state(), which transitions through any
> [...]
> > +/* Handle backend state transitions:
> > + *
> > + * The backend state starts in InitWait and the following transtions are
>                                                              transitions
> > + * allowed.
> >
> [...]
> > @@ -363,7 +448,9 @@ static void hotplug_status_changed(struct
> xenbus_watch *watch,
> >  	if (IS_ERR(str))
> >  		return;
> >  	if (len == sizeof("connected")-1 && !memcmp(str, "connected", len))
> {
> > -		xenbus_switch_state(be->dev, XenbusStateConnected);
> > +		/* Complete any pending state change */
> > +		xenbus_switch_state(be->dev, be->state);
> > +
> 
> The state transition takes place iff hotplug status is "connected", is
> this desirable? What if hotplug fails?
> 

The xenbus state will remain in InitWait but the backend state will be Connected.

> If it cycles through connect again it looks like it will trigger that
> BUG_ON in connect()?
> 

No, I don't think so because be->state is not the same as dev->state. The frontend can go to Closing/Closed (which will take dev->state to Closing/Closed) and this should be fine.

  Paul

^ permalink raw reply

* Re: question tehuti.c
From: Andy Gospodarek @ 2013-09-24 15:34 UTC (permalink / raw)
  To: Julia Lawall; +Cc: netdev, linux-kernel, Alexander Indenbaum
In-Reply-To: <alpine.DEB.2.02.1309231752040.2242@hadrien>

On Mon, Sep 23, 2013 at 05:54:00PM +0200, Julia Lawall wrote:
> The function bdx_setmulti in the file drivers/net/ethernet/tehuti/tehuti.c
> contains:
> 
>         u32 rxf_val =
>             GMAC_RX_FILTER_AM | GMAC_RX_FILTER_AB | GMAC_RX_FILTER_OSEN;
> 
> and then later:
> 
>        } else {
>                 DBG("only own mac %d\n", netdev_mc_count(ndev));
>                 rxf_val |= GMAC_RX_FILTER_AB;
>         }
> 
> The last assignment doesn't look very useful, because GMAC_RX_FILTER_ABis
> already included in rxf_val.  Was something else intended?

You are correct that the setting of GMAC_RX_FILTER_AB is a bit redundant
since it was set earlier in the function.

I have not worked on this driver in several years and if there is no
response from Alexander or someone else at Tehuti Networks then you may
not get an answer to your question. :-/

^ permalink raw reply

* Re: [PATCH net-next] ipv6: do not allow ipv6 module to be removed
From: David Miller @ 2013-09-24 15:32 UTC (permalink / raw)
  To: amwang; +Cc: netdev, yoshfuji, stephen
In-Reply-To: <1379733141-10643-1-git-send-email-amwang@redhat.com>

From: Cong Wang <amwang@redhat.com>
Date: Sat, 21 Sep 2013 11:12:21 +0800

> From: Cong Wang <amwang@redhat.com>
> 
> There was some bug report on ipv6 module removal path before.
> Also, as Stephen pointed out, after vxlan module gets ipv6 support,
> the ipv6 stub it used is not safe against this module removal either.
> So, let's just remove inet6_exit() so that ipv6 module will not be
> able to be unloaded.
> 
> Cc: Hideaki YOSHIFUJI <yoshfuji@linux-ipv6.org>
> Cc: Stephen Hemminger <stephen@networkplumber.org>
> Cc: David S. Miller <davem@davemloft.net>
> Signed-off-by: Cong Wang <amwang@redhat.com>

Fair enough, applied.  We can always revert this later if we ever
make ipv6 properly unloadable.

^ permalink raw reply

* Re: [PATCH 1/2] net: Toeplitz library functions
From: Eric Dumazet @ 2013-09-24 15:29 UTC (permalink / raw)
  To: Tom Herbert
  Cc: David Laight, David Miller, Linux Netdev List, Brandeburg, Jesse
In-Reply-To: <CA+mtBx_y0hK69jcdY5e0MUzAa8hEPwExBDuK9Dn4Ceo5Hkn_iw@mail.gmail.com>

On Tue, 2013-09-24 at 08:22 -0700, Tom Herbert wrote:

> Assuming skb_rx_hash does symmetric calculation is currently
> incorrect.  For instance, looks like tun.c is trying to implement a
> sort of 'flow director' logic to pair TX queues and RX queues using
> skb_get_rxhash an expecting that the value is calculated
> symmetrically.  If HW is providing RX hash, this is broken and we'll
> never match the flows.  We could either recompute the hash in SW or
> try to match HW hash.

Its not incorrect, its an implementation choice.

Its software in linux, we do not have to care of how its done in
hardware.

This is done to reduce conntracking cost, in case RPS is used on a
router : Same cpu will process frames in both ways.

But conntracking does not 'rely' on rxhash being symmetric, thats an
optimization to have better data locality.

^ permalink raw reply

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox