Netdev List

Netdev List
 help / color / mirror / Atom feed

* patch kobject-send-hotplug-events-in-all-network-namespaces.patch added to gregkh-2.6 tree
From: gregkh @ 2010-05-20 18:10 UTC (permalink / raw)
  To: ebiederm, bcrl, cornelia.huck, davem, eric.dumazet, gregkh,
	kay.sievers, netdev, serue
In-Reply-To: <1273019809-16472-1-git-send-email-ebiederm@xmission.com>


This is a note to let you know that I've just added the patch titled

    Subject: kobject: Send hotplug events in all network namespaces

to my gregkh-2.6 tree.  Its filename is

    kobject-send-hotplug-events-in-all-network-namespaces.patch

This tree can be found at 
    http://www.kernel.org/pub/linux/kernel/people/gregkh/gregkh-2.6/patches/


>From ebiederm@xmission.com  Thu May 20 10:40:26 2010
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Tue,  4 May 2010 17:36:44 -0700
Subject: kobject: Send hotplug events in all network namespaces
To: Greg Kroah-Hartman <gregkh@suse.de>
Cc: Kay Sievers <kay.sievers@vrfy.org>, linux-kernel@vger.kernel.org, Tejun Heo <tj@kernel.org>, Cornelia Huck <cornelia.huck@de.ibm.com>, Eric Dumazet <eric.dumazet@gmail.com>, Benjamin LaHaise <bcrl@lhnet.ca>, Serge Hallyn <serue@us.ibm.com>, <netdev@vger.kernel.org>, David Miller <davem@davemloft.net>, "Eric W. Biederman" <ebiederm@xmission.com>
Message-ID: <1273019809-16472-1-git-send-email-ebiederm@xmission.com>


From: Eric W. Biederman <ebiederm@xmission.com>

Open a copy of the uevent kernel socket in each network
namespace so we can send uevents in all network namespaces.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

---
 lib/kobject_uevent.c |   68 +++++++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 60 insertions(+), 8 deletions(-)

--- a/lib/kobject_uevent.c
+++ b/lib/kobject_uevent.c
@@ -24,13 +24,19 @@
 #include <linux/skbuff.h>
 #include <linux/netlink.h>
 #include <net/sock.h>
+#include <net/net_namespace.h>
 
 
 u64 uevent_seqnum;
 char uevent_helper[UEVENT_HELPER_PATH_LEN] = CONFIG_UEVENT_HELPER_PATH;
 static DEFINE_SPINLOCK(sequence_lock);
-#if defined(CONFIG_NET)
-static struct sock *uevent_sock;
+#ifdef CONFIG_NET
+struct uevent_sock {
+	struct list_head list;
+	struct sock *sk;
+};
+static LIST_HEAD(uevent_sock_list);
+static DEFINE_MUTEX(uevent_sock_mutex);
 #endif
 
 /* the strings here must match the enum in include/linux/kobject.h */
@@ -100,6 +106,9 @@ int kobject_uevent_env(struct kobject *k
 	u64 seq;
 	int i = 0;
 	int retval = 0;
+#ifdef CONFIG_NET
+	struct uevent_sock *ue_sk;
+#endif
 
 	pr_debug("kobject: '%s' (%p): %s\n",
 		 kobject_name(kobj), kobj, __func__);
@@ -211,7 +220,9 @@ int kobject_uevent_env(struct kobject *k
 
 #if defined(CONFIG_NET)
 	/* send netlink message */
-	if (uevent_sock) {
+	mutex_lock(&uevent_sock_mutex);
+	list_for_each_entry(ue_sk, &uevent_sock_list, list) {
+		struct sock *uevent_sock = ue_sk->sk;
 		struct sk_buff *skb;
 		size_t len;
 
@@ -241,6 +252,7 @@ int kobject_uevent_env(struct kobject *k
 		} else
 			retval = -ENOMEM;
 	}
+	mutex_unlock(&uevent_sock_mutex);
 #endif
 
 	/* call uevent_helper, usually only enabled during early boot */
@@ -320,18 +332,58 @@ int add_uevent_var(struct kobj_uevent_en
 EXPORT_SYMBOL_GPL(add_uevent_var);
 
 #if defined(CONFIG_NET)
-static int __init kobject_uevent_init(void)
+static int uevent_net_init(struct net *net)
 {
-	uevent_sock = netlink_kernel_create(&init_net, NETLINK_KOBJECT_UEVENT,
-					    1, NULL, NULL, THIS_MODULE);
-	if (!uevent_sock) {
+	struct uevent_sock *ue_sk;
+
+	ue_sk = kzalloc(sizeof(*ue_sk), GFP_KERNEL);
+	if (!ue_sk)
+		return -ENOMEM;
+
+	ue_sk->sk = netlink_kernel_create(net, NETLINK_KOBJECT_UEVENT,
+					  1, NULL, NULL, THIS_MODULE);
+	if (!ue_sk->sk) {
 		printk(KERN_ERR
 		       "kobject_uevent: unable to create netlink socket!\n");
 		return -ENODEV;
 	}
-	netlink_set_nonroot(NETLINK_KOBJECT_UEVENT, NL_NONROOT_RECV);
+	mutex_lock(&uevent_sock_mutex);
+	list_add_tail(&ue_sk->list, &uevent_sock_list);
+	mutex_unlock(&uevent_sock_mutex);
 	return 0;
 }
 
+static void uevent_net_exit(struct net *net)
+{
+	struct uevent_sock *ue_sk;
+
+	mutex_lock(&uevent_sock_mutex);
+	list_for_each_entry(ue_sk, &uevent_sock_list, list) {
+		if (sock_net(ue_sk->sk) == net)
+			goto found;
+	}
+	mutex_unlock(&uevent_sock_mutex);
+	return;
+
+found:
+	list_del(&ue_sk->list);
+	mutex_unlock(&uevent_sock_mutex);
+
+	netlink_kernel_release(ue_sk->sk);
+	kfree(ue_sk);
+}
+
+static struct pernet_operations uevent_net_ops = {
+	.init	= uevent_net_init,
+	.exit	= uevent_net_exit,
+};
+
+static int __init kobject_uevent_init(void)
+{
+	netlink_set_nonroot(NETLINK_KOBJECT_UEVENT, NL_NONROOT_RECV);
+	return register_pernet_subsys(&uevent_net_ops);
+}
+
+
 postcore_initcall(kobject_uevent_init);
 #endif


^ permalink raw reply

* patch kobj-send-hotplug-events-in-the-proper-namespace.patch added to gregkh-2.6 tree
From: gregkh @ 2010-05-20 18:10 UTC (permalink / raw)
  To: ebiederm, bcrl, cornelia.huck, davem, eric.dumazet, gregkh,
	kay.sievers, netdev, serue
In-Reply-To: <1273019809-16472-4-git-send-email-ebiederm@xmission.com>


This is a note to let you know that I've just added the patch titled

    Subject: kobj: Send hotplug events in the proper namespace.

to my gregkh-2.6 tree.  Its filename is

    kobj-send-hotplug-events-in-the-proper-namespace.patch

This tree can be found at 
    http://www.kernel.org/pub/linux/kernel/people/gregkh/gregkh-2.6/patches/


>From ebiederm@xmission.com  Thu May 20 10:44:38 2010
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Tue,  4 May 2010 17:36:47 -0700
Subject: kobj: Send hotplug events in the proper namespace.
To: Greg Kroah-Hartman <gregkh@suse.de>
Cc: Kay Sievers <kay.sievers@vrfy.org>, linux-kernel@vger.kernel.org, Tejun Heo <tj@kernel.org>, Cornelia Huck <cornelia.huck@de.ibm.com>, Eric Dumazet <eric.dumazet@gmail.com>, Benjamin LaHaise <bcrl@lhnet.ca>, Serge Hallyn <serue@us.ibm.com>, <netdev@vger.kernel.org>, David Miller <davem@davemloft.net>, "Eric W. Biederman" <ebiederm@xmission.com>
Message-ID: <1273019809-16472-4-git-send-email-ebiederm@xmission.com>


From: Eric W. Biederman <ebiederm@xmission.com>

Utilize netlink_broacast_filtered to allow sending hotplug events
in the proper namespace.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

---
 lib/kobject_uevent.c |   22 ++++++++++++++++++++--
 1 file changed, 20 insertions(+), 2 deletions(-)

--- a/lib/kobject_uevent.c
+++ b/lib/kobject_uevent.c
@@ -83,6 +83,22 @@ out:
 	return ret;
 }
 
+static int kobj_bcast_filter(struct sock *dsk, struct sk_buff *skb, void *data)
+{
+	struct kobject *kobj = data;
+	const struct kobj_ns_type_operations *ops;
+
+	ops = kobj_ns_ops(kobj);
+	if (ops) {
+		const void *sock_ns, *ns;
+		ns = kobj->ktype->namespace(kobj);
+		sock_ns = ops->netlink_ns(dsk);
+		return sock_ns != ns;
+	}
+
+	return 0;
+}
+
 /**
  * kobject_uevent_env - send an uevent with environmental data
  *
@@ -244,8 +260,10 @@ int kobject_uevent_env(struct kobject *k
 			}
 
 			NETLINK_CB(skb).dst_group = 1;
-			retval = netlink_broadcast(uevent_sock, skb, 0, 1,
-						   GFP_KERNEL);
+			retval = netlink_broadcast_filtered(uevent_sock, skb,
+							    0, 1, GFP_KERNEL,
+							    kobj_bcast_filter,
+							    kobj);
 			/* ENOBUFS should be handled in userspace */
 			if (retval == -ENOBUFS)
 				retval = 0;


^ permalink raw reply

* patch hotplug-netns-aware-uevent_helper.patch added to gregkh-2.6 tree
From: gregkh @ 2010-05-20 18:10 UTC (permalink / raw)
  To: ebiederm, bcrl, cornelia.huck, davem, eric.dumazet, gregkh,
	kay.sievers, netdev, serue
In-Reply-To: <1273019809-16472-5-git-send-email-ebiederm@xmission.com>


This is a note to let you know that I've just added the patch titled

    Subject: hotplug: netns aware uevent_helper

to my gregkh-2.6 tree.  Its filename is

    hotplug-netns-aware-uevent_helper.patch

This tree can be found at 
    http://www.kernel.org/pub/linux/kernel/people/gregkh/gregkh-2.6/patches/


>From ebiederm@xmission.com  Thu May 20 10:45:13 2010
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Tue,  4 May 2010 17:36:48 -0700
Subject: hotplug: netns aware uevent_helper
To: Greg Kroah-Hartman <gregkh@suse.de>
Cc: Kay Sievers <kay.sievers@vrfy.org>, linux-kernel@vger.kernel.org, Tejun Heo <tj@kernel.org>, Cornelia Huck <cornelia.huck@de.ibm.com>, Eric Dumazet <eric.dumazet@gmail.com>, Benjamin LaHaise <bcrl@lhnet.ca>, Serge Hallyn <serue@us.ibm.com>, <netdev@vger.kernel.org>, David Miller <davem@davemloft.net>, "Eric W. Biederman" <ebiederm@xmission.com>
Message-ID: <1273019809-16472-5-git-send-email-ebiederm@xmission.com>


From: Eric W. Biederman <ebiederm@xmission.com>

It only makes sense for uevent_helper to get events
in the intial namespaces.  It's invocation is not
per namespace and it is not clear how we could make
it's invocation namespace aware.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

---
 lib/kobject_uevent.c |   19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

--- a/lib/kobject_uevent.c
+++ b/lib/kobject_uevent.c
@@ -19,7 +19,7 @@
 #include <linux/kobject.h>
 #include <linux/module.h>
 #include <linux/slab.h>
-
+#include <linux/user_namespace.h>
 #include <linux/socket.h>
 #include <linux/skbuff.h>
 #include <linux/netlink.h>
@@ -99,6 +99,21 @@ static int kobj_bcast_filter(struct sock
 	return 0;
 }
 
+static int kobj_usermode_filter(struct kobject *kobj)
+{
+	const struct kobj_ns_type_operations *ops;
+
+	ops = kobj_ns_ops(kobj);
+	if (ops) {
+		const void *init_ns, *ns;
+		ns = kobj->ktype->namespace(kobj);
+		init_ns = ops->initial_ns();
+		return ns != init_ns;
+	}
+
+	return 0;
+}
+
 /**
  * kobject_uevent_env - send an uevent with environmental data
  *
@@ -274,7 +289,7 @@ int kobject_uevent_env(struct kobject *k
 #endif
 
 	/* call uevent_helper, usually only enabled during early boot */
-	if (uevent_helper[0]) {
+	if (uevent_helper[0] && !kobj_usermode_filter(kobj)) {
 		char *argv [3];
 
 		argv [0] = uevent_helper;


^ permalink raw reply

* patch net-expose-all-network-devices-in-a-namespaces-in-sysfs.patch added to gregkh-2.6 tree
From: gregkh @ 2010-05-20 18:10 UTC (permalink / raw)
  To: ebiederm, bcrl, cornelia.huck, davem, eric.dumazet, gregkh,
	kay.sievers, netdev, serue
In-Reply-To: <1273019809-16472-6-git-send-email-ebiederm@xmission.com>


This is a note to let you know that I've just added the patch titled

    Subject: net: Expose all network devices in a namespaces in sysfs

to my gregkh-2.6 tree.  Its filename is

    net-expose-all-network-devices-in-a-namespaces-in-sysfs.patch

This tree can be found at 
    http://www.kernel.org/pub/linux/kernel/people/gregkh/gregkh-2.6/patches/


>From ebiederm@xmission.com  Thu May 20 10:46:13 2010
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Tue,  4 May 2010 17:36:49 -0700
Subject: net: Expose all network devices in a namespaces in sysfs
To: Greg Kroah-Hartman <gregkh@suse.de>
Cc: Kay Sievers <kay.sievers@vrfy.org>, linux-kernel@vger.kernel.org, Tejun Heo <tj@kernel.org>, Cornelia Huck <cornelia.huck@de.ibm.com>, Eric Dumazet <eric.dumazet@gmail.com>, Benjamin LaHaise <bcrl@lhnet.ca>, Serge Hallyn <serue@us.ibm.com>, <netdev@vger.kernel.org>, David Miller <davem@davemloft.net>, "Eric W. Biederman" <ebiederm@xmission.com>
Message-ID: <1273019809-16472-6-git-send-email-ebiederm@xmission.com>


From: Eric W. Biederman <ebiederm@xmission.com>

This reverts commit aaf8cdc34ddba08122f02217d9d684e2f9f5d575.

Drivers like the ipw2100 call device_create_group when they
are initialized and device_remove_group when they are shutdown.
Moving them between namespaces deletes their sysfs groups early.

In particular the following call chain results.
netdev_unregister_kobject -> device_del -> kobject_del -> sysfs_remove_dir
With sysfs_remove_dir recursively deleting all of it's subdirectories,
and nothing adding them back.

Ouch!

Therefore we need to call something that ultimate calls sysfs_mv_dir
as that sysfs function can move sysfs directories between namespaces
without deleting their subdirectories or their contents.   Allowing
us to avoid placing extra boiler plate into every driver that does
something interesting with sysfs.

Currently the function that provides that capability is device_rename.
That is the code works without nasty side effects as originally written.

So remove the misguided fix for moving devices between namespaces.  The
bug in the kobject layer that inspired it has now been recognized and
fixed.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

---
 net/core/dev.c       |   28 +++++-----------------------
 net/core/net-sysfs.c |   16 +---------------
 net/core/net-sysfs.h |    1 -
 3 files changed, 6 insertions(+), 39 deletions(-)

--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -984,15 +984,10 @@ int dev_change_name(struct net_device *d
 		return err;
 
 rollback:
-	/* For now only devices in the initial network namespace
-	 * are in sysfs.
-	 */
-	if (net_eq(net, &init_net)) {
-		ret = device_rename(&dev->dev, dev->name);
-		if (ret) {
-			memcpy(dev->name, oldname, IFNAMSIZ);
-			return ret;
-		}
+	ret = device_rename(&dev->dev, dev->name);
+	if (ret) {
+		memcpy(dev->name, oldname, IFNAMSIZ);
+		return ret;
 	}
 
 	write_lock_bh(&dev_base_lock);
@@ -5112,8 +5107,6 @@ int register_netdevice(struct net_device
 	if (dev->features & NETIF_F_SG)
 		dev->features |= NETIF_F_GSO;
 
-	netdev_initialize_kobject(dev);
-
 	ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev);
 	ret = notifier_to_errno(ret);
 	if (ret)
@@ -5634,15 +5627,6 @@ int dev_change_net_namespace(struct net_
 	if (dev->features & NETIF_F_NETNS_LOCAL)
 		goto out;
 
-#ifdef CONFIG_SYSFS
-	/* Don't allow real devices to be moved when sysfs
-	 * is enabled.
-	 */
-	err = -EINVAL;
-	if (dev->dev.parent)
-		goto out;
-#endif
-
 	/* Ensure the device has been registrered */
 	err = -EINVAL;
 	if (dev->reg_state != NETREG_REGISTERED)
@@ -5693,8 +5677,6 @@ int dev_change_net_namespace(struct net_
 	dev_unicast_flush(dev);
 	dev_addr_discard(dev);
 
-	netdev_unregister_kobject(dev);
-
 	/* Actually switch the network namespace */
 	dev_net_set(dev, net);
 
@@ -5707,7 +5689,7 @@ int dev_change_net_namespace(struct net_
 	}
 
 	/* Fixup kobjects */
-	err = netdev_register_kobject(dev);
+	err = device_rename(&dev->dev, dev->name);
 	WARN_ON(err);
 
 	/* Add the device back in the hashes */
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -508,9 +508,6 @@ static int netdev_uevent(struct device *
 	struct net_device *dev = to_net_dev(d);
 	int retval;
 
-	if (!net_eq(dev_net(dev), &init_net))
-		return 0;
-
 	/* pass interface to uevent. */
 	retval = add_uevent_var(env, "INTERFACE=%s", dev->name);
 	if (retval)
@@ -569,9 +566,6 @@ void netdev_unregister_kobject(struct ne
 
 	kobject_get(&dev->kobj);
 
-	if (!net_eq(dev_net(net), &init_net))
-		return;
-
 	device_del(dev);
 }
 
@@ -581,6 +575,7 @@ int netdev_register_kobject(struct net_d
 	struct device *dev = &(net->dev);
 	const struct attribute_group **groups = net->sysfs_groups;
 
+	device_initialize(dev);
 	dev->class = &net_class;
 	dev->platform_data = net;
 	dev->groups = groups;
@@ -603,9 +598,6 @@ int netdev_register_kobject(struct net_d
 #endif
 #endif /* CONFIG_SYSFS */
 
-	if (!net_eq(dev_net(net), &init_net))
-		return 0;
-
 	return device_add(dev);
 }
 
@@ -622,12 +614,6 @@ void netdev_class_remove_file(struct cla
 EXPORT_SYMBOL(netdev_class_create_file);
 EXPORT_SYMBOL(netdev_class_remove_file);
 
-void netdev_initialize_kobject(struct net_device *net)
-{
-	struct device *device = &(net->dev);
-	device_initialize(device);
-}
-
 int netdev_kobject_init(void)
 {
 	kobj_ns_type_register(&net_ns_type_operations);
--- a/net/core/net-sysfs.h
+++ b/net/core/net-sysfs.h
@@ -4,5 +4,4 @@
 int netdev_kobject_init(void);
 int netdev_register_kobject(struct net_device *);
 void netdev_unregister_kobject(struct net_device *);
-void netdev_initialize_kobject(struct net_device *);
 #endif


^ permalink raw reply

* patch netns-teach-network-device-kobjects-which-namespace-they-are-in.patch added to gregkh-2.6 tree
From: gregkh @ 2010-05-20 18:10 UTC (permalink / raw)
  To: ebiederm, bcrl, cornelia.huck, davem, eric.dumazet, gregkh,
	kay.sievers, netdev, serue
In-Reply-To: <1273019809-16472-2-git-send-email-ebiederm@xmission.com>


This is a note to let you know that I've just added the patch titled

    Subject: [PATCH 2/6] netns: Teach network device kobjects which namespace they are in.

to my gregkh-2.6 tree.  Its filename is

    netns-teach-network-device-kobjects-which-namespace-they-are-in.patch

This tree can be found at 
    http://www.kernel.org/pub/linux/kernel/people/gregkh/gregkh-2.6/patches/


>From ebiederm@xmission.com  Thu May 20 10:41:04 2010
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Tue,  4 May 2010 17:36:45 -0700
Subject: [PATCH 2/6] netns: Teach network device kobjects which namespace they are in.
To: Greg Kroah-Hartman <gregkh@suse.de>
Cc: Kay Sievers <kay.sievers@vrfy.org>, linux-kernel@vger.kernel.org, Tejun Heo <tj@kernel.org>, Cornelia Huck <cornelia.huck@de.ibm.com>, Eric Dumazet <eric.dumazet@gmail.com>, Benjamin LaHaise <bcrl@lhnet.ca>, Serge Hallyn <serue@us.ibm.com>, <netdev@vger.kernel.org>, David Miller <davem@davemloft.net>, "Eric W. Biederman" <ebiederm@xmission.com>
Message-ID: <1273019809-16472-2-git-send-email-ebiederm@xmission.com>


From: Eric W. Biederman <ebiederm@xmission.com>

The problem.  Network devices show up in sysfs and with the network
namespace active multiple devices with the same name can show up in
the same directory, ouch!

To avoid that problem and allow existing applications in network namespaces
to see the same interface that is currently presented in sysfs, this
patch enables the tagging directory support in sysfs.

By using the network namespace pointers as tags to separate out the
the sysfs directory entries we ensure that we don't have conflicts
in the directories and applications only see a limited set of
the network devices.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

---
 include/linux/kobject.h |    1 +
 net/Kconfig             |    8 ++++++++
 net/core/net-sysfs.c    |   46 ++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 55 insertions(+)

--- a/include/linux/kobject.h
+++ b/include/linux/kobject.h
@@ -142,6 +142,7 @@ extern const struct sysfs_ops kobj_sysfs
  */
 enum kobj_ns_type {
 	KOBJ_NS_TYPE_NONE = 0,
+	KOBJ_NS_TYPE_NET,
 	KOBJ_NS_TYPES
 };
 
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -45,6 +45,14 @@ config COMPAT_NETLINK_MESSAGES
 
 menu "Networking options"
 
+config NET_NS
+	bool "Network namespace support"
+	default n
+	depends on EXPERIMENTAL && NAMESPACES
+	help
+	  Allow user space to create what appear to be multiple instances
+	  of the network stack.
+
 source "net/packet/Kconfig"
 source "net/unix/Kconfig"
 source "net/xfrm/Kconfig"
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -14,7 +14,9 @@
 #include <linux/netdevice.h>
 #include <linux/if_arp.h>
 #include <linux/slab.h>
+#include <linux/nsproxy.h>
 #include <net/sock.h>
+#include <net/net_namespace.h>
 #include <linux/rtnetlink.h>
 #include <linux/wireless.h>
 #include <net/wext.h>
@@ -467,6 +469,37 @@ static struct attribute_group wireless_g
 };
 #endif
 
+static const void *net_current_ns(void)
+{
+	return current->nsproxy->net_ns;
+}
+
+static const void *net_initial_ns(void)
+{
+	return &init_net;
+}
+
+static const void *net_netlink_ns(struct sock *sk)
+{
+	return sock_net(sk);
+}
+
+static struct kobj_ns_type_operations net_ns_type_operations = {
+	.type = KOBJ_NS_TYPE_NET,
+	.current_ns = net_current_ns,
+	.netlink_ns = net_netlink_ns,
+	.initial_ns = net_initial_ns,
+};
+
+static void net_kobj_ns_exit(struct net *net)
+{
+	kobj_ns_exit(KOBJ_NS_TYPE_NET, net);
+}
+
+static struct pernet_operations sysfs_net_ops = {
+	.exit = net_kobj_ns_exit,
+};
+
 #endif /* CONFIG_SYSFS */
 
 #ifdef CONFIG_HOTPLUG
@@ -507,6 +540,13 @@ static void netdev_release(struct device
 	kfree((char *)dev - dev->padded);
 }
 
+static const void *net_namespace(struct device *d)
+{
+	struct net_device *dev;
+	dev = container_of(d, struct net_device, dev);
+	return dev_net(dev);
+}
+
 static struct class net_class = {
 	.name = "net",
 	.dev_release = netdev_release,
@@ -516,6 +556,8 @@ static struct class net_class = {
 #ifdef CONFIG_HOTPLUG
 	.dev_uevent = netdev_uevent,
 #endif
+	.ns_type = &net_ns_type_operations,
+	.namespace = net_namespace,
 };
 
 /* Delete sysfs entries but hold kobject reference until after all
@@ -588,5 +630,9 @@ void netdev_initialize_kobject(struct ne
 
 int netdev_kobject_init(void)
 {
+	kobj_ns_type_register(&net_ns_type_operations);
+#ifdef CONFIG_SYSFS
+	register_pernet_subsys(&sysfs_net_ops);
+#endif
 	return class_register(&net_class);
 }


^ permalink raw reply

* patch netlink-implment-netlink_broadcast_filtered.patch added to gregkh-2.6 tree
From: gregkh @ 2010-05-20 18:10 UTC (permalink / raw)
  To: ebiederm, bcrl, cornelia.huck, davem, eric.dumazet, gregkh,
	kay.sievers, netdev, serue
In-Reply-To: <1273019809-16472-3-git-send-email-ebiederm@xmission.com>


This is a note to let you know that I've just added the patch titled

    Subject: netlink: Implment netlink_broadcast_filtered

to my gregkh-2.6 tree.  Its filename is

    netlink-implment-netlink_broadcast_filtered.patch

This tree can be found at 
    http://www.kernel.org/pub/linux/kernel/people/gregkh/gregkh-2.6/patches/


>From ebiederm@xmission.com  Thu May 20 10:43:10 2010
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Tue,  4 May 2010 17:36:46 -0700
Subject: netlink: Implment netlink_broadcast_filtered
To: Greg Kroah-Hartman <gregkh@suse.de>
Cc: Kay Sievers <kay.sievers@vrfy.org>, linux-kernel@vger.kernel.org, Tejun Heo <tj@kernel.org>, Cornelia Huck <cornelia.huck@de.ibm.com>, Eric Dumazet <eric.dumazet@gmail.com>, Benjamin LaHaise <bcrl@lhnet.ca>, Serge Hallyn <serue@us.ibm.com>, <netdev@vger.kernel.org>, David Miller <davem@davemloft.net>, "Eric W. Biederman" <ebiederm@xmission.com>
Message-ID: <1273019809-16472-3-git-send-email-ebiederm@xmission.com>


From: Eric W. Biederman <ebiederm@xmission.com>

When netlink sockets are used to convey data that is in a namespace
we need a way to select a subset of the listening sockets to deliver
the packet to.  For the network namespace we have been doing this
by only transmitting packets in the correct network namespace.

For data belonging to other namespaces netlink_bradcast_filtered
provides a mechanism that allows us to examine the destination
socket and to decide if we should transmit the specified packet
to it.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

---
 include/linux/netlink.h  |    4 ++++
 net/netlink/af_netlink.c |   21 +++++++++++++++++++--
 2 files changed, 23 insertions(+), 2 deletions(-)

--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -188,6 +188,10 @@ extern int netlink_has_listeners(struct
 extern int netlink_unicast(struct sock *ssk, struct sk_buff *skb, __u32 pid, int nonblock);
 extern int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, __u32 pid,
 			     __u32 group, gfp_t allocation);
+extern int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb,
+	__u32 pid, __u32 group, gfp_t allocation,
+	int (*filter)(struct sock *dsk, struct sk_buff *skb, void *data),
+	void *filter_data);
 extern int netlink_set_err(struct sock *ssk, __u32 pid, __u32 group, int code);
 extern int netlink_register_notifier(struct notifier_block *nb);
 extern int netlink_unregister_notifier(struct notifier_block *nb);
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -978,6 +978,8 @@ struct netlink_broadcast_data {
 	int delivered;
 	gfp_t allocation;
 	struct sk_buff *skb, *skb2;
+	int (*tx_filter)(struct sock *dsk, struct sk_buff *skb, void *data);
+	void *tx_data;
 };
 
 static inline int do_one_broadcast(struct sock *sk,
@@ -1020,6 +1022,9 @@ static inline int do_one_broadcast(struc
 		p->failure = 1;
 		if (nlk->flags & NETLINK_BROADCAST_SEND_ERROR)
 			p->delivery_failure = 1;
+	} else if (p->tx_filter && p->tx_filter(sk, p->skb2, p->tx_data)) {
+		kfree_skb(p->skb2);
+		p->skb2 = NULL;
 	} else if (sk_filter(sk, p->skb2)) {
 		kfree_skb(p->skb2);
 		p->skb2 = NULL;
@@ -1038,8 +1043,10 @@ out:
 	return 0;
 }
 
-int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid,
-		      u32 group, gfp_t allocation)
+int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb, u32 pid,
+	u32 group, gfp_t allocation,
+	int (*filter)(struct sock *dsk, struct sk_buff *skb, void *data),
+	void *filter_data)
 {
 	struct net *net = sock_net(ssk);
 	struct netlink_broadcast_data info;
@@ -1059,6 +1066,8 @@ int netlink_broadcast(struct sock *ssk,
 	info.allocation = allocation;
 	info.skb = skb;
 	info.skb2 = NULL;
+	info.tx_filter = filter;
+	info.tx_data = filter_data;
 
 	/* While we sleep in clone, do not allow to change socket list */
 
@@ -1083,6 +1092,14 @@ int netlink_broadcast(struct sock *ssk,
 	}
 	return -ESRCH;
 }
+EXPORT_SYMBOL(netlink_broadcast_filtered);
+
+int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid,
+		      u32 group, gfp_t allocation)
+{
+	return netlink_broadcast_filtered(ssk, skb, pid, group, allocation,
+		NULL, NULL);
+}
 EXPORT_SYMBOL(netlink_broadcast);
 
 struct netlink_set_err_data {


^ permalink raw reply

* Re: [PATCH net-next-2.6] bonding: move slave MTU handling from sysfs V2
From: Jay Vosburgh @ 2010-05-20 18:21 UTC (permalink / raw)
  To: Jiri Pirko; +Cc: netdev, davem, bonding-devel, monis
In-Reply-To: <20100520053403.GA2867@psychotron.redhat.com>

Jiri Pirko <jpirko@redhat.com> wrote:

>Thu, May 20, 2010 at 02:07:41AM CEST, fubar@us.ibm.com wrote:
[...]
>>	This chunk doesn't apply to net-next-2.6 because your context
>>doesn't match; it looks like you've removed the variable "found" in your
>>"before" source.  On closer inspection, "found" isn't actually used
>>meaningfully, so I'm guessing you removed it in a prior patch but didn't
>>submit that patch.
>>
>>	If that's the case, could you repost the whole series, with
>>sequence numbers?
>
>I don't think that's necessary for now. The patch removing found was posted as a
>first one:
>http://patchwork.ozlabs.org/patch/52795/
>
>I tried it several times. Patches are cleanly applicable in order I posted it.

	Ok, I tracked down a copy (not sure where mine went).  Sequence
numbers do help in general, though, as a set of email messages aren't
always delivered in the same order they're sent.

	In any event, the patches all look ok to me (they do apply
cleanly and compile, now that I have the complete set), but none of them
are bug fixes, and should therefore probably wait until net-next
reopens.  

	So, for whenever the tree is open:

Signed-off-by: Jay Vosburgh <fubar@us.ibm.com>

	-J

---
	-Jay Vosburgh, IBM Linux Technology Center, fubar@us.ibm.com

^ permalink raw reply

* Re: bnx2x + SFP+ DA/2.6.33.3: Got bad status 0x0 when reading from SFP+ EEPROM -> SFP+ module is not initialized
From: Eilon Greenstein @ 2010-05-20 18:45 UTC (permalink / raw)
  To: Krzysztof Olędzki; +Cc: Michael Chan, netdev@vger.kernel.org
In-Reply-To: <4BF57ADF.1060203@ans.pl>

On Thu, 2010-05-20 at 11:09 -0700, Krzysztof Olędzki wrote:
> On 2010-05-20 19:49, Eilon Greenstein wrote:
> > On Thu, 2010-05-20 at 10:08 -0700, Krzysztof Olędzki wrote:
> >> Hello,
> >>
> >> I would like to connect a dual port SFP+ NetXtreme II BCM57711
> >> 10-Gigabit NIC to a HP J9309A ProCurve 4-Port 10GbE SFP+ zl Module
> >> using a HP ProCurve 10-GbE SFP+ 7m Direct Attach Cable (J9285B).
> >>
> >> Unfortuantely, it does not work. :( After connecting the switch and
> >> the server together and loading the bnx2x module the switch logs:
> >>
> >> I 05/20/10 18:32:23 ports: port E4 is Blocked by STP
> >> I 05/20/10 18:32:23 ports: port E4 is now on-line
> >>
> >> Here is the dmesg output from the server:
> >>
> >> Broadcom NetXtreme II 5771x 10Gigabit Ethernet Driver bnx2x 1.52.1-5 (2009/11/09)
> >> bnx2x 0000:04:00.0: PCI INT A ->  GSI 38 (level, low) ->  IRQ 38
> >> bnx2x 0000:04:00.0: setting latency timer to 64
> >> bnx2x: part number 394D4342-31373735-31314131-473331
> >> bnx2x: Loading bnx2x-e1h-5.2.7.0.fw
> >> bnx2x 0000:04:00.0: firmware: requesting bnx2x-e1h-5.2.7.0.fw
> >> eth4: Broadcom NetXtreme II BCM57711 XGb (A0) PCI-E x8 5GHz (Gen2) found at mem dc000000, IRQ 38, node addr 00:10:18:5f:e4:b4
> >> bnx2x 0000:04:00.1: PCI INT B ->  GSI 45 (level, low) ->  IRQ 45
> >> bnx2x 0000:04:00.1: setting latency timer to 64
> >> bnx2x: part number 394D4342-31373735-31314131-473331
> >> bnx2x: Loading bnx2x-e1h-5.2.7.0.fw
> >> bnx2x 0000:04:00.1: firmware: requesting bnx2x-e1h-5.2.7.0.fw
> >> eth5: Broadcom NetXtreme II BCM57711 XGb (A0) PCI-E x8 5GHz (Gen2) found at mem dd000000, IRQ 45, node addr 00:10:18:5f:e4:b6
> >> bnx2x 0000:04:00.0: irq 97 for MSI/MSI-X
> >> bnx2x 0000:04:00.0: irq 98 for MSI/MSI-X
> >> bnx2x 0000:04:00.0: irq 99 for MSI/MSI-X
> >> bnx2x 0000:04:00.0: irq 100 for MSI/MSI-X
> >> bnx2x 0000:04:00.0: irq 101 for MSI/MSI-X
> >> bnx2x 0000:04:00.0: irq 102 for MSI/MSI-X
> >> bnx2x: eth4: using MSI-X  IRQs: sp 97  fp[0] 99 ... fp[3] 102
> >> ADDRCONF(NETDEV_UP): eth4: link is not ready
> >> bnx2x 0000:04:00.1: irq 103 for MSI/MSI-X
> >> bnx2x 0000:04:00.1: irq 104 for MSI/MSI-X
> >> bnx2x 0000:04:00.1: irq 105 for MSI/MSI-X
> >> bnx2x 0000:04:00.1: irq 106 for MSI/MSI-X
> >> bnx2x 0000:04:00.1: irq 107 for MSI/MSI-X
> >> bnx2x 0000:04:00.1: irq 108 for MSI/MSI-X
> >> bnx2x: eth5: using MSI-X  IRQs: sp 103  fp[0] 105 ... fp[3] 108
> >> ADDRCONF(NETDEV_UP): eth5: link is not ready
> >> bnx2x: eth5 NIC Link is Down
> >> bnx2x: eth5 NIC Link is Down
> >>
> >> Loading the driver with debug mode enabled (modprobe bnx2x debug=0x20004) I got:
> > Thank you for this debug information! You saved one email round trip :)
> 
> Hehe, thanks.
> 
> > However, I still need some more information about the FW version and
> > nvram settings. Can you please send me the output of ethtool -i
> 
> # ethtool -i eth5
> driver: bnx2x
> version: 1.52.1-5
> firmware-version: BC:5.0.13 PHY:0aa0:0406
> bus-info: 0000:04:00.1
> 
> > and ethtool -e? Since ethtool -e is quite long, it is best to send
> > it as an attached file.
> 
> Attached.

Almost everything seems to be in order. Almost - since you don't get
link... I don't think I have tried using this kind of Direct Attach
Cable - so maybe it just needs some more time. Let's see if the
following makes any difference (other than delay the failure for another
2.7 seconds):

diff --git a/drivers/net/bnx2x_link.c b/drivers/net/bnx2x_link.c
index ff70be8..bcee38c 100644
--- a/drivers/net/bnx2x_link.c
+++ b/drivers/net/bnx2x_link.c
@@ -3113,7 +3113,7 @@ static u8
bnx2x_wait_for_sfp_module_initialized(struct link_params *params)
        u16 timeout;
        /* Initialization time after hot-plug may take up to 300ms for
some
        phys type ( e.g. JDSU ) */
-       for (timeout = 0; timeout < 60; timeout++) {
+       for (timeout = 0; timeout < 600; timeout++) {
                if (bnx2x_read_sfp_module_eeprom(params, 1, 1, &val)
                    == 0) {
                        DP(NETIF_MSG_LINK, "SFP+ module initialization "

If it does help, be sure to let me know how much time it took (you
should have this debug print).

Regards,
Eilon



^ permalink raw reply related

* Re: bnx2x + SFP+ DA/2.6.33.3: Got bad status 0x0 when reading from SFP+ EEPROM -> SFP+ module is not initialized
From: Krzysztof Olędzki @ 2010-05-20 19:41 UTC (permalink / raw)
  To: eilong; +Cc: Michael Chan, netdev@vger.kernel.org
In-Reply-To: <1274381113.28702.6.camel@lb-tlvb-eilong.il.broadcom.com>

On 2010-05-20 20:45, Eilon Greenstein wrote:
> On Thu, 2010-05-20 at 11:09 -0700, Krzysztof Olędzki wrote:
>> On 2010-05-20 19:49, Eilon Greenstein wrote:
>>> On Thu, 2010-05-20 at 10:08 -0700, Krzysztof Olędzki wrote:
>>>> Hello,
>>>>
>>>> I would like to connect a dual port SFP+ NetXtreme II BCM57711
>>>> 10-Gigabit NIC to a HP J9309A ProCurve 4-Port 10GbE SFP+ zl Module
>>>> using a HP ProCurve 10-GbE SFP+ 7m Direct Attach Cable (J9285B).
>>>>
>>>> Unfortuantely, it does not work. :( After connecting the switch and
>>>> the server together and loading the bnx2x module the switch logs:
>>>>
>>>> I 05/20/10 18:32:23 ports: port E4 is Blocked by STP
>>>> I 05/20/10 18:32:23 ports: port E4 is now on-line
>>>>
>>>> Here is the dmesg output from the server:
>>>>
>>>> Broadcom NetXtreme II 5771x 10Gigabit Ethernet Driver bnx2x 1.52.1-5 (2009/11/09)
>>>> bnx2x 0000:04:00.0: PCI INT A ->   GSI 38 (level, low) ->   IRQ 38
>>>> bnx2x 0000:04:00.0: setting latency timer to 64
>>>> bnx2x: part number 394D4342-31373735-31314131-473331
>>>> bnx2x: Loading bnx2x-e1h-5.2.7.0.fw
>>>> bnx2x 0000:04:00.0: firmware: requesting bnx2x-e1h-5.2.7.0.fw
>>>> eth4: Broadcom NetXtreme II BCM57711 XGb (A0) PCI-E x8 5GHz (Gen2) found at mem dc000000, IRQ 38, node addr 00:10:18:5f:e4:b4
>>>> bnx2x 0000:04:00.1: PCI INT B ->   GSI 45 (level, low) ->   IRQ 45
>>>> bnx2x 0000:04:00.1: setting latency timer to 64
>>>> bnx2x: part number 394D4342-31373735-31314131-473331
>>>> bnx2x: Loading bnx2x-e1h-5.2.7.0.fw
>>>> bnx2x 0000:04:00.1: firmware: requesting bnx2x-e1h-5.2.7.0.fw
>>>> eth5: Broadcom NetXtreme II BCM57711 XGb (A0) PCI-E x8 5GHz (Gen2) found at mem dd000000, IRQ 45, node addr 00:10:18:5f:e4:b6
>>>> bnx2x 0000:04:00.0: irq 97 for MSI/MSI-X
>>>> bnx2x 0000:04:00.0: irq 98 for MSI/MSI-X
>>>> bnx2x 0000:04:00.0: irq 99 for MSI/MSI-X
>>>> bnx2x 0000:04:00.0: irq 100 for MSI/MSI-X
>>>> bnx2x 0000:04:00.0: irq 101 for MSI/MSI-X
>>>> bnx2x 0000:04:00.0: irq 102 for MSI/MSI-X
>>>> bnx2x: eth4: using MSI-X  IRQs: sp 97  fp[0] 99 ... fp[3] 102
>>>> ADDRCONF(NETDEV_UP): eth4: link is not ready
>>>> bnx2x 0000:04:00.1: irq 103 for MSI/MSI-X
>>>> bnx2x 0000:04:00.1: irq 104 for MSI/MSI-X
>>>> bnx2x 0000:04:00.1: irq 105 for MSI/MSI-X
>>>> bnx2x 0000:04:00.1: irq 106 for MSI/MSI-X
>>>> bnx2x 0000:04:00.1: irq 107 for MSI/MSI-X
>>>> bnx2x 0000:04:00.1: irq 108 for MSI/MSI-X
>>>> bnx2x: eth5: using MSI-X  IRQs: sp 103  fp[0] 105 ... fp[3] 108
>>>> ADDRCONF(NETDEV_UP): eth5: link is not ready
>>>> bnx2x: eth5 NIC Link is Down
>>>> bnx2x: eth5 NIC Link is Down
>>>>
>>>> Loading the driver with debug mode enabled (modprobe bnx2x debug=0x20004) I got:
>>> Thank you for this debug information! You saved one email round trip :)
>>
>> Hehe, thanks.
>>
>>> However, I still need some more information about the FW version and
>>> nvram settings. Can you please send me the output of ethtool -i
>>
>> # ethtool -i eth5
>> driver: bnx2x
>> version: 1.52.1-5
>> firmware-version: BC:5.0.13 PHY:0aa0:0406
>> bus-info: 0000:04:00.1
>>
>>> and ethtool -e? Since ethtool -e is quite long, it is best to send
>>> it as an attached file.
>>
>> Attached.
>
> Almost everything seems to be in order. Almost - since you don't get
> link... I don't think I have tried using this kind of Direct Attach
> Cable - so maybe it just needs some more time. Let's see if the
> following makes any difference (other than delay the failure for another
> 2.7 seconds):
>
> diff --git a/drivers/net/bnx2x_link.c b/drivers/net/bnx2x_link.c
> index ff70be8..bcee38c 100644
> --- a/drivers/net/bnx2x_link.c
> +++ b/drivers/net/bnx2x_link.c
> @@ -3113,7 +3113,7 @@ static u8
> bnx2x_wait_for_sfp_module_initialized(struct link_params *params)
>          u16 timeout;
>          /* Initialization time after hot-plug may take up to 300ms for
> some
>          phys type ( e.g. JDSU ) */
> -       for (timeout = 0; timeout<  60; timeout++) {
> +       for (timeout = 0; timeout<  600; timeout++) {
>                  if (bnx2x_read_sfp_module_eeprom(params, 1, 1,&val)
>                      == 0) {
>                          DP(NETIF_MSG_LINK, "SFP+ module initialization "
>
> If it does help, be sure to let me know how much time it took (you
> should have this debug print).

Still no luck. :( The kernel printed many more "Got bad status 0x0 when 
reading from SFP+ EEPROM" messages. Finally I got:
  "SFP+ module is not initialized".

Best regards,

			Krzysztof Olędzki

^ permalink raw reply

* Re: bnx2x + SFP+ DA/2.6.33.3: Got bad status 0x0 when reading from SFP+ EEPROM -> SFP+ module is not initialized
From: Rick Jones @ 2010-05-20 20:25 UTC (permalink / raw)
  To: Krzysztof Olędzki; +Cc: eilong, Michael Chan, netdev@vger.kernel.org
In-Reply-To: <4BF59058.6050205@ans.pl>

Some simple/simplistic thoughts/questions...

Has the DAC been used successfully prior to this?

Do you have another HP ProCurve 10-GbE SFP+ 7m Direct Attach Cable (J9285B) to try?

There's a transceiver and presumably an EEPROM at both ends of a DAC right?  If 
the EEPROM at one end were "bad" might the 57711 be happier with the other end 
of the DAC?  Getting some sort of error message at the switch side, which may 
(or may not) have more detailed diagnostics might help.

rick jones

^ permalink raw reply

* [RFC] tcp: delack_timer expiration changes for every frame
From: Eric Dumazet @ 2010-05-20 20:47 UTC (permalink / raw)
  To: netdev, David Miller; +Cc: Ilpo Järvinen

While oprofiling net-next-2.6 during tcp workloads I found
mod_timer(delack_timer) was used too much, even if we receive/send more
than one frame per jiffie.

Something seems wrong, since we should try to change this timer only
when jiffies changes. mod_timer() has a special optimization for this,
but something is broken in our tcp stack ?

I added some logs in mod_timer() :

HZ = 250

results for one socket shown :

[  392.116735] timer->expires=22997, expires=23024(37) diff=-27 timer=e5ecb754
[  392.120627] timer->expires=23024, expires=22998(10) diff=26 timer=e5ecb754
[  392.123245] timer->expires=22998, expires=23025(37) diff=-27 timer=e5ecb754
[  392.133688] timer->expires=23025, expires=23001(10) diff=24 timer=e5ecb754
[  392.136502] timer->expires=23001, expires=23029(37) diff=-28 timer=e5ecb754
[  392.140392] timer->expires=23029, expires=23003(10) diff=26 timer=e5ecb754
[  392.143142] timer->expires=23003, expires=23030(37) diff=-27 timer=e5ecb754
[  392.153812] timer->expires=23030, expires=23006(10) diff=24 timer=e5ecb754
[  392.156658] timer->expires=23006, expires=23034(37) diff=-28 timer=e5ecb754
[  392.160474] timer->expires=23034, expires=23008(10) diff=26 timer=e5ecb754
[  392.163317] timer->expires=23008, expires=23035(37) diff=-27 timer=e5ecb754
[  392.167176] timer->expires=23035, expires=23009(10) diff=26 timer=e5ecb754
[  392.176963] timer->expires=23009, expires=23039(37) diff=-30 timer=e5ecb754
[  392.180863] timer->expires=23039, expires=23013(10) diff=26 timer=e5ecb754
[  392.183577] timer->expires=23013, expires=23040(37) diff=-27 timer=e5ecb754
[  392.187537] timer->expires=23040, expires=23014(10) diff=26 timer=e5ecb754
[  392.197286] timer->expires=23014, expires=23044(37) diff=-30 timer=e5ecb754
[  392.201047] timer->expires=23044, expires=23018(10) diff=26 timer=e5ecb754
[  392.203761] timer->expires=23018, expires=23045(37) diff=-27 timer=e5ecb754
[  392.207721] timer->expires=23045, expires=23019(10) diff=26 timer=e5ecb754
[  392.217454] timer->expires=23019, expires=23049(37) diff=-30 timer=e5ecb754

So we change the delack_timer by a positive delta (~ HZ/10) and a
 negative delta (~HZ/10), on the typical netperf TCP_RR workload.



Here, the incoming frame is handled by netperf, doing a recvmsg().
tcp_send_delayed_ack() sets the delack_timer to jiffies + HZ/25

[  392.207721] timer->expires=23045, new expires=23019(10) diff=26 timer=e5ecb754
[  392.207785] ------------[ cut here ]------------
[  392.207846] WARNING: at kernel/timer.c:753 mod_timer+0x55/0x18e()
[  392.207908] Hardware name: ProLiant BL460c G6
[  392.207965] Modules linked in: ipmi_devintf ipmi_si ipmi_msghandler ipv6 dm_mod button battery ac ehci_hcd uhci_hcd tg3 libphy bnx2x crc32c libcrc32c mdio [last unloaded: x_tables]
[  392.208900] Pid: 5320, comm: netperf Tainted: G        W  2.6.34-06175-g801cae3-dirty #33
[  392.208979] Call Trace:
[  392.209036]  [<c102df55>] ? warn_slowpath_common+0x5d/0x70
[  392.209098]  [<c102df73>] ? warn_slowpath_null+0xb/0xd
[  392.209159]  [<c10388de>] ? mod_timer+0x55/0x18e
[  392.209221]  [<c1279ce7>] ? tcp_send_delayed_ack+0xb5/0xc1
[  392.209282]  [<c1276d26>] ? tcp_rcv_established+0x39f/0x4f7
[  392.209345]  [<c127bae5>] ? tcp_v4_do_rcv+0x22/0x161
[  392.209406]  [<c126d6b4>] ? tcp_prequeue_process+0x47/0x5b
[  392.209468]  [<c12701d2>] ? tcp_recvmsg+0x371/0x691
[  392.209529]  [<c12b8c91>] ? _raw_spin_lock_bh+0x8/0x1e
[  392.209590]  [<c1240bcd>] ? release_sock+0x10/0xc9
[  392.216514]  [<c1285bad>] ? inet_recvmsg+0x5d/0x72
[  392.216575]  [<c123e725>] ? sock_recvmsg+0xb4/0xd1
[  392.216636]  [<c1032ace>] ? irq_exit+0x39/0x5b
[  392.216696]  [<c123fab5>] ? sys_recvfrom+0xb4/0x117
[  392.216757]  [<c10483be>] ? ktime_get+0x61/0xe8
[  392.216817]  [<c1016431>] ? lapic_next_event+0x13/0x16
[  392.216878]  [<c104ba9d>] ? clockevents_program_event+0xac/0xbc
[  392.216940]  [<c104c6cc>] ? tick_dev_program_event+0x34/0x138
[  392.217002]  [<c104c7ed>] ? tick_program_event+0x1d/0x21
[  392.217064]  [<c1044da0>] ? hrtimer_interrupt+0x10b/0x1c1
[  392.217126]  [<c123fb31>] ? sys_recv+0x19/0x1d
[  392.217186]  [<c12401dc>] ? sys_socketcall+0x120/0x1c6
[  392.217303]  [<c100268c>] ? sysenter_do_call+0x12/0x22
[  392.217364] ---[ end trace e9475c06f1d49408 ]---

Here, the incoming frame is handled by the other side (netserver),
but still for the netperf socket, (softirq handling)
tcp_v4_rcv() sets the delack timer to 37 ticks, so mod_timer() optimizations is not
working at all.

[  392.217454] timer->expires=23019, new expires=23049(37) diff=-30 timer=e5ecb754
[  392.217518] ------------[ cut here ]------------
[  392.217578] WARNING: at kernel/timer.c:753 mod_timer+0x55/0x18e()
[  392.217639] Hardware name: ProLiant BL460c G6
[  392.217697] Modules linked in: ipmi_devintf ipmi_si ipmi_msghandler ipv6 dm_mod button battery ac ehci_hcd uhci_hcd tg3 libphy bnx2x crc32c libcrc32c mdio [last unloaded: x_tables]
[  392.218439] Pid: 5321, comm: netserver Tainted: G        W  2.6.34-06175-g801cae3-dirty #33
[  392.218526] Call Trace:
[  392.218582]  [<c102df55>] ? warn_slowpath_common+0x5d/0x70
[  392.218644]  [<c102df73>] ? warn_slowpath_null+0xb/0xd
[  392.218705]  [<c10388de>] ? mod_timer+0x55/0x18e
[  392.218765]  [<c127cf56>] ? tcp_v4_rcv+0x41c/0x6b7
[  392.218826]  [<c1265832>] ? ip_local_deliver_finish+0xe9/0x178
[  392.218888]  [<c126572a>] ? ip_rcv_finish+0x262/0x281
[  392.218949]  [<c1249986>] ? __netif_receive_skb+0x267/0x282
[  392.219011]  [<c1249a0d>] ? process_backlog+0x6c/0x113
[  392.219072]  [<c124a2e6>] ? net_rx_action+0x8a/0x15a
[  392.219133]  [<c106234e>] ? __rcu_process_callbacks+0xb9/0x1d1
[  392.219195]  [<c1032910>] ? __do_softirq+0x0/0x13a
[  392.219255]  [<c10329b5>] ? __do_softirq+0xa5/0x13a
[  392.219316]  [<c1032910>] ? __do_softirq+0x0/0x13a
[  392.219375]  <IRQ>  [<c1032449>] ? local_bh_enable+0x5f/0x6a
[  392.219474]  [<c124c0be>] ? dev_queue_xmit+0x34d/0x37a
[  392.219536]  [<c1268115>] ? ip_finish_output+0x1c7/0x1ff
[  392.219610]  [<c1268255>] ? ip_local_out+0x18/0x1a
[  392.219670]  [<c12684f4>] ? ip_queue_xmit+0x29d/0x2d5
[  392.219731]  [<c1293e89>] ? bictcp_acked+0x4f/0x139
[  392.219791]  [<c1275ee8>] ? tcp_ack+0x155b/0x16e9
[  392.219851]  [<c127851a>] ? tcp_transmit_skb+0x62a/0x65f
[  392.219912]  [<c1038a03>] ? mod_timer+0x17a/0x18e
[  392.219972]  [<c127900a>] ? tcp_write_xmit+0x73a/0x81c
[  392.220033]  [<c109e487>] ? __kmalloc_node+0x30/0x76
[  392.220095]  [<c1279127>] ? __tcp_push_pending_frames+0x15/0x6c
[  392.220159]  [<c126f214>] ? tcp_sendmsg+0x7ee/0x8c5
[  392.220223]  [<c123e823>] ? sock_sendmsg+0xa7/0xc1
[  392.220284]  [<c1044da0>] ? hrtimer_interrupt+0x10b/0x1c1
[  392.220346]  [<c1032ace>] ? irq_exit+0x39/0x5b
[  392.220406]  [<c1016811>] ? smp_apic_timer_interrupt+0x6b/0x75
[  392.220468]  [<c102007b>] ? pud_huge+0x1/0x9
[  392.220536]  [<c123f9b9>] ? sys_sendto+0xfc/0x127
[  392.220599]  [<c1044bd4>] ? hrtimer_start_range_ns+0xf/0x13
[  392.220661]  [<c1023848>] ? update_curr+0x60/0xdf
[  392.220722]  [<c1044871>] ? hrtimer_forward+0x10f/0x123
[  392.220784]  [<c10483be>] ? ktime_get+0x61/0xe8
[  392.220844]  [<c123f9fd>] ? sys_send+0x19/0x1d
[  392.220903]  [<c12401af>] ? sys_socketcall+0xf3/0x1c6
[  392.220964]  [<c1032ace>] ? irq_exit+0x39/0x5b
[  392.221024]  [<c1016811>] ? smp_apic_timer_interrupt+0x6b/0x75
[  392.221086]  [<c100268c>] ? sysenter_do_call+0x12/0x22
[  392.221147] ---[ end trace e9475c06f1d49409 ]---

Its a bit late here to investigate, maybe one of you guys have an idea about this...




^ permalink raw reply

* Re: bnx2x + SFP+ DA/2.6.33.3: Got bad status 0x0 when reading from SFP+ EEPROM -> SFP+ module is not initialized
From: Krzysztof Olędzki @ 2010-05-20 20:54 UTC (permalink / raw)
  To: Rick Jones; +Cc: eilong, Michael Chan, netdev@vger.kernel.org
In-Reply-To: <4BF59ABB.9070600@hp.com>

On 2010-05-20 22:25, Rick Jones wrote:
> Some simple/simplistic thoughts/questions...
>
> Has the DAC been used successfully prior to this?

Yes. It was successfully used to connect two HP switches, before I 
received SFP+ SR modules, that allowed me to put the switches into 
distanced rooms.

> Do you have another HP ProCurve 10-GbE SFP+ 7m Direct Attach Cable (J9285B) to try?

Yes. The same situation.

> There's a transceiver and presumably an EEPROM at both ends of a DAC right?

Yes, I think there should be one. ;)

> If
> the EEPROM at one end were "bad" might the 57711 be happier with the other end
> of the DAC?

Tested both ends. The same situation. :|

> Getting some sort of error message at the switch side, which may
> (or may not) have more detailed diagnostics might help.

There is no error message at the switch side. The switch shows that 
everything is correct.

Best regards,

				Krzysztof Olędzki

^ permalink raw reply

* [PATCH] ipvs: Add missing locking during connection table hashing and unhashing
From: Sven Wegener @ 2010-05-20 20:55 UTC (permalink / raw)
  To: Simon Horman, Julian Anastasov, Wensong Zhang; +Cc: netdev, lvs-devel

The code that hashes and unhashes connections from the connection table
is missing locking of the connection being modified, which opens up a
race condition and results in memory corruption when this race condition
is hit.

Here is what happens in pretty verbose form:

CPU 0					CPU 1
------------				------------
An active connection is terminated and
we schedule ip_vs_conn_expire() on this
CPU to expire this connection.

					IRQ assignment is changed to this CPU,
					but the expire timer stays scheduled on
					the other CPU.

					New connection from same ip:port comes
					in right before the timer expires, we
					find the inactive connection in our
					connection table and get a reference to
					it. We proper lock the connection in
					tcp_state_transition() and read the
					connection flags in set_tcp_state().

ip_vs_conn_expire() gets called, we
unhash the connection from our
connection table and remove the hashed
flag in ip_vs_conn_unhash(), without
proper locking!

					While still holding proper locks we
					write the connection flags in
					set_tcp_state() and this sets the hashed
					flag again.

ip_vs_conn_expire() fails to expire the
connection, because the other CPU has
incremented the reference count. We try
to re-insert the connection into our
connection table, but this fails in
ip_vs_conn_hash(), because the hashed
flag has been set by the other CPU. We
re-schedule execution of
ip_vs_conn_expire(). Now this connection
has the hashed flag set, but isn't
actually hashed in our connection table
and has a dangling list_head.

					We drop the reference we held on the
					connection and schedule the expire timer
					for timeouting the connection on this
					CPU. Further packets won't be able to
					find this connection in our connection
					table.

					ip_vs_conn_expire() gets called again,
					we think it's already hashed, but the
					list_head is dangling and while removing
					the connection from our connection table
					we write to the memory location where
					this list_head points to.

The result will probably be a kernel oops at some other point in time.

Signed-off-by: Sven Wegener <sven.wegener@stealer.net>
Cc: stable@kernel.org
---
 net/netfilter/ipvs/ip_vs_conn.c |    4 ++++
 1 files changed, 4 insertions(+), 0 deletions(-)

This race condition is pretty subtle, but it can be triggered remotely.
It needs the IRQ assignment change or another circumstance where packets
coming from the same ip:port for the same service are being processed on
different CPUs. And it involves hitting the exact time at which
ip_vs_conn_expire() gets called. It can be avoided by making sure that
all packets from one connection are always processed on the same CPU and
can be made harder to exploit by changing the connection timeouts to
some custom values.

diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index d8f7e8e..ff04e9e 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -162,6 +162,7 @@ static inline int ip_vs_conn_hash(struct ip_vs_conn *cp)
 	hash = ip_vs_conn_hashkey(cp->af, cp->protocol, &cp->caddr, cp->cport);

 	ct_write_lock(hash);
+	spin_lock(&cp->lock);

 	if (!(cp->flags & IP_VS_CONN_F_HASHED)) {
 		list_add(&cp->c_list, &ip_vs_conn_tab[hash]);
@@ -174,6 +175,7 @@ static inline int ip_vs_conn_hash(struct ip_vs_conn *cp)
 		ret = 0;
 	}

+	spin_unlock(&cp->lock);
 	ct_write_unlock(hash);

 	return ret;
@@ -193,6 +195,7 @@ static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp)
 	hash = ip_vs_conn_hashkey(cp->af, cp->protocol, &cp->caddr, cp->cport);

 	ct_write_lock(hash);
+	spin_lock(&cp->lock);

 	if (cp->flags & IP_VS_CONN_F_HASHED) {
 		list_del(&cp->c_list);
@@ -202,6 +205,7 @@ static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp)
 	} else
 		ret = 0;

+	spin_unlock(&cp->lock);
 	ct_write_unlock(hash);

 	return ret;

^ permalink raw reply related

* Re: [PATCH net-next-2.6 0/8] CAIF: Bugfixes and updates
From: Sjur Brændeland @ 2010-05-20 21:08 UTC (permalink / raw)
  To: David Miller
  Cc: sjur.brandeland, netdev, marcel, daniel.martensson, linus.walleji
In-Reply-To: <20100520.005658.11949785.davem@davemloft.net>

Hi Dave,

David Miller wrote:
> Send me bug fixes only.

Currently in caif_socket.c caif_seqpkt_recvmsg returns -EMSGSIZE if
skb don't fit in user buffer.
Would you consider my patch where I fix MSG_TRUNC to work properly a bugfix?

Regards Sjur

^ permalink raw reply

* Re: [PATCH] sh_eth: Fix memleak in sh_mdio_release
From: Nobuhiro Iwamatsu @ 2010-05-20 22:12 UTC (permalink / raw)
  To: Denis Kirjanov; +Cc: davem, shimoda.yoshihiro, morimoto.kuninori, netdev
In-Reply-To: <20100520140059.GA8968@hera.kernel.org>

Hi, Denis.

2010/5/20 Denis Kirjanov <dkirjanov@hera.kernel.org>:
> Allocated memory for IRQs should be freed when releasing the mii_bus
>
> Signed-off-by: Denis Kirjanov <dkirjanov@kernel.org>
> ---
>
> drivers/net/sh_eth.c |    3 +++
>  1 files changed, 3 insertions(+), 0 deletions(-)
>
> diff --git a/drivers/net/sh_eth.c b/drivers/net/sh_eth.c
> index 586ed09..501a55f 100644
> --- a/drivers/net/sh_eth.c
> +++ b/drivers/net/sh_eth.c
> @@ -1294,6 +1294,9 @@ static int sh_mdio_release(struct net_device *ndev)
>        /* remove mdio bus info from net_device */
>        dev_set_drvdata(&ndev->dev, NULL);
>
> +       /* free interrupts memory */
> +       kfree(bus->irq);
> +
>        /* free bitbang info */
>        free_mdio_bitbang(bus);
>
>
Acked-by: Nobuhiro Iwamatsu <iwamatsu@nigauri.org>

Thanks!

Best regards,
  Nobuhiro

^ permalink raw reply

* Re: [PATCH 1/3] cgroups: Add an API to attach a task to current task's cgroup
From: Paul Menage @ 2010-05-20 22:22 UTC (permalink / raw)
  To: Sridhar Samudrala; +Cc: Michael S. Tsirkin, netdev, kvm@vger.kernel.org, lkml
In-Reply-To: <1274227488.2370.107.camel@w-sridhar.beaverton.ibm.com>

On Tue, May 18, 2010 at 5:04 PM, Sridhar Samudrala
<samudrala.sridhar@gmail.com> wrote:
> Add a new kernel API to attach a task to current task's cgroup
> in all the active hierarchies.
>
> Signed-off-by: Sridhar Samudrala <sri@us.ibm.com>

Reviewed-by: Paul Menage <menage@google.com>

It would be more efficient to just attach directly to current->cgroups
rather than potentially creating/destroying one css_set for each
hierarchy until we've completely converged on current->cgroups - but
that would require a bunch of refactoring of the guts of
cgroup_attach_task() to ensure that the right can_attach()/attach()
callbacks are made. That doesn't really seem worthwhile right now for
the initial use, that I imagine isn't going to be
performance-sensitive.

Paul

^ permalink raw reply

* Re: [PATCH 1/3] cgroups: Add an API to attach a task to current task's cgroup
From: Paul Menage @ 2010-05-20 22:26 UTC (permalink / raw)
  To: Sridhar Samudrala; +Cc: Michael S. Tsirkin, netdev, kvm@vger.kernel.org, lkml
In-Reply-To: <AANLkTinsrFoLVKDFM5pcKcL_6MvAzhR6IzbNmWKh3BDh@mail.gmail.com>

On Thu, May 20, 2010 at 3:22 PM, Paul Menage <menage@google.com> wrote:
> On Tue, May 18, 2010 at 5:04 PM, Sridhar Samudrala
> <samudrala.sridhar@gmail.com> wrote:
>> Add a new kernel API to attach a task to current task's cgroup
>> in all the active hierarchies.
>>
>> Signed-off-by: Sridhar Samudrala <sri@us.ibm.com>
>
> Reviewed-by: Paul Menage <menage@google.com>
>

One other thought on this - this would be the first piece of code
that's attaching a task to a cgroup without holding the cgroup
directory inode i_mutex. I believe that this is probably OK.

Paul

^ permalink raw reply

* Re: [PATCH net-next-2.6 0/8] CAIF: Bugfixes and updates
From: David Miller @ 2010-05-20 22:27 UTC (permalink / raw)
  To: sjurbren
  Cc: sjur.brandeland, netdev, marcel, daniel.martensson, linus.walleji
In-Reply-To: <AANLkTilvQ8W5X-qvt6GvEj-1ZmmfwZJz079Rdi8K6Tll@mail.gmail.com>

From: Sjur Brændeland <sjurbren@gmail.com>
Date: Thu, 20 May 2010 23:08:27 +0200

> Currently in caif_socket.c caif_seqpkt_recvmsg returns -EMSGSIZE if
> skb don't fit in user buffer.
> Would you consider my patch where I fix MSG_TRUNC to work properly a bugfix?

You're really pushing it, but fine...

This is the part I hate most about the merge window, people just
want to slip in as much as they possibly can and justify it by
any means necessary to suit their own personal needs instead of
being amicable and abiding by the merge window rules which is
for the good of everyone.

^ permalink raw reply

* Re: [patch] IPVS: one-packet scheduling
From: Simon Horman @ 2010-05-20 22:31 UTC (permalink / raw)
  To: Patrick McHardy
  Cc: netdev, lvs-devel, netfilter-devel, Wensong Zhang,
	Julian Anastasov, Nick Chalk
In-Reply-To: <4BF55294.9030908@trash.net>

On Thu, May 20, 2010 at 05:17:40PM +0200, Patrick McHardy wrote:
> Simon Horman wrote:
> > From: Nick Chalk <nick@loadbalancer.org>
> > 
> > IPVS: one-packet scheduling
> > 
> > Allow one-packet scheduling for UDP connections. When the fwmark-based or
> > normal virtual service is marked with '-o' or '--ops' options all
> > connections are created only to schedule one packet. Useful to schedule UDP
> > packets from same client port to different real servers. Recommended with
> > RR or WRR schedulers (the connections are not visible with ipvsadm -L).
> 
> I'm afraid its too late in this merge window for new features
> since Dave has already sent his merge request to Linus.
> 
> Please resend once the net-next (and nf-next) tree opens up.

Sure, will do.


^ permalink raw reply

* Re: tun: Use netif_receive_skb instead of netif_rx
From: Herbert Xu @ 2010-05-20 23:16 UTC (permalink / raw)
  To: Neil Horman; +Cc: Eric Dumazet, David Miller, bmb, tgraf, nhorman, netdev
In-Reply-To: <20100520172918.GA17613@shamino.rdu.redhat.com>

On Thu, May 20, 2010 at 01:29:18PM -0400, Neil Horman wrote:
>
> So, I'm testing this patch out now, and unfotunately it doesn't seem to be
> working.  Every frame seems to be holding a classid of 0.  Trying to figure out
> why now.

Not very surprising since tun.c doesn't go through the normal
socket interface.  I'll send a additional patch for that.

Cheers,
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply

* Re: linux-next: build failure after merge of the suspend tree
From: Stephen Rothwell @ 2010-05-21  0:29 UTC (permalink / raw)
  To: John W. Linville, David Miller, Linus
  Cc: Rafael J. Wysocki, linux-next, linux-kernel, Helmut Schaa, netdev
In-Reply-To: <201005080413.24465.rjw@sisk.pl>

Hi John, Dave,

On Sat, 8 May 2010 04:13:24 +0200 "Rafael J. Wysocki" <rjw@sisk.pl> wrote:
>
> On Friday 07 May 2010, Stephen Rothwell wrote:
> > 
> > After merging the suspend tree, today's linux-next build (x86_64
> > allmodconfig) failed like this:
> > 
> > net/mac80211/scan.c: In function 'ieee80211_scan_state_decision':
> > net/mac80211/scan.c:510: error: implicit declaration of function 'pm_qos_requirement'
> > 
> > Caused by commit 62bad14fc6e0911a99882c261390968977d43283 ("PM QOS
> > update") from the suspend tree interacting with commit
> > df13cce53a7b28a81460e6bfc4857e9df4956141 ("mac80211: Improve software
> > scan timing") from the net tree.
> > 
> > I have added the following merge fixup patch and can carry it as
> > necessary:
> 
> Thanks a lot, please do so if that's not a problem.
> 
> Both trees are based on Linus' current and I don't see a good way of fixing
> this issue in any of them individually.

The suspend tree has been merged into Linus' tree, so this patch is
needed in the net tree before it is merged (or as part of the merge).

Here is the patch again:

From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Fri, 7 May 2010 13:02:54 +1000
Subject: [PATCH] wireless: update for pm_qos_requirement to pm_qos_request rename

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
---
 net/mac80211/scan.c |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index e14c441..e1b0be7 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -510,7 +510,7 @@ static int ieee80211_scan_state_decision(struct ieee80211_local *local,
 		bad_latency = time_after(jiffies +
 				ieee80211_scan_get_channel_time(next_chan),
 				local->leave_oper_channel_time +
-				usecs_to_jiffies(pm_qos_requirement(PM_QOS_NETWORK_LATENCY)));
+				usecs_to_jiffies(pm_qos_request(PM_QOS_NETWORK_LATENCY)));
 
 		listen_int_exceeded = time_after(jiffies +
 				ieee80211_scan_get_channel_time(next_chan),
-- 
1.7.1

-- 
Cheers,
Stephen Rothwell                    sfr@canb.auug.org.au
http://www.canb.auug.org.au/~sfr/

^ permalink raw reply related

* Re: tun: Use netif_receive_skb instead of netif_rx
From: Neil Horman @ 2010-05-21  0:39 UTC (permalink / raw)
  To: Herbert Xu; +Cc: Eric Dumazet, David Miller, bmb, tgraf, nhorman, netdev
In-Reply-To: <20100520231630.GA22593@gondor.apana.org.au>

On Fri, May 21, 2010 at 09:16:30AM +1000, Herbert Xu wrote:
> On Thu, May 20, 2010 at 01:29:18PM -0400, Neil Horman wrote:
> >
> > So, I'm testing this patch out now, and unfotunately it doesn't seem to be
> > working.  Every frame seems to be holding a classid of 0.  Trying to figure out
> > why now.
> 
> Not very surprising since tun.c doesn't go through the normal
> socket interface.  I'll send a additional patch for that.
> 
I don't think thats it.  I think its a chicken and egg situation.  I think the
problem is that tasks can't be assigned to cgroups until their created, and in
that time a sock can be created.  Its a natural race.  If you create a socket
before you assign it to a cgroup, that socket retains a classid of zero.  I'm
going to try modify the patch to update sockets owned by tasks when the cgroup
is assigned.

Best
Neil

> Cheers,
> -- 
> Visit Openswan at http://www.openswan.org/
> Email: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au>
> Home Page: http://gondor.apana.org.au/~herbert/
> PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
> 

^ permalink raw reply

* Final Notification
From: NG Inter Switch ATM Organization @ 2010-05-21  7:37 UTC (permalink / raw)


$950,000.00 has been accredited in your favor, In view of this, 
you are instructed to contact the Senate House with the details 
stated below and Endeavor to discuss the funds delivery. More 
details will be sent to you once you contact Mrs Linda Hills with
your Full Names: Delivery Address: Sex: Age: Occupation: Phone Number.

Contact Person:Mrs.Linda Hills
Email:senatehouse106@yahoo.com.hk
Tel:+234 70 622 577 63












^ permalink raw reply

* Re: tun: Use netif_receive_skb instead of netif_rx
From: Herbert Xu @ 2010-05-21  1:02 UTC (permalink / raw)
  To: Neil Horman; +Cc: Eric Dumazet, David Miller, bmb, tgraf, nhorman, netdev
In-Reply-To: <20100521003939.GA2223@localhost.localdomain>

On Thu, May 20, 2010 at 08:39:39PM -0400, Neil Horman wrote:
>
> > Not very surprising since tun.c doesn't go through the normal
> > socket interface.  I'll send a additional patch for that.
> > 
> I don't think thats it.  I think its a chicken and egg situation.  I think the
> problem is that tasks can't be assigned to cgroups until their created, and in
> that time a sock can be created.  Its a natural race.  If you create a socket
> before you assign it to a cgroup, that socket retains a classid of zero.  I'm
> going to try modify the patch to update sockets owned by tasks when the cgroup
> is assigned.

That's what I meant above.  My patch will make tun.c to the
classid update every time it sends out a packet.

Cheers,
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply

* Re: tun: Use netif_receive_skb instead of netif_rx
From: Herbert Xu @ 2010-05-21  1:16 UTC (permalink / raw)
  To: Neil Horman; +Cc: Eric Dumazet, David Miller, bmb, tgraf, nhorman, netdev
In-Reply-To: <20100521010211.GA23671@gondor.apana.org.au>

On Fri, May 21, 2010 at 11:02:11AM +1000, Herbert Xu wrote:
> 
> That's what I meant above.  My patch will make tun.c to the
> classid update every time it sends out a packet.

Here it is:

tun: Update classid on packet injection

This patch makes tun update its socket classid every time we
inject a packet into the network stack.  This is so that any
updates made by the admin to the process writing packets to
tun is effected.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 4326520..a8a9aa8 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -525,6 +525,8 @@ static inline struct sk_buff *tun_alloc_skb(struct tun_struct *tun,
 	struct sk_buff *skb;
 	int err;
 
+	sock_update_classid(sk);
+
 	/* Under a page?  Don't bother with paged skb. */
 	if (prepad + len < PAGE_SIZE || !linear)
 		linear = len;
diff --git a/net/core/sock.c b/net/core/sock.c
index 8f7fdf8..4969bd1 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1055,6 +1055,7 @@ void sock_update_classid(struct sock *sk)
 	if (classid && classid != sk->sk_classid)
 		sk->classid = classid;
 }
+EXPORT_SYMBOL(sock_update_classid);
 #endif
 
 /**

Cheers,
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox