All of lore.kernel.org
 help / color / mirror / Atom feed
From: Dmitry Mishin <dim@openvz.org>
To: containers@lists.osdl.org
Cc: alexey@sw.ru, saw@sw.ru, Andrew Morton <akpm@osdl.org>,
	netdev@vger.kernel.org
Subject: [PATCH 3/12] L2 network namespace (v3): loopback device virtualization
Date: Wed, 17 Jan 2007 19:00:32 +0300	[thread overview]
Message-ID: <200701171900.32906.dim@openvz.org> (raw)
In-Reply-To: <200701171851.14734.dim@openvz.org>

Added per-namespace network loopback device

Signed-off-by: Dmitry Mishin <dim@openvz.org>

---
 drivers/net/loopback.c        |  112 +++++++++++++++++++++++++++++-------------
 include/linux/net_namespace.h |    2 
 include/linux/netdevice.h     |    6 +-
 net/core/dev.c                |    3 -
 net/core/net_namespace.c      |   16 ++++++
 net/ipv4/devinet.c            |    2 
 net/ipv6/addrconf.c           |    2 
 net/ipv6/route.c              |    6 +-
 8 files changed, 108 insertions(+), 41 deletions(-)

--- linux-2.6.20-rc4-mm1.net_ns.orig/drivers/net/loopback.c
+++ linux-2.6.20-rc4-mm1.net_ns/drivers/net/loopback.c
@@ -62,7 +62,12 @@ struct pcpu_lstats {
 	unsigned long packets;
 	unsigned long bytes;
 };
-static DEFINE_PER_CPU(struct pcpu_lstats, pcpu_lstats);
+#ifndef CONFIG_NET_NS
+struct pcpu_lstats		*pcpu_lstats_p;
+#define pcpu_lstats_ptr(ns)		pcpu_lstats_p
+#else
+#define pcpu_lstats_ptr(ns)		(ns->pcpu_lstats_p)
+#endif
 
 #define LOOPBACK_OVERHEAD (128 + MAX_HEADER + 16 + 16)
 
@@ -154,7 +159,8 @@ static int loopback_xmit(struct sk_buff 
 	dev->last_rx = jiffies;
 
 	/* it's OK to use __get_cpu_var() because BHs are off */
-	lb_stats = &__get_cpu_var(pcpu_lstats);
+	lb_stats = per_cpu_ptr(pcpu_lstats_ptr(dev->net_ns),
+							smp_processor_id());
 	lb_stats->bytes += skb->len;
 	lb_stats->packets++;
 
@@ -163,11 +169,9 @@ static int loopback_xmit(struct sk_buff 
 	return 0;
 }
 
-static struct net_device_stats loopback_stats;
-
 static struct net_device_stats *get_stats(struct net_device *dev)
 {
-	struct net_device_stats *stats = &loopback_stats;
+	struct net_device_stats *stats = netdev_priv(dev);
 	unsigned long bytes = 0;
 	unsigned long packets = 0;
 	int i;
@@ -175,7 +179,7 @@ static struct net_device_stats *get_stat
 	for_each_possible_cpu(i) {
 		const struct pcpu_lstats *lb_stats;
 
-		lb_stats = &per_cpu(pcpu_lstats, i);
+		lb_stats = per_cpu_ptr(pcpu_lstats_ptr(dev->net_ns), i);
 		bytes   += lb_stats->bytes;
 		packets += lb_stats->packets;
 	}
@@ -200,40 +204,80 @@ static const struct ethtool_ops loopback
 	.get_rx_csum		= always_on,
 };
 
-/*
- * The loopback device is special. There is only one instance and
- * it is statically allocated. Don't do this for other devices.
- */
-struct net_device loopback_dev = {
-	.name	 		= "lo",
-	.get_stats		= &get_stats,
-	.priv			= &loopback_stats,
-	.mtu			= (16 * 1024) + 20 + 20 + 12,
-	.hard_start_xmit	= loopback_xmit,
-	.hard_header		= eth_header,
-	.hard_header_cache	= eth_header_cache,
-	.header_cache_update	= eth_header_cache_update,
-	.hard_header_len	= ETH_HLEN,	/* 14	*/
-	.addr_len		= ETH_ALEN,	/* 6	*/
-	.tx_queue_len		= 0,
-	.type			= ARPHRD_LOOPBACK,	/* 0x0001*/
-	.rebuild_header		= eth_rebuild_header,
-	.flags			= IFF_LOOPBACK,
-	.features 		= NETIF_F_SG | NETIF_F_FRAGLIST
+struct net_device *loopback_dev_p;
+EXPORT_SYMBOL(loopback_dev_p);
+
+struct pcpu_lstats *pcpu_lstats_p;
+EXPORT_SYMBOL(pcpu_lstats_p);
+
+
+void loopback_dev_dtor(struct net_device *dev)
+{
+	free_percpu(pcpu_lstats_ptr(dev->net_ns));
+	free_netdev(dev);
+}
+
+void loopback_dev_ctor(struct net_device *dev)
+{
+	dev->mtu		= (16 * 1024) + 20 + 20 + 12;
+	dev->hard_start_xmit	= loopback_xmit;
+	dev->hard_header	= eth_header;
+	dev->hard_header_cache	= eth_header_cache;
+	dev->header_cache_update = eth_header_cache_update;
+	dev->hard_header_len	= ETH_HLEN;	/* 14	*/
+	dev->addr_len		= ETH_ALEN;	/* 6	*/
+	dev->tx_queue_len	= 0;
+	dev->type		= ARPHRD_LOOPBACK;	/* 0x0001*/
+	dev->rebuild_header	= eth_rebuild_header;
+	dev->flags		= IFF_LOOPBACK;
+	dev->features 		= NETIF_F_SG | NETIF_F_FRAGLIST
 #ifdef LOOPBACK_TSO
 				  | NETIF_F_TSO
 #endif
 				  | NETIF_F_NO_CSUM | NETIF_F_HIGHDMA
-				  | NETIF_F_LLTX,
-	.ethtool_ops		= &loopback_ethtool_ops,
-};
+				  | NETIF_F_LLTX;
+	dev->ethtool_ops	= &loopback_ethtool_ops;
+	dev->get_stats		= &get_stats;
+	dev->destructor		= loopback_dev_dtor;
+}
 
 /* Setup and register the loopback device. */
-static int __init loopback_init(void)
+int loopback_init(void)
 {
-	return register_netdev(&loopback_dev);
-};
+	struct net_namespace *ns = current_net_ns;
+	int err;
 
-module_init(loopback_init);
+	err = -ENOMEM;
+	loopback_dev_ptr = alloc_netdev(sizeof(struct net_device_stats), "lo",
+							loopback_dev_ctor);
+	if (loopback_dev_ptr == NULL)
+		goto out;
+	pcpu_lstats_ptr(ns) = alloc_percpu(struct pcpu_lstats);
+	if (pcpu_lstats_ptr(ns) == NULL)
+		goto out_stats;
+	/*
+	 * loopback device doesn't hold active reference: it doesn't prevent
+	 * stopping of net_namespace. So, just put old namespace.
+	 */
+#ifdef CONFIG_NET_NS
+	put_net_ns(loopback_dev_ptr->net_ns);
+	loopback_dev_ptr->net_ns = ns;
+#endif
+	err = register_netdev(loopback_dev_ptr);
+	if (err)
+		goto out_register;
+	return 0;
 
-EXPORT_SYMBOL(loopback_dev);
+out_register:
+	free_percpu(ns->pcpu_lstats_p);
+#ifdef CONFIG_NET_NS
+	/* in order to avoid put in free_netdev() */
+	loopback_dev_ptr->net_ns = NULL;
+#endif
+out_stats:
+	free_netdev(loopback_dev_ptr);
+out:
+	return err;
+}
+
+module_init(loopback_init);
--- linux-2.6.20-rc4-mm1.net_ns.orig/include/linux/net_namespace.h
+++ linux-2.6.20-rc4-mm1.net_ns/include/linux/net_namespace.h
@@ -9,6 +9,8 @@
 struct net_namespace {
 	struct kref		kref;
 	struct net_device	*dev_base_p, **dev_tail_p;
+	struct net_device	*loopback_dev_p;
+	struct pcpu_lstats	*pcpu_lstats_p;
 	unsigned int		hash;
 };
 
--- linux-2.6.20-rc4-mm1.net_ns.orig/include/linux/netdevice.h
+++ linux-2.6.20-rc4-mm1.net_ns/include/linux/netdevice.h
@@ -570,16 +570,20 @@ struct packet_type {
 #include <linux/notifier.h>
 #include <linux/net_namespace.h>
 
-extern struct net_device		loopback_dev;		/* The loopback */
+extern struct net_device		*loopback_dev_p;
 #ifndef CONFIG_NET_NS
+#define loopback_dev_ptr		loopback_dev_p
 extern struct net_device		*dev_base;		/* All devices */
 #define dev_base_ns(dev)		dev_base
 #else
+#define loopback_dev_ptr		(current_net_ns->loopback_dev_p)
 #define dev_base			(current_net_ns->dev_base_p)
 #define dev_base_ns(dev)		(dev->net_ns->dev_base_p)
 #endif
+#define loopback_dev			(*loopback_dev_ptr)	/* The loopback */
 extern rwlock_t				dev_base_lock;		/* Device list lock */
 
+extern int			loopback_init(void);
 extern int 			netdev_boot_setup_check(struct net_device *dev);
 extern unsigned long		netdev_boot_base(const char *prefix, int unit);
 extern struct net_device    *dev_getbyhwaddr(unsigned short type, char *hwaddr);
--- linux-2.6.20-rc4-mm1.net_ns.orig/net/core/dev.c
+++ linux-2.6.20-rc4-mm1.net_ns/net/core/dev.c
@@ -3253,7 +3253,8 @@ void free_netdev(struct net_device *dev)
 
 	ns = dev->net_ns;
 	if (ns != NULL) {
-		put_net_ns(ns);
+		if (dev != ns->loopback_dev_p)
+			put_net_ns(ns);
 		dev->net_ns = NULL;
 	}
 #endif
--- linux-2.6.20-rc4-mm1.net_ns.orig/net/core/net_namespace.c
+++ linux-2.6.20-rc4-mm1.net_ns/net/core/net_namespace.c
@@ -10,6 +10,7 @@
 #include <linux/nsproxy.h>
 #include <linux/net_namespace.h>
 #include <linux/net.h>
+#include <linux/netdevice.h>
 
 struct net_namespace init_net_ns = {
 	.kref = {
@@ -17,6 +18,8 @@ struct net_namespace init_net_ns = {
 	},
 	.dev_base_p	= NULL,
 	.dev_tail_p	= &init_net_ns.dev_base_p,
+	.loopback_dev_p	= NULL,
+	.pcpu_lstats_p	= NULL,
 };
 
 #ifdef CONFIG_NET_NS
@@ -41,7 +44,19 @@ static struct net_namespace *clone_net_n
 	ns->dev_base_p = NULL;
 	ns->dev_tail_p = &ns->dev_base_p;
 	ns->hash = net_random();
+
+	if ((push_net_ns(ns)) != old_ns)
+		BUG();
+	if (loopback_init())
+		goto out_loopback;
+	pop_net_ns(old_ns);
 	return ns;
+
+out_loopback:
+	pop_net_ns(old_ns);
+	BUG_ON(atomic_read(&ns->kref.refcount) != 1);
+	kfree(ns);
+	return NULL;
 }
 
 /*
@@ -79,6 +94,7 @@ void free_net_ns(struct kref *kref)
 	struct net_namespace *ns;
 
 	ns = container_of(kref, struct net_namespace, kref);
+	unregister_netdev(ns->loopback_dev_p);
 	if (ns->dev_base_p != NULL) {
 		printk("NET_NS: BUG: namespace %p has devices! ref %d\n",
 				ns, atomic_read(&ns->kref.refcount));
--- linux-2.6.20-rc4-mm1.net_ns.orig/net/ipv4/devinet.c
+++ linux-2.6.20-rc4-mm1.net_ns/net/ipv4/devinet.c
@@ -198,7 +198,7 @@ static void inetdev_destroy(struct in_de
 	ASSERT_RTNL();
 
 	dev = in_dev->dev;
-	if (dev == &loopback_dev)
+	if (dev == loopback_dev_p)
 		return;
 
 	in_dev->dead = 1;
--- linux-2.6.20-rc4-mm1.net_ns.orig/net/ipv6/addrconf.c
+++ linux-2.6.20-rc4-mm1.net_ns/net/ipv6/addrconf.c
@@ -2360,7 +2360,7 @@ static int addrconf_ifdown(struct net_de
 
 	ASSERT_RTNL();
 
-	if (dev == &loopback_dev && how == 1)
+	if (dev == loopback_dev_p && how == 1)
 		how = 0;
 
 	rt6_ifdown(dev);
--- linux-2.6.20-rc4-mm1.net_ns.orig/net/ipv6/route.c
+++ linux-2.6.20-rc4-mm1.net_ns/net/ipv6/route.c
@@ -124,7 +124,6 @@ struct rt6_info ip6_null_entry = {
 		.dst = {
 			.__refcnt	= ATOMIC_INIT(1),
 			.__use		= 1,
-			.dev		= &loopback_dev,
 			.obsolete	= -1,
 			.error		= -ENETUNREACH,
 			.metrics	= { [RTAX_HOPLIMIT - 1] = 255, },
@@ -150,7 +149,6 @@ struct rt6_info ip6_prohibit_entry = {
 		.dst = {
 			.__refcnt	= ATOMIC_INIT(1),
 			.__use		= 1,
-			.dev		= &loopback_dev,
 			.obsolete	= -1,
 			.error		= -EACCES,
 			.metrics	= { [RTAX_HOPLIMIT - 1] = 255, },
@@ -170,7 +168,6 @@ struct rt6_info ip6_blk_hole_entry = {
 		.dst = {
 			.__refcnt	= ATOMIC_INIT(1),
 			.__use		= 1,
-			.dev		= &loopback_dev,
 			.obsolete	= -1,
 			.error		= -EINVAL,
 			.metrics	= { [RTAX_HOPLIMIT - 1] = 255, },
@@ -2469,7 +2466,10 @@ void __init ip6_route_init(void)
 #endif
 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
 	fib6_rules_init();
+	ip6_prohibit_entry.u.dst.dev = &loopback_dev;
+	ip6_blk_hole_entry.u.dst.dev = &loopback_dev;
 #endif
+	ip6_null_entry.u.dst.dev = &loopback_dev;
 }
 
 void ip6_route_cleanup(void)

  parent reply	other threads:[~2007-01-17 16:02 UTC|newest]

Thread overview: 24+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2007-01-17 15:51 [PATCH 0/12] L2 network namespace (v3) Dmitry Mishin
2007-01-17 15:57 ` [PATCH 1/12] L2 network namespace (v3): current network namespace operations Dmitry Mishin
2007-01-17 20:16   ` Eric W. Biederman
2007-01-18 10:56     ` Dmitry Mishin
2007-01-18 13:37       ` Eric W. Biederman
2007-01-25  7:58       ` Eric W. Biederman
2007-01-17 15:58 ` [PATCH 0/12] L2 network namespace (v3) Cedric Le Goater
2007-01-17 15:59 ` [PATCH 2/12] L2 network namespace (v3): network devices virtualization Dmitry Mishin
2007-01-17 16:00 ` Dmitry Mishin [this message]
2007-01-17 16:01 ` [PATCH 4/12] L2 network namespace (v3): devinet sysctl's checks Dmitry Mishin
2007-01-17 16:03 ` [PATCH 5/12] L2 network namespace (v3): IPv4 routing Dmitry Mishin
2007-01-17 16:05 ` [PATCH 6/12] L2 network namespace (v3): socket hashes Dmitry Mishin
2007-01-17 16:10 ` [PATCH 0/12] L2 network namespace (v3) Daniel Lezcano
2007-01-17 16:10 ` [PATCH 7/12] allow proc_dir_entries to have destructor Dmitry Mishin
2007-01-17 16:11 ` [PATCH 8/12] net_device seq_file Dmitry Mishin
2007-01-17 20:36   ` Stephen Hemminger
2007-01-18 17:07     ` Eric W. Biederman
2007-01-17 16:14 ` [PATCH 9/12] L2 network namespace (v3): device to pass packets between namespaces Dmitry Mishin
2007-01-17 16:15 ` [PATCH 10/12] L2 network namespace (v3): playing with pass-through device Dmitry Mishin
2007-01-17 16:16 ` [PATCH 11/12] L2 network namespace (v3): sockets proc view virtualization Dmitry Mishin
2007-01-17 16:18 ` [PATCH 12/12] L2 network namespace (v3): L3 network namespace intro Dmitry Mishin
2007-01-19  0:07 ` [PATCH 0/12] L2 network namespace (v3) YOSHIFUJI Hideaki / 吉藤英明
2007-01-19  7:27   ` Eric W. Biederman
2007-01-19  9:35     ` Dmitry Mishin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=200701171900.32906.dim@openvz.org \
    --to=dim@openvz.org \
    --cc=akpm@osdl.org \
    --cc=alexey@sw.ru \
    --cc=containers@lists.osdl.org \
    --cc=netdev@vger.kernel.org \
    --cc=saw@sw.ru \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.