All of lore.kernel.org
 help / color / mirror / Atom feed
From: Cyrill Gorcunov <gorcunov@gmail.com>
To: NETDEV <netdev@vger.kernel.org>
Cc: Solar Designer <solar@openwall.com>,
	Vasily Averin <vvs@virtuozzo.com>,
	Andrey Vagin <avagin@virtuozzo.com>,
	Pavel Emelianov <xemul@virtuozzo.com>,
	Vladimir Davydov <vdavydov@virtuozzo.com>,
	Konstantin Khorenko <khorenko@virtuozzo.com>,
	David Miller <davem@davemloft.net>,
	Eric Dumazet <eric.dumazet@gmail.com>
Subject: [RFC] net: ipv4 -- Introduce ifa limit per net
Date: Sat, 5 Mar 2016 00:39:20 +0300	[thread overview]
Message-ID: <20160304213920.GJ4184@uranus.lan> (raw)

Currenlty all the kernels (including vanilla) free ifa
list under rtln_lock() taken which takes a huge time
to release all entries when we stop the container.
Moreover it's allowed to create unlimited number
of addresses from inside of net-namespace if
CAP-NET_ADMIN granted (which is common for containers).

Lets introduce per-net limit (4096 by default)
of addresses, which can be tuned up via sysfs
entry /proc/sys/net/ipv4/ifa_limit.

Reported-by: Solar Designer <solar@openwall.com>
Signed-off-by: Cyrill Gorcunov <gorcunov@virtuozzo.com>
CC: Vasily Averin <vvs@virtuozzo.com>
CC: Andrey Vagin <avagin@virtuozzo.com>
CC: Pavel Emelianov <xemul@virtuozzo.com>
CC: Vladimir Davydov <vdavydov@virtuozzo.com>
CC: Konstantin Khorenko <khorenko@virtuozzo.com>
CC: David Miller <davem@davemloft.net>
CC: Eric Dumazet <eric.dumazet@gmail.com>
---

Please share the idea if there some more elegant way exist
to fix this problem, maybe I miss something obvious. Thanks!

 include/net/netns/ipv4.h   |    3 +++
 net/ipv4/devinet.c         |   34 +++++++++++++++++++---------------
 net/ipv4/sysctl_net_ipv4.c |    8 ++++++++
 3 files changed, 30 insertions(+), 15 deletions(-)

Index: linux-ml.git/include/net/netns/ipv4.h
===================================================================
--- linux-ml.git.orig/include/net/netns/ipv4.h
+++ linux-ml.git/include/net/netns/ipv4.h
@@ -77,6 +77,8 @@ struct netns_ipv4 {
 
 	struct local_ports ip_local_ports;
 
+	int sysctl_ifa_limit;
+
 	int sysctl_tcp_ecn;
 	int sysctl_tcp_ecn_fallback;
 
@@ -101,6 +103,7 @@ struct netns_ipv4 {
 	struct ping_group_range ping_group_range;
 
 	atomic_t dev_addr_genid;
+	atomic_t ifa_nr;
 
 #ifdef CONFIG_SYSCTL
 	unsigned long *sysctl_local_reserved_ports;
Index: linux-ml.git/net/ipv4/devinet.c
===================================================================
--- linux-ml.git.orig/net/ipv4/devinet.c
+++ linux-ml.git/net/ipv4/devinet.c
@@ -194,8 +194,11 @@ static void devinet_sysctl_unregister(st
 
 /* Locks all the inet devices. */
 
-static struct in_ifaddr *inet_alloc_ifa(void)
+static struct in_ifaddr *inet_alloc_ifa(struct net *net)
 {
+	if (atomic_add_return(1, &net->ipv4.ifa_nr) >
+	    net->ipv4.sysctl_ifa_limit)
+		return NULL;
 	return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
 }
 
@@ -207,8 +210,9 @@ static void inet_rcu_free_ifa(struct rcu
 	kfree(ifa);
 }
 
-static void inet_free_ifa(struct in_ifaddr *ifa)
+static void inet_free_ifa(struct net *net, struct in_ifaddr *ifa)
 {
+	atomic_dec(&net->ipv4.ifa_nr);
 	call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
 }
 
@@ -296,7 +300,7 @@ static void inetdev_destroy(struct in_de
 
 	while ((ifa = in_dev->ifa_list) != NULL) {
 		inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
-		inet_free_ifa(ifa);
+		inet_free_ifa(dev_net(dev), ifa);
 	}
 
 	RCU_INIT_POINTER(dev->ip_ptr, NULL);
@@ -361,7 +365,7 @@ static void __inet_del_ifa(struct in_dev
 				rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
 				blocking_notifier_call_chain(&inetaddr_chain,
 						NETDEV_DOWN, ifa);
-				inet_free_ifa(ifa);
+				inet_free_ifa(dev_net(in_dev->dev), ifa);
 			} else {
 				promote = ifa;
 				break;
@@ -420,7 +424,7 @@ static void __inet_del_ifa(struct in_dev
 
 	}
 	if (destroy)
-		inet_free_ifa(ifa1);
+		inet_free_ifa(dev_net(in_dev->dev), ifa1);
 }
 
 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
@@ -442,7 +446,7 @@ static int __inet_insert_ifa(struct in_i
 	ASSERT_RTNL();
 
 	if (!ifa->ifa_local) {
-		inet_free_ifa(ifa);
+		inet_free_ifa(dev_net(in_dev->dev), ifa);
 		return 0;
 	}
 
@@ -457,11 +461,11 @@ static int __inet_insert_ifa(struct in_i
 		if (ifa1->ifa_mask == ifa->ifa_mask &&
 		    inet_ifa_match(ifa1->ifa_address, ifa)) {
 			if (ifa1->ifa_local == ifa->ifa_local) {
-				inet_free_ifa(ifa);
+				inet_free_ifa(dev_net(in_dev->dev), ifa);
 				return -EEXIST;
 			}
 			if (ifa1->ifa_scope != ifa->ifa_scope) {
-				inet_free_ifa(ifa);
+				inet_free_ifa(dev_net(in_dev->dev), ifa);
 				return -EINVAL;
 			}
 			ifa->ifa_flags |= IFA_F_SECONDARY;
@@ -502,7 +506,7 @@ static int inet_set_ifa(struct net_devic
 	ASSERT_RTNL();
 
 	if (!in_dev) {
-		inet_free_ifa(ifa);
+		inet_free_ifa(dev_net(dev), ifa);
 		return -ENOBUFS;
 	}
 	ipv4_devconf_setall(in_dev);
@@ -768,7 +772,7 @@ static struct in_ifaddr *rtm_to_ifaddr(s
 	if (!in_dev)
 		goto errout;
 
-	ifa = inet_alloc_ifa();
+	ifa = inet_alloc_ifa(net);
 	if (!ifa)
 		/*
 		 * A potential indev allocation can be left alive, it stays
@@ -817,7 +821,7 @@ static struct in_ifaddr *rtm_to_ifaddr(s
 	return ifa;
 
 errout_free:
-	inet_free_ifa(ifa);
+	inet_free_ifa(net, ifa);
 errout:
 	return ERR_PTR(err);
 }
@@ -865,13 +869,13 @@ static int inet_rtm_newaddr(struct sk_bu
 					       true, ifa);
 
 			if (ret < 0) {
-				inet_free_ifa(ifa);
+				inet_free_ifa(net, ifa);
 				return ret;
 			}
 		}
 		return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid);
 	} else {
-		inet_free_ifa(ifa);
+		inet_free_ifa(net, ifa);
 
 		if (nlh->nlmsg_flags & NLM_F_EXCL ||
 		    !(nlh->nlmsg_flags & NLM_F_REPLACE))
@@ -1055,7 +1059,7 @@ int devinet_ioctl(struct net *net, unsig
 
 		if (!ifa) {
 			ret = -ENOBUFS;
-			ifa = inet_alloc_ifa();
+			ifa = inet_alloc_ifa(net);
 			if (!ifa)
 				break;
 			INIT_HLIST_NODE(&ifa->hash);
@@ -1408,7 +1412,7 @@ static int inetdev_event(struct notifier
 		if (!inetdev_valid_mtu(dev->mtu))
 			break;
 		if (dev->flags & IFF_LOOPBACK) {
-			struct in_ifaddr *ifa = inet_alloc_ifa();
+			struct in_ifaddr *ifa = inet_alloc_ifa(dev_net(dev));
 
 			if (ifa) {
 				INIT_HLIST_NODE(&ifa->hash);
Index: linux-ml.git/net/ipv4/sysctl_net_ipv4.c
===================================================================
--- linux-ml.git.orig/net/ipv4/sysctl_net_ipv4.c
+++ linux-ml.git/net/ipv4/sysctl_net_ipv4.c
@@ -960,6 +960,13 @@ static struct ctl_table ipv4_net_table[]
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_jiffies,
 	},
+	{
+		.procname	= "ifa_limit",
+		.data		= &init_net.ipv4.sysctl_ifa_limit,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
 	{ }
 };
 
@@ -988,6 +995,7 @@ static __net_init int ipv4_sysctl_init_n
 	if (!net->ipv4.sysctl_local_reserved_ports)
 		goto err_ports;
 
+	net->ipv4.sysctl_ifa_limit = 4096;
 	return 0;
 
 err_ports:

             reply	other threads:[~2016-03-04 21:39 UTC|newest]

Thread overview: 50+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-03-04 21:39 Cyrill Gorcunov [this message]
2016-03-04 22:50 ` [RFC] net: ipv4 -- Introduce ifa limit per net David Miller
2016-03-05  0:08   ` Eric Dumazet
2016-03-05  4:11     ` David Miller
2016-03-05  7:18       ` Cyrill Gorcunov
2016-03-05 15:57       ` Cyrill Gorcunov
2016-03-05 16:33         ` David Miller
2016-03-05 17:00           ` Cyrill Gorcunov
2016-03-05 18:44           ` Cyrill Gorcunov
2016-03-06 10:09             ` Cyrill Gorcunov
2016-03-06 16:23               ` Eric Dumazet
2016-03-06 17:06                 ` Cyrill Gorcunov
2016-03-09 16:39                   ` Cyrill Gorcunov
2016-03-09 16:51                     ` Cyrill Gorcunov
2016-03-09 16:58                     ` Alexei Starovoitov
2016-03-09 17:09                       ` Cyrill Gorcunov
2016-03-09 17:24                         ` David Miller
2016-03-09 17:53                           ` Cyrill Gorcunov
2016-03-09 19:55                             ` Cyrill Gorcunov
2016-03-09 20:27                             ` David Miller
2016-03-09 20:41                               ` Cyrill Gorcunov
2016-03-09 20:47                                 ` David Miller
2016-03-09 20:57                                   ` Cyrill Gorcunov
2016-03-09 21:10                                     ` David Miller
2016-03-09 21:16                                       ` Cyrill Gorcunov
2016-03-10 10:20                                         ` Cyrill Gorcunov
2016-03-10 11:03                                           ` Cyrill Gorcunov
2016-03-10 15:09                                             ` Cyrill Gorcunov
2016-03-10 18:01                                               ` David Miller
2016-03-10 18:48                                                 ` Cyrill Gorcunov
2016-03-10 19:02                                                 ` Cong Wang
2016-03-10 19:55                                                   ` David Miller
2016-03-10 20:01                                                     ` Cyrill Gorcunov
2016-03-10 20:03                                                       ` David Miller
2016-03-10 20:13                                                         ` Cyrill Gorcunov
2016-03-10 20:19                                                           ` Cyrill Gorcunov
2016-03-10 21:05                                                           ` David Miller
2016-03-10 21:19                                                             ` Cyrill Gorcunov
2016-03-10 21:59                                                               ` Cyrill Gorcunov
2016-03-10 22:36                                                                 ` David Miller
2016-03-10 22:40                                                                   ` Cyrill Gorcunov
2016-03-11 20:40                                                                     ` David Miller
2016-03-11 20:58                                                                       ` Florian Westphal
2016-03-11 21:00                                                                       ` Cyrill Gorcunov
2016-03-11 21:22                                                                       ` Cyrill Gorcunov
2016-03-11 21:59                                                                         ` Cyrill Gorcunov
2016-03-14  3:29                                                                           ` David Miller
2016-03-10 21:09                                                     ` Cong Wang
2016-03-09 17:19                     ` David Miller
2016-03-05  6:58   ` Cyrill Gorcunov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20160304213920.GJ4184@uranus.lan \
    --to=gorcunov@gmail.com \
    --cc=avagin@virtuozzo.com \
    --cc=davem@davemloft.net \
    --cc=eric.dumazet@gmail.com \
    --cc=khorenko@virtuozzo.com \
    --cc=netdev@vger.kernel.org \
    --cc=solar@openwall.com \
    --cc=vdavydov@virtuozzo.com \
    --cc=vvs@virtuozzo.com \
    --cc=xemul@virtuozzo.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.