Linux Container Development
 help / color / mirror / Atom feed
From: Daniel Lezcano <dlezcano-NmTC/0ZBporQT0dZR+AlfA@public.gmane.org>
To: "Denis V. Lunev" <den-GEFAQzZX7r8dnm+yROfE0A@public.gmane.org>
Cc: containers-qjLDD68F18O7TbgM5vRIOg@public.gmane.org,
	ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org
Subject: Re: [PATCH] [NETNS49] support for per/namespace routing cache cleanup
Date: Wed, 17 Oct 2007 13:46:23 +0200	[thread overview]
Message-ID: <4715F60F.6060304@fr.ibm.com> (raw)
In-Reply-To: <20071017111215.GA29653-aPCOdVxUTlgvJsYlp49lxw@public.gmane.org>

Denis V. Lunev wrote:
> /proc/sys/net/route/flush should be accessible inside the net namespace.
> Though, the complete opening of this file will result in a DoS or
> significant entire host slowdown if a namespace process will continually
> flush routes.
> 
> This patch introduces per/namespace route flush facility.
> 
> Each namespace wanted to flush a cache copies global generation count to
> itself and starts the timer. The cache is dropped for a specific namespace
> iff the namespace counter is greater or equal global ones.
> 
> So, in general, unwanted namespaces do nothing. They hold very old low
> counter and they are unaffected by the requested cleanup.
> 
> Signed-of-by: Denis V. Lunev <den-GEFAQzZX7r8dnm+yROfE0A@public.gmane.org>

That's right and that will happen when manipulating ip addresses of the 
network devices too. But I am not confortable with your patchset. It 
touches the routing flush function too hardly and it uses 
current->nsproxy->net_ns.

IMHO we should have two flush functions. One taking a network namespace 
parameter and one without the network namespace parameter. The first one 
is called when a write to /proc/sys/net/ipv4/route/flush is done (we 
must use the network namespace of the writer) or when a interface 
address is changed or shutdown|up. The last one is called by the timer, 
so we have a global timer flushing the routing cache for all the namespaces.


> diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
> index 85abf14..b492ce8 100644
> --- a/include/net/net_namespace.h
> +++ b/include/net/net_namespace.h
> @@ -143,6 +143,8 @@ struct net {
> 
>  	/* iptable_filter.c */
>  	struct xt_table		*ip_packet_filter;
> +
> +	unsigned long		rt_flush_required;
>  };
> 
>  extern struct net init_net;
> diff --git a/net/ipv4/route.c b/net/ipv4/route.c
> index f9a59ff..413587c 100644
> --- a/net/ipv4/route.c
> +++ b/net/ipv4/route.c
> @@ -91,6 +91,7 @@
>  #include <linux/jhash.h>
>  #include <linux/rcupdate.h>
>  #include <linux/times.h>
> +#include <linux/nsproxy.h>
>  #include <net/net_namespace.h>
>  #include <net/protocol.h>
>  #include <net/ip.h>
> @@ -642,6 +643,51 @@ static void rt_check_expire(unsigned long dummy)
>  	mod_timer(&rt_periodic_timer, jiffies + ip_rt_gc_interval);
>  }
> 
> +
> +static DEFINE_SPINLOCK(rt_flush_lock);
> +
> +#ifdef CONFIG_NET_NS
> +static unsigned long rt_flush_gen;
> +
> +/* called under rt_flush_lock */
> +static void rt_flush_required_set(struct net *net)
> +{
> +	/*
> +	 * If the global generation rt_flush_gen is equal to G, then
> +	 * the pass considering entries labelled by G is yet to come.
> +	 */
> +	net->rt_flush_required = rt_flush_gen;
> +}
> +
> +static unsigned long rt_flush_required_reset(void)
> +{
> +	unsigned long g;
> +
> +	spin_lock_bh(&rt_flush_lock);
> +	g = rt_flush_gen++;
> +	spin_unlock_bh(&rt_flush_lock);
> +	return g;
> +}
> +
> +static int rt_flush_required_check(struct net *net, unsigned long gen)
> +{
> +	/* can be checked without the lock */
> +	return net->rt_flush_required >= gen;
> +}
> +
> +#else
> +
> +static void rt_flush_required_reset(struct net *net)
> +{
> +}
> +
> +static unsigned long rt_flush_required_reset(void)
> +{
> +	return 0;
> +}
> +#endif
> +
> +
>  /* This can run from both BH and non-BH contexts, the latter
>   * in the case of a forced flush event.
>   */
> @@ -649,16 +695,46 @@ static void rt_run_flush(unsigned long dummy)
>  {
>  	int i;
>  	struct rtable *rth, *next;
> +	unsigned long gen;
> 
>  	rt_deadline = 0;
> 
>  	get_random_bytes(&rt_hash_rnd, 4);
> +	gen = rt_flush_required_reset();
> 
>  	for (i = rt_hash_mask; i >= 0; i--) {
> +#ifdef CONFIG_NET_NS
> +		struct rtable **prev, *p, *tail;
> +
> +		spin_lock_bh(rt_hash_lock_addr(i));
> +		rth = rt_hash_table[i].chain;
> +
> +		/* defer releasing the head of the list after spin_unlock */
> +		for (tail = rth; tail; tail = tail->u.dst.rt_next)
> +			if (!rt_flush_required_check(tail->fl.fl_net, gen))
> +				break;
> +		if (rth != tail)
> +			rt_hash_table[i].chain = tail;
> +
> +		/* call rt_free on entries after the tail requiring flush */
> +		prev = &rt_hash_table[i].chain;
> +		for (p = *prev; p; p = next) {
> +			next = p->u.dst.rt_next;
> +			if (!rt_flush_required_check(p->fl.fl_net, gen)) {
> +				prev = &p->u.dst.rt_next;
> +			} else {
> +				*prev = next;
> +				rt_free(p);
> +			}
> +		}
> +#else
>  		spin_lock_bh(rt_hash_lock_addr(i));
>  		rth = rt_hash_table[i].chain;
>  		if (rth)
>  			rt_hash_table[i].chain = NULL;
> +		tail = NULL;
> +
> +#endif
>  		spin_unlock_bh(rt_hash_lock_addr(i));
> 
>  		for (; rth; rth = next) {
> @@ -668,8 +744,6 @@ static void rt_run_flush(unsigned long dummy)
>  	}
>  }
> 
> -static DEFINE_SPINLOCK(rt_flush_lock);
> -
>  void rt_cache_flush(int delay)
>  {
>  	unsigned long now = jiffies;
> @@ -697,6 +771,8 @@ void rt_cache_flush(int delay)
>  			delay = tmo;
>  	}
> 
> +	rt_flush_required_set(current->nsproxy->net_ns);
> +
>  	if (delay <= 0) {
>  		spin_unlock_bh(&rt_flush_lock);
>  		rt_run_flush(0);

  parent reply	other threads:[~2007-10-17 11:46 UTC|newest]

Thread overview: 19+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2007-10-17 11:12 [PATCH] [NETNS49] support for per/namespace routing cache cleanup Denis V. Lunev
     [not found] ` <20071017111215.GA29653-aPCOdVxUTlgvJsYlp49lxw@public.gmane.org>
2007-10-17 11:46   ` Daniel Lezcano [this message]
     [not found]     ` <4715F60F.6060304-NmTC/0ZBporQT0dZR+AlfA@public.gmane.org>
2007-10-17 12:51       ` Denis V. Lunev
     [not found]         ` <4716055D.4010102-3ImXcnM4P+0@public.gmane.org>
2007-10-17 13:40           ` Daniel Lezcano
     [not found]             ` <471610E8.8020008-NmTC/0ZBporQT0dZR+AlfA@public.gmane.org>
2007-10-17 14:10               ` Denis V. Lunev
     [not found]                 ` <471617CA.9090901-3ImXcnM4P+0@public.gmane.org>
2007-10-17 14:46                   ` Daniel Lezcano
2007-10-17 15:05                   ` Daniel Lezcano
     [not found]                     ` <471624C0.9020108-NmTC/0ZBporQT0dZR+AlfA@public.gmane.org>
2007-10-17 17:56                       ` Denis V. Lunev
     [not found]                         ` <47164CBD.3040107-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2007-10-17 18:50                           ` Eric W. Biederman
2007-10-18  7:18                           ` Benjamin Thery
     [not found]                             ` <471708D8.3080808-6ktuUTfB/bM@public.gmane.org>
2007-10-18  9:54                               ` Denis V. Lunev
2007-10-18 14:51                               ` Denis V. Lunev
     [not found]                                 ` <471772EB.2060206-3ImXcnM4P+0@public.gmane.org>
2007-10-18 16:29                                   ` Benjamin Thery
     [not found]                                     ` <471789FD.5020802-6ktuUTfB/bM@public.gmane.org>
2007-10-18 19:01                                       ` Denis V. Lunev
2007-10-18 19:05                                       ` Denis V. Lunev
     [not found]                                         ` <4717AE7D.1060000-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2007-10-19  7:39                                           ` Daniel Lezcano
2007-10-19  7:39                                   ` Daniel Lezcano
     [not found]                                     ` <47185F2C.1070404-NmTC/0ZBporQT0dZR+AlfA@public.gmane.org>
2007-10-19  8:53                                       ` Denis V. Lunev
     [not found]                                         ` <47187074.3090206-3ImXcnM4P+0@public.gmane.org>
2007-10-19 19:03                                           ` Eric W. Biederman

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4715F60F.6060304@fr.ibm.com \
    --to=dlezcano-nmtc/0zbporqt0dzr+alfa@public.gmane.org \
    --cc=containers-qjLDD68F18O7TbgM5vRIOg@public.gmane.org \
    --cc=den-GEFAQzZX7r8dnm+yROfE0A@public.gmane.org \
    --cc=ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox