All of lore.kernel.org
 help / color / mirror / Atom feed
From: Daniel Lezcano <dlezcano-NmTC/0ZBporQT0dZR+AlfA@public.gmane.org>
To: "Denis V. Lunev" <den-GEFAQzZX7r8dnm+yROfE0A@public.gmane.org>
Cc: containers-qjLDD68F18O7TbgM5vRIOg@public.gmane.org,
	ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org
Subject: Re: [PATCH] [NETNS49] support for per/namespace routing cache cleanup
Date: Wed, 17 Oct 2007 13:46:23 +0200	[thread overview]
Message-ID: <4715F60F.6060304@fr.ibm.com> (raw)
In-Reply-To: <20071017111215.GA29653-aPCOdVxUTlgvJsYlp49lxw@public.gmane.org>

Denis V. Lunev wrote:
> /proc/sys/net/route/flush should be accessible inside the net namespace.
> Though, the complete opening of this file will result in a DoS or
> significant entire host slowdown if a namespace process will continually
> flush routes.
> 
> This patch introduces per/namespace route flush facility.
> 
> Each namespace wanted to flush a cache copies global generation count to
> itself and starts the timer. The cache is dropped for a specific namespace
> iff the namespace counter is greater or equal global ones.
> 
> So, in general, unwanted namespaces do nothing. They hold very old low
> counter and they are unaffected by the requested cleanup.
> 
> Signed-of-by: Denis V. Lunev <den-GEFAQzZX7r8dnm+yROfE0A@public.gmane.org>

That's right and that will happen when manipulating ip addresses of the 
network devices too. But I am not confortable with your patchset. It 
touches the routing flush function too hardly and it uses 
current->nsproxy->net_ns.

IMHO we should have two flush functions. One taking a network namespace 
parameter and one without the network namespace parameter. The first one 
is called when a write to /proc/sys/net/ipv4/route/flush is done (we 
must use the network namespace of the writer) or when a interface 
address is changed or shutdown|up. The last one is called by the timer, 
so we have a global timer flushing the routing cache for all the namespaces.


> diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
> index 85abf14..b492ce8 100644
> --- a/include/net/net_namespace.h
> +++ b/include/net/net_namespace.h
> @@ -143,6 +143,8 @@ struct net {
> 
>  	/* iptable_filter.c */
>  	struct xt_table		*ip_packet_filter;
> +
> +	unsigned long		rt_flush_required;
>  };
> 
>  extern struct net init_net;
> diff --git a/net/ipv4/route.c b/net/ipv4/route.c
> index f9a59ff..413587c 100644
> --- a/net/ipv4/route.c
> +++ b/net/ipv4/route.c
> @@ -91,6 +91,7 @@
>  #include <linux/jhash.h>
>  #include <linux/rcupdate.h>
>  #include <linux/times.h>
> +#include <linux/nsproxy.h>
>  #include <net/net_namespace.h>
>  #include <net/protocol.h>
>  #include <net/ip.h>
> @@ -642,6 +643,51 @@ static void rt_check_expire(unsigned long dummy)
>  	mod_timer(&rt_periodic_timer, jiffies + ip_rt_gc_interval);
>  }
> 
> +
> +static DEFINE_SPINLOCK(rt_flush_lock);
> +
> +#ifdef CONFIG_NET_NS
> +static unsigned long rt_flush_gen;
> +
> +/* called under rt_flush_lock */
> +static void rt_flush_required_set(struct net *net)
> +{
> +	/*
> +	 * If the global generation rt_flush_gen is equal to G, then
> +	 * the pass considering entries labelled by G is yet to come.
> +	 */
> +	net->rt_flush_required = rt_flush_gen;
> +}
> +
> +static unsigned long rt_flush_required_reset(void)
> +{
> +	unsigned long g;
> +
> +	spin_lock_bh(&rt_flush_lock);
> +	g = rt_flush_gen++;
> +	spin_unlock_bh(&rt_flush_lock);
> +	return g;
> +}
> +
> +static int rt_flush_required_check(struct net *net, unsigned long gen)
> +{
> +	/* can be checked without the lock */
> +	return net->rt_flush_required >= gen;
> +}
> +
> +#else
> +
> +static void rt_flush_required_reset(struct net *net)
> +{
> +}
> +
> +static unsigned long rt_flush_required_reset(void)
> +{
> +	return 0;
> +}
> +#endif
> +
> +
>  /* This can run from both BH and non-BH contexts, the latter
>   * in the case of a forced flush event.
>   */
> @@ -649,16 +695,46 @@ static void rt_run_flush(unsigned long dummy)
>  {
>  	int i;
>  	struct rtable *rth, *next;
> +	unsigned long gen;
> 
>  	rt_deadline = 0;
> 
>  	get_random_bytes(&rt_hash_rnd, 4);
> +	gen = rt_flush_required_reset();
> 
>  	for (i = rt_hash_mask; i >= 0; i--) {
> +#ifdef CONFIG_NET_NS
> +		struct rtable **prev, *p, *tail;
> +
> +		spin_lock_bh(rt_hash_lock_addr(i));
> +		rth = rt_hash_table[i].chain;
> +
> +		/* defer releasing the head of the list after spin_unlock */
> +		for (tail = rth; tail; tail = tail->u.dst.rt_next)
> +			if (!rt_flush_required_check(tail->fl.fl_net, gen))
> +				break;
> +		if (rth != tail)
> +			rt_hash_table[i].chain = tail;
> +
> +		/* call rt_free on entries after the tail requiring flush */
> +		prev = &rt_hash_table[i].chain;
> +		for (p = *prev; p; p = next) {
> +			next = p->u.dst.rt_next;
> +			if (!rt_flush_required_check(p->fl.fl_net, gen)) {
> +				prev = &p->u.dst.rt_next;
> +			} else {
> +				*prev = next;
> +				rt_free(p);
> +			}
> +		}
> +#else
>  		spin_lock_bh(rt_hash_lock_addr(i));
>  		rth = rt_hash_table[i].chain;
>  		if (rth)
>  			rt_hash_table[i].chain = NULL;
> +		tail = NULL;
> +
> +#endif
>  		spin_unlock_bh(rt_hash_lock_addr(i));
> 
>  		for (; rth; rth = next) {
> @@ -668,8 +744,6 @@ static void rt_run_flush(unsigned long dummy)
>  	}
>  }
> 
> -static DEFINE_SPINLOCK(rt_flush_lock);
> -
>  void rt_cache_flush(int delay)
>  {
>  	unsigned long now = jiffies;
> @@ -697,6 +771,8 @@ void rt_cache_flush(int delay)
>  			delay = tmo;
>  	}
> 
> +	rt_flush_required_set(current->nsproxy->net_ns);
> +
>  	if (delay <= 0) {
>  		spin_unlock_bh(&rt_flush_lock);
>  		rt_run_flush(0);

  parent reply	other threads:[~2007-10-17 11:46 UTC|newest]

Thread overview: 19+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2007-10-17 11:12 [PATCH] [NETNS49] support for per/namespace routing cache cleanup Denis V. Lunev
     [not found] ` <20071017111215.GA29653-aPCOdVxUTlgvJsYlp49lxw@public.gmane.org>
2007-10-17 11:46   ` Daniel Lezcano [this message]
     [not found]     ` <4715F60F.6060304-NmTC/0ZBporQT0dZR+AlfA@public.gmane.org>
2007-10-17 12:51       ` Denis V. Lunev
     [not found]         ` <4716055D.4010102-3ImXcnM4P+0@public.gmane.org>
2007-10-17 13:40           ` Daniel Lezcano
     [not found]             ` <471610E8.8020008-NmTC/0ZBporQT0dZR+AlfA@public.gmane.org>
2007-10-17 14:10               ` Denis V. Lunev
     [not found]                 ` <471617CA.9090901-3ImXcnM4P+0@public.gmane.org>
2007-10-17 14:46                   ` Daniel Lezcano
2007-10-17 15:05                   ` Daniel Lezcano
     [not found]                     ` <471624C0.9020108-NmTC/0ZBporQT0dZR+AlfA@public.gmane.org>
2007-10-17 17:56                       ` Denis V. Lunev
     [not found]                         ` <47164CBD.3040107-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2007-10-17 18:50                           ` Eric W. Biederman
2007-10-18  7:18                           ` Benjamin Thery
     [not found]                             ` <471708D8.3080808-6ktuUTfB/bM@public.gmane.org>
2007-10-18  9:54                               ` Denis V. Lunev
2007-10-18 14:51                               ` Denis V. Lunev
     [not found]                                 ` <471772EB.2060206-3ImXcnM4P+0@public.gmane.org>
2007-10-18 16:29                                   ` Benjamin Thery
     [not found]                                     ` <471789FD.5020802-6ktuUTfB/bM@public.gmane.org>
2007-10-18 19:01                                       ` Denis V. Lunev
2007-10-18 19:05                                       ` Denis V. Lunev
     [not found]                                         ` <4717AE7D.1060000-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2007-10-19  7:39                                           ` Daniel Lezcano
2007-10-19  7:39                                   ` Daniel Lezcano
     [not found]                                     ` <47185F2C.1070404-NmTC/0ZBporQT0dZR+AlfA@public.gmane.org>
2007-10-19  8:53                                       ` Denis V. Lunev
     [not found]                                         ` <47187074.3090206-3ImXcnM4P+0@public.gmane.org>
2007-10-19 19:03                                           ` Eric W. Biederman

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4715F60F.6060304@fr.ibm.com \
    --to=dlezcano-nmtc/0zbporqt0dzr+alfa@public.gmane.org \
    --cc=containers-qjLDD68F18O7TbgM5vRIOg@public.gmane.org \
    --cc=den-GEFAQzZX7r8dnm+yROfE0A@public.gmane.org \
    --cc=ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.