Re: BUG ? ipip unregister_netdevice_many()

netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

From: Daniel Lezcano <daniel.lezcano@free.fr>
To: David Miller <davem@davemloft.net>
Cc: ebiederm@xmission.com, hans.schillstrom@ericsson.com,
	netdev@vger.kernel.org
Subject: Re: BUG ? ipip unregister_netdevice_many()
Date: Thu, 14 Oct 2010 00:16:15 +0200	[thread overview]
Message-ID: <4CB62FAF.1030009@free.fr> (raw)
In-Reply-To: <20101012.130520.48517464.davem@davemloft.net>

On 10/12/2010 10:05 PM, David Miller wrote:
> From: ebiederm@xmission.com (Eric W. Biederman)
> Date: Fri, 08 Oct 2010 10:32:40 -0700
>
>    
>> It is just dealing with not flushing the entire routing cache, just the
>> routes that have expired.  Which prevents one network namespace from
>> flushing it's routes and DOS'ing another.
>>      
> That's a very indirect and obfuscated way of handling it.
>    

I agree.

> And I still don't know why we let the first contiguous set of expired
> entries in the chain get freed outside of the lock, and the rest
> inside the lock.  That really isn't explained by anything I've read.
>
> How about we just do exactly what's intended, and with no ifdefs?
>    
Acked-by: Daniel Lezcano <daniel.lezcano@free.fr>

Dave,

do you mind to wait I test the patch before merging it ?
I would like to stress a bit this routine with multiple containers.

Thanks
    -- Daniel

> Signed-off-by: David S. Miller<davem@davemloft.net>
>
> diff --git a/include/net/route.h b/include/net/route.h
> index 7e5e73b..8d24761 100644
> --- a/include/net/route.h
> +++ b/include/net/route.h
> @@ -106,7 +106,7 @@ extern int		ip_rt_init(void);
>   extern void		ip_rt_redirect(__be32 old_gw, __be32 dst, __be32 new_gw,
>   				       __be32 src, struct net_device *dev);
>   extern void		rt_cache_flush(struct net *net, int how);
> -extern void		rt_cache_flush_batch(void);
> +extern void		rt_cache_flush_batch(struct net *net);
>   extern int		__ip_route_output_key(struct net *, struct rtable **, const struct flowi *flp);
>   extern int		ip_route_output_key(struct net *, struct rtable **, struct flowi *flp);
>   extern int		ip_route_output_flow(struct net *, struct rtable **rp, struct flowi *flp, struct sock *sk, int flags);
> diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
> index 919f2ad..4039f56 100644
> --- a/net/ipv4/fib_frontend.c
> +++ b/net/ipv4/fib_frontend.c
> @@ -999,7 +999,7 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo
>   		rt_cache_flush(dev_net(dev), 0);
>   		break;
>   	case NETDEV_UNREGISTER_BATCH:
> -		rt_cache_flush_batch();
> +		rt_cache_flush_batch(dev_net(dev));
>   		break;
>   	}
>   	return NOTIFY_DONE;
> diff --git a/net/ipv4/route.c b/net/ipv4/route.c
> index 0755aa4..6ad730c 100644
> --- a/net/ipv4/route.c
> +++ b/net/ipv4/route.c
> @@ -712,13 +712,14 @@ static inline int rt_is_expired(struct rtable *rth)
>    * Can be called by a softirq or a process.
>    * In the later case, we want to be reschedule if necessary
>    */
> -static void rt_do_flush(int process_context)
> +static void rt_do_flush(struct net *net, int process_context)
>   {
>   	unsigned int i;
>   	struct rtable *rth, *next;
> -	struct rtable * tail;
>
>   	for (i = 0; i<= rt_hash_mask; i++) {
> +		struct rtable *list, **pprev;
> +
>   		if (process_context&&  need_resched())
>   			cond_resched();
>   		rth = rt_hash_table[i].chain;
> @@ -726,41 +727,27 @@ static void rt_do_flush(int process_context)
>   			continue;
>
>   		spin_lock_bh(rt_hash_lock_addr(i));
> -#ifdef CONFIG_NET_NS
> -		{
> -		struct rtable ** prev, * p;
>
> -		rth = rt_hash_table[i].chain;
> +		pprev =&rt_hash_table[i].chain;
> +		rth = *pprev;
> +		while (rth) {
> +			next = rth->dst.rt_next;
> +			if (dev_net(rth->dst.dev) == net) {
> +				*pprev = next;
>
> -		/* defer releasing the head of the list after spin_unlock */
> -		for (tail = rth; tail; tail = tail->dst.rt_next)
> -			if (!rt_is_expired(tail))
> -				break;
> -		if (rth != tail)
> -			rt_hash_table[i].chain = tail;
> -
> -		/* call rt_free on entries after the tail requiring flush */
> -		prev =&rt_hash_table[i].chain;
> -		for (p = *prev; p; p = next) {
> -			next = p->dst.rt_next;
> -			if (!rt_is_expired(p)) {
> -				prev =&p->dst.rt_next;
> -			} else {
> -				*prev = next;
> -				rt_free(p);
> -			}
> -		}
> +				rth->dst.rt_next = list;
> +				list = rth;
> +			} else
> +				pprev =&rth->dst.rt_next;
> +
> +			rth = next;
>   		}
> -#else
> -		rth = rt_hash_table[i].chain;
> -		rt_hash_table[i].chain = NULL;
> -		tail = NULL;
> -#endif
> +
>   		spin_unlock_bh(rt_hash_lock_addr(i));
>
> -		for (; rth != tail; rth = next) {
> -			next = rth->dst.rt_next;
> -			rt_free(rth);
> +		for (; list; list = next) {
> +			next = list->dst.rt_next;
> +			rt_free(list);
>   		}
>   	}
>   }
> @@ -906,13 +893,13 @@ void rt_cache_flush(struct net *net, int delay)
>   {
>   	rt_cache_invalidate(net);
>   	if (delay>= 0)
> -		rt_do_flush(!in_softirq());
> +		rt_do_flush(net, !in_softirq());
>   }
>
>   /* Flush previous cache invalidated entries from the cache */
> -void rt_cache_flush_batch(void)
> +void rt_cache_flush_batch(struct net *net)
>   {
> -	rt_do_flush(!in_softirq());
> +	rt_do_flush(net, !in_softirq());
>   }
>
>   static void rt_emergency_hash_rebuild(struct net *net)
>
>

next prev parent reply	other threads:[~2010-10-13 22:16 UTC|newest]

Thread overview: 28+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-10-07  8:48 BUG ? ipip unregister_netdevice_many() Hans Schillstrom
2010-10-08 11:19 ` Daniel Lezcano
2010-10-08 11:53   ` Hans Schillstrom
2010-10-08 12:28     ` Hans Schillstrom
2010-10-08 15:53       ` Daniel Lezcano
2010-10-08 16:17         ` Daniel Lezcano
2010-10-08 16:58           ` Eric W. Biederman
2010-10-08 17:29             ` Daniel Lezcano
2010-10-08 17:47               ` Daniel Lezcano
2010-10-08 16:45       ` Eric W. Biederman
2010-10-08 17:20         ` David Miller
2010-10-08 17:32           ` Eric W. Biederman
2010-10-12 20:05             ` David Miller
2010-10-13 11:19               ` Jarek Poplawski
2010-10-13 21:58                 ` David Miller
2010-10-14  6:41                   ` Hans Schillstrom
2010-10-13 22:16               ` Daniel Lezcano [this message]
2010-10-13 23:23                 ` David Miller
2010-10-14  3:57                   ` Eric Dumazet
2010-10-14 23:28                     ` Paul E. McKenney
2010-10-14  4:40               ` Eric W. Biederman
2010-10-14  4:50                 ` David Miller
2010-10-14  5:20                   ` Eric W. Biederman
2010-10-14 15:09                     ` David Miller
2010-10-14 18:35                       ` Eric W. Biederman
2010-10-08 16:51   ` Eric W. Biederman
2010-10-08 16:06 ` Eric W. Biederman
  -- strict thread matches above, loose matches on Subject: below --
2010-10-14 19:21 Octavian Purdila

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4CB62FAF.1030009@free.fr \
    --to=daniel.lezcano@free.fr \
    --cc=davem@davemloft.net \
    --cc=ebiederm@xmission.com \
    --cc=hans.schillstrom@ericsson.com \
    --cc=netdev@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).