Re: BUG ? ipip unregister_netdevice_many()

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Daniel Lezcano <daniel.lezcano@free.fr>
To: David Miller <davem@davemloft.net>
Cc: ebiederm@xmission.com, hans.schillstrom@ericsson.com,
	netdev@vger.kernel.org
Subject: Re: BUG ? ipip unregister_netdevice_many()
Date: Thu, 14 Oct 2010 00:16:15 +0200	[thread overview]
Message-ID: <4CB62FAF.1030009@free.fr> (raw)
In-Reply-To: <20101012.130520.48517464.davem@davemloft.net>

On 10/12/2010 10:05 PM, David Miller wrote:
> From: ebiederm@xmission.com (Eric W. Biederman)
> Date: Fri, 08 Oct 2010 10:32:40 -0700
>
>    
>> It is just dealing with not flushing the entire routing cache, just the
>> routes that have expired.  Which prevents one network namespace from
>> flushing it's routes and DOS'ing another.
>>      
> That's a very indirect and obfuscated way of handling it.
>    

I agree.

> And I still don't know why we let the first contiguous set of expired
> entries in the chain get freed outside of the lock, and the rest
> inside the lock.  That really isn't explained by anything I've read.
>
> How about we just do exactly what's intended, and with no ifdefs?
>    
Acked-by: Daniel Lezcano <daniel.lezcano@free.fr>

Dave,

do you mind to wait I test the patch before merging it ?
I would like to stress a bit this routine with multiple containers.

Thanks
    -- Daniel

> Signed-off-by: David S. Miller<davem@davemloft.net>
>
> diff --git a/include/net/route.h b/include/net/route.h
> index 7e5e73b..8d24761 100644
> --- a/include/net/route.h
> +++ b/include/net/route.h
> @@ -106,7 +106,7 @@ extern int		ip_rt_init(void);
>   extern void		ip_rt_redirect(__be32 old_gw, __be32 dst, __be32 new_gw,
>   				       __be32 src, struct net_device *dev);
>   extern void		rt_cache_flush(struct net *net, int how);
> -extern void		rt_cache_flush_batch(void);
> +extern void		rt_cache_flush_batch(struct net *net);
>   extern int		__ip_route_output_key(struct net *, struct rtable **, const struct flowi *flp);
>   extern int		ip_route_output_key(struct net *, struct rtable **, struct flowi *flp);
>   extern int		ip_route_output_flow(struct net *, struct rtable **rp, struct flowi *flp, struct sock *sk, int flags);
> diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
> index 919f2ad..4039f56 100644
> --- a/net/ipv4/fib_frontend.c
> +++ b/net/ipv4/fib_frontend.c
> @@ -999,7 +999,7 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo
>   		rt_cache_flush(dev_net(dev), 0);
>   		break;
>   	case NETDEV_UNREGISTER_BATCH:
> -		rt_cache_flush_batch();
> +		rt_cache_flush_batch(dev_net(dev));
>   		break;
>   	}
>   	return NOTIFY_DONE;
> diff --git a/net/ipv4/route.c b/net/ipv4/route.c
> index 0755aa4..6ad730c 100644
> --- a/net/ipv4/route.c
> +++ b/net/ipv4/route.c
> @@ -712,13 +712,14 @@ static inline int rt_is_expired(struct rtable *rth)
>    * Can be called by a softirq or a process.
>    * In the later case, we want to be reschedule if necessary
>    */
> -static void rt_do_flush(int process_context)
> +static void rt_do_flush(struct net *net, int process_context)
>   {
>   	unsigned int i;
>   	struct rtable *rth, *next;
> -	struct rtable * tail;
>
>   	for (i = 0; i<= rt_hash_mask; i++) {
> +		struct rtable *list, **pprev;
> +
>   		if (process_context&&  need_resched())
>   			cond_resched();
>   		rth = rt_hash_table[i].chain;
> @@ -726,41 +727,27 @@ static void rt_do_flush(int process_context)
>   			continue;
>
>   		spin_lock_bh(rt_hash_lock_addr(i));
> -#ifdef CONFIG_NET_NS
> -		{
> -		struct rtable ** prev, * p;
>
> -		rth = rt_hash_table[i].chain;
> +		pprev =&rt_hash_table[i].chain;
> +		rth = *pprev;
> +		while (rth) {
> +			next = rth->dst.rt_next;
> +			if (dev_net(rth->dst.dev) == net) {
> +				*pprev = next;
>
> -		/* defer releasing the head of the list after spin_unlock */
> -		for (tail = rth; tail; tail = tail->dst.rt_next)
> -			if (!rt_is_expired(tail))
> -				break;
> -		if (rth != tail)
> -			rt_hash_table[i].chain = tail;
> -
> -		/* call rt_free on entries after the tail requiring flush */
> -		prev =&rt_hash_table[i].chain;
> -		for (p = *prev; p; p = next) {
> -			next = p->dst.rt_next;
> -			if (!rt_is_expired(p)) {
> -				prev =&p->dst.rt_next;
> -			} else {
> -				*prev = next;
> -				rt_free(p);
> -			}
> -		}
> +				rth->dst.rt_next = list;
> +				list = rth;
> +			} else
> +				pprev =&rth->dst.rt_next;
> +
> +			rth = next;
>   		}
> -#else
> -		rth = rt_hash_table[i].chain;
> -		rt_hash_table[i].chain = NULL;
> -		tail = NULL;
> -#endif
> +
>   		spin_unlock_bh(rt_hash_lock_addr(i));
>
> -		for (; rth != tail; rth = next) {
> -			next = rth->dst.rt_next;
> -			rt_free(rth);
> +		for (; list; list = next) {
> +			next = list->dst.rt_next;
> +			rt_free(list);
>   		}
>   	}
>   }
> @@ -906,13 +893,13 @@ void rt_cache_flush(struct net *net, int delay)
>   {
>   	rt_cache_invalidate(net);
>   	if (delay>= 0)
> -		rt_do_flush(!in_softirq());
> +		rt_do_flush(net, !in_softirq());
>   }
>
>   /* Flush previous cache invalidated entries from the cache */
> -void rt_cache_flush_batch(void)
> +void rt_cache_flush_batch(struct net *net)
>   {
> -	rt_do_flush(!in_softirq());
> +	rt_do_flush(net, !in_softirq());
>   }
>
>   static void rt_emergency_hash_rebuild(struct net *net)
>
>

next prev parent reply	other threads:[~2010-10-13 22:16 UTC|newest]

Thread overview: 28+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-10-07  8:48 BUG ? ipip unregister_netdevice_many() Hans Schillstrom
2010-10-08 11:19 ` Daniel Lezcano
2010-10-08 11:53   ` Hans Schillstrom
2010-10-08 12:28     ` Hans Schillstrom
2010-10-08 15:53       ` Daniel Lezcano
2010-10-08 16:17         ` Daniel Lezcano
2010-10-08 16:58           ` Eric W. Biederman
2010-10-08 17:29             ` Daniel Lezcano
2010-10-08 17:47               ` Daniel Lezcano
2010-10-08 16:45       ` Eric W. Biederman
2010-10-08 17:20         ` David Miller
2010-10-08 17:32           ` Eric W. Biederman
2010-10-12 20:05             ` David Miller
2010-10-13 11:19               ` Jarek Poplawski
2010-10-13 21:58                 ` David Miller
2010-10-14  6:41                   ` Hans Schillstrom
2010-10-13 22:16               ` Daniel Lezcano [this message]
2010-10-13 23:23                 ` David Miller
2010-10-14  3:57                   ` Eric Dumazet
2010-10-14 23:28                     ` Paul E. McKenney
2010-10-14  4:40               ` Eric W. Biederman
2010-10-14  4:50                 ` David Miller
2010-10-14  5:20                   ` Eric W. Biederman
2010-10-14 15:09                     ` David Miller
2010-10-14 18:35                       ` Eric W. Biederman
2010-10-08 16:51   ` Eric W. Biederman
2010-10-08 16:06 ` Eric W. Biederman
  -- strict thread matches above, loose matches on Subject: below --
2010-10-14 19:21 Octavian Purdila

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4CB62FAF.1030009@free.fr \
    --to=daniel.lezcano@free.fr \
    --cc=davem@davemloft.net \
    --cc=ebiederm@xmission.com \
    --cc=hans.schillstrom@ericsson.com \
    --cc=netdev@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.