netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Eric Dumazet <eric.dumazet@gmail.com>
To: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: David Miller <davem@davemloft.net>,
	netdev@vger.kernel.org, jamal <hadi@cyberus.ca>,
	Daniel Lezcano <dlezcano@fr.ibm.com>
Subject: Re: [PATCH 7/7] net: Batch inet_twsk_purge
Date: Thu, 03 Dec 2009 14:23:21 +0100	[thread overview]
Message-ID: <4B17BBC9.8070106@gmail.com> (raw)
In-Reply-To: <1259843349-3810-7-git-send-email-ebiederm@xmission.com>

Eric W. Biederman a écrit :
> From: Eric W. Biederman <ebiederm@xmission.com>
> 
> This function walks the whole hashtable so there is no point in
> passing it a network namespace.  Instead I purge all timewait
> sockets from dead network namespaces that I find.  If the namespace
> is one of the once I am trying to purge I am guaranteed no new timewait
> sockets can be formed so this will get them all.  If the namespace
> is one I am not acting for it might form a few more but I will
> call inet_twsk_purge again and  shortly to get rid of them.  In
> any even if the network namespace is dead timewait sockets are
> useless.
> 
> Move the calls of inet_twsk_purge into batch_exit routines so
> that if I am killing a bunch of namespaces at once I will just
> call inet_twsk_purge once and save a lot of redundant unnecessary
> work.
> 
> My simple 4k network namespace exit test the cleanup time dropped from
> roughly 8.2s to 1.6s.  While the time spent running inet_twsk_purge fell
> to about 2ms.  1ms for ipv4 and 1ms for ipv6.
> 
> Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
> ---
>  include/net/inet_timewait_sock.h |    6 +++---
>  net/ipv4/inet_timewait_sock.c    |   10 +++++-----
>  net/ipv4/tcp_ipv4.c              |   11 ++++++++---
>  net/ipv6/tcp_ipv6.c              |   11 ++++++++---
>  4 files changed, 24 insertions(+), 14 deletions(-)
> 
> diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h
> index 773b10f..4fd007f 100644
> --- a/include/net/inet_timewait_sock.h
> +++ b/include/net/inet_timewait_sock.h
> @@ -212,14 +212,14 @@ extern void inet_twsk_schedule(struct inet_timewait_sock *tw,
>  extern void inet_twsk_deschedule(struct inet_timewait_sock *tw,
>  				 struct inet_timewait_death_row *twdr);
>  
> -extern void inet_twsk_purge(struct net *net, struct inet_hashinfo *hashinfo,
> +extern void inet_twsk_purge(struct inet_hashinfo *hashinfo,
>  			    struct inet_timewait_death_row *twdr, int family);
>  
>  static inline
>  struct net *twsk_net(const struct inet_timewait_sock *twsk)
>  {
>  #ifdef CONFIG_NET_NS
> -	return twsk->tw_net;
> +	return rcu_dereference(twsk->tw_net);
>  #else
>  	return &init_net;
>  #endif
> @@ -229,7 +229,7 @@ static inline
>  void twsk_net_set(struct inet_timewait_sock *twsk, struct net *net)
>  {
>  #ifdef CONFIG_NET_NS
> -	twsk->tw_net = net;
> +	rcu_assign_pointer(twsk->tw_net, net);
>  #endif
>  }
>  #endif	/* _INET_TIMEWAIT_SOCK_ */
> diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
> index 683ecec..a3699ac 100644
> --- a/net/ipv4/inet_timewait_sock.c
> +++ b/net/ipv4/inet_timewait_sock.c
> @@ -421,7 +421,7 @@ out:
>  
>  EXPORT_SYMBOL_GPL(inet_twdr_twcal_tick);
>  
> -void inet_twsk_purge(struct net *net, struct inet_hashinfo *hashinfo,
> +void inet_twsk_purge(struct inet_hashinfo *hashinfo,
>  		     struct inet_timewait_death_row *twdr, int family)
>  {
>  	struct inet_timewait_sock *tw;
> @@ -436,15 +436,15 @@ restart_rcu:
>  restart:
>  		sk_nulls_for_each_rcu(sk, node, &head->twchain) {
>  			tw = inet_twsk(sk);
> -			if (!net_eq(twsk_net(tw), net) ||
> -			    tw->tw_family != family)
> +			if ((tw->tw_family != family) ||
> +				atomic_read(&twsk_net(tw)->count))
>  				continue;
>  
>  			if (unlikely(!atomic_inc_not_zero(&tw->tw_refcnt)))
>  				continue;
>  
> -			if (unlikely(!net_eq(twsk_net(tw), net) ||
> -				     tw->tw_family != family)) {
> +			if (unlikely((tw->tw_family != family) ||
> +				     atomic_read(&twsk_net(tw)->count))) {
>  				inet_twsk_put(tw);
>  				goto restart;
>  			}
> diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
> index df18ce0..e30f026 100644
> --- a/net/ipv4/tcp_ipv4.c
> +++ b/net/ipv4/tcp_ipv4.c
> @@ -2468,12 +2468,17 @@ static int __net_init tcp_sk_init(struct net *net)
>  static void __net_exit tcp_sk_exit(struct net *net)
>  {
>  	inet_ctl_sock_destroy(net->ipv4.tcp_sock);
> -	inet_twsk_purge(net, &tcp_hashinfo, &tcp_death_row, AF_INET);
> +}
> +
> +static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
> +{
> +	inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET);
>  }
>  
>  static struct pernet_operations __net_initdata tcp_sk_ops = {
> -       .init = tcp_sk_init,
> -       .exit = tcp_sk_exit,
> +       .init	   = tcp_sk_init,
> +       .exit	   = tcp_sk_exit,
> +       .exit_batch = tcp_sk_exit_batch,
>  };
>  
>  void __init tcp_v4_init(void)
> diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
> index de70909..5f46d36 100644
> --- a/net/ipv6/tcp_ipv6.c
> +++ b/net/ipv6/tcp_ipv6.c
> @@ -2126,12 +2126,17 @@ static int tcpv6_net_init(struct net *net)
>  static void tcpv6_net_exit(struct net *net)
>  {
>  	inet_ctl_sock_destroy(net->ipv6.tcp_sk);
> -	inet_twsk_purge(net, &tcp_hashinfo, &tcp_death_row, AF_INET6);
> +}
> +
> +static void tcpv6_net_exit_batch(struct list_head *net_exit_list)
> +{
> +	inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET6);
>  }
>  
>  static struct pernet_operations tcpv6_net_ops = {
> -	.init = tcpv6_net_init,
> -	.exit = tcpv6_net_exit,
> +	.init	    = tcpv6_net_init,
> +	.exit	    = tcpv6_net_exit,
> +	.exit_batch = tcpv6_net_exit_batch,
>  };
>  
>  int __init tcpv6_init(void)


OK, but why calling inet_twsk_purge() twice, one for AF_INET, once for AF_INET6

I believe you could zap family check as well in inet_twsk_purge(), and not
need tcpv6_net_ops.exit_batch = tcpv6_net_exit_batch


  reply	other threads:[~2009-12-03 13:23 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-12-03 12:27 [PATCH 0/7] Batched netns improvements Eric W. Biederman
2009-12-03 12:29 ` [PATCH 1/7] net: Add support for batching network namespace cleanups Eric W. Biederman
2009-12-03 12:29 ` [PATCH 2/7] net: Move network device exit batching Eric W. Biederman
2009-12-03 12:29 ` [PATCH 3/7] net: Allow xfrm_user_net_exit to batch efficiently Eric W. Biederman
2009-12-03 12:29 ` [PATCH 4/7] netns: Add an explicit rcu_barrier to unregister_pernet_{device|subsys} Eric W. Biederman
2009-12-03 12:29 ` [PATCH 5/7] net: Allow fib_rule_unregister to batch Eric W. Biederman
2009-12-03 12:29 ` [PATCH 6/7] net: Use rcu lookups in inet_twsk_purge Eric W. Biederman
2009-12-03 13:17   ` Eric Dumazet
2009-12-03 12:29 ` [PATCH 7/7] net: Batch inet_twsk_purge Eric W. Biederman
2009-12-03 13:23   ` Eric Dumazet [this message]
2009-12-03 13:36     ` Eric W. Biederman
2009-12-03 20:24       ` David Miller
2009-12-03 20:45         ` Eric W. Biederman
2009-12-03 13:06 ` [PATCH 0/7] Batched netns improvements jamal
2009-12-03 13:23   ` Eric W. Biederman
2009-12-03 20:24 ` David Miller

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4B17BBC9.8070106@gmail.com \
    --to=eric.dumazet@gmail.com \
    --cc=davem@davemloft.net \
    --cc=dlezcano@fr.ibm.com \
    --cc=ebiederm@xmission.com \
    --cc=hadi@cyberus.ca \
    --cc=netdev@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).