netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: ebiederm@xmission.com (Eric W. Biederman)
To: Eric Dumazet <eric.dumazet@gmail.com>
Cc: David Miller <davem@davemloft.net>,
	netdev@vger.kernel.org, jamal <hadi@cyberus.ca>,
	Daniel Lezcano <dlezcano@fr.ibm.com>
Subject: Re: [PATCH 7/7] net: Batch inet_twsk_purge
Date: Thu, 03 Dec 2009 05:36:52 -0800	[thread overview]
Message-ID: <m1ws14kwln.fsf@fess.ebiederm.org> (raw)
In-Reply-To: <4B17BBC9.8070106@gmail.com> (Eric Dumazet's message of "Thu\, 03 Dec 2009 14\:23\:21 +0100")

Eric Dumazet <eric.dumazet@gmail.com> writes:

> Eric W. Biederman a écrit :
>> From: Eric W. Biederman <ebiederm@xmission.com>
>> 
>> This function walks the whole hashtable so there is no point in
>> passing it a network namespace.  Instead I purge all timewait
>> sockets from dead network namespaces that I find.  If the namespace
>> is one of the once I am trying to purge I am guaranteed no new timewait
>> sockets can be formed so this will get them all.  If the namespace
>> is one I am not acting for it might form a few more but I will
>> call inet_twsk_purge again and  shortly to get rid of them.  In
>> any even if the network namespace is dead timewait sockets are
>> useless.
>> 
>> Move the calls of inet_twsk_purge into batch_exit routines so
>> that if I am killing a bunch of namespaces at once I will just
>> call inet_twsk_purge once and save a lot of redundant unnecessary
>> work.
>> 
>> My simple 4k network namespace exit test the cleanup time dropped from
>> roughly 8.2s to 1.6s.  While the time spent running inet_twsk_purge fell
>> to about 2ms.  1ms for ipv4 and 1ms for ipv6.
>> 
>> Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
>> ---
>>  include/net/inet_timewait_sock.h |    6 +++---
>>  net/ipv4/inet_timewait_sock.c    |   10 +++++-----
>>  net/ipv4/tcp_ipv4.c              |   11 ++++++++---
>>  net/ipv6/tcp_ipv6.c              |   11 ++++++++---
>>  4 files changed, 24 insertions(+), 14 deletions(-)
>> 
>> diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h
>> index 773b10f..4fd007f 100644
>> --- a/include/net/inet_timewait_sock.h
>> +++ b/include/net/inet_timewait_sock.h
>> @@ -212,14 +212,14 @@ extern void inet_twsk_schedule(struct inet_timewait_sock *tw,
>>  extern void inet_twsk_deschedule(struct inet_timewait_sock *tw,
>>  				 struct inet_timewait_death_row *twdr);
>>  
>> -extern void inet_twsk_purge(struct net *net, struct inet_hashinfo *hashinfo,
>> +extern void inet_twsk_purge(struct inet_hashinfo *hashinfo,
>>  			    struct inet_timewait_death_row *twdr, int family);
>>  
>>  static inline
>>  struct net *twsk_net(const struct inet_timewait_sock *twsk)
>>  {
>>  #ifdef CONFIG_NET_NS
>> -	return twsk->tw_net;
>> +	return rcu_dereference(twsk->tw_net);
>>  #else
>>  	return &init_net;
>>  #endif
>> @@ -229,7 +229,7 @@ static inline
>>  void twsk_net_set(struct inet_timewait_sock *twsk, struct net *net)
>>  {
>>  #ifdef CONFIG_NET_NS
>> -	twsk->tw_net = net;
>> +	rcu_assign_pointer(twsk->tw_net, net);
>>  #endif
>>  }
>>  #endif	/* _INET_TIMEWAIT_SOCK_ */
>> diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
>> index 683ecec..a3699ac 100644
>> --- a/net/ipv4/inet_timewait_sock.c
>> +++ b/net/ipv4/inet_timewait_sock.c
>> @@ -421,7 +421,7 @@ out:
>>  
>>  EXPORT_SYMBOL_GPL(inet_twdr_twcal_tick);
>>  
>> -void inet_twsk_purge(struct net *net, struct inet_hashinfo *hashinfo,
>> +void inet_twsk_purge(struct inet_hashinfo *hashinfo,
>>  		     struct inet_timewait_death_row *twdr, int family)
>>  {
>>  	struct inet_timewait_sock *tw;
>> @@ -436,15 +436,15 @@ restart_rcu:
>>  restart:
>>  		sk_nulls_for_each_rcu(sk, node, &head->twchain) {
>>  			tw = inet_twsk(sk);
>> -			if (!net_eq(twsk_net(tw), net) ||
>> -			    tw->tw_family != family)
>> +			if ((tw->tw_family != family) ||
>> +				atomic_read(&twsk_net(tw)->count))
>>  				continue;
>>  
>>  			if (unlikely(!atomic_inc_not_zero(&tw->tw_refcnt)))
>>  				continue;
>>  
>> -			if (unlikely(!net_eq(twsk_net(tw), net) ||
>> -				     tw->tw_family != family)) {
>> +			if (unlikely((tw->tw_family != family) ||
>> +				     atomic_read(&twsk_net(tw)->count))) {
>>  				inet_twsk_put(tw);
>>  				goto restart;
>>  			}
>> diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
>> index df18ce0..e30f026 100644
>> --- a/net/ipv4/tcp_ipv4.c
>> +++ b/net/ipv4/tcp_ipv4.c
>> @@ -2468,12 +2468,17 @@ static int __net_init tcp_sk_init(struct net *net)
>>  static void __net_exit tcp_sk_exit(struct net *net)
>>  {
>>  	inet_ctl_sock_destroy(net->ipv4.tcp_sock);
>> -	inet_twsk_purge(net, &tcp_hashinfo, &tcp_death_row, AF_INET);
>> +}
>> +
>> +static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
>> +{
>> +	inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET);
>>  }
>>  
>>  static struct pernet_operations __net_initdata tcp_sk_ops = {
>> -       .init = tcp_sk_init,
>> -       .exit = tcp_sk_exit,
>> +       .init	   = tcp_sk_init,
>> +       .exit	   = tcp_sk_exit,
>> +       .exit_batch = tcp_sk_exit_batch,
>>  };
>>  
>>  void __init tcp_v4_init(void)
>> diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
>> index de70909..5f46d36 100644
>> --- a/net/ipv6/tcp_ipv6.c
>> +++ b/net/ipv6/tcp_ipv6.c
>> @@ -2126,12 +2126,17 @@ static int tcpv6_net_init(struct net *net)
>>  static void tcpv6_net_exit(struct net *net)
>>  {
>>  	inet_ctl_sock_destroy(net->ipv6.tcp_sk);
>> -	inet_twsk_purge(net, &tcp_hashinfo, &tcp_death_row, AF_INET6);
>> +}
>> +
>> +static void tcpv6_net_exit_batch(struct list_head *net_exit_list)
>> +{
>> +	inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET6);
>>  }
>>  
>>  static struct pernet_operations tcpv6_net_ops = {
>> -	.init = tcpv6_net_init,
>> -	.exit = tcpv6_net_exit,
>> +	.init	    = tcpv6_net_init,
>> +	.exit	    = tcpv6_net_exit,
>> +	.exit_batch = tcpv6_net_exit_batch,
>>  };
>>  
>>  int __init tcpv6_init(void)
>
>
> OK, but why calling inet_twsk_purge() twice, one for AF_INET, once for AF_INET6
>
> I believe you could zap family check as well in inet_twsk_purge(), and not
> need tcpv6_net_ops.exit_batch = tcpv6_net_exit_batch

Technically it is needed if someone does rmmod ipv6.  rmmod ipv6 didn't seem
to work for me, but if it ever does...  That and the cost is now in the noise
in human terms.

I think dccp may also need a inet_twsk_purge as well, but I couldn't figure out what
it was doing with timewait sockets.

Eric



  reply	other threads:[~2009-12-03 13:36 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-12-03 12:27 [PATCH 0/7] Batched netns improvements Eric W. Biederman
2009-12-03 12:29 ` [PATCH 1/7] net: Add support for batching network namespace cleanups Eric W. Biederman
2009-12-03 12:29 ` [PATCH 2/7] net: Move network device exit batching Eric W. Biederman
2009-12-03 12:29 ` [PATCH 3/7] net: Allow xfrm_user_net_exit to batch efficiently Eric W. Biederman
2009-12-03 12:29 ` [PATCH 4/7] netns: Add an explicit rcu_barrier to unregister_pernet_{device|subsys} Eric W. Biederman
2009-12-03 12:29 ` [PATCH 5/7] net: Allow fib_rule_unregister to batch Eric W. Biederman
2009-12-03 12:29 ` [PATCH 6/7] net: Use rcu lookups in inet_twsk_purge Eric W. Biederman
2009-12-03 13:17   ` Eric Dumazet
2009-12-03 12:29 ` [PATCH 7/7] net: Batch inet_twsk_purge Eric W. Biederman
2009-12-03 13:23   ` Eric Dumazet
2009-12-03 13:36     ` Eric W. Biederman [this message]
2009-12-03 20:24       ` David Miller
2009-12-03 20:45         ` Eric W. Biederman
2009-12-03 13:06 ` [PATCH 0/7] Batched netns improvements jamal
2009-12-03 13:23   ` Eric W. Biederman
2009-12-03 20:24 ` David Miller

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=m1ws14kwln.fsf@fess.ebiederm.org \
    --to=ebiederm@xmission.com \
    --cc=davem@davemloft.net \
    --cc=dlezcano@fr.ibm.com \
    --cc=eric.dumazet@gmail.com \
    --cc=hadi@cyberus.ca \
    --cc=netdev@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).