From: Eric Dumazet <eric.dumazet@gmail.com>
To: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: David Miller <davem@davemloft.net>,
netdev@vger.kernel.org, jamal <hadi@cyberus.ca>,
Daniel Lezcano <dlezcano@fr.ibm.com>
Subject: Re: [PATCH 7/7] net: Batch inet_twsk_purge
Date: Thu, 03 Dec 2009 14:23:21 +0100 [thread overview]
Message-ID: <4B17BBC9.8070106@gmail.com> (raw)
In-Reply-To: <1259843349-3810-7-git-send-email-ebiederm@xmission.com>
Eric W. Biederman a écrit :
> From: Eric W. Biederman <ebiederm@xmission.com>
>
> This function walks the whole hashtable so there is no point in
> passing it a network namespace. Instead I purge all timewait
> sockets from dead network namespaces that I find. If the namespace
> is one of the once I am trying to purge I am guaranteed no new timewait
> sockets can be formed so this will get them all. If the namespace
> is one I am not acting for it might form a few more but I will
> call inet_twsk_purge again and shortly to get rid of them. In
> any even if the network namespace is dead timewait sockets are
> useless.
>
> Move the calls of inet_twsk_purge into batch_exit routines so
> that if I am killing a bunch of namespaces at once I will just
> call inet_twsk_purge once and save a lot of redundant unnecessary
> work.
>
> My simple 4k network namespace exit test the cleanup time dropped from
> roughly 8.2s to 1.6s. While the time spent running inet_twsk_purge fell
> to about 2ms. 1ms for ipv4 and 1ms for ipv6.
>
> Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
> ---
> include/net/inet_timewait_sock.h | 6 +++---
> net/ipv4/inet_timewait_sock.c | 10 +++++-----
> net/ipv4/tcp_ipv4.c | 11 ++++++++---
> net/ipv6/tcp_ipv6.c | 11 ++++++++---
> 4 files changed, 24 insertions(+), 14 deletions(-)
>
> diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h
> index 773b10f..4fd007f 100644
> --- a/include/net/inet_timewait_sock.h
> +++ b/include/net/inet_timewait_sock.h
> @@ -212,14 +212,14 @@ extern void inet_twsk_schedule(struct inet_timewait_sock *tw,
> extern void inet_twsk_deschedule(struct inet_timewait_sock *tw,
> struct inet_timewait_death_row *twdr);
>
> -extern void inet_twsk_purge(struct net *net, struct inet_hashinfo *hashinfo,
> +extern void inet_twsk_purge(struct inet_hashinfo *hashinfo,
> struct inet_timewait_death_row *twdr, int family);
>
> static inline
> struct net *twsk_net(const struct inet_timewait_sock *twsk)
> {
> #ifdef CONFIG_NET_NS
> - return twsk->tw_net;
> + return rcu_dereference(twsk->tw_net);
> #else
> return &init_net;
> #endif
> @@ -229,7 +229,7 @@ static inline
> void twsk_net_set(struct inet_timewait_sock *twsk, struct net *net)
> {
> #ifdef CONFIG_NET_NS
> - twsk->tw_net = net;
> + rcu_assign_pointer(twsk->tw_net, net);
> #endif
> }
> #endif /* _INET_TIMEWAIT_SOCK_ */
> diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
> index 683ecec..a3699ac 100644
> --- a/net/ipv4/inet_timewait_sock.c
> +++ b/net/ipv4/inet_timewait_sock.c
> @@ -421,7 +421,7 @@ out:
>
> EXPORT_SYMBOL_GPL(inet_twdr_twcal_tick);
>
> -void inet_twsk_purge(struct net *net, struct inet_hashinfo *hashinfo,
> +void inet_twsk_purge(struct inet_hashinfo *hashinfo,
> struct inet_timewait_death_row *twdr, int family)
> {
> struct inet_timewait_sock *tw;
> @@ -436,15 +436,15 @@ restart_rcu:
> restart:
> sk_nulls_for_each_rcu(sk, node, &head->twchain) {
> tw = inet_twsk(sk);
> - if (!net_eq(twsk_net(tw), net) ||
> - tw->tw_family != family)
> + if ((tw->tw_family != family) ||
> + atomic_read(&twsk_net(tw)->count))
> continue;
>
> if (unlikely(!atomic_inc_not_zero(&tw->tw_refcnt)))
> continue;
>
> - if (unlikely(!net_eq(twsk_net(tw), net) ||
> - tw->tw_family != family)) {
> + if (unlikely((tw->tw_family != family) ||
> + atomic_read(&twsk_net(tw)->count))) {
> inet_twsk_put(tw);
> goto restart;
> }
> diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
> index df18ce0..e30f026 100644
> --- a/net/ipv4/tcp_ipv4.c
> +++ b/net/ipv4/tcp_ipv4.c
> @@ -2468,12 +2468,17 @@ static int __net_init tcp_sk_init(struct net *net)
> static void __net_exit tcp_sk_exit(struct net *net)
> {
> inet_ctl_sock_destroy(net->ipv4.tcp_sock);
> - inet_twsk_purge(net, &tcp_hashinfo, &tcp_death_row, AF_INET);
> +}
> +
> +static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
> +{
> + inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET);
> }
>
> static struct pernet_operations __net_initdata tcp_sk_ops = {
> - .init = tcp_sk_init,
> - .exit = tcp_sk_exit,
> + .init = tcp_sk_init,
> + .exit = tcp_sk_exit,
> + .exit_batch = tcp_sk_exit_batch,
> };
>
> void __init tcp_v4_init(void)
> diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
> index de70909..5f46d36 100644
> --- a/net/ipv6/tcp_ipv6.c
> +++ b/net/ipv6/tcp_ipv6.c
> @@ -2126,12 +2126,17 @@ static int tcpv6_net_init(struct net *net)
> static void tcpv6_net_exit(struct net *net)
> {
> inet_ctl_sock_destroy(net->ipv6.tcp_sk);
> - inet_twsk_purge(net, &tcp_hashinfo, &tcp_death_row, AF_INET6);
> +}
> +
> +static void tcpv6_net_exit_batch(struct list_head *net_exit_list)
> +{
> + inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET6);
> }
>
> static struct pernet_operations tcpv6_net_ops = {
> - .init = tcpv6_net_init,
> - .exit = tcpv6_net_exit,
> + .init = tcpv6_net_init,
> + .exit = tcpv6_net_exit,
> + .exit_batch = tcpv6_net_exit_batch,
> };
>
> int __init tcpv6_init(void)
OK, but why calling inet_twsk_purge() twice, one for AF_INET, once for AF_INET6
I believe you could zap family check as well in inet_twsk_purge(), and not
need tcpv6_net_ops.exit_batch = tcpv6_net_exit_batch
next prev parent reply other threads:[~2009-12-03 13:23 UTC|newest]
Thread overview: 16+ messages / expand[flat|nested] mbox.gz Atom feed top
2009-12-03 12:27 [PATCH 0/7] Batched netns improvements Eric W. Biederman
2009-12-03 12:29 ` [PATCH 1/7] net: Add support for batching network namespace cleanups Eric W. Biederman
2009-12-03 12:29 ` [PATCH 2/7] net: Move network device exit batching Eric W. Biederman
2009-12-03 12:29 ` [PATCH 3/7] net: Allow xfrm_user_net_exit to batch efficiently Eric W. Biederman
2009-12-03 12:29 ` [PATCH 4/7] netns: Add an explicit rcu_barrier to unregister_pernet_{device|subsys} Eric W. Biederman
2009-12-03 12:29 ` [PATCH 5/7] net: Allow fib_rule_unregister to batch Eric W. Biederman
2009-12-03 12:29 ` [PATCH 6/7] net: Use rcu lookups in inet_twsk_purge Eric W. Biederman
2009-12-03 13:17 ` Eric Dumazet
2009-12-03 12:29 ` [PATCH 7/7] net: Batch inet_twsk_purge Eric W. Biederman
2009-12-03 13:23 ` Eric Dumazet [this message]
2009-12-03 13:36 ` Eric W. Biederman
2009-12-03 20:24 ` David Miller
2009-12-03 20:45 ` Eric W. Biederman
2009-12-03 13:06 ` [PATCH 0/7] Batched netns improvements jamal
2009-12-03 13:23 ` Eric W. Biederman
2009-12-03 20:24 ` David Miller
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=4B17BBC9.8070106@gmail.com \
--to=eric.dumazet@gmail.com \
--cc=davem@davemloft.net \
--cc=dlezcano@fr.ibm.com \
--cc=ebiederm@xmission.com \
--cc=hadi@cyberus.ca \
--cc=netdev@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).