netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Jarek Poplawski <jarkao2@o2.pl>
To: Eric Dumazet <dada1@cosmosbay.com>
Cc: "David S. Miller" <davem@davemloft.net>,
	Linux Netdev List <netdev@vger.kernel.org>,
	Andi Kleen <ak@suse.de>,
	Arnaldo Carvalho de Melo <acme@redhat.com>
Subject: Re: [PATCH] INET : removes per bucket rwlock in tcp/dccp ehash table
Date: Thu, 01 Nov 2007 17:06:03 +0100	[thread overview]
Message-ID: <4729F96B.8000802@o2.pl> (raw)
In-Reply-To: <4729A774.9030409@cosmosbay.com>

Hi,

A few doubts below: 

Eric Dumazet wrote:
> As done two years ago on IP route cache table (commit 
> 22c047ccbc68fa8f3fa57f0e8f906479a062c426) , we can avoid using one lock per 
> hash bucket for the huge TCP/DCCP hash tables.
...
> diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
> index 4427dcd..5cbfbac 100644
> --- a/include/net/inet_hashtables.h
> +++ b/include/net/inet_hashtables.h
> @@ -37,7 +37,6 @@
>   * I'll experiment with dynamic table growth later.
>   */
>  struct inet_ehash_bucket {
> -	rwlock_t	  lock;
>  	struct hlist_head chain;
>  	struct hlist_head twchain;
>  };
> @@ -91,6 +90,28 @@ struct inet_bind_hashbucket {
>  /* This is for listening sockets, thus all sockets which possess wildcards. */
>  #define INET_LHTABLE_SIZE	32	/* Yes, really, this is all you need. */
>  
> +#if defined(CONFIG_SMP) || defined(CONFIG_PROVE_LOCKING)

Probably "|| defined(CONFIG_DEBUG_SPINLOCK)" is needed here.

> +/*
> + * Instead of using one rwlock for each inet_ehash_bucket, we use a table of locks
> + * The size of this table is a power of two and depends on the number of CPUS.
> + */
> +# if defined(CONFIG_DEBUG_LOCK_ALLOC)
> +#  define EHASH_LOCK_SZ 256
> +# elif NR_CPUS >= 32
> +#  define EHASH_LOCK_SZ	4096
> +# elif NR_CPUS >= 16
> +#  define EHASH_LOCK_SZ	2048
> +# elif NR_CPUS >= 8
> +#  define EHASH_LOCK_SZ	1024
> +# elif NR_CPUS >= 4
> +#  define EHASH_LOCK_SZ	512
> +# else
> +#  define EHASH_LOCK_SZ	256
> +# endif
> +#else
> +# define EHASH_LOCK_SZ 0
> +#endif
> +

Looks hackish: usually DEBUG code checks "real" environment, and here it's
a special case. But omitting locks if no SMP or DEBUG is strange. IMHO,
there should be 1 instead of 0.

>  struct inet_hashinfo {
>  	/* This is for sockets with full identity only.  Sockets here will
>  	 * always be without wildcards and will have the following invariant:
> @@ -100,6 +121,7 @@ struct inet_hashinfo {
>  	 * TIME_WAIT sockets use a separate chain (twchain).
>  	 */
>  	struct inet_ehash_bucket	*ehash;
> +	rwlock_t			*ehash_locks;
>  
>  	/* Ok, let's try this, I give up, we do need a local binding
>  	 * TCP hash as well as the others for fast bind/connect.
> @@ -134,6 +156,13 @@ static inline struct inet_ehash_bucket *inet_ehash_bucket(
>  	return &hashinfo->ehash[hash & (hashinfo->ehash_size - 1)];
>  }
>  
> +static inline rwlock_t *inet_ehash_lockp(
> +	struct inet_hashinfo *hashinfo,
> +	unsigned int hash)
> +{
> +	return &hashinfo->ehash_locks[hash & (EHASH_LOCK_SZ - 1)];
> +}
> +

Is it OK for EHASH_LOCK_SZ == 0?

...
> diff --git a/net/dccp/proto.c b/net/dccp/proto.c
> index d849739..3b5f97a 100644
> --- a/net/dccp/proto.c
> +++ b/net/dccp/proto.c
> @@ -1072,11 +1072,18 @@ static int __init dccp_init(void)
>  	}
>  
>  	for (i = 0; i < dccp_hashinfo.ehash_size; i++) {
> -		rwlock_init(&dccp_hashinfo.ehash[i].lock);
>  		INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain);
>  		INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].twchain);
>  	}
> -
> +	if (EHASH_LOCK_SZ) {

Why not #ifdef then? But, IMHO, rwlock_init() should be done at least
once here. (Similarly later for tcp.)

> +		dccp_hashinfo.ehash_locks =
> +			kmalloc(EHASH_LOCK_SZ * sizeof(rwlock_t),
> +				GFP_KERNEL);
> +		if (!dccp_hashinfo.ehash_locks)
> +			goto out_free_dccp_ehash;
> +		for (i = 0; i < EHASH_LOCK_SZ; i++)
> +			rwlock_init(&dccp_hashinfo.ehash_locks[i]);
> +	}
>  	bhash_order = ehash_order;
>  
>  	do {
> @@ -1091,7 +1098,7 @@ static int __init dccp_init(void)
>  
>  	if (!dccp_hashinfo.bhash) {
>  		DCCP_CRIT("Failed to allocate DCCP bind hash table");
> -		goto out_free_dccp_ehash;
> +		goto out_free_dccp_locks;
>  	}
>  
>  	for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
> @@ -1121,6 +1128,9 @@ out_free_dccp_mib:
>  out_free_dccp_bhash:
>  	free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
>  	dccp_hashinfo.bhash = NULL;
> +out_free_dccp_locks:
> +	kfree(dccp_hashinfo.ehash_locks);
> +	dccp_hashinfo.ehash_locks = NULL;
>  out_free_dccp_ehash:
>  	free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
>  	dccp_hashinfo.ehash = NULL;

Isn't such kfree(dccp_hashinfo.ehash_locks) needed in dccp_fini()?

Regards,
Jarek P.

  parent reply	other threads:[~2007-11-01 16:01 UTC|newest]

Thread overview: 29+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2007-11-01 10:16 [PATCH] INET : removes per bucket rwlock in tcp/dccp ehash table Eric Dumazet
2007-11-01 11:03 ` David Miller
2007-11-01 11:20   ` Arnaldo Carvalho de Melo
2007-11-01 11:15 ` Ilpo Järvinen
2007-11-01 16:06 ` Jarek Poplawski [this message]
2007-11-01 18:00   ` Eric Dumazet
2007-11-01 16:14 ` Stephen Hemminger
2007-11-01 17:54   ` Eric Dumazet
2007-11-01 18:48     ` Rick Jones
2007-11-01 19:00       ` Eric Dumazet
2007-11-01 19:17         ` Eric Dumazet
2007-11-01 21:52           ` David Miller
2007-11-01 21:46     ` David Miller
2007-11-03 23:18 ` Andi Kleen
2007-11-03 23:23   ` David Miller
2007-11-04  0:54     ` Andi Kleen
2007-11-04 11:31     ` Eric Dumazet
2007-11-04 12:26       ` Andi Kleen
2007-11-04 13:05         ` Eric Dumazet
2007-11-04 21:56         ` David Miller
2007-11-04 23:01           ` Andi Kleen
2007-11-05  4:24             ` David Miller
2007-11-05  4:35               ` David Miller
2007-11-04 17:58       ` Jarek Poplawski
2007-11-04 18:15         ` Jarek Poplawski
2007-11-04 21:23           ` Eric Dumazet
2007-11-04 23:08             ` Jarek Poplawski
2007-11-07 10:41       ` David Miller
2007-11-07 12:13         ` Jarek Poplawski

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4729F96B.8000802@o2.pl \
    --to=jarkao2@o2.pl \
    --cc=acme@redhat.com \
    --cc=ak@suse.de \
    --cc=dada1@cosmosbay.com \
    --cc=davem@davemloft.net \
    --cc=netdev@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).