All of lore.kernel.org
 help / color / mirror / Atom feed
From: Jarek Poplawski <jarkao2@o2.pl>
To: Eric Dumazet <dada1@cosmosbay.com>
Cc: "David S. Miller" <davem@davemloft.net>,
	Linux Netdev List <netdev@vger.kernel.org>,
	Andi Kleen <ak@suse.de>,
	Arnaldo Carvalho de Melo <acme@redhat.com>
Subject: Re: [PATCH] INET : removes per bucket rwlock in tcp/dccp ehash table
Date: Thu, 01 Nov 2007 17:06:03 +0100	[thread overview]
Message-ID: <4729F96B.8000802@o2.pl> (raw)
In-Reply-To: <4729A774.9030409@cosmosbay.com>

Hi,

A few doubts below: 

Eric Dumazet wrote:
> As done two years ago on IP route cache table (commit 
> 22c047ccbc68fa8f3fa57f0e8f906479a062c426) , we can avoid using one lock per 
> hash bucket for the huge TCP/DCCP hash tables.
...
> diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
> index 4427dcd..5cbfbac 100644
> --- a/include/net/inet_hashtables.h
> +++ b/include/net/inet_hashtables.h
> @@ -37,7 +37,6 @@
>   * I'll experiment with dynamic table growth later.
>   */
>  struct inet_ehash_bucket {
> -	rwlock_t	  lock;
>  	struct hlist_head chain;
>  	struct hlist_head twchain;
>  };
> @@ -91,6 +90,28 @@ struct inet_bind_hashbucket {
>  /* This is for listening sockets, thus all sockets which possess wildcards. */
>  #define INET_LHTABLE_SIZE	32	/* Yes, really, this is all you need. */
>  
> +#if defined(CONFIG_SMP) || defined(CONFIG_PROVE_LOCKING)

Probably "|| defined(CONFIG_DEBUG_SPINLOCK)" is needed here.

> +/*
> + * Instead of using one rwlock for each inet_ehash_bucket, we use a table of locks
> + * The size of this table is a power of two and depends on the number of CPUS.
> + */
> +# if defined(CONFIG_DEBUG_LOCK_ALLOC)
> +#  define EHASH_LOCK_SZ 256
> +# elif NR_CPUS >= 32
> +#  define EHASH_LOCK_SZ	4096
> +# elif NR_CPUS >= 16
> +#  define EHASH_LOCK_SZ	2048
> +# elif NR_CPUS >= 8
> +#  define EHASH_LOCK_SZ	1024
> +# elif NR_CPUS >= 4
> +#  define EHASH_LOCK_SZ	512
> +# else
> +#  define EHASH_LOCK_SZ	256
> +# endif
> +#else
> +# define EHASH_LOCK_SZ 0
> +#endif
> +

Looks hackish: usually DEBUG code checks "real" environment, and here it's
a special case. But omitting locks if no SMP or DEBUG is strange. IMHO,
there should be 1 instead of 0.

>  struct inet_hashinfo {
>  	/* This is for sockets with full identity only.  Sockets here will
>  	 * always be without wildcards and will have the following invariant:
> @@ -100,6 +121,7 @@ struct inet_hashinfo {
>  	 * TIME_WAIT sockets use a separate chain (twchain).
>  	 */
>  	struct inet_ehash_bucket	*ehash;
> +	rwlock_t			*ehash_locks;
>  
>  	/* Ok, let's try this, I give up, we do need a local binding
>  	 * TCP hash as well as the others for fast bind/connect.
> @@ -134,6 +156,13 @@ static inline struct inet_ehash_bucket *inet_ehash_bucket(
>  	return &hashinfo->ehash[hash & (hashinfo->ehash_size - 1)];
>  }
>  
> +static inline rwlock_t *inet_ehash_lockp(
> +	struct inet_hashinfo *hashinfo,
> +	unsigned int hash)
> +{
> +	return &hashinfo->ehash_locks[hash & (EHASH_LOCK_SZ - 1)];
> +}
> +

Is it OK for EHASH_LOCK_SZ == 0?

...
> diff --git a/net/dccp/proto.c b/net/dccp/proto.c
> index d849739..3b5f97a 100644
> --- a/net/dccp/proto.c
> +++ b/net/dccp/proto.c
> @@ -1072,11 +1072,18 @@ static int __init dccp_init(void)
>  	}
>  
>  	for (i = 0; i < dccp_hashinfo.ehash_size; i++) {
> -		rwlock_init(&dccp_hashinfo.ehash[i].lock);
>  		INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain);
>  		INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].twchain);
>  	}
> -
> +	if (EHASH_LOCK_SZ) {

Why not #ifdef then? But, IMHO, rwlock_init() should be done at least
once here. (Similarly later for tcp.)

> +		dccp_hashinfo.ehash_locks =
> +			kmalloc(EHASH_LOCK_SZ * sizeof(rwlock_t),
> +				GFP_KERNEL);
> +		if (!dccp_hashinfo.ehash_locks)
> +			goto out_free_dccp_ehash;
> +		for (i = 0; i < EHASH_LOCK_SZ; i++)
> +			rwlock_init(&dccp_hashinfo.ehash_locks[i]);
> +	}
>  	bhash_order = ehash_order;
>  
>  	do {
> @@ -1091,7 +1098,7 @@ static int __init dccp_init(void)
>  
>  	if (!dccp_hashinfo.bhash) {
>  		DCCP_CRIT("Failed to allocate DCCP bind hash table");
> -		goto out_free_dccp_ehash;
> +		goto out_free_dccp_locks;
>  	}
>  
>  	for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
> @@ -1121,6 +1128,9 @@ out_free_dccp_mib:
>  out_free_dccp_bhash:
>  	free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
>  	dccp_hashinfo.bhash = NULL;
> +out_free_dccp_locks:
> +	kfree(dccp_hashinfo.ehash_locks);
> +	dccp_hashinfo.ehash_locks = NULL;
>  out_free_dccp_ehash:
>  	free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
>  	dccp_hashinfo.ehash = NULL;

Isn't such kfree(dccp_hashinfo.ehash_locks) needed in dccp_fini()?

Regards,
Jarek P.

  parent reply	other threads:[~2007-11-01 16:01 UTC|newest]

Thread overview: 29+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2007-11-01 10:16 [PATCH] INET : removes per bucket rwlock in tcp/dccp ehash table Eric Dumazet
2007-11-01 11:03 ` David Miller
2007-11-01 11:20   ` Arnaldo Carvalho de Melo
2007-11-01 11:15 ` Ilpo Järvinen
2007-11-01 16:06 ` Jarek Poplawski [this message]
2007-11-01 18:00   ` Eric Dumazet
2007-11-01 16:14 ` Stephen Hemminger
2007-11-01 17:54   ` Eric Dumazet
2007-11-01 18:48     ` Rick Jones
2007-11-01 19:00       ` Eric Dumazet
2007-11-01 19:17         ` Eric Dumazet
2007-11-01 21:52           ` David Miller
2007-11-01 21:46     ` David Miller
2007-11-03 23:18 ` Andi Kleen
2007-11-03 23:23   ` David Miller
2007-11-04  0:54     ` Andi Kleen
2007-11-04 11:31     ` Eric Dumazet
2007-11-04 12:26       ` Andi Kleen
2007-11-04 13:05         ` Eric Dumazet
2007-11-04 21:56         ` David Miller
2007-11-04 23:01           ` Andi Kleen
2007-11-05  4:24             ` David Miller
2007-11-05  4:35               ` David Miller
2007-11-04 17:58       ` Jarek Poplawski
2007-11-04 18:15         ` Jarek Poplawski
2007-11-04 21:23           ` Eric Dumazet
2007-11-04 23:08             ` Jarek Poplawski
2007-11-07 10:41       ` David Miller
2007-11-07 12:13         ` Jarek Poplawski

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4729F96B.8000802@o2.pl \
    --to=jarkao2@o2.pl \
    --cc=acme@redhat.com \
    --cc=ak@suse.de \
    --cc=dada1@cosmosbay.com \
    --cc=davem@davemloft.net \
    --cc=netdev@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.