From: Eric Dumazet <dada1@cosmosbay.com>
To: Jarek Poplawski <jarkao2@o2.pl>
Cc: "David S. Miller" <davem@davemloft.net>,
Linux Netdev List <netdev@vger.kernel.org>,
Andi Kleen <ak@suse.de>,
Arnaldo Carvalho de Melo <acme@redhat.com>
Subject: Re: [PATCH] INET : removes per bucket rwlock in tcp/dccp ehash table
Date: Thu, 01 Nov 2007 19:00:13 +0100 [thread overview]
Message-ID: <472A142D.4040305@cosmosbay.com> (raw)
In-Reply-To: <4729F96B.8000802@o2.pl>
Jarek Poplawski a écrit :
> Hi,
>
> A few doubts below:
>
>>
>> +#if defined(CONFIG_SMP) || defined(CONFIG_PROVE_LOCKING)
>
> Probably "|| defined(CONFIG_DEBUG_SPINLOCK)" is needed here.
Not sure, because DEBUG_SPINLOCK only applies to spinlocks.
Here we deal with rwlocks.
>
>> +/*
>> + * Instead of using one rwlock for each inet_ehash_bucket, we use a table of locks
>> + * The size of this table is a power of two and depends on the number of CPUS.
>> + */
>> +# if defined(CONFIG_DEBUG_LOCK_ALLOC)
>> +# define EHASH_LOCK_SZ 256
>> +# elif NR_CPUS >= 32
>> +# define EHASH_LOCK_SZ 4096
>> +# elif NR_CPUS >= 16
>> +# define EHASH_LOCK_SZ 2048
>> +# elif NR_CPUS >= 8
>> +# define EHASH_LOCK_SZ 1024
>> +# elif NR_CPUS >= 4
>> +# define EHASH_LOCK_SZ 512
>> +# else
>> +# define EHASH_LOCK_SZ 256
>> +# endif
>> +#else
>> +# define EHASH_LOCK_SZ 0
>> +#endif
>> +
>
> Looks hackish: usually DEBUG code checks "real" environment, and here it's
> a special case. But omitting locks if no SMP or DEBUG is strange. IMHO,
> there should be 1 instead of 0.
It is 0 so that no alloc is done. (see your next questions)
>
>> struct inet_hashinfo {
>> /* This is for sockets with full identity only. Sockets here will
>> * always be without wildcards and will have the following invariant:
>> @@ -100,6 +121,7 @@ struct inet_hashinfo {
>> * TIME_WAIT sockets use a separate chain (twchain).
>> */
>> struct inet_ehash_bucket *ehash;
>> + rwlock_t *ehash_locks;
>>
>> /* Ok, let's try this, I give up, we do need a local binding
>> * TCP hash as well as the others for fast bind/connect.
>> @@ -134,6 +156,13 @@ static inline struct inet_ehash_bucket *inet_ehash_bucket(
>> return &hashinfo->ehash[hash & (hashinfo->ehash_size - 1)];
>> }
>>
>> +static inline rwlock_t *inet_ehash_lockp(
>> + struct inet_hashinfo *hashinfo,
>> + unsigned int hash)
>> +{
>> + return &hashinfo->ehash_locks[hash & (EHASH_LOCK_SZ - 1)];
>> +}
>> +
>
> Is it OK for EHASH_LOCK_SZ == 0?
At least, compiled tested and booted on UP ;)
>
> ...
>> diff --git a/net/dccp/proto.c b/net/dccp/proto.c
>> index d849739..3b5f97a 100644
>> --- a/net/dccp/proto.c
>> +++ b/net/dccp/proto.c
>> @@ -1072,11 +1072,18 @@ static int __init dccp_init(void)
>> }
>>
>> for (i = 0; i < dccp_hashinfo.ehash_size; i++) {
>> - rwlock_init(&dccp_hashinfo.ehash[i].lock);
>> INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain);
>> INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].twchain);
>> }
>> -
>> + if (EHASH_LOCK_SZ) {
>
> Why not #ifdef then? But, IMHO, rwlock_init() should be done at least
> once here. (Similarly later for tcp.)
well, #ifdef are not so nice :)
>
>> + dccp_hashinfo.ehash_locks =
>> + kmalloc(EHASH_LOCK_SZ * sizeof(rwlock_t),
>> + GFP_KERNEL);
>> + if (!dccp_hashinfo.ehash_locks)
>> + goto out_free_dccp_ehash;
>> + for (i = 0; i < EHASH_LOCK_SZ; i++)
>> + rwlock_init(&dccp_hashinfo.ehash_locks[i]);
>> + }
>> bhash_order = ehash_order;
>>
>> do {
>> @@ -1091,7 +1098,7 @@ static int __init dccp_init(void)
>>
>> if (!dccp_hashinfo.bhash) {
>> DCCP_CRIT("Failed to allocate DCCP bind hash table");
>> - goto out_free_dccp_ehash;
>> + goto out_free_dccp_locks;
>> }
>>
>> for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
>> @@ -1121,6 +1128,9 @@ out_free_dccp_mib:
>> out_free_dccp_bhash:
>> free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
>> dccp_hashinfo.bhash = NULL;
>> +out_free_dccp_locks:
>> + kfree(dccp_hashinfo.ehash_locks);
>> + dccp_hashinfo.ehash_locks = NULL;
>> out_free_dccp_ehash:
>> free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
>> dccp_hashinfo.ehash = NULL;
>
> Isn't such kfree(dccp_hashinfo.ehash_locks) needed in dccp_fini()?
>
Probably ! Thank you for reviewing !
Eric
next prev parent reply other threads:[~2007-11-01 18:00 UTC|newest]
Thread overview: 29+ messages / expand[flat|nested] mbox.gz Atom feed top
2007-11-01 10:16 [PATCH] INET : removes per bucket rwlock in tcp/dccp ehash table Eric Dumazet
2007-11-01 11:03 ` David Miller
2007-11-01 11:20 ` Arnaldo Carvalho de Melo
2007-11-01 11:15 ` Ilpo Järvinen
2007-11-01 16:06 ` Jarek Poplawski
2007-11-01 18:00 ` Eric Dumazet [this message]
2007-11-01 16:14 ` Stephen Hemminger
2007-11-01 17:54 ` Eric Dumazet
2007-11-01 18:48 ` Rick Jones
2007-11-01 19:00 ` Eric Dumazet
2007-11-01 19:17 ` Eric Dumazet
2007-11-01 21:52 ` David Miller
2007-11-01 21:46 ` David Miller
2007-11-03 23:18 ` Andi Kleen
2007-11-03 23:23 ` David Miller
2007-11-04 0:54 ` Andi Kleen
2007-11-04 11:31 ` Eric Dumazet
2007-11-04 12:26 ` Andi Kleen
2007-11-04 13:05 ` Eric Dumazet
2007-11-04 21:56 ` David Miller
2007-11-04 23:01 ` Andi Kleen
2007-11-05 4:24 ` David Miller
2007-11-05 4:35 ` David Miller
2007-11-04 17:58 ` Jarek Poplawski
2007-11-04 18:15 ` Jarek Poplawski
2007-11-04 21:23 ` Eric Dumazet
2007-11-04 23:08 ` Jarek Poplawski
2007-11-07 10:41 ` David Miller
2007-11-07 12:13 ` Jarek Poplawski
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=472A142D.4040305@cosmosbay.com \
--to=dada1@cosmosbay.com \
--cc=acme@redhat.com \
--cc=ak@suse.de \
--cc=davem@davemloft.net \
--cc=jarkao2@o2.pl \
--cc=netdev@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).