From: Stephen Hemminger <shemminger@vyatta.com>
To: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Octavian Purdila <opurdila@ixiacom.com>, netdev@vger.kernel.org
Subject: Re: [PATCH next-next-2.6] netdev: better dev_name_hash
Date: Sun, 25 Oct 2009 23:30:16 -0700 [thread overview]
Message-ID: <20091025233016.6860d9c7@nehalam> (raw)
In-Reply-To: <4AE4C1FA.7000002@gmail.com>
[-- Attachment #1: Type: text/plain, Size: 1536 bytes --]
I overkilled this with more functions and compared filenames as well.
genarated names (dummyNNNN)
Algorithm Time (us) Ratio Max StdDev
kr_hash 277925 152408.6 468448 543.19
string_hash31 329356 5859.4 16042 44.18
SuperFastHash 324570 4885.9 10502 2.29
djb2 327908 5608.5 15210 38.08
string_hash17 326769 4883.6 9896 0.76
full_name_hash 343196 63921.0 140628 343.62
jhash_string 463801 4883.8 10085 1.02
sdbm 398587 9801.7 29634 99.18
filesystem names
Algorithm Time (us) Ratio Max StdDev
kr_hash 278840 152314.9 468717 543.01
string_hash31 331206 5802.1 16004 42.87
SuperFastHash 325938 4887.5 13528 2.88
djb2 330621 5607.1 15333 38.05
string_hash17 331181 4884.9 13274 1.78
full_name_hash 347312 63972.2 141336 343.77
jhash_string 466799 4885.2 13275 1.92
sdbm 403691 9771.7 29629 98.88
Ratio is the average number of buckets examined when scanning
the whole set of names.
1) Increased hash buckets to 1024 which seems reasonable if we are
going to test that many names.
2) Increased name size to 256 so that longer filenames could be
checked and name blocks were not in same cache line
* SuperFastHash is too big to put inline
[-- Attachment #2: hashtest.c --]
[-- Type: text/x-c++src, Size: 6872 bytes --]
#include <stdio.h>
#include <stdint.h>
#include <malloc.h>
#include <sys/types.h>
#include <sys/time.h>
#include <string.h>
#include <stdlib.h>
#include <math.h>
static unsigned int
kr_hash(const unsigned char *str, unsigned int len)
{
unsigned int hash = 0;
while (len--)
hash += *str++;
return hash;
}
static unsigned int
sdbm(const unsigned char *name, unsigned int len)
{
unsigned long hash = 0;
while (len--)
hash = *name++ + (hash << 6) + (hash << 16) - hash;
return hash;
}
#define init_name_hash() 0
/* partial hash update function. Assume roughly 4 bits per character */
static inline unsigned long
partial_name_hash(unsigned long c, unsigned long prevhash)
{
return (prevhash + (c << 4) + (c >> 4)) * 11;
}
/* Compute the hash for a name string. */
static unsigned int
full_name_hash(const unsigned char *name, unsigned int len)
{
unsigned long hash = 0;
while (len--)
hash = partial_name_hash(*name++, hash);
return hash;
}
static unsigned int
djb2(const unsigned char *str, unsigned int len)
{
unsigned long hash = 5381;
while (len--)
hash = ((hash << 5) + hash) + *str++; /* hash * 33 + c */
return hash;
}
static unsigned int
string_hash31(const unsigned char *name, unsigned int len)
{
unsigned hash = 0;
int i;
for (i = 0; i < len; ++i)
hash = 31 * hash + name[i];
return hash;
}
static unsigned int
string_hash17(const unsigned char *name, unsigned int len)
{
unsigned hash = 0;
int i;
for (i = 0; i < len; ++i)
hash = 17 * hash + name[i];
return hash;
}
/* will need to change to unaligned_access() */
static inline uint16_t get16bits(const unsigned char *data)
{
return *(uint16_t *) data;
}
static inline unsigned int
SuperFastHash (const unsigned char * data, unsigned int len)
{
uint32_t hash = len, tmp;
int rem;
rem = len & 3;
len >>= 2;
/* Main loop */
for (;len > 0; len--) {
hash += get16bits (data);
tmp = (get16bits (data+2) << 11) ^ hash;
hash = (hash << 16) ^ tmp;
data += 2*sizeof (uint16_t);
hash += hash >> 11;
}
/* Handle end cases */
switch (rem) {
case 3: hash += get16bits (data);
hash ^= hash << 16;
hash ^= data[sizeof (uint16_t)] << 18;
hash += hash >> 11;
break;
case 2: hash += get16bits (data);
hash ^= hash << 11;
hash += hash >> 17;
break;
case 1: hash += *data;
hash ^= hash << 10;
hash += hash >> 1;
}
/* Force "avalanching" of final 127 bits */
hash ^= hash << 3;
hash += hash >> 5;
hash ^= hash << 4;
hash += hash >> 17;
hash ^= hash << 25;
hash += hash >> 6;
return hash;
}
/* NOTE: Arguments are modified. */
#define __jhash_mix(a, b, c) \
{ \
a -= b; a -= c; a ^= (c>>13); \
b -= c; b -= a; b ^= (a<<8); \
c -= a; c -= b; c ^= (b>>13); \
a -= b; a -= c; a ^= (c>>12); \
b -= c; b -= a; b ^= (a<<16); \
c -= a; c -= b; c ^= (b>>5); \
a -= b; a -= c; a ^= (c>>3); \
b -= c; b -= a; b ^= (a<<10); \
c -= a; c -= b; c ^= (b>>15); \
}
/* The golden ration: an arbitrary value */
#define JHASH_GOLDEN_RATIO 0x9e3779b9
/* The most generic version, hashes an arbitrary sequence
* of bytes. No alignment or length assumptions are made about
* the input key.
*/
static inline uint32_t jhash(const void *key, uint32_t length, uint32_t initval)
{
uint32_t a, b, c, len;
const uint8_t *k = key;
len = length;
a = b = JHASH_GOLDEN_RATIO;
c = initval;
while (len >= 12) {
a += (k[0] +((uint32_t)k[1]<<8) +((uint32_t)k[2]<<16) +((uint32_t)k[3]<<24));
b += (k[4] +((uint32_t)k[5]<<8) +((uint32_t)k[6]<<16) +((uint32_t)k[7]<<24));
c += (k[8] +((uint32_t)k[9]<<8) +((uint32_t)k[10]<<16)+((uint32_t)k[11]<<24));
__jhash_mix(a,b,c);
k += 12;
len -= 12;
}
c += length;
switch (len) {
case 11: c += ((uint32_t)k[10]<<24);
case 10: c += ((uint32_t)k[9]<<16);
case 9 : c += ((uint32_t)k[8]<<8);
case 8 : b += ((uint32_t)k[7]<<24);
case 7 : b += ((uint32_t)k[6]<<16);
case 6 : b += ((uint32_t)k[5]<<8);
case 5 : b += k[4];
case 4 : a += ((uint32_t)k[3]<<24);
case 3 : a += ((uint32_t)k[2]<<16);
case 2 : a += ((uint32_t)k[1]<<8);
case 1 : a += k[0];
};
__jhash_mix(a,b,c);
return c;
}
static unsigned int
jhash_string(const unsigned char *name, unsigned int len)
{
return jhash(name, len, 0);
}
#define TESTSIZE 10000000ul
#define HASHBITS 10
#define HASHSZ (1<<HASHBITS)
#define HASHMSK (HASHSZ-1)
#define IFNAMSIZ 256
static char *names[TESTSIZE];
static void generate_names(void)
{
unsigned int i;
char *buf = malloc(TESTSIZE * IFNAMSIZ);
printf("\ngenarated names (dummyNNNN)\n");
for (i = 0; i < TESTSIZE; i++) {
snprintf(buf, IFNAMSIZ, "dummy%d", i);
names[i] = buf;
buf += IFNAMSIZ;
}
}
static inline void measure(const char *name,
unsigned int (*hash)(const unsigned char *, unsigned int))
{
unsigned int i;
struct timeval t0, t1;
unsigned int dist[HASHSZ] = { 0 };
unsigned long long ratio = 0;
long us;
unsigned long m = 0;
const double avg = TESTSIZE / HASHSZ;
double std = 0;
gettimeofday(&t0, NULL);
for (i = 0; i < TESTSIZE; i++) {
const unsigned char *str = (const unsigned char *) names[i];
unsigned int h = hash(str, strlen(names[i]));
++dist[h & HASHMSK];
}
gettimeofday(&t1, NULL);
us = (t1.tv_sec - t0.tv_sec) * 1000000;
us += (t1.tv_usec - t0.tv_usec);
for (i = 0; i < HASHSZ; i++) {
unsigned int n = dist[i];
unsigned long long s;
double delta = (n - avg);
s = n;
s *= (1 + n);
ratio += s / 2;
if (n > m) m = n;
std += delta * delta;
}
printf("%-20s %10ld %10.1f %8ld %6.2f\n", name, us,
(double) ratio / (double)TESTSIZE, m, sqrt(std / TESTSIZE));
}
#define MEASURE(func) measure(#func, &func)
static void filesystem_names(void)
{
FILE *f = fopen("/tmp/names", "r");
if (!f) { perror("/tmp/names"); exit(1); }
unsigned int i = 0;
char line[IFNAMSIZ];
printf("\nfilesystem names\n");
while (fgets(line, sizeof line, f) != NULL) {
strncpy(names[i], line, IFNAMSIZ);
if (++i == TESTSIZE)
break;
}
fclose(f);
}
int main()
{
generate_names();
printf("Algorithm Time (us) Ratio Max StdDev\n");
MEASURE(kr_hash);
MEASURE(string_hash31);
MEASURE(SuperFastHash);
MEASURE(djb2);
MEASURE(string_hash17);
MEASURE(full_name_hash);
MEASURE(jhash_string);
MEASURE(sdbm);
filesystem_names();
printf("Algorithm Time (us) Ratio Max StdDev\n");
MEASURE(kr_hash);
MEASURE(string_hash31);
MEASURE(SuperFastHash);
MEASURE(djb2);
MEASURE(string_hash17);
MEASURE(full_name_hash);
MEASURE(jhash_string);
MEASURE(sdbm);
return 0;
}
next prev parent reply other threads:[~2009-10-26 6:30 UTC|newest]
Thread overview: 22+ messages / expand[flat|nested] mbox.gz Atom feed top
2009-10-25 19:58 [PATCH next-next-2.6] netdev: better dev_name_hash Octavian Purdila
2009-10-25 20:17 ` Hagen Paul Pfeifer
2009-10-25 21:24 ` Eric Dumazet
2009-10-25 21:55 ` Octavian Purdila
2009-10-25 22:41 ` Hagen Paul Pfeifer
2009-10-25 22:45 ` Octavian Purdila
2009-10-26 5:28 ` Eric Dumazet
2009-10-26 13:07 ` Krishna Kumar2
2009-10-26 14:31 ` Octavian Purdila
2009-10-26 14:55 ` Eric Dumazet
2009-10-26 15:52 ` Octavian Purdila
2009-10-26 16:55 ` Stephen Hemminger
2009-10-26 17:45 ` Stephen Hemminger
2009-10-27 1:24 ` David Miller
2009-10-27 1:40 ` Eric Dumazet
2009-10-26 6:30 ` Stephen Hemminger [this message]
2009-10-26 7:48 ` Eric Dumazet
2009-10-26 4:43 ` Stephen Hemminger
2009-10-26 22:36 ` [PATCH] dcache: better name hash function Stephen Hemminger <shemminger@vyatta.com>, Al Viro
2009-10-27 2:45 ` Eric Dumazet
2009-10-27 3:53 ` Stephen Hemminger
2009-10-27 16:38 ` Rick Jones
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20091025233016.6860d9c7@nehalam \
--to=shemminger@vyatta.com \
--cc=eric.dumazet@gmail.com \
--cc=netdev@vger.kernel.org \
--cc=opurdila@ixiacom.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).