Re: [PATCH next-next-2.6] netdev: better dev_name_hash

netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

From: Octavian Purdila <opurdila@ixiacom.com>
To: Eric Dumazet <eric.dumazet@gmail.com>
Cc: netdev@vger.kernel.org
Subject: Re: [PATCH next-next-2.6] netdev: better dev_name_hash
Date: Sun, 25 Oct 2009 23:55:32 +0200	[thread overview]
Message-ID: <200910252355.32640.opurdila@ixiacom.com> (raw)
In-Reply-To: <4AE4C1FA.7000002@gmail.com>

[-- Attachment #1: Type: Text/Plain, Size: 2271 bytes --]

On Sunday 25 October 2009 23:24:10 you wrote:
> Octavian Purdila a écrit :
> > The current dev_name_hash is not very good at spreading entries when a
> > large number of interfaces of the same type (e.g. ethXXXXX) are used.
> 
> Very true
> 
> > Here are some performance numbers for creating 16000 dummy interfaces
> > with and without the patch (with per device sysctl entries disabled)
> >
> >     With patch                      Without patch
> >
> >     real    0m 2.27s                real    0m 4.32s
> >     user    0m 0.00s                user    0m 0.00s
> >     sys     0m 1.13s                sys     0m 2.16s
> 
> 1) A program to show hash distribution would be more convincing,
> and could show that 17 multiplier is better, not 31 :)
> 

You beat me to it :) 

But anyways, I've attached mine as well , which compares orig, jhash, new17, 
new31, with different number of interfaces as well as different hash bits. 

The score it shows its the number of iterations needed to find all elements in 
the hash (good enough metric?)

My results shows that new17 is better or very close to jhash2. And I think its 
lighter then jhash too.

> 2) Why full_name_hash() not changed as well ?

I don't think it is going to be as easy to "benchmark" this, as it is used 
with more diverse inputs. 

AFAIK, full_name_hash is used by the dentry code, meaning that it cashes path 
names? If so, perhaps we can use find {/etc,/bin,/sbin,/usr} as input patterns? 

My results:

$ ./dev_name_hash ixint 255 0 8
score 1140
$ ./dev_name_hash ixint 255 1 8
score 379
$ ./dev_name_hash ixint 255 2 8
score 461
$ ./dev_name_hash ixint 255 3 8
score 329
$ ./dev_name_hash ixint 1000 0 8
score 26074
$ ./dev_name_hash ixint 1000 1 8
score 2887
$ ./dev_name_hash ixint 1000 2 8
score 2853
$ ./dev_name_hash ixint 1000 3 8
score 3713
$ ./dev_name_hash ixint 16000 0 8
score 3689828
$ ./dev_name_hash ixint 16000 1 8
score 516389
$ ./dev_name_hash ixint 16000 2 8
score 515292
$ ./dev_name_hash ixint 16000 3 8
score 554042
$ ./dev_name_hash ixint 16000 0 16
score 24741
$ ./dev_name_hash ixint 16000 1 16
score 17949
$ ./dev_name_hash ixint 16000 2 16
score 16715
$ ./dev_name_hash ixint 16000 3 16
score 18125

[-- Attachment #2: dev_name_hash.c --]
[-- Type: text/x-csrc, Size: 4994 bytes --]

#define _GNU_SOURCE
#include <string.h>
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <net/if.h>

typedef unsigned int u32;
typedef unsigned char u8;

#define __jhash_mix(a, b, c) \
{ \
  a -= b; a -= c; a ^= (c>>13); \
  b -= c; b -= a; b ^= (a<<8); \
  c -= a; c -= b; c ^= (b>>13); \
  a -= b; a -= c; a ^= (c>>12);  \
  b -= c; b -= a; b ^= (a<<16); \
  c -= a; c -= b; c ^= (b>>5); \
  a -= b; a -= c; a ^= (c>>3);  \
  b -= c; b -= a; b ^= (a<<10); \
  c -= a; c -= b; c ^= (b>>15); \
}

/* The golden ration: an arbitrary value */
#define JHASH_GOLDEN_RATIO	0x9e3779b9

/* The most generic version, hashes an arbitrary sequence
 * of bytes.  No alignment or length assumptions are made about
 * the input key.
 */
static inline u32 jhash(const void *key, u32 length, u32 initval)
{
	u32 a, b, c, len;
	const u8 *k = key;

	len = length;
	a = b = JHASH_GOLDEN_RATIO;
	c = initval;

	while (len >= 12) {
		a += (k[0] +((u32)k[1]<<8) +((u32)k[2]<<16) +((u32)k[3]<<24));
		b += (k[4] +((u32)k[5]<<8) +((u32)k[6]<<16) +((u32)k[7]<<24));
		c += (k[8] +((u32)k[9]<<8) +((u32)k[10]<<16)+((u32)k[11]<<24));

		__jhash_mix(a,b,c);

		k += 12;
		len -= 12;
	}

	c += length;
	switch (len) {
	case 11: c += ((u32)k[10]<<24);
	case 10: c += ((u32)k[9]<<16);
	case 9 : c += ((u32)k[8]<<8);
	case 8 : b += ((u32)k[7]<<24);
	case 7 : b += ((u32)k[6]<<16);
	case 6 : b += ((u32)k[5]<<8);
	case 5 : b += k[4];
	case 4 : a += ((u32)k[3]<<24);
	case 3 : a += ((u32)k[2]<<16);
	case 2 : a += ((u32)k[1]<<8);
	case 1 : a += k[0];
	};

	__jhash_mix(a,b,c);

	return c;
}


/* Name hashing routines. Initial hash value */
/* Hash courtesy of the R5 hash in reiserfs modulo sign bits */
#define init_name_hash()		0

/* partial hash update function. Assume roughly 4 bits per character */
static inline unsigned long
partial_name_hash(unsigned long c, unsigned long prevhash)
{
	return (prevhash + (c << 4) + (c >> 4)) * 11;
}

/*
 * Finally: cut down the number of bits to a int value (and try to avoid
 * losing bits)
 */
static inline unsigned long end_name_hash(unsigned long hash)
{
	return (unsigned int) hash;
}

/* Compute the hash for a name string. */
static inline unsigned int
full_name_hash(const char *name, unsigned int len)
{
	unsigned long hash = init_name_hash();
	while (len--)
		hash = partial_name_hash(*name++, hash);
	return end_name_hash(hash);
}

unsigned int dev_name_hash_new17(const char *name)
{
	unsigned hash = 0;
	int len = strnlen(name, IFNAMSIZ);
	int i;

	for (i = 0; i < len; ++i)
		hash = 17 * hash + name[i];
	return hash;
}

unsigned int dev_name_hash_new31(const char *name)
{
	unsigned hash = 0;
	int len = strnlen(name, IFNAMSIZ);
	int i;

	for (i = 0; i < len; ++i)
		hash = 31 * hash + name[i];
	return hash;
}

unsigned int dev_name_hash_orig(const char *name)
{
	return full_name_hash(name, strnlen(name, IFNAMSIZ));
}

unsigned int dev_name_hash_jhash(const char *name)
{
	return jhash(name, strnlen(name, IFNAMSIZ), 0);
}

typedef unsigned int (*dev_hash_name)(const char *name);

dev_hash_name hfn[] = {
	dev_name_hash_orig,
	dev_name_hash_jhash,
	dev_name_hash_new17,
	dev_name_hash_new31,
};

struct hocs {
	unsigned value;
	int occurences;
};

#if 0
#define debug(x...) printf(x)
#else
#define debug(x...) 
#endif

int main(int argc, char **argv)
{
	char dev_name[64];
	int no, htype, i, j, score, hbits;
	unsigned *h;
	/* stores hash elements by value and number of occurences */
	struct hocs *hocs;

	if (argc != 5) {
		fprintf(stderr, "syntax: dev_name_hash prefix no_of_names hash_type hash_bits\n");
		return -1;
	}

	no = atoi(argv[2]);
	if (no <= 0) {
		fprintf(stderr, "invalid number: %d\n", no);
		return -1;
	}

	htype = atoi(argv[3]);
	if (htype < 0 || htype >= sizeof(hfn)) {
		fprintf(stderr, "invalid hash type: %d\n", htype);
		return -1;
	}

	hbits = atoi(argv[4]);
	if (hbits <= 0) {
		fprintf(stderr, "invalid hash bits: %d\n", hbits);
		return -1;
	}
	
	h = malloc(no * sizeof(unsigned));

	for(i = 0; i < no; i++) {
		snprintf(dev_name, sizeof(dev_name), "%s%d", argv[1], i);
		h[i] = hfn[htype](dev_name) & ((1 << hbits) - 1);
		debug("h[i] = %x\n", h[i]);
	}

	hocs = malloc(no * sizeof(struct hocs));
	memset(hocs, 0, no * sizeof(unsigned));

	/* find occurences */
	for(i = 0; i < no; i++) {
		unsigned hash, dup;

		hash= h[i];
		dup = 0;
		hocs[i].value = hash;
		
		/* did we count this value already? */
		for(j = 0; j < i; j++) {
			if (i == j)
				continue;
			if (hocs[j].value == hash) {
				dup = 1;
				break;
			}
		}
		if (dup)
			continue;

		hocs[i].occurences = 1;

		for(j = 0; j < no; j++) {
			if (i == j)
				continue;
			if (hash == h[j])
				hocs[i].occurences++;
		}
	}

	/* the score is the number of iterations required to find each element
	   once */
	score = 0;
	for(i = 0; i < no; i++) {
		debug("hocs[%d] value = %x occurences = %d\n", i, hocs[i].value, hocs[i].occurences);
		score += hocs[i].occurences * (hocs[i].occurences + 1) / 2;
	}

	printf("score %d\n", score);

	return 0;
}

next prev parent reply	other threads:[~2009-10-25 21:58 UTC|newest]

Thread overview: 22+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-10-25 19:58 [PATCH next-next-2.6] netdev: better dev_name_hash Octavian Purdila
2009-10-25 20:17 ` Hagen Paul Pfeifer
2009-10-25 21:24 ` Eric Dumazet
2009-10-25 21:55   ` Octavian Purdila [this message]
2009-10-25 22:41     ` Hagen Paul Pfeifer
2009-10-25 22:45       ` Octavian Purdila
2009-10-26  5:28       ` Eric Dumazet
2009-10-26 13:07         ` Krishna Kumar2
2009-10-26 14:31           ` Octavian Purdila
2009-10-26 14:55             ` Eric Dumazet
2009-10-26 15:52               ` Octavian Purdila
2009-10-26 16:55                 ` Stephen Hemminger
2009-10-26 17:45                   ` Stephen Hemminger
2009-10-27  1:24               ` David Miller
2009-10-27  1:40                 ` Eric Dumazet
2009-10-26  6:30   ` Stephen Hemminger
2009-10-26  7:48     ` Eric Dumazet
2009-10-26  4:43 ` Stephen Hemminger
2009-10-26 22:36   ` [PATCH] dcache: better name hash function Stephen Hemminger <shemminger@vyatta.com>, Al Viro
2009-10-27  2:45     ` Eric Dumazet
2009-10-27  3:53       ` Stephen Hemminger
2009-10-27 16:38       ` Rick Jones

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=200910252355.32640.opurdila@ixiacom.com \
    --to=opurdila@ixiacom.com \
    --cc=eric.dumazet@gmail.com \
    --cc=netdev@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).