From: "Theodore Ts'o" <tytso@mit.edu>
To: George Spelvin <linux@horizon.com>
Cc: hpa@linux.intel.com, linux-kernel@vger.kernel.org,
mingo@kernel.org, price@mit.edu
Subject: Re: random: Benchamrking fast_mix2
Date: Sat, 14 Jun 2014 21:17:13 -0400 [thread overview]
Message-ID: <20140615011713.GK6447@thunk.org> (raw)
In-Reply-To: <20140615002333.24750.qmail@ns.horizon.com>
On Sat, Jun 14, 2014 at 08:23:33PM -0400, George Spelvin wrote:
> The example I posted:
>
> // (29/66353) score = 49/121/123: 6 27 16 14
>
> a += b; c += d;
> b = rol32(a, 6); d = rol32(c, 27);
> d ^= a; b ^= c;
>
> a += b; c += d;
> b = rol32(a, 16); d = rol32(c, 14);
> d ^= a; b ^= c;
>
> has, after 2 rounds, a minimum avalanche of 49 bits, taken over all of
> the variables just mentioned. The only thing maximized over is the
> different starting values.
I'm seeing a minimum delta of 40 bits, actually. Which makes it
slightly better than your original fast_mix2 (which had a minimum
delta of 39) when using 1024 random samples using random(3) to
generate a starting pool and setting a single bit in each possible bit
position in the input array. So it's slightly better, and as I
mentioned, on my CPU, I'm really not seeing that much difference
between fast_mix2() and fast_mix3().
But I'm willing to go with this as being quite sufficient as a mixing
function.
- Ted
(Compile the following with -DANALYZE to see the analysis I did.)
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
typedef unsigned int __u32;
typedef unsigned long long __u64;
struct fast_pool {
__u32 pool[4];
unsigned long last;
unsigned short count;
unsigned char rotate;
unsigned char last_timer_intr;
};
/**
* rol32 - rotate a 32-bit value left
* @word: value to rotate
* @shift: bits to roll
*/
static inline __u32 rol32(__u32 word, unsigned int shift)
{
return (word << shift) | (word >> (32 - shift));
}
static inline __u64 rol64(__u64 word, unsigned int shift)
{
return (word << shift) | (word >> (64 - shift));
}
static __u32 const twist_table[8] = {
0x00000000, 0x3b6e20c8, 0x76dc4190, 0x4db26158,
0xedb88320, 0xd6d6a3e8, 0x9b64c2b0, 0xa00ae278 };
extern void fast_mix(struct fast_pool *f, __u32 input[4])
{
__u32 w;
unsigned input_rotate = f->rotate;
w = rol32(input[0], input_rotate) ^ f->pool[0] ^ f->pool[3];
f->pool[0] = (w >> 3) ^ twist_table[w & 7];
input_rotate = (input_rotate + 14) & 31;
w = rol32(input[1], input_rotate) ^ f->pool[1] ^ f->pool[0];
f->pool[1] = (w >> 3) ^ twist_table[w & 7];
input_rotate = (input_rotate + 7) & 31;
w = rol32(input[2], input_rotate) ^ f->pool[2] ^ f->pool[1];
f->pool[2] = (w >> 3) ^ twist_table[w & 7];
input_rotate = (input_rotate + 7) & 31;
w = rol32(input[3], input_rotate) ^ f->pool[3] ^ f->pool[2];
f->pool[3] = (w >> 3) ^ twist_table[w & 7];
input_rotate = (input_rotate + 7) & 31;
f->rotate = input_rotate;
f->count++;
}
extern void fast_mix2(struct fast_pool *f, __u32 const input[4])
{
__u32 a = f->pool[0] ^ input[0], b = f->pool[1] ^ input[1];
__u32 c = f->pool[2] ^ input[2], d = f->pool[3] ^ input[3];
int i;
for (i = 0; i < 2; i++) {
/*
* Inspired by ChaCha's QuarterRound, but
* modified for much greater parallelism.
*/
a += b; c += d;
d ^= a; b ^= c;
a = rol32(a, 15); c = rol32(c, 21);
a += b; c += d;
d ^= a; b ^= c;
a = rol32(a, 3); c = rol32(c, 7);
}
f->pool[0] = a; f->pool[1] = b;
f->pool[2] = c; f->pool[3] = d;
f->count++;
}
extern void fast_mix3(struct fast_pool *f, __u32 const input[4])
{
__u32 a = f->pool[0] ^ input[0], b = f->pool[1] ^ input[1];
__u32 c = f->pool[2] ^ input[2], d = f->pool[3] ^ input[3];
int i;
for (i = 0; i < 2; i++) {
a += b; c += d;
a = rol32(a, 6); c = rol32(c, 27);
d ^= a; b ^= c;
a += b; c += d;
a = rol32(a, 16); c = rol32(c, 14);
d ^= a; b ^= c;
}
f->pool[0] = a; f->pool[1] = b;
f->pool[2] = c; f->pool[3] = d;
f->count++;
}
extern void fast_mix4(struct fast_pool *f, __u32 const input[4])
{
__u64 a = ((__u64 *)f->pool)[0] ^ ((__u64 const *)input)[0];
__u64 b = ((__u64 *)f->pool)[1] ^ ((__u64 const *)input)[1];
int i;
for (i = 0; i < 2; i++) {
a += b; b = rol64(b, 52);
b ^= a; a = rol64(a, 10);
a += b; b = rol64(b, 47);
b ^= a; a = rol64(a, 17);
}
((__u64 *)f->pool)[0] = a;
((__u64 *)f->pool)[1] = b;
f->count++;
}
static void rotate(__u32 a[4])
{
int i;
int carry = 0;
__u32 tmp;
for (i=0; i < 4; i++) {
tmp = a[i];
a[i] = (tmp << 1) + carry;
carry = (tmp & 0x80000000) ? 1 : 0;
}
if (carry)
a[0]++;
}
int global_min = 9999;
void analyze(void)
{
struct fast_pool f;
int i, pc;
int sum = 0, max = 0, min=9999;
__u32 input[4];
__u32 start[4];
start[0] = random();
start[1] = random();
start[2] = random();
start[3] = random();
memset(&f, 0, sizeof(f));
memset(&input, 0, sizeof(input));
input[0] = 1;
for (i=0; i < 32; i++) {
memcpy(f.pool, start, sizeof(start));
fast_mix3(&f, input);
pc = (__builtin_popcount(f.pool[0] ^ start[0]) +
__builtin_popcount(f.pool[1] ^ start[1]) +
__builtin_popcount(f.pool[2] ^ start[2]) +
__builtin_popcount(f.pool[3] ^ start[3]));
sum += pc;
if (pc > max)
max = pc;
if (pc < min)
min = pc;
if (pc < global_min)
global_min = pc;
rotate(input);
// printf("%d ", pc);
}
// printf("\n");
// printf("average popcount: %d, max: %d min %d\n", sum / 128, max, min);
}
static __inline__ unsigned long long rdtsc(void)
{
unsigned long long int x;
__asm__ volatile (".byte 0x0f, 0x31" : "=A" (x));
return x;
}
int main(int argc, char **argv)
{
struct fast_pool f;
int i;
__u32 input[4];
unsigned volatile long long start_time, end_time;
#ifdef ANALYZE
for (i=0; i < 1024; i++)
analyze();
printf("Global minimum: %d\n", global_min);
return 0;
#endif
#if !defined(BENCH_FASTMIX) && !defined(BENCH_FASTMIX2)
for (i=0; i < 20; i++) {
usleep(50000);
start_time = rdtsc();
fast_mix2(&f, input);
end_time = rdtsc();
printf("fast_mix2: %llu\t", end_time - start_time);
#if 0
usleep(50000);
start_time = rdtsc();
fast_mix2(&f, input);
end_time = rdtsc();
printf("fast_mix2: %llu\t", end_time - start_time);
usleep(50000);
start_time = rdtsc();
fast_mix3(&f, input);
end_time = rdtsc();
printf("fast_mix3: %llu\t", end_time - start_time);
#endif
fputc('\n', stdout);
}
#endif
#ifdef BENCH_FASTMIX
for (i=0; i < 10240000; i++) {
fast_mix(&f, input);
}
#endif
#ifdef BENCH_FASTMIX2
for (i=0; i < 10240000; i++) {
fast_mix2(&f, input);
}
#endif
return 0;
}
next prev parent reply other threads:[~2014-06-15 1:17 UTC|newest]
Thread overview: 57+ messages / expand[flat|nested] mbox.gz Atom feed top
2014-06-09 0:05 [RFC PATCH] drivers/char/random.c: Is reducing locking range like this safe? George Spelvin
2014-06-09 1:35 ` Theodore Ts'o
2014-06-09 2:10 ` George Spelvin
2014-06-09 2:18 ` George Spelvin
2014-06-09 4:03 ` George Spelvin
2014-06-09 9:23 ` George Spelvin
2014-06-09 13:34 ` Theodore Ts'o
2014-06-09 15:04 ` George Spelvin
2014-06-09 15:50 ` Theodore Ts'o
2014-06-09 16:11 ` George Spelvin
2014-06-10 0:20 ` drivers/char/random.c: more ruminations George Spelvin
2014-06-10 1:20 ` Theodore Ts'o
2014-06-10 3:10 ` George Spelvin
2014-06-10 15:25 ` Theodore Ts'o
2014-06-10 20:40 ` George Spelvin
2014-06-10 21:20 ` Theodore Ts'o
2014-06-11 0:10 ` George Spelvin
2014-06-11 2:08 ` Theodore Ts'o
2014-06-11 3:58 ` George Spelvin
2014-06-11 13:11 ` Theodore Ts'o
2014-06-12 0:42 ` George Spelvin
2014-06-12 1:03 ` H. Peter Anvin
2014-06-11 4:34 ` George Spelvin
2014-06-11 13:09 ` Theodore Ts'o
2014-06-11 2:21 ` Theodore Ts'o
2014-06-09 13:17 ` drivers/char/random.c: More futzing about George Spelvin
2014-06-11 16:38 ` Theodore Ts'o
2014-06-11 16:48 ` H. Peter Anvin
2014-06-11 19:25 ` Theodore Ts'o
2014-06-11 20:41 ` H. Peter Anvin
2014-06-12 0:44 ` H. Peter Anvin
2014-06-12 1:51 ` George Spelvin
2014-06-12 0:32 ` George Spelvin
2014-06-12 3:22 ` Theodore Ts'o
2014-06-12 4:13 ` random: Benchamrking fast_mix2 George Spelvin
2014-06-12 11:18 ` George Spelvin
2014-06-12 20:17 ` Theodore Ts'o
2014-06-12 20:46 ` Theodore Ts'o
2014-06-13 0:23 ` George Spelvin
2014-06-13 15:52 ` Theodore Ts'o
2014-06-14 2:10 ` George Spelvin
2014-06-14 3:06 ` Theodore Ts'o
2014-06-14 5:25 ` George Spelvin
2014-06-14 6:24 ` Theodore Ts'o
2014-06-14 8:03 ` George Spelvin
2014-06-14 11:14 ` George Spelvin
2014-06-14 15:13 ` George Spelvin
2014-06-14 16:33 ` Theodore Ts'o
2014-06-15 0:23 ` George Spelvin
2014-06-15 1:17 ` Theodore Ts'o [this message]
2014-06-15 6:58 ` George Spelvin
2014-06-15 13:01 ` Theodore Ts'o
2014-06-14 6:27 ` Theodore Ts'o
2014-06-14 4:55 ` [RFC] random: is the IRQF_TIMER test working as intended? George Spelvin
2014-06-14 6:43 ` Theodore Ts'o
2014-06-14 7:23 ` George Spelvin
2014-06-12 3:43 ` drivers/char/random.c: More futzing about George Spelvin
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20140615011713.GK6447@thunk.org \
--to=tytso@mit.edu \
--cc=hpa@linux.intel.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux@horizon.com \
--cc=mingo@kernel.org \
--cc=price@mit.edu \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.