Linux cryptographic layer development
 help / color / mirror / Atom feed
* [PATCH v6 5/5] syncookies: use SipHash in place of SHA1
From: Jason A. Donenfeld @ 2016-12-16  3:03 UTC (permalink / raw)
  To: Netdev, kernel-hardening, LKML, linux-crypto, David Laight,
	Ted Tso, Hannes Frederic Sowa, Linus Torvalds, Eric Biggers,
	Tom Herbert, George Spelvin, Vegard Nossum, ak, davem, luto
  Cc: Jason A. Donenfeld
In-Reply-To: <20161216030328.11602-1-Jason@zx2c4.com>

SHA1 is slower and less secure than SipHash, and so replacing syncookie
generation with SipHash makes natural sense. Some BSDs have been doing
this for several years in fact.

Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
 net/ipv4/syncookies.c | 20 ++++----------------
 net/ipv6/syncookies.c | 37 ++++++++++++++++---------------------
 2 files changed, 20 insertions(+), 37 deletions(-)

diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 3e88467d70ee..03bb068f8888 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -13,13 +13,13 @@
 #include <linux/tcp.h>
 #include <linux/slab.h>
 #include <linux/random.h>
-#include <linux/cryptohash.h>
+#include <linux/siphash.h>
 #include <linux/kernel.h>
 #include <linux/export.h>
 #include <net/tcp.h>
 #include <net/route.h>
 
-static u32 syncookie_secret[2][16-4+SHA_DIGEST_WORDS] __read_mostly;
+static siphash_key_t syncookie_secret[2] __read_mostly;
 
 #define COOKIEBITS 24	/* Upper bits store count */
 #define COOKIEMASK (((__u32)1 << COOKIEBITS) - 1)
@@ -48,24 +48,12 @@ static u32 syncookie_secret[2][16-4+SHA_DIGEST_WORDS] __read_mostly;
 #define TSBITS	6
 #define TSMASK	(((__u32)1 << TSBITS) - 1)
 
-static DEFINE_PER_CPU(__u32 [16 + 5 + SHA_WORKSPACE_WORDS], ipv4_cookie_scratch);
-
 static u32 cookie_hash(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport,
 		       u32 count, int c)
 {
-	__u32 *tmp;
-
 	net_get_random_once(syncookie_secret, sizeof(syncookie_secret));
-
-	tmp  = this_cpu_ptr(ipv4_cookie_scratch);
-	memcpy(tmp + 4, syncookie_secret[c], sizeof(syncookie_secret[c]));
-	tmp[0] = (__force u32)saddr;
-	tmp[1] = (__force u32)daddr;
-	tmp[2] = ((__force u32)sport << 16) + (__force u32)dport;
-	tmp[3] = count;
-	sha_transform(tmp + 16, (__u8 *)tmp, tmp + 16 + 5);
-
-	return tmp[17];
+	return siphash_4u32(saddr, daddr, (u32)sport << 16 | dport, count,
+			    syncookie_secret[c]);
 }
 
 
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index a4d49760bf43..04d19e89a3e0 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -16,7 +16,7 @@
 
 #include <linux/tcp.h>
 #include <linux/random.h>
-#include <linux/cryptohash.h>
+#include <linux/siphash.h>
 #include <linux/kernel.h>
 #include <net/ipv6.h>
 #include <net/tcp.h>
@@ -24,7 +24,7 @@
 #define COOKIEBITS 24	/* Upper bits store count */
 #define COOKIEMASK (((__u32)1 << COOKIEBITS) - 1)
 
-static u32 syncookie6_secret[2][16-4+SHA_DIGEST_WORDS] __read_mostly;
+static siphash_key_t syncookie6_secret[2] __read_mostly;
 
 /* RFC 2460, Section 8.3:
  * [ipv6 tcp] MSS must be computed as the maximum packet size minus 60 [..]
@@ -41,30 +41,25 @@ static __u16 const msstab[] = {
 	9000 - 60,
 };
 
-static DEFINE_PER_CPU(__u32 [16 + 5 + SHA_WORKSPACE_WORDS], ipv6_cookie_scratch);
-
 static u32 cookie_hash(const struct in6_addr *saddr, const struct in6_addr *daddr,
 		       __be16 sport, __be16 dport, u32 count, int c)
 {
-	__u32 *tmp;
+	const struct {
+		struct in6_addr saddr;
+		struct in6_addr daddr;
+		u32 count;
+		u16 sport;
+		u16 dport;
+	} __aligned(SIPHASH_ALIGNMENT) combined = {
+		.saddr = *saddr,
+		.daddr = *daddr,
+		.count = count,
+		.sport = sport,
+		.dport = dport
+	};
 
 	net_get_random_once(syncookie6_secret, sizeof(syncookie6_secret));
-
-	tmp  = this_cpu_ptr(ipv6_cookie_scratch);
-
-	/*
-	 * we have 320 bits of information to hash, copy in the remaining
-	 * 192 bits required for sha_transform, from the syncookie6_secret
-	 * and overwrite the digest with the secret
-	 */
-	memcpy(tmp + 10, syncookie6_secret[c], 44);
-	memcpy(tmp, saddr, 16);
-	memcpy(tmp + 4, daddr, 16);
-	tmp[8] = ((__force u32)sport << 16) + (__force u32)dport;
-	tmp[9] = count;
-	sha_transform(tmp + 16, (__u8 *)tmp, tmp + 16 + 5);
-
-	return tmp[17];
+	return siphash(&combined, sizeof(combined), syncookie6_secret[c]);
 }
 
 static __u32 secure_tcp_syn_cookie(const struct in6_addr *saddr,
-- 
2.11.0

^ permalink raw reply related

* [PATCH v6 4/5] md5: remove from lib and only live in crypto
From: Jason A. Donenfeld @ 2016-12-16  3:03 UTC (permalink / raw)
  To: Netdev, kernel-hardening, LKML, linux-crypto, David Laight,
	Ted Tso, Hannes Frederic Sowa, Linus Torvalds, Eric Biggers,
	Tom Herbert, George Spelvin, Vegard Nossum, ak, davem, luto
  Cc: Jason A. Donenfeld
In-Reply-To: <20161216030328.11602-1-Jason@zx2c4.com>

The md5_transform function is no longer used any where in the tree,
except for the crypto api's actual implementation of md5, so we can drop
the function from lib and put it as a static function of the crypto
file, where it belongs. There should be no new users of md5_transform,
anyway, since there are more modern ways of doing what it once achieved.

Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
 crypto/md5.c | 95 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 lib/Makefile |  2 +-
 lib/md5.c    | 95 ------------------------------------------------------------
 3 files changed, 95 insertions(+), 97 deletions(-)
 delete mode 100644 lib/md5.c

diff --git a/crypto/md5.c b/crypto/md5.c
index 2355a7c25c45..f7ae1a48225b 100644
--- a/crypto/md5.c
+++ b/crypto/md5.c
@@ -21,9 +21,11 @@
 #include <linux/module.h>
 #include <linux/string.h>
 #include <linux/types.h>
-#include <linux/cryptohash.h>
 #include <asm/byteorder.h>
 
+#define MD5_DIGEST_WORDS 4
+#define MD5_MESSAGE_BYTES 64
+
 const u8 md5_zero_message_hash[MD5_DIGEST_SIZE] = {
 	0xd4, 0x1d, 0x8c, 0xd9, 0x8f, 0x00, 0xb2, 0x04,
 	0xe9, 0x80, 0x09, 0x98, 0xec, 0xf8, 0x42, 0x7e,
@@ -47,6 +49,97 @@ static inline void cpu_to_le32_array(u32 *buf, unsigned int words)
 	}
 }
 
+#define F1(x, y, z)	(z ^ (x & (y ^ z)))
+#define F2(x, y, z)	F1(z, x, y)
+#define F3(x, y, z)	(x ^ y ^ z)
+#define F4(x, y, z)	(y ^ (x | ~z))
+
+#define MD5STEP(f, w, x, y, z, in, s) \
+	(w += f(x, y, z) + in, w = (w<<s | w>>(32-s)) + x)
+
+static void md5_transform(__u32 *hash, __u32 const *in)
+{
+	u32 a, b, c, d;
+
+	a = hash[0];
+	b = hash[1];
+	c = hash[2];
+	d = hash[3];
+
+	MD5STEP(F1, a, b, c, d, in[0] + 0xd76aa478, 7);
+	MD5STEP(F1, d, a, b, c, in[1] + 0xe8c7b756, 12);
+	MD5STEP(F1, c, d, a, b, in[2] + 0x242070db, 17);
+	MD5STEP(F1, b, c, d, a, in[3] + 0xc1bdceee, 22);
+	MD5STEP(F1, a, b, c, d, in[4] + 0xf57c0faf, 7);
+	MD5STEP(F1, d, a, b, c, in[5] + 0x4787c62a, 12);
+	MD5STEP(F1, c, d, a, b, in[6] + 0xa8304613, 17);
+	MD5STEP(F1, b, c, d, a, in[7] + 0xfd469501, 22);
+	MD5STEP(F1, a, b, c, d, in[8] + 0x698098d8, 7);
+	MD5STEP(F1, d, a, b, c, in[9] + 0x8b44f7af, 12);
+	MD5STEP(F1, c, d, a, b, in[10] + 0xffff5bb1, 17);
+	MD5STEP(F1, b, c, d, a, in[11] + 0x895cd7be, 22);
+	MD5STEP(F1, a, b, c, d, in[12] + 0x6b901122, 7);
+	MD5STEP(F1, d, a, b, c, in[13] + 0xfd987193, 12);
+	MD5STEP(F1, c, d, a, b, in[14] + 0xa679438e, 17);
+	MD5STEP(F1, b, c, d, a, in[15] + 0x49b40821, 22);
+
+	MD5STEP(F2, a, b, c, d, in[1] + 0xf61e2562, 5);
+	MD5STEP(F2, d, a, b, c, in[6] + 0xc040b340, 9);
+	MD5STEP(F2, c, d, a, b, in[11] + 0x265e5a51, 14);
+	MD5STEP(F2, b, c, d, a, in[0] + 0xe9b6c7aa, 20);
+	MD5STEP(F2, a, b, c, d, in[5] + 0xd62f105d, 5);
+	MD5STEP(F2, d, a, b, c, in[10] + 0x02441453, 9);
+	MD5STEP(F2, c, d, a, b, in[15] + 0xd8a1e681, 14);
+	MD5STEP(F2, b, c, d, a, in[4] + 0xe7d3fbc8, 20);
+	MD5STEP(F2, a, b, c, d, in[9] + 0x21e1cde6, 5);
+	MD5STEP(F2, d, a, b, c, in[14] + 0xc33707d6, 9);
+	MD5STEP(F2, c, d, a, b, in[3] + 0xf4d50d87, 14);
+	MD5STEP(F2, b, c, d, a, in[8] + 0x455a14ed, 20);
+	MD5STEP(F2, a, b, c, d, in[13] + 0xa9e3e905, 5);
+	MD5STEP(F2, d, a, b, c, in[2] + 0xfcefa3f8, 9);
+	MD5STEP(F2, c, d, a, b, in[7] + 0x676f02d9, 14);
+	MD5STEP(F2, b, c, d, a, in[12] + 0x8d2a4c8a, 20);
+
+	MD5STEP(F3, a, b, c, d, in[5] + 0xfffa3942, 4);
+	MD5STEP(F3, d, a, b, c, in[8] + 0x8771f681, 11);
+	MD5STEP(F3, c, d, a, b, in[11] + 0x6d9d6122, 16);
+	MD5STEP(F3, b, c, d, a, in[14] + 0xfde5380c, 23);
+	MD5STEP(F3, a, b, c, d, in[1] + 0xa4beea44, 4);
+	MD5STEP(F3, d, a, b, c, in[4] + 0x4bdecfa9, 11);
+	MD5STEP(F3, c, d, a, b, in[7] + 0xf6bb4b60, 16);
+	MD5STEP(F3, b, c, d, a, in[10] + 0xbebfbc70, 23);
+	MD5STEP(F3, a, b, c, d, in[13] + 0x289b7ec6, 4);
+	MD5STEP(F3, d, a, b, c, in[0] + 0xeaa127fa, 11);
+	MD5STEP(F3, c, d, a, b, in[3] + 0xd4ef3085, 16);
+	MD5STEP(F3, b, c, d, a, in[6] + 0x04881d05, 23);
+	MD5STEP(F3, a, b, c, d, in[9] + 0xd9d4d039, 4);
+	MD5STEP(F3, d, a, b, c, in[12] + 0xe6db99e5, 11);
+	MD5STEP(F3, c, d, a, b, in[15] + 0x1fa27cf8, 16);
+	MD5STEP(F3, b, c, d, a, in[2] + 0xc4ac5665, 23);
+
+	MD5STEP(F4, a, b, c, d, in[0] + 0xf4292244, 6);
+	MD5STEP(F4, d, a, b, c, in[7] + 0x432aff97, 10);
+	MD5STEP(F4, c, d, a, b, in[14] + 0xab9423a7, 15);
+	MD5STEP(F4, b, c, d, a, in[5] + 0xfc93a039, 21);
+	MD5STEP(F4, a, b, c, d, in[12] + 0x655b59c3, 6);
+	MD5STEP(F4, d, a, b, c, in[3] + 0x8f0ccc92, 10);
+	MD5STEP(F4, c, d, a, b, in[10] + 0xffeff47d, 15);
+	MD5STEP(F4, b, c, d, a, in[1] + 0x85845dd1, 21);
+	MD5STEP(F4, a, b, c, d, in[8] + 0x6fa87e4f, 6);
+	MD5STEP(F4, d, a, b, c, in[15] + 0xfe2ce6e0, 10);
+	MD5STEP(F4, c, d, a, b, in[6] + 0xa3014314, 15);
+	MD5STEP(F4, b, c, d, a, in[13] + 0x4e0811a1, 21);
+	MD5STEP(F4, a, b, c, d, in[4] + 0xf7537e82, 6);
+	MD5STEP(F4, d, a, b, c, in[11] + 0xbd3af235, 10);
+	MD5STEP(F4, c, d, a, b, in[2] + 0x2ad7d2bb, 15);
+	MD5STEP(F4, b, c, d, a, in[9] + 0xeb86d391, 21);
+
+	hash[0] += a;
+	hash[1] += b;
+	hash[2] += c;
+	hash[3] += d;
+}
+
 static inline void md5_transform_helper(struct md5_state *ctx)
 {
 	le32_to_cpu_array(ctx->block, sizeof(ctx->block) / sizeof(u32));
diff --git a/lib/Makefile b/lib/Makefile
index 71d398b04a74..1079152607e0 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -19,7 +19,7 @@ KCOV_INSTRUMENT_dynamic_debug.o := n
 lib-y := ctype.o string.o vsprintf.o cmdline.o \
 	 rbtree.o radix-tree.o dump_stack.o timerqueue.o\
 	 idr.o int_sqrt.o extable.o \
-	 sha1.o chacha20.o md5.o irq_regs.o argv_split.o \
+	 sha1.o chacha20.o irq_regs.o argv_split.o \
 	 flex_proportions.o ratelimit.o show_mem.o \
 	 is_single_threaded.o plist.o decompress.o kobject_uevent.o \
 	 earlycpio.o seq_buf.o siphash.o \
diff --git a/lib/md5.c b/lib/md5.c
deleted file mode 100644
index bb0cd01d356d..000000000000
--- a/lib/md5.c
+++ /dev/null
@@ -1,95 +0,0 @@
-#include <linux/compiler.h>
-#include <linux/export.h>
-#include <linux/cryptohash.h>
-
-#define F1(x, y, z)	(z ^ (x & (y ^ z)))
-#define F2(x, y, z)	F1(z, x, y)
-#define F3(x, y, z)	(x ^ y ^ z)
-#define F4(x, y, z)	(y ^ (x | ~z))
-
-#define MD5STEP(f, w, x, y, z, in, s) \
-	(w += f(x, y, z) + in, w = (w<<s | w>>(32-s)) + x)
-
-void md5_transform(__u32 *hash, __u32 const *in)
-{
-	u32 a, b, c, d;
-
-	a = hash[0];
-	b = hash[1];
-	c = hash[2];
-	d = hash[3];
-
-	MD5STEP(F1, a, b, c, d, in[0] + 0xd76aa478, 7);
-	MD5STEP(F1, d, a, b, c, in[1] + 0xe8c7b756, 12);
-	MD5STEP(F1, c, d, a, b, in[2] + 0x242070db, 17);
-	MD5STEP(F1, b, c, d, a, in[3] + 0xc1bdceee, 22);
-	MD5STEP(F1, a, b, c, d, in[4] + 0xf57c0faf, 7);
-	MD5STEP(F1, d, a, b, c, in[5] + 0x4787c62a, 12);
-	MD5STEP(F1, c, d, a, b, in[6] + 0xa8304613, 17);
-	MD5STEP(F1, b, c, d, a, in[7] + 0xfd469501, 22);
-	MD5STEP(F1, a, b, c, d, in[8] + 0x698098d8, 7);
-	MD5STEP(F1, d, a, b, c, in[9] + 0x8b44f7af, 12);
-	MD5STEP(F1, c, d, a, b, in[10] + 0xffff5bb1, 17);
-	MD5STEP(F1, b, c, d, a, in[11] + 0x895cd7be, 22);
-	MD5STEP(F1, a, b, c, d, in[12] + 0x6b901122, 7);
-	MD5STEP(F1, d, a, b, c, in[13] + 0xfd987193, 12);
-	MD5STEP(F1, c, d, a, b, in[14] + 0xa679438e, 17);
-	MD5STEP(F1, b, c, d, a, in[15] + 0x49b40821, 22);
-
-	MD5STEP(F2, a, b, c, d, in[1] + 0xf61e2562, 5);
-	MD5STEP(F2, d, a, b, c, in[6] + 0xc040b340, 9);
-	MD5STEP(F2, c, d, a, b, in[11] + 0x265e5a51, 14);
-	MD5STEP(F2, b, c, d, a, in[0] + 0xe9b6c7aa, 20);
-	MD5STEP(F2, a, b, c, d, in[5] + 0xd62f105d, 5);
-	MD5STEP(F2, d, a, b, c, in[10] + 0x02441453, 9);
-	MD5STEP(F2, c, d, a, b, in[15] + 0xd8a1e681, 14);
-	MD5STEP(F2, b, c, d, a, in[4] + 0xe7d3fbc8, 20);
-	MD5STEP(F2, a, b, c, d, in[9] + 0x21e1cde6, 5);
-	MD5STEP(F2, d, a, b, c, in[14] + 0xc33707d6, 9);
-	MD5STEP(F2, c, d, a, b, in[3] + 0xf4d50d87, 14);
-	MD5STEP(F2, b, c, d, a, in[8] + 0x455a14ed, 20);
-	MD5STEP(F2, a, b, c, d, in[13] + 0xa9e3e905, 5);
-	MD5STEP(F2, d, a, b, c, in[2] + 0xfcefa3f8, 9);
-	MD5STEP(F2, c, d, a, b, in[7] + 0x676f02d9, 14);
-	MD5STEP(F2, b, c, d, a, in[12] + 0x8d2a4c8a, 20);
-
-	MD5STEP(F3, a, b, c, d, in[5] + 0xfffa3942, 4);
-	MD5STEP(F3, d, a, b, c, in[8] + 0x8771f681, 11);
-	MD5STEP(F3, c, d, a, b, in[11] + 0x6d9d6122, 16);
-	MD5STEP(F3, b, c, d, a, in[14] + 0xfde5380c, 23);
-	MD5STEP(F3, a, b, c, d, in[1] + 0xa4beea44, 4);
-	MD5STEP(F3, d, a, b, c, in[4] + 0x4bdecfa9, 11);
-	MD5STEP(F3, c, d, a, b, in[7] + 0xf6bb4b60, 16);
-	MD5STEP(F3, b, c, d, a, in[10] + 0xbebfbc70, 23);
-	MD5STEP(F3, a, b, c, d, in[13] + 0x289b7ec6, 4);
-	MD5STEP(F3, d, a, b, c, in[0] + 0xeaa127fa, 11);
-	MD5STEP(F3, c, d, a, b, in[3] + 0xd4ef3085, 16);
-	MD5STEP(F3, b, c, d, a, in[6] + 0x04881d05, 23);
-	MD5STEP(F3, a, b, c, d, in[9] + 0xd9d4d039, 4);
-	MD5STEP(F3, d, a, b, c, in[12] + 0xe6db99e5, 11);
-	MD5STEP(F3, c, d, a, b, in[15] + 0x1fa27cf8, 16);
-	MD5STEP(F3, b, c, d, a, in[2] + 0xc4ac5665, 23);
-
-	MD5STEP(F4, a, b, c, d, in[0] + 0xf4292244, 6);
-	MD5STEP(F4, d, a, b, c, in[7] + 0x432aff97, 10);
-	MD5STEP(F4, c, d, a, b, in[14] + 0xab9423a7, 15);
-	MD5STEP(F4, b, c, d, a, in[5] + 0xfc93a039, 21);
-	MD5STEP(F4, a, b, c, d, in[12] + 0x655b59c3, 6);
-	MD5STEP(F4, d, a, b, c, in[3] + 0x8f0ccc92, 10);
-	MD5STEP(F4, c, d, a, b, in[10] + 0xffeff47d, 15);
-	MD5STEP(F4, b, c, d, a, in[1] + 0x85845dd1, 21);
-	MD5STEP(F4, a, b, c, d, in[8] + 0x6fa87e4f, 6);
-	MD5STEP(F4, d, a, b, c, in[15] + 0xfe2ce6e0, 10);
-	MD5STEP(F4, c, d, a, b, in[6] + 0xa3014314, 15);
-	MD5STEP(F4, b, c, d, a, in[13] + 0x4e0811a1, 21);
-	MD5STEP(F4, a, b, c, d, in[4] + 0xf7537e82, 6);
-	MD5STEP(F4, d, a, b, c, in[11] + 0xbd3af235, 10);
-	MD5STEP(F4, c, d, a, b, in[2] + 0x2ad7d2bb, 15);
-	MD5STEP(F4, b, c, d, a, in[9] + 0xeb86d391, 21);
-
-	hash[0] += a;
-	hash[1] += b;
-	hash[2] += c;
-	hash[3] += d;
-}
-EXPORT_SYMBOL(md5_transform);
-- 
2.11.0

^ permalink raw reply related

* [PATCH v6 3/5] random: use SipHash in place of MD5
From: Jason A. Donenfeld @ 2016-12-16  3:03 UTC (permalink / raw)
  To: Netdev, kernel-hardening, LKML, linux-crypto, David Laight,
	Ted Tso, Hannes Frederic Sowa, Linus Torvalds, Eric Biggers,
	Tom Herbert, George Spelvin, Vegard Nossum, ak, davem, luto
  Cc: Jason A. Donenfeld, Jean-Philippe Aumasson
In-Reply-To: <20161216030328.11602-1-Jason@zx2c4.com>

This duplicates the current algorithm for get_random_int/long, but uses
siphash instead. This comes with several benefits. It's certainly
faster and more cryptographically secure than MD5. This patch also
separates hashed fields into three values instead of one, in order to
increase diffusion.

The previous MD5 algorithm used a per-cpu MD5 state, which caused
successive calls to the function to chain upon each other. While it's
not entirely clear that this kind of chaining is absolutely necessary
when using a secure PRF like siphash, it can't hurt, and the timing of
the call chain does add a degree of natural entropy. So, in keeping with
this design, instead of the massive per-cpu 64-byte MD5 state, there is
instead a per-cpu previously returned value for chaining.

The speed benefits are substantial:

                | siphash | md5    | speedup |
		------------------------------
get_random_long | 137130  | 415983 | 3.03x   |
get_random_int  | 86384   | 343323 | 3.97x   |

Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Cc: Jean-Philippe Aumasson <jeanphilippe.aumasson@gmail.com>
Cc: Ted Tso <tytso@mit.edu>
---
 drivers/char/random.c | 32 +++++++++++++-------------------
 1 file changed, 13 insertions(+), 19 deletions(-)

diff --git a/drivers/char/random.c b/drivers/char/random.c
index d6876d506220..a51f0ff43f00 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -262,6 +262,7 @@
 #include <linux/syscalls.h>
 #include <linux/completion.h>
 #include <linux/uuid.h>
+#include <linux/siphash.h>
 #include <crypto/chacha20.h>
 
 #include <asm/processor.h>
@@ -2042,7 +2043,7 @@ struct ctl_table random_table[] = {
 };
 #endif 	/* CONFIG_SYSCTL */
 
-static u32 random_int_secret[MD5_MESSAGE_BYTES / 4] ____cacheline_aligned;
+static siphash_key_t random_int_secret;
 
 int random_int_secret_init(void)
 {
@@ -2050,8 +2051,7 @@ int random_int_secret_init(void)
 	return 0;
 }
 
-static DEFINE_PER_CPU(__u32 [MD5_DIGEST_WORDS], get_random_int_hash)
-		__aligned(sizeof(unsigned long));
+static DEFINE_PER_CPU(u64, get_random_int_chaining);
 
 /*
  * Get a random word for internal kernel use only. Similar to urandom but
@@ -2061,19 +2061,16 @@ static DEFINE_PER_CPU(__u32 [MD5_DIGEST_WORDS], get_random_int_hash)
  */
 unsigned int get_random_int(void)
 {
-	__u32 *hash;
 	unsigned int ret;
+	u64 *chaining;
 
 	if (arch_get_random_int(&ret))
 		return ret;
 
-	hash = get_cpu_var(get_random_int_hash);
-
-	hash[0] += current->pid + jiffies + random_get_entropy();
-	md5_transform(hash, random_int_secret);
-	ret = hash[0];
-	put_cpu_var(get_random_int_hash);
-
+	chaining = &get_cpu_var(get_random_int_chaining);
+	ret = *chaining = siphash_3u64(*chaining, jiffies, random_get_entropy() +
+				       current->pid, random_int_secret);
+	put_cpu_var(get_random_int_chaining);
 	return ret;
 }
 EXPORT_SYMBOL(get_random_int);
@@ -2083,19 +2080,16 @@ EXPORT_SYMBOL(get_random_int);
  */
 unsigned long get_random_long(void)
 {
-	__u32 *hash;
 	unsigned long ret;
+	u64 *chaining;
 
 	if (arch_get_random_long(&ret))
 		return ret;
 
-	hash = get_cpu_var(get_random_int_hash);
-
-	hash[0] += current->pid + jiffies + random_get_entropy();
-	md5_transform(hash, random_int_secret);
-	ret = *(unsigned long *)hash;
-	put_cpu_var(get_random_int_hash);
-
+	chaining = &get_cpu_var(get_random_int_chaining);
+	ret = *chaining = siphash_3u64(*chaining, jiffies, random_get_entropy() +
+				       current->pid, random_int_secret);
+	put_cpu_var(get_random_int_chaining);
 	return ret;
 }
 EXPORT_SYMBOL(get_random_long);
-- 
2.11.0

^ permalink raw reply related

* [PATCH v6 1/5] siphash: add cryptographically secure PRF
From: Jason A. Donenfeld @ 2016-12-16  3:03 UTC (permalink / raw)
  To: Netdev, kernel-hardening, LKML, linux-crypto, David Laight,
	Ted Tso, Hannes Frederic Sowa, Linus Torvalds, Eric Biggers,
	Tom Herbert, George Spelvin, Vegard Nossum, ak, davem, luto
  Cc: Jason A. Donenfeld, Jean-Philippe Aumasson
In-Reply-To: <20161216030328.11602-1-Jason@zx2c4.com>

SipHash is a 64-bit keyed hash function that is actually a
cryptographically secure PRF, like HMAC. Except SipHash is super fast,
and is meant to be used as a hashtable keyed lookup function, or as a
general PRF for short input use cases, such as sequence numbers or RNG
chaining.

For the first usage:

There are a variety of attacks known as "hashtable poisoning" in which an
attacker forms some data such that the hash of that data will be the
same, and then preceeds to fill up all entries of a hashbucket. This is
a realistic and well-known denial-of-service vector. Currently
hashtables use jhash, which is fast but not secure, and some kind of
rotating key scheme (or none at all, which isn't good). SipHash is meant
as a replacement for jhash in these cases.

There are a modicum of places in the kernel that are vulnerable to
hashtable poisoning attacks, either via userspace vectors or network
vectors, and there's not a reliable mechanism inside the kernel at the
moment to fix it. The first step toward fixing these issues is actually
getting a secure primitive into the kernel for developers to use. Then
we can, bit by bit, port things over to it as deemed appropriate.

While SipHash is extremely fast for a cryptographically secure function,
it is likely a bit slower than the insecure jhash, and so replacements
will be evaluated on a case-by-case basis based on whether or not the
difference in speed is negligible and whether or not the current jhash usage
poses a real security risk.

For the second usage:

A few places in the kernel are using MD5 or SHA1 for creating secure
sequence numbers, syn cookies, port numbers, or fast random numbers.
SipHash is a faster and more fitting, and more secure replacement for MD5
in those situations. Replacing MD5 and SHA1 with SipHash for these uses is
obvious and straight-forward, and so is submitted along with this patch
series. There shouldn't be much of a debate over its efficacy.

Dozens of languages are already using this internally for their hash
tables and PRFs. Some of the BSDs already use this in their kernels.
SipHash is a widely known high-speed solution to a widely known set of
problems, and it's time we catch-up.

Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Cc: Jean-Philippe Aumasson <jeanphilippe.aumasson@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Eric Biggers <ebiggers3@gmail.com>
Cc: David Laight <David.Laight@aculab.com>
---
 MAINTAINERS             |   7 ++
 include/linux/siphash.h |  86 ++++++++++++++++++++
 lib/Kconfig.debug       |   6 +-
 lib/Makefile            |   5 +-
 lib/siphash.c           | 210 ++++++++++++++++++++++++++++++++++++++++++++++++
 lib/test_siphash.c      | 101 +++++++++++++++++++++++
 6 files changed, 410 insertions(+), 5 deletions(-)
 create mode 100644 include/linux/siphash.h
 create mode 100644 lib/siphash.c
 create mode 100644 lib/test_siphash.c

diff --git a/MAINTAINERS b/MAINTAINERS
index 59c9895d73d5..5d87a8c1056a 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -11231,6 +11231,13 @@ F:	arch/arm/mach-s3c24xx/mach-bast.c
 F:	arch/arm/mach-s3c24xx/bast-ide.c
 F:	arch/arm/mach-s3c24xx/bast-irq.c
 
+SIPHASH PRF ROUTINES
+M:	Jason A. Donenfeld <Jason@zx2c4.com>
+S:	Maintained
+F:	lib/siphash.c
+F:	lib/test_siphash.c
+F:	include/linux/siphash.h
+
 TI DAVINCI MACHINE SUPPORT
 M:	Sekhar Nori <nsekhar@ti.com>
 M:	Kevin Hilman <khilman@kernel.org>
diff --git a/include/linux/siphash.h b/include/linux/siphash.h
new file mode 100644
index 000000000000..e82fce48a0f6
--- /dev/null
+++ b/include/linux/siphash.h
@@ -0,0 +1,86 @@
+/* Copyright (C) 2016 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.
+ *
+ * SipHash: a fast short-input PRF
+ * https://131002.net/siphash/
+ *
+ * This implementation is specifically for SipHash2-4.
+ */
+
+#ifndef _LINUX_SIPHASH_H
+#define _LINUX_SIPHASH_H
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+
+#define SIPHASH_ALIGNMENT 8
+typedef u64 siphash_key_t[2];
+
+u64 __siphash_aligned(const void *data, size_t len, const siphash_key_t key);
+#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+u64 __siphash_unaligned(const void *data, size_t len, const siphash_key_t key);
+#endif
+
+u64 siphash_1u64(const u64 a, const siphash_key_t key);
+u64 siphash_2u64(const u64 a, const u64 b, const siphash_key_t key);
+u64 siphash_3u64(const u64 a, const u64 b, const u64 c,
+		 const siphash_key_t key);
+u64 siphash_4u64(const u64 a, const u64 b, const u64 c, const u64 d,
+		 const siphash_key_t key);
+
+static inline u64 ___siphash_aligned(const u64 *data, size_t len, const siphash_key_t key)
+{
+	if (__builtin_constant_p(len) && len == 8)
+		return siphash_1u64(data[0], key);
+	if (__builtin_constant_p(len) && len == 16)
+		return siphash_2u64(data[0], data[1], key);
+	if (__builtin_constant_p(len) && len == 24)
+		return siphash_3u64(data[0], data[1], data[2], key);
+	if (__builtin_constant_p(len) && len == 32)
+		return siphash_4u64(data[0], data[1], data[2], data[3], key);
+	return __siphash_aligned(data, len, key);
+}
+
+/**
+ * siphash - compute 64-bit siphash PRF value
+ * @data: buffer to hash
+ * @size: size of @data
+ * @key: the siphash key
+ */
+static inline u64 siphash(const void *data, size_t len, const siphash_key_t key)
+{
+#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+	if (!IS_ALIGNED((unsigned long)data, SIPHASH_ALIGNMENT))
+		return __siphash_unaligned(data, len, key);
+#endif
+	return ___siphash_aligned(data, len, key);
+}
+
+static inline u64 siphash_2u32(const u32 a, const u32 b, const siphash_key_t key)
+{
+	return siphash_1u64((u64)b << 32 | a, key);
+}
+
+static inline u64 siphash_4u32(const u32 a, const u32 b, const u32 c, const u32 d,
+			       const siphash_key_t key)
+{
+	return siphash_2u64((u64)b << 32 | a, (u64)d << 32 | c, key);
+}
+
+static inline u64 siphash_6u32(const u32 a, const u32 b, const u32 c, const u32 d,
+			       const u32 e, const u32 f, const siphash_key_t key)
+{
+	return siphash_3u64((u64)b << 32 | a, (u64)d << 32 | c, (u64)f << 32 | e,
+			    key);
+}
+
+static inline u64 siphash_8u32(const u32 a, const u32 b, const u32 c, const u32 d,
+			       const u32 e, const u32 f, const u32 g, const u32 h,
+			       const siphash_key_t key)
+{
+	return siphash_4u64((u64)b << 32 | a, (u64)d << 32 | c, (u64)f << 32 | e,
+			    (u64)h << 32 | g, key);
+}
+
+#endif /* _LINUX_SIPHASH_H */
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 7446097f72bd..86254ea99b45 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1843,9 +1843,9 @@ config TEST_HASH
 	tristate "Perform selftest on hash functions"
 	default n
 	help
-	  Enable this option to test the kernel's integer (<linux/hash,h>)
-	  and string (<linux/stringhash.h>) hash functions on boot
-	  (or module load).
+	  Enable this option to test the kernel's integer (<linux/hash.h>),
+	  string (<linux/stringhash.h>), and siphash (<linux/siphash.h>)
+	  hash functions on boot (or module load).
 
 	  This is intended to help people writing architecture-specific
 	  optimized versions.  If unsure, say N.
diff --git a/lib/Makefile b/lib/Makefile
index 50144a3aeebd..71d398b04a74 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -22,7 +22,8 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \
 	 sha1.o chacha20.o md5.o irq_regs.o argv_split.o \
 	 flex_proportions.o ratelimit.o show_mem.o \
 	 is_single_threaded.o plist.o decompress.o kobject_uevent.o \
-	 earlycpio.o seq_buf.o nmi_backtrace.o nodemask.o win_minmax.o
+	 earlycpio.o seq_buf.o siphash.o \
+	 nmi_backtrace.o nodemask.o win_minmax.o
 
 lib-$(CONFIG_MMU) += ioremap.o
 lib-$(CONFIG_SMP) += cpumask.o
@@ -44,7 +45,7 @@ obj-$(CONFIG_TEST_HEXDUMP) += test_hexdump.o
 obj-y += kstrtox.o
 obj-$(CONFIG_TEST_BPF) += test_bpf.o
 obj-$(CONFIG_TEST_FIRMWARE) += test_firmware.o
-obj-$(CONFIG_TEST_HASH) += test_hash.o
+obj-$(CONFIG_TEST_HASH) += test_hash.o test_siphash.o
 obj-$(CONFIG_TEST_KASAN) += test_kasan.o
 obj-$(CONFIG_TEST_KSTRTOX) += test-kstrtox.o
 obj-$(CONFIG_TEST_LKM) += test_module.o
diff --git a/lib/siphash.c b/lib/siphash.c
new file mode 100644
index 000000000000..7efc273de5d0
--- /dev/null
+++ b/lib/siphash.c
@@ -0,0 +1,210 @@
+/* Copyright (C) 2016 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.
+ *
+ * SipHash: a fast short-input PRF
+ * https://131002.net/siphash/
+ *
+ * This implementation is specifically for SipHash2-4.
+ */
+
+#include <linux/siphash.h>
+#include <asm/unaligned.h>
+
+#if defined(CONFIG_DCACHE_WORD_ACCESS) && BITS_PER_LONG == 64
+#include <linux/dcache.h>
+#include <asm/word-at-a-time.h>
+#endif
+
+#define SIPROUND \
+	do { \
+	v0 += v1; v1 = rol64(v1, 13); v1 ^= v0; v0 = rol64(v0, 32); \
+	v2 += v3; v3 = rol64(v3, 16); v3 ^= v2; \
+	v0 += v3; v3 = rol64(v3, 21); v3 ^= v0; \
+	v2 += v1; v1 = rol64(v1, 17); v1 ^= v2; v2 = rol64(v2, 32); \
+	} while(0)
+
+#define PREAMBLE(len) \
+	u64 v0 = 0x736f6d6570736575ULL; \
+	u64 v1 = 0x646f72616e646f6dULL; \
+	u64 v2 = 0x6c7967656e657261ULL; \
+	u64 v3 = 0x7465646279746573ULL; \
+	u64 b = ((u64)len) << 56; \
+	v3 ^= key[1]; \
+	v2 ^= key[0]; \
+	v1 ^= key[1]; \
+	v0 ^= key[0];
+
+#define POSTAMBLE \
+	v3 ^= b; \
+	SIPROUND; \
+	SIPROUND; \
+	v0 ^= b; \
+	v2 ^= 0xff; \
+	SIPROUND; \
+	SIPROUND; \
+	SIPROUND; \
+	SIPROUND; \
+	return (v0 ^ v1) ^ (v2 ^ v3);
+
+u64 __siphash_aligned(const void *data, size_t len, const siphash_key_t key)
+{
+	const u8 *end = data + len - (len % sizeof(u64));
+	const u8 left = len & (sizeof(u64) - 1);
+	u64 m;
+	PREAMBLE(len)
+	for (; data != end; data += sizeof(u64)) {
+		m = le64_to_cpup(data);
+		v3 ^= m;
+		SIPROUND;
+		SIPROUND;
+		v0 ^= m;
+	}
+#if defined(CONFIG_DCACHE_WORD_ACCESS) && BITS_PER_LONG == 64
+	if (left)
+		b |= le64_to_cpu((__force __le64)(load_unaligned_zeropad(data) &
+						  bytemask_from_count(left)));
+#else
+	switch (left) {
+	case 7: b |= ((u64)end[6]) << 48;
+	case 6: b |= ((u64)end[5]) << 40;
+	case 5: b |= ((u64)end[4]) << 32;
+	case 4: b |= le32_to_cpup(data); break;
+	case 3: b |= ((u64)end[2]) << 16;
+	case 2: b |= le16_to_cpup(data); break;
+	case 1: b |= end[0];
+	}
+#endif
+	POSTAMBLE
+}
+EXPORT_SYMBOL(__siphash_aligned);
+
+#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+u64 __siphash_unaligned(const void *data, size_t len, const siphash_key_t key)
+{
+	const u8 *end = data + len - (len % sizeof(u64));
+	const u8 left = len & (sizeof(u64) - 1);
+	u64 m;
+	PREAMBLE(len)
+	for (; data != end; data += sizeof(u64)) {
+		m = get_unaligned_le64(data);
+		v3 ^= m;
+		SIPROUND;
+		SIPROUND;
+		v0 ^= m;
+	}
+#if defined(CONFIG_DCACHE_WORD_ACCESS) && BITS_PER_LONG == 64
+	if (left)
+		b |= le64_to_cpu((__force __le64)(load_unaligned_zeropad(data) &
+						  bytemask_from_count(left)));
+#else
+	switch (left) {
+	case 7: b |= ((u64)end[6]) << 48;
+	case 6: b |= ((u64)end[5]) << 40;
+	case 5: b |= ((u64)end[4]) << 32;
+	case 4: b |= get_unaligned_le32(end); break;
+	case 3: b |= ((u64)end[2]) << 16;
+	case 2: b |= get_unaligned_le16(end); break;
+	case 1: b |= end[0];
+	}
+#endif
+	POSTAMBLE
+}
+EXPORT_SYMBOL(__siphash_unaligned);
+#endif
+
+/**
+ * siphash_1u64 - compute 64-bit siphash PRF value of a u64
+ * @first: first u64
+ * @key: the siphash key
+ */
+u64 siphash_1u64(const u64 first, const siphash_key_t key)
+{
+	PREAMBLE(8)
+	v3 ^= first;
+	SIPROUND;
+	SIPROUND;
+	v0 ^= first;
+	POSTAMBLE
+}
+EXPORT_SYMBOL(siphash_1u64);
+
+/**
+ * siphash_2u64 - compute 64-bit siphash PRF value of 2 u64
+ * @first: first u64
+ * @second: second u64
+ * @key: the siphash key
+ */
+u64 siphash_2u64(const u64 first, const u64 second, const siphash_key_t key)
+{
+	PREAMBLE(16)
+	v3 ^= first;
+	SIPROUND;
+	SIPROUND;
+	v0 ^= first;
+	v3 ^= second;
+	SIPROUND;
+	SIPROUND;
+	v0 ^= second;
+	POSTAMBLE
+}
+EXPORT_SYMBOL(siphash_2u64);
+
+/**
+ * siphash_3u64 - compute 64-bit siphash PRF value of 3 u64
+ * @first: first u64
+ * @second: second u64
+ * @third: third u64
+ * @key: the siphash key
+ */
+u64 siphash_3u64(const u64 first, const u64 second, const u64 third,
+		 const siphash_key_t key)
+{
+	PREAMBLE(24)
+	v3 ^= first;
+	SIPROUND;
+	SIPROUND;
+	v0 ^= first;
+	v3 ^= second;
+	SIPROUND;
+	SIPROUND;
+	v0 ^= second;
+	v3 ^= third;
+	SIPROUND;
+	SIPROUND;
+	v0 ^= third;
+	POSTAMBLE
+}
+EXPORT_SYMBOL(siphash_3u64);
+
+/**
+ * siphash_4u64 - compute 64-bit siphash PRF value of 4 u64
+ * @first: first u64
+ * @second: second u64
+ * @third: third u64
+ * @forth: forth u64
+ * @key: the siphash key
+ */
+u64 siphash_4u64(const u64 first, const u64 second, const u64 third,
+		 const u64 forth, const siphash_key_t key)
+{
+	PREAMBLE(32)
+	v3 ^= first;
+	SIPROUND;
+	SIPROUND;
+	v0 ^= first;
+	v3 ^= second;
+	SIPROUND;
+	SIPROUND;
+	v0 ^= second;
+	v3 ^= third;
+	SIPROUND;
+	SIPROUND;
+	v0 ^= third;
+	v3 ^= forth;
+	SIPROUND;
+	SIPROUND;
+	v0 ^= forth;
+	POSTAMBLE
+}
+EXPORT_SYMBOL(siphash_4u64);
diff --git a/lib/test_siphash.c b/lib/test_siphash.c
new file mode 100644
index 000000000000..906e58a2c946
--- /dev/null
+++ b/lib/test_siphash.c
@@ -0,0 +1,101 @@
+/* Test cases for siphash.c
+ *
+ * Copyright (C) 2016 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.
+ *
+ * SipHash: a fast short-input PRF
+ * https://131002.net/siphash/
+ *
+ * This implementation is specifically for SipHash2-4.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/siphash.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/module.h>
+
+/* Test vectors taken from official reference source available at:
+ *     https://131002.net/siphash/siphash24.c
+ */
+static const u64 test_vectors[64] = {
+	0x726fdb47dd0e0e31ULL, 0x74f839c593dc67fdULL, 0x0d6c8009d9a94f5aULL,
+	0x85676696d7fb7e2dULL, 0xcf2794e0277187b7ULL, 0x18765564cd99a68dULL,
+	0xcbc9466e58fee3ceULL, 0xab0200f58b01d137ULL, 0x93f5f5799a932462ULL,
+	0x9e0082df0ba9e4b0ULL, 0x7a5dbbc594ddb9f3ULL, 0xf4b32f46226bada7ULL,
+	0x751e8fbc860ee5fbULL, 0x14ea5627c0843d90ULL, 0xf723ca908e7af2eeULL,
+	0xa129ca6149be45e5ULL, 0x3f2acc7f57c29bdbULL, 0x699ae9f52cbe4794ULL,
+	0x4bc1b3f0968dd39cULL, 0xbb6dc91da77961bdULL, 0xbed65cf21aa2ee98ULL,
+	0xd0f2cbb02e3b67c7ULL, 0x93536795e3a33e88ULL, 0xa80c038ccd5ccec8ULL,
+	0xb8ad50c6f649af94ULL, 0xbce192de8a85b8eaULL, 0x17d835b85bbb15f3ULL,
+	0x2f2e6163076bcfadULL, 0xde4daaaca71dc9a5ULL, 0xa6a2506687956571ULL,
+	0xad87a3535c49ef28ULL, 0x32d892fad841c342ULL, 0x7127512f72f27cceULL,
+	0xa7f32346f95978e3ULL, 0x12e0b01abb051238ULL, 0x15e034d40fa197aeULL,
+	0x314dffbe0815a3b4ULL, 0x027990f029623981ULL, 0xcadcd4e59ef40c4dULL,
+	0x9abfd8766a33735cULL, 0x0e3ea96b5304a7d0ULL, 0xad0c42d6fc585992ULL,
+	0x187306c89bc215a9ULL, 0xd4a60abcf3792b95ULL, 0xf935451de4f21df2ULL,
+	0xa9538f0419755787ULL, 0xdb9acddff56ca510ULL, 0xd06c98cd5c0975ebULL,
+	0xe612a3cb9ecba951ULL, 0xc766e62cfcadaf96ULL, 0xee64435a9752fe72ULL,
+	0xa192d576b245165aULL, 0x0a8787bf8ecb74b2ULL, 0x81b3e73d20b49b6fULL,
+	0x7fa8220ba3b2eceaULL, 0x245731c13ca42499ULL, 0xb78dbfaf3a8d83bdULL,
+	0xea1ad565322a1a0bULL, 0x60e61c23a3795013ULL, 0x6606d7e446282b93ULL,
+	0x6ca4ecb15c5f91e1ULL, 0x9f626da15c9625f3ULL, 0xe51b38608ef25f57ULL,
+	0x958a324ceb064572ULL
+};
+static const siphash_key_t test_key =
+	{ 0x0706050403020100ULL , 0x0f0e0d0c0b0a0908ULL };
+
+static int __init siphash_test_init(void)
+{
+	u8 in[64] __aligned(SIPHASH_ALIGNMENT);
+	u8 in_unaligned[65];
+	u8 i;
+	int ret = 0;
+
+	for (i = 0; i < 64; ++i) {
+		in[i] = i;
+		in_unaligned[i + 1] = i;
+		if (siphash(in, i, test_key) != test_vectors[i]) {
+			pr_info("self-test aligned %u: FAIL\n", i + 1);
+			ret = -EINVAL;
+		}
+		if (siphash(in_unaligned + 1, i, test_key) != test_vectors[i]) {
+			pr_info("self-test unaligned %u: FAIL\n", i + 1);
+			ret = -EINVAL;
+		}
+	}
+	if (siphash_1u64(0x0706050403020100ULL, test_key) != test_vectors[8]) {
+		pr_info("self-test 1u64: FAIL\n");
+		ret = -EINVAL;
+	}
+	if (siphash_2u64(0x0706050403020100ULL, 0x0f0e0d0c0b0a0908ULL, test_key) != test_vectors[16]) {
+		pr_info("self-test 2u64: FAIL\n");
+		ret = -EINVAL;
+	}
+	if (siphash_3u64(0x0706050403020100ULL, 0x0f0e0d0c0b0a0908ULL,
+			 0x1716151413121110ULL, test_key) != test_vectors[24]) {
+		pr_info("self-test 3u64: FAIL\n");
+		ret = -EINVAL;
+	}
+	if (siphash_4u64(0x0706050403020100ULL, 0x0f0e0d0c0b0a0908ULL,
+			 0x1716151413121110ULL, 0x1f1e1d1c1b1a1918ULL, test_key) != test_vectors[32]) {
+		pr_info("self-test 4u64: FAIL\n");
+		ret = -EINVAL;
+	}
+	if (!ret)
+		pr_info("self-tests: pass\n");
+	return ret;
+}
+
+static void __exit siphash_test_exit(void)
+{
+}
+
+module_init(siphash_test_init);
+module_exit(siphash_test_exit);
+
+MODULE_AUTHOR("Jason A. Donenfeld <Jason@zx2c4.com>");
+MODULE_LICENSE("Dual BSD/GPL");
-- 
2.11.0

^ permalink raw reply related

* Re: [PATCH v5 1/4] siphash: add cryptographically secure PRF
From: kbuild test robot @ 2016-12-16  2:14 UTC (permalink / raw)
  To: Jason A. Donenfeld
  Cc: kbuild-all, Netdev, kernel-hardening, LKML, linux-crypto,
	David Laight, Ted Tso, Hannes Frederic Sowa, Linus Torvalds,
	Eric Biggers, Tom Herbert, George Spelvin, Vegard Nossum, ak,
	davem, luto, Jason A. Donenfeld, Jean-Philippe Aumasson,
	Daniel J . Bernstein
In-Reply-To: <20161215203003.31989-2-Jason@zx2c4.com>

[-- Attachment #1: Type: text/plain, Size: 1530 bytes --]

Hi Jason,

[auto build test ERROR on linus/master]
[also build test ERROR on v4.9 next-20161215]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Jason-A-Donenfeld/siphash-add-cryptographically-secure-PRF/20161216-092837
config: ia64-allmodconfig (attached as .config)
compiler: ia64-linux-gcc (GCC) 6.2.0
reproduce:
        wget https://git.kernel.org/cgit/linux/kernel/git/wfg/lkp-tests.git/plain/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # save the attached .config to linux build tree
        make.cross ARCH=ia64 

All errors (new ones prefixed by >>):

   lib/siphash.c: In function 'siphash_unaligned':
>> lib/siphash.c:123:15: error: 'bytes' undeclared (first use in this function)
     case 1: b |= bytes[0];
                  ^~~~~
   lib/siphash.c:123:15: note: each undeclared identifier is reported only once for each function it appears in

vim +/bytes +123 lib/siphash.c

   117		case 7: b |= ((u64)end[6]) << 48;
   118		case 6: b |= ((u64)end[5]) << 40;
   119		case 5: b |= ((u64)end[4]) << 32;
   120		case 4: b |= get_unaligned_le32(end); break;
   121		case 3: b |= ((u64)end[2]) << 16;
   122		case 2: b |= get_unaligned_le16(end); break;
 > 123		case 1: b |= bytes[0];
   124		}
   125	#endif
   126		v3 ^= b;

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 45664 bytes --]

^ permalink raw reply

* RE: [virtio-dev] Re: [Qemu-devel] [PATCH v7 1/1] crypto: add virtio-crypto driver
From: Gonglei (Arei) @ 2016-12-16  0:40 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Zeng, Xin, Halil Pasic, linux-kernel@vger.kernel.org,
	qemu-devel@nongnu.org, virtio-dev@lists.oasis-open.org,
	virtualization@lists.linux-foundation.org,
	linux-crypto@vger.kernel.org, Huangweidong (C), Claudio Fontana,
	Luonengjun, Hanweidong (Randy), Xuquan (Quan Xu),
	Wanzongshun (Vincent), stefanha@redhat.com, Zhoujian (jay, Euler)
In-Reply-To: <20161215184153-mutt-send-email-mst@kernel.org>

Hi Michael,

>
> >
> >
> > > Subject: RE: [Qemu-devel] [PATCH v7 1/1] crypto: add virtio-crypto driver
> > >
> > > On Thursday, December 15, 2016 8:45 AM, Gonglei (Arei) Wrote:
> > > < > > diff --git a/drivers/crypto/virtio/virtio_crypto_core.c
> > > < > b/drivers/crypto/virtio/virtio_crypto_core.c
> > > < > > new file mode 100644
> > > < > > index 0000000..c0854a1
> > > < > > --- /dev/null
> > > < > > +++ b/drivers/crypto/virtio/virtio_crypto_core.c
> > > < > > @@ -0,0 +1,474 @@
> > > < > [..]
> > > < > > +
> > > < > > +static void virtcrypto_dataq_callback(struct virtqueue *vq)
> > > < > > +{
> > > < > > +	struct virtio_crypto *vcrypto = vq->vdev->priv;
> > > < > > +	struct virtio_crypto_request *vc_req;
> > > < > > +	unsigned long flags;
> > > < > > +	unsigned int len;
> > > < > > +	struct ablkcipher_request *ablk_req;
> > > < > > +	int error;
> > > < > > +
> > > < > > +	spin_lock_irqsave(&vcrypto->lock, flags);
> > > < >
> > > < > Would it make sense to use a per virtqueue lock
> > > < > like in virtio_blk for example instead of locking on the whole
> > > < > device? OK, it seems you use only one dataqueue, so it
> > > < > may not be that relevant.
> > > < >
> > > < Currently yes, both the backend device (cryptodev-backend-builtin)
> > > < and the frontend driver use one dataqueue.
> > > <
> > >
> > > I think it makes sense to use per virtqueue lock here though it only uses one
> > > queue so far,
> > > but in the spec we already have multi queues support.
> > >
> > Yes, I agree. Will do that in V8 soon.
> > Hope to catch up with Michael's pull request for 4.10.
> >
> > Regards,
> > -Gonglei
> 
> I merged v7, this change will have to wait. Sorry.
> 
That's OK. Thanks!

I can post a separate patch after this pull request. 

Regards,
-Gonglei

^ permalink raw reply

* Re: [PATCH v2 1/4] siphash: add cryptographically secure hashtable function
From: Hannes Frederic Sowa @ 2016-12-16  0:03 UTC (permalink / raw)
  To: Jason A. Donenfeld
  Cc: David Laight, Netdev, kernel-hardening@lists.openwall.com,
	Jean-Philippe Aumasson, LKML, Linux Crypto Mailing List,
	Daniel J . Bernstein, Linus Torvalds, Eric Biggers
In-Reply-To: <CAHmME9rF0pKxfc3p9ThD3LZU2+ZNPeR-4=9MVEE3AJQKBMNA+w@mail.gmail.com>

On 16.12.2016 00:43, Jason A. Donenfeld wrote:
> Hi Hannes,
> 
> Good news.
> 
> On Thu, Dec 15, 2016 at 10:45 PM, Hannes Frederic Sowa
> <hannes@stressinduktion.org> wrote:
>>> How's that sound?
>>
>> I am still very much concerned about the API.
> 
> Thanks for pushing me and putting up with my daftness... the constant
> folding works absolutely perfectly. I've run several tests. When gcc
> knows that a struct is aligned (say, via __aligned(8)), then it erases
> the branch and makes a direct jump to the aligned code. When it's
> uncertain, it evaluates at runtime. So, now there is a single
> siphash() function that chooses the best one automatically. Behind the
> scene there's siphash_aligned and siphash_unaligned, but nobody needs
> to call these directly. (Should I rename these to have a double
> underscore prefix?) On platforms that have
> CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS, of course all of this
> disappears and everything goes directly to the aligned version.
> 
> So, I think this assuages your concerns entirely. A single API entry
> point that does the right thing.
> 
> Whew! Good thinking, and thanks again for the suggestion.

Awesome, thanks for trying this out. This basically resolves my concern
API-wise so far.

Hannes out. ;)

^ permalink raw reply

* Re: [PATCH v2 1/4] siphash: add cryptographically secure hashtable function
From: Jason A. Donenfeld @ 2016-12-15 23:47 UTC (permalink / raw)
  To: Hannes Frederic Sowa
  Cc: David Laight, Netdev, kernel-hardening@lists.openwall.com,
	Jean-Philippe Aumasson, LKML, Linux Crypto Mailing List,
	Daniel J . Bernstein, Linus Torvalds, Eric Biggers
In-Reply-To: <e574d97f-e0b4-f5e2-6d60-88d3ce185249@stressinduktion.org>

On Thu, Dec 15, 2016 at 10:45 PM, Hannes Frederic Sowa
<hannes@stressinduktion.org> wrote:
> By the way, if you target net-next, it is currently closed. So no need
> to hurry.

Honestly I have no idea what I'm targeting. The hash function touches
lib/. The secure_seq stuff touches net/. The rng stuff touches
random.c. Shall this be for net-next? For lib-next (doesn't exist)?
For tytso-next? Since a lot of feedback has come from netdev people, I
suspect net-next is the correct answer. In that case, I'll ask Ted for
his sign-off to touch random.c, and then we'll get this queued up in
net-next. Please correct me if this doesn't actually resemble how
things work around here...

Jason

^ permalink raw reply

* Re: [PATCH v2 1/4] siphash: add cryptographically secure hashtable function
From: Jason A. Donenfeld @ 2016-12-15 23:43 UTC (permalink / raw)
  To: Hannes Frederic Sowa
  Cc: David Laight, Netdev, kernel-hardening@lists.openwall.com,
	Jean-Philippe Aumasson, LKML, Linux Crypto Mailing List,
	Daniel J . Bernstein, Linus Torvalds, Eric Biggers
In-Reply-To: <e574d97f-e0b4-f5e2-6d60-88d3ce185249@stressinduktion.org>

Hi Hannes,

Good news.

On Thu, Dec 15, 2016 at 10:45 PM, Hannes Frederic Sowa
<hannes@stressinduktion.org> wrote:
>> How's that sound?
>
> I am still very much concerned about the API.

Thanks for pushing me and putting up with my daftness... the constant
folding works absolutely perfectly. I've run several tests. When gcc
knows that a struct is aligned (say, via __aligned(8)), then it erases
the branch and makes a direct jump to the aligned code. When it's
uncertain, it evaluates at runtime. So, now there is a single
siphash() function that chooses the best one automatically. Behind the
scene there's siphash_aligned and siphash_unaligned, but nobody needs
to call these directly. (Should I rename these to have a double
underscore prefix?) On platforms that have
CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS, of course all of this
disappears and everything goes directly to the aligned version.

So, I think this assuages your concerns entirely. A single API entry
point that does the right thing.

Whew! Good thinking, and thanks again for the suggestion.

Jason

^ permalink raw reply

* Re: [PATCH v5 1/4] siphash: add cryptographically secure PRF
From: George Spelvin @ 2016-12-15 23:28 UTC (permalink / raw)
  To: ak, davem, David.Laight, ebiggers3, hannes, Jason,
	jeanphilippe.aumasson, kernel-hardening, linux-crypto,
	linux-kernel, linux, luto, netdev, tom, torvalds, tytso,
	vegard.nossum
  Cc: djb
In-Reply-To: <CAGiyFdfmiCMyHvAg=5sGh8KjBBrF0Wb4Qf=JLzJqUAx4yFSS3Q@mail.gmail.com>

> If a halved version of SipHash can bring significant performance boost
> (with 32b words instead of 64b words) with an acceptable security level
> (64-bit enough?) then we may design such a version.

I was thinking if the key could be pushed to 80 bits, that would be nice,
but honestly 64 bits is fine.  This is DoS protection, and while it's
possible to brute-force a 64 bit secret, there are more effective (DDoS)
attacks possible for the same cost.

(I'd suggest a name of "HalfSipHash" to convey the reduced security
effectively.)

> Regarding output size, are 64 bits sufficient?

As a replacement for jhash, 32 bits are sufficient.  It's for
indexing an in-memory hash table on a 32-bit machine.


(When you're done thinking about this, as a matter of personal interest
I'd love a hash expert's opinion on
https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=2a18da7a9c7886f1c7307f8d3f23f24318583f03
and
https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=8387ff2577eb9ed245df9a39947f66976c6bcd02
which is a non-cryptographic hash function of novel design that's
inspired by SipHash.)

^ permalink raw reply

* Re: [PATCH v5 1/4] siphash: add cryptographically secure PRF
From: Jean-Philippe Aumasson @ 2016-12-15 23:00 UTC (permalink / raw)
  To: George Spelvin, ak, davem, David.Laight, ebiggers3, hannes, Jason,
	kernel-hardening, linux-crypto, linux-kernel, luto, netdev, tom,
	torvalds, tytso, vegard.nossum
  Cc: djb
In-Reply-To: <20161215224224.21447.qmail@ns.sciencehorizons.net>

[-- Attachment #1: Type: text/plain, Size: 4109 bytes --]

If a halved version of SipHash can bring significant performance boost
(with 32b words instead of 64b words) with an acceptable security level
(64-bit enough?) then we may design such a version.

Regarding output size, are 64 bits sufficient?
On Thu, 15 Dec 2016 at 23:42, George Spelvin <linux@sciencehorizons.net>
wrote:

> > While SipHash is extremely fast for a cryptographically secure function,
> > it is likely a tiny bit slower than the insecure jhash, and so
> replacements
> > will be evaluated on a case-by-case basis based on whether or not the
> > difference in speed is negligible and whether or not the current jhash
> usage
> > poses a real security risk.
>
> To quantify that, jhash is 27 instructions per 12 bytes of input, with a
> dependency path length of 13 instructions.  (24/12 in __jash_mix, plus
> 3/1 for adding the input to the state.) The final add + __jhash_final
> is 24 instructions with a path length of 15, which is close enough for
> this handwaving.  Call it 18n instructions and 8n cycles for 8n bytes.
>
> SipHash (on a 64-bit machine) is 14 instructions with a dependency path
> length of 4 *per round*.  Two rounds per 8 bytes, plus plus two adds
> and one cycle per input word, plus four rounds to finish makes 30n+46
> instructions and 9n+16 cycles for 8n bytes.
>
> So *if* you have a 64-bit 4-way superscalar machine, it's not that much
> slower once it gets going, but the four-round finalization is quite
> noticeable for short inputs.
>
> For typical kernel input lengths "within a factor of 2" is
> probably more accurate than "a tiny bit".
>
> You lose a factor of 2 if you machine is 2-way or non-superscalar,
> and a second factor of 2 if it's a 32-bit machine.
>
> I mention this because there are a lot of home routers and other netwoek
> appliances running Linux on 32-bit ARM and MIPS processors.  For those,
> it's a factor of *eight*, which is a lot more than "a tiny bit".
>
> The real killer is if you don't have enough registers; SipHash performs
> horribly on i386 because it uses more state than i386 has registers.
>
> (If i386 performance is desired, you might ask Jean-Philippe for some
> rotate constants for a 32-bit variant with 64 bits of key.  Note that
> SipHash's security proof requires that key length + input length is
> strictly less than the state size, so for a 4x32-bit variant, while
> you could stretch the key length a little, you'd have a hard limit at
> 95 bits.)
>
>
> A second point, the final XOR in SipHash is either a (very minor) design
> mistake, or an opportunity for optimization, depending on how you look
> at it.  Look at the end of the function:
>
> >+      SIPROUND;
> >+      SIPROUND;
> >+      return (v0 ^ v1) ^ (v2 ^ v3);
>
> Expanding that out, you get:
> +       v0 += v1; v1 = rol64(v1, 13); v1 ^= v0; v0 = rol64(v0, 32);
> +       v2 += v3; v3 = rol64(v3, 16); v3 ^= v2;
> +       v0 += v3; v3 = rol64(v3, 21); v3 ^= v0;
> +       v2 += v1; v1 = rol64(v1, 17); v1 ^= v2; v2 = rol64(v2, 32);
> +       return v0 ^ v1 ^ v2 ^ v3;
>
> Since the final XOR includes both v0 and v3, it's undoing the "v3 ^= v0"
> two lines earlier, so the value of v0 doesn't matter after its XOR into
> v1 on line one.
>
> The final SIPROUND and return can then be optimized to
>
> +       v0 += v1; v1 = rol64(v1, 13); v1 ^= v0;
> +       v2 += v3; v3 = rol64(v3, 16); v3 ^= v2;
> +       v3 = rol64(v3, 21);
> +       v2 += v1; v1 = rol64(v1, 17); v1 ^= v2; v2 = rol64(v2, 32);
> +       return v1 ^ v2 ^ v3;
>
> A 32-bit implementation could further tweak the 4 instructions of
>         v1 ^= v2; v2 = rol64(v2, 32); v1 ^= v2;
>
> gcc 6.2.1 -O3 compiles it to basically:
>         v1.low ^= v2.low;
>         v1.high ^= v2.high;
>         v1.low ^= v2.high;
>         v1.high ^= v2.low;
> but it could be written as:
>         v2.low ^= v2.high;
>         v1.low ^= v2.low;
>         v1.high ^= v2.low;
>
> Alternatively, if it's for private use only (key not shared with other
> systems), a slightly stronger variant would "return v1 ^ v3;".
> (The final swap of v2 is dead code, but a compiler can spot that easily.)
>

[-- Attachment #2: Type: text/html, Size: 6336 bytes --]

^ permalink raw reply

* Re: [PATCH v5 1/4] siphash: add cryptographically secure PRF
From: George Spelvin @ 2016-12-15 22:42 UTC (permalink / raw)
  To: ak, davem, David.Laight, ebiggers3, hannes, Jason,
	kernel-hardening, linux-crypto, linux-kernel, linux, luto, netdev,
	tom, torvalds, tytso, vegard.nossum
  Cc: djb, jeanphilippe.aumasson
In-Reply-To: <20161215203003.31989-2-Jason@zx2c4.com>

> While SipHash is extremely fast for a cryptographically secure function,
> it is likely a tiny bit slower than the insecure jhash, and so replacements
> will be evaluated on a case-by-case basis based on whether or not the
> difference in speed is negligible and whether or not the current jhash usage
> poses a real security risk.

To quantify that, jhash is 27 instructions per 12 bytes of input, with a
dependency path length of 13 instructions.  (24/12 in __jash_mix, plus
3/1 for adding the input to the state.) The final add + __jhash_final
is 24 instructions with a path length of 15, which is close enough for
this handwaving.  Call it 18n instructions and 8n cycles for 8n bytes.

SipHash (on a 64-bit machine) is 14 instructions with a dependency path
length of 4 *per round*.  Two rounds per 8 bytes, plus plus two adds
and one cycle per input word, plus four rounds to finish makes 30n+46
instructions and 9n+16 cycles for 8n bytes.

So *if* you have a 64-bit 4-way superscalar machine, it's not that much
slower once it gets going, but the four-round finalization is quite
noticeable for short inputs.

For typical kernel input lengths "within a factor of 2" is
probably more accurate than "a tiny bit".

You lose a factor of 2 if you machine is 2-way or non-superscalar,
and a second factor of 2 if it's a 32-bit machine.

I mention this because there are a lot of home routers and other netwoek
appliances running Linux on 32-bit ARM and MIPS processors.  For those,
it's a factor of *eight*, which is a lot more than "a tiny bit".

The real killer is if you don't have enough registers; SipHash performs
horribly on i386 because it uses more state than i386 has registers.

(If i386 performance is desired, you might ask Jean-Philippe for some
rotate constants for a 32-bit variant with 64 bits of key.  Note that
SipHash's security proof requires that key length + input length is
strictly less than the state size, so for a 4x32-bit variant, while
you could stretch the key length a little, you'd have a hard limit at
95 bits.)


A second point, the final XOR in SipHash is either a (very minor) design
mistake, or an opportunity for optimization, depending on how you look
at it.  Look at the end of the function:

>+	SIPROUND;
>+	SIPROUND;
>+	return (v0 ^ v1) ^ (v2 ^ v3);

Expanding that out, you get:
+	v0 += v1; v1 = rol64(v1, 13); v1 ^= v0; v0 = rol64(v0, 32);
+	v2 += v3; v3 = rol64(v3, 16); v3 ^= v2;
+	v0 += v3; v3 = rol64(v3, 21); v3 ^= v0;
+	v2 += v1; v1 = rol64(v1, 17); v1 ^= v2; v2 = rol64(v2, 32);
+	return v0 ^ v1 ^ v2 ^ v3;

Since the final XOR includes both v0 and v3, it's undoing the "v3 ^= v0"
two lines earlier, so the value of v0 doesn't matter after its XOR into
v1 on line one.

The final SIPROUND and return can then be optimized to

+	v0 += v1; v1 = rol64(v1, 13); v1 ^= v0;
+	v2 += v3; v3 = rol64(v3, 16); v3 ^= v2;
+	v3 = rol64(v3, 21);
+	v2 += v1; v1 = rol64(v1, 17); v1 ^= v2; v2 = rol64(v2, 32);
+	return v1 ^ v2 ^ v3;

A 32-bit implementation could further tweak the 4 instructions of
	v1 ^= v2; v2 = rol64(v2, 32); v1 ^= v2;

gcc 6.2.1 -O3 compiles it to basically:
	v1.low ^= v2.low;
	v1.high ^= v2.high;
	v1.low ^= v2.high;
	v1.high ^= v2.low;
but it could be written as:
	v2.low ^= v2.high;
	v1.low ^= v2.low;
	v1.high ^= v2.low;

Alternatively, if it's for private use only (key not shared with other
systems), a slightly stronger variant would "return v1 ^ v3;".
(The final swap of v2 is dead code, but a compiler can spot that easily.)

^ permalink raw reply

* Re: [PATCH v2 1/4] siphash: add cryptographically secure hashtable function
From: Hannes Frederic Sowa @ 2016-12-15 21:45 UTC (permalink / raw)
  To: Jason A. Donenfeld
  Cc: David Laight, Netdev, kernel-hardening@lists.openwall.com,
	Jean-Philippe Aumasson, LKML, Linux Crypto Mailing List,
	Daniel J . Bernstein, Linus Torvalds, Eric Biggers
In-Reply-To: <CAHmME9p9cf1W3vhbu=YTRY1Xt=fmE1sVqY1XPt5iQwxfCfQUOA@mail.gmail.com>

On 15.12.2016 22:25, Jason A. Donenfeld wrote:
> On Thu, Dec 15, 2016 at 10:17 PM, Hannes Frederic Sowa
> <hannes@stressinduktion.org> wrote:
>> And I was exactly questioning this.
>>
>> static unsigned int inet6_hash_frag(__be32 id, const struct in6_addr *saddr,
>>                                     const struct in6_addr *daddr)
>> {
>>         net_get_random_once(&ip6_frags.rnd, sizeof(ip6_frags.rnd));
>>         return jhash_3words(ipv6_addr_hash(saddr), ipv6_addr_hash(daddr),
>>                             (__force u32)id, ip6_frags.rnd);
>> }
> 
> For this example, the replacement is the function entitled siphash_4u32:
> 
>  static unsigned int inet6_hash_frag(__be32 id, const struct in6_addr *saddr,
>                                      const struct in6_addr *daddr)
>  {
>          net_get_random_once(&ip6_frags.rnd, sizeof(ip6_frags.rnd));
>          return siphash_4u32(ipv6_addr_hash(saddr), ipv6_addr_hash(daddr),
>                  (__force u32)id, 0, ip6_frags.rnd);
>  }
> 
> And then you make ip6_frags.rnd be of type siphash_key_t. Then
> everything is taken care of and works beautifully. Please see v5 of
> this patchset.

Sorry to not be specific enough, the Hash-DoS is in ipv6_addr_hash.
Maybe it was a silly example to start with, sorry. But anyway, your
proposal wouldn't have prevented the hash DoS. I wanted to show how it
can be difficult to make sure that all pointers come from an appropriate
aligned memory region.

The idea would be to actually factor out the key in the data structure
and align it with __aligned(SIPHASH_ALIGNMENT), make sure the padding
bits are all equal zero to not cause any bugs and irregularities with
the corresponding equality function. This might need some serious review
when switching to siphash to actually make use of it and prevent
HashDoS. Or simply use the unaligned version always...

>> I would be interested if the compiler can actually constant-fold the
>> address of the stack allocation with an simple if () or some
>> __builtin_constant_p fiddeling, so we don't have this constant review
>> overhead to which function we pass which data. This would also make
>> this whole discussion moot.
> 
> I'll play with it to see if the compiler is capable of doing that.
> Does anybody know off hand if it is or if there are other examples of
> the compiler doing that?

Not of the top of my head, but it should be easy to test.

> In any case, for all current replacement of jhash_1word, jhash_2words,
> jhash_3words, there's the siphash_2u32 or siphash_4u32 functions. This
> covers the majority of cases.

Agreed and this is also totally fine by me.

> For replacements of md5_transform, either the data is small and can
> fit in siphash_Nu{32,64}, or it can be put into a struct explicitly
> aligned on the stack.

> For the remaining use of jhash_nwords, either siphash() can be used or
> siphash_unaligned() can be used if the source is of unknown alignment.
> Both functions have their alignment requirements (or lack thereof)
> documented in a docbook comment.

I think the warning needs to be bigger, seriously. Most of the people
develop on 64 bit arch, where it will just work during testing and break
later on 32 bit. ;)

> I'll look into the constant folding to see if it actually works. If it
> does, I'll use it. If not, I believe the current solution works.
> 
> How's that sound?

I am still very much concerned about the API.

By the way, if you target net-next, it is currently closed. So no need
to hurry.

Bye,
Hannes

^ permalink raw reply

* Re: [PATCH v2 1/4] siphash: add cryptographically secure hashtable function
From: Jason A. Donenfeld @ 2016-12-15 21:25 UTC (permalink / raw)
  To: Hannes Frederic Sowa
  Cc: David Laight, Netdev, kernel-hardening@lists.openwall.com,
	Jean-Philippe Aumasson, LKML, Linux Crypto Mailing List,
	Daniel J . Bernstein, Linus Torvalds, Eric Biggers

On Thu, Dec 15, 2016 at 10:17 PM, Hannes Frederic Sowa
<hannes@stressinduktion.org> wrote:
> And I was exactly questioning this.
>
> static unsigned int inet6_hash_frag(__be32 id, const struct in6_addr *saddr,
>                                     const struct in6_addr *daddr)
> {
>         net_get_random_once(&ip6_frags.rnd, sizeof(ip6_frags.rnd));
>         return jhash_3words(ipv6_addr_hash(saddr), ipv6_addr_hash(daddr),
>                             (__force u32)id, ip6_frags.rnd);
> }

For this example, the replacement is the function entitled siphash_4u32:

 static unsigned int inet6_hash_frag(__be32 id, const struct in6_addr *saddr,
                                     const struct in6_addr *daddr)
 {
         net_get_random_once(&ip6_frags.rnd, sizeof(ip6_frags.rnd));
         return siphash_4u32(ipv6_addr_hash(saddr), ipv6_addr_hash(daddr),
                 (__force u32)id, 0, ip6_frags.rnd);
 }

And then you make ip6_frags.rnd be of type siphash_key_t. Then
everything is taken care of and works beautifully. Please see v5 of
this patchset.

> I would be interested if the compiler can actually constant-fold the
> address of the stack allocation with an simple if () or some
> __builtin_constant_p fiddeling, so we don't have this constant review
> overhead to which function we pass which data. This would also make
> this whole discussion moot.

I'll play with it to see if the compiler is capable of doing that.
Does anybody know off hand if it is or if there are other examples of
the compiler doing that?

In any case, for all current replacement of jhash_1word, jhash_2words,
jhash_3words, there's the siphash_2u32 or siphash_4u32 functions. This
covers the majority of cases.

For replacements of md5_transform, either the data is small and can
fit in siphash_Nu{32,64}, or it can be put into a struct explicitly
aligned on the stack.

For the remaining use of jhash_nwords, either siphash() can be used or
siphash_unaligned() can be used if the source is of unknown alignment.
Both functions have their alignment requirements (or lack thereof)
documented in a docbook comment.

I'll look into the constant folding to see if it actually works. If it
does, I'll use it. If not, I believe the current solution works.

How's that sound?

Jason

^ permalink raw reply

* Re: [PATCH v2 1/4] siphash: add cryptographically secure hashtable function
From: Hannes Frederic Sowa @ 2016-12-15 21:17 UTC (permalink / raw)
  To: Jason A. Donenfeld
  Cc: David Laight, Netdev, kernel-hardening@lists.openwall.com,
	Jean-Philippe Aumasson, LKML, Linux Crypto Mailing List,
	Daniel J . Bernstein, Linus Torvalds, Eric Biggers
In-Reply-To: <CAHmME9rDCb=2rojJba13Uew9V9qAbxv1qcJGHwEAKoahxyE9QA@mail.gmail.com>

On 15.12.2016 21:43, Jason A. Donenfeld wrote:
> On Thu, Dec 15, 2016 at 9:31 PM, Hannes Frederic Sowa
> <hannes@stressinduktion.org> wrote:
>> ARM64 and x86-64 have memory operations that are not vector operations
>> that operate on 128 bit memory.
> 
> Fair enough. imull I guess.
> 
>> How do you know that the compiler for some architecture will not chose a
>> more optimized instruction to load a 64 bit memory value into two 32 bit
>> registers if you tell the compiler it is 8 byte aligned but it actually
>> isn't? I don't know the answer but telling the compiler some data is 8
>> byte aligned while it isn't really pretty much seems like a call for
>> trouble.
> 
> If a compiler is in the business of using special 64-bit instructions
> on 64-bit aligned data, then it is also the job of the compiler to
> align structs to 64-bits when passed __aligned(8), which is what we've
> done in this code. If the compiler were to hypothetically choose to
> ignore that and internally convert it to a __aligned(4), then it would
> only be able to do so with the knowledge that it will never use 64-bit
> aligned data instructions. But so far as I can tell, gcc always
> respects __aligned(8), which is why I use it in this patchset.
> 
> I think there might have been confusion here, because perhaps someone
> was hoping that since in6_addr is 128-bits, that the __aligned
> attribute would not be required and that the struct would just
> automatically be aligned to at least 8 bytes. But in fact, as I
> mentioned, in6_addr is actually composed of u32[4] and not u64[2], so
> it will only be aligned to 4 bytes, making the __aligned(8) necessary.
> 
> I think for the purposes of this patchset, this is a solved problem.
> There's the unaligned version of the function if you don't know about
> the data, and there's the aligned version if you're using
> __aligned(SIPHASH_ALIGNMENT) on your data. Plain and simple.

And I was exactly questioning this.

static unsigned int inet6_hash_frag(__be32 id, const struct in6_addr *saddr,
				    const struct in6_addr *daddr)
{
	net_get_random_once(&ip6_frags.rnd, sizeof(ip6_frags.rnd));
	return jhash_3words(ipv6_addr_hash(saddr), ipv6_addr_hash(daddr),
			    (__force u32)id, ip6_frags.rnd);
}

This function had a hash DoS (and kind of still has), but it has been
mitigated by explicit checks, I hope.

So you start looking for all the pointers where ipv6 addresses could
come from and find some globally defined struct where I would need to
put the aligned(SIPHASH_ALIGNMENT) into to make this work on 32 bit
code? Otherwise just the unaligned version is safe on 32 bit code.

Who knows this? It isn't even obvious by looking at the header!

I would be interested if the compiler can actually constant-fold the
address of the stack allocation with an simple if () or some
__builtin_constant_p fiddeling, so we don't have this constant review
overhead to which function we pass which data. This would also make
this whole discussion mood.

Bye,
Hannes

^ permalink raw reply

* Re: Re: [PATCH v2 1/4] siphash: add cryptographically secure hashtable function
From: Jason A. Donenfeld @ 2016-12-15 21:16 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: kernel-hardening@lists.openwall.com, Hannes Frederic Sowa,
	David Laight, Netdev, Jean-Philippe Aumasson, LKML,
	Linux Crypto Mailing List, Daniel J . Bernstein, Eric Biggers

On Thu, Dec 15, 2016 at 10:14 PM, Linus Torvalds
<torvalds@linux-foundation.org> wrote:
> I think you can/should just use the natural alignment for "u64".
>
> For architectures that need 8-byte alignment, u64 will already be
> properly aligned. For architectures (like x86-32) that only need
> 4-byte alignment, you get it.

I should have added mention of that with my previous email. For the
parameters that are always a multiple of u64 -- namely, the key -- I
now do that in v5 of the patchset. So this is already done.

^ permalink raw reply

* Re: [kernel-hardening] Re: [PATCH v2 1/4] siphash: add cryptographically secure hashtable function
From: Linus Torvalds @ 2016-12-15 21:14 UTC (permalink / raw)
  To: Jason A. Donenfeld
  Cc: kernel-hardening@lists.openwall.com, Hannes Frederic Sowa,
	David Laight, Netdev, Jean-Philippe Aumasson, LKML,
	Linux Crypto Mailing List, Daniel J . Bernstein, Eric Biggers
In-Reply-To: <CAHmME9p+xEBTKz+Wfy5Ypav4HU7H+rnA-0hLgd1sMmthDzOmvw@mail.gmail.com>

On Thu, Dec 15, 2016 at 1:11 PM, Jason A. Donenfeld <Jason@zx2c4.com> wrote:
>
> Indeed, I stand corrected. But in any case, the use of __aligned(8) in
> the patchset ensures that things are fixed and that we don't have this
> issue.

I think you can/should just use the natural alignment for "u64".

For architectures that need 8-byte alignment, u64 will already be
properly aligned. For architectures (like x86-32) that only need
4-byte alignment, you get it.

              Linus

^ permalink raw reply

* Re: [kernel-hardening] Re: [PATCH v2 1/4] siphash: add cryptographically secure hashtable function
From: Jason A. Donenfeld @ 2016-12-15 21:11 UTC (permalink / raw)
  To: kernel-hardening
  Cc: Hannes Frederic Sowa, David Laight, Netdev,
	Jean-Philippe Aumasson, LKML, Linux Crypto Mailing List,
	Daniel J . Bernstein, Linus Torvalds, Eric Biggers
In-Reply-To: <20161215210933.GY3207@twins.programming.kicks-ass.net>

On Thu, Dec 15, 2016 at 10:09 PM, Peter Zijlstra <peterz@infradead.org> wrote:
> On Thu, Dec 15, 2016 at 07:50:36PM +0100, Jason A. Donenfeld wrote:
>> There's no 32-bit platform
>> that will trap on a 64-bit unaligned access because there's no such
>> thing as a 64-bit access there. In short, we're fine.
>
> ARMv7 LPAE is a 32bit architecture that has 64bit load/stores IIRC.
>
> x86 has cmpxchg8b that can do 64bit things and very much wants the u64
> aligned.
>
> Also, IIRC we have a few platforms where u64 doesn't carry 8 byte
> alignment, m68k or something like that, but yes, you likely don't care.

Indeed, I stand corrected. But in any case, the use of __aligned(8) in
the patchset ensures that things are fixed and that we don't have this
issue.

^ permalink raw reply

* Re: [PATCH v2 1/4] siphash: add cryptographically secure hashtable function
From: Peter Zijlstra @ 2016-12-15 21:09 UTC (permalink / raw)
  To: Jason A. Donenfeld
  Cc: Hannes Frederic Sowa, David Laight, Netdev,
	kernel-hardening@lists.openwall.com, Jean-Philippe Aumasson, LKML,
	Linux Crypto Mailing List, Daniel J . Bernstein, Linus Torvalds,
	Eric Biggers
In-Reply-To: <CAHmME9pTLFu3-4n6m_OMj5jVWGE-+yC-4CnkynD--H4Nt8_cpA@mail.gmail.com>

On Thu, Dec 15, 2016 at 07:50:36PM +0100, Jason A. Donenfeld wrote:
> There's no 32-bit platform
> that will trap on a 64-bit unaligned access because there's no such
> thing as a 64-bit access there. In short, we're fine.

ARMv7 LPAE is a 32bit architecture that has 64bit load/stores IIRC.

x86 has cmpxchg8b that can do 64bit things and very much wants the u64
aligned.

Also, IIRC we have a few platforms where u64 doesn't carry 8 byte
alignment, m68k or something like that, but yes, you likely don't care.

Just to make life interesting...

^ permalink raw reply

* Re: [PATCH v2 1/4] siphash: add cryptographically secure hashtable function
From: Hannes Frederic Sowa @ 2016-12-15 21:09 UTC (permalink / raw)
  To: Peter Zijlstra, Jason A. Donenfeld
  Cc: David Laight, Netdev, kernel-hardening@lists.openwall.com,
	Jean-Philippe Aumasson, LKML, Linux Crypto Mailing List,
	Daniel J . Bernstein, Linus Torvalds, Eric Biggers
In-Reply-To: <20161215210425.GX3207@twins.programming.kicks-ass.net>

On 15.12.2016 22:04, Peter Zijlstra wrote:
> On Thu, Dec 15, 2016 at 09:43:04PM +0100, Jason A. Donenfeld wrote:
>> On Thu, Dec 15, 2016 at 9:31 PM, Hannes Frederic Sowa
>> <hannes@stressinduktion.org> wrote:
>>> ARM64 and x86-64 have memory operations that are not vector operations
>>> that operate on 128 bit memory.
>>
>> Fair enough. imull I guess.
> 
> imull is into rdx:rax, not memory. I suspect he's talking about
> cmpxchg16b.

Exactly and I think I saw a ll/sc 128 bit on armv8 to atomically
manipulate linked lists.

Bye,
Hannes

^ permalink raw reply

* Re: [PATCH v2 1/4] siphash: add cryptographically secure hashtable function
From: Peter Zijlstra @ 2016-12-15 21:04 UTC (permalink / raw)
  To: Jason A. Donenfeld
  Cc: Hannes Frederic Sowa, David Laight, Netdev,
	kernel-hardening@lists.openwall.com, Jean-Philippe Aumasson, LKML,
	Linux Crypto Mailing List, Daniel J . Bernstein, Linus Torvalds,
	Eric Biggers
In-Reply-To: <CAHmME9rDCb=2rojJba13Uew9V9qAbxv1qcJGHwEAKoahxyE9QA@mail.gmail.com>

On Thu, Dec 15, 2016 at 09:43:04PM +0100, Jason A. Donenfeld wrote:
> On Thu, Dec 15, 2016 at 9:31 PM, Hannes Frederic Sowa
> <hannes@stressinduktion.org> wrote:
> > ARM64 and x86-64 have memory operations that are not vector operations
> > that operate on 128 bit memory.
> 
> Fair enough. imull I guess.

imull is into rdx:rax, not memory. I suspect he's talking about
cmpxchg16b.

^ permalink raw reply

* Re: [PATCH v2 1/4] siphash: add cryptographically secure hashtable function
From: Jason A. Donenfeld @ 2016-12-15 20:43 UTC (permalink / raw)
  To: Hannes Frederic Sowa
  Cc: David Laight, Netdev, kernel-hardening@lists.openwall.com,
	Jean-Philippe Aumasson, LKML, Linux Crypto Mailing List,
	Daniel J . Bernstein, Linus Torvalds, Eric Biggers
In-Reply-To: <18d1e9d1-1e52-b9a6-de26-2f33859ec052@stressinduktion.org>

On Thu, Dec 15, 2016 at 9:31 PM, Hannes Frederic Sowa
<hannes@stressinduktion.org> wrote:
> ARM64 and x86-64 have memory operations that are not vector operations
> that operate on 128 bit memory.

Fair enough. imull I guess.

> How do you know that the compiler for some architecture will not chose a
> more optimized instruction to load a 64 bit memory value into two 32 bit
> registers if you tell the compiler it is 8 byte aligned but it actually
> isn't? I don't know the answer but telling the compiler some data is 8
> byte aligned while it isn't really pretty much seems like a call for
> trouble.

If a compiler is in the business of using special 64-bit instructions
on 64-bit aligned data, then it is also the job of the compiler to
align structs to 64-bits when passed __aligned(8), which is what we've
done in this code. If the compiler were to hypothetically choose to
ignore that and internally convert it to a __aligned(4), then it would
only be able to do so with the knowledge that it will never use 64-bit
aligned data instructions. But so far as I can tell, gcc always
respects __aligned(8), which is why I use it in this patchset.

I think there might have been confusion here, because perhaps someone
was hoping that since in6_addr is 128-bits, that the __aligned
attribute would not be required and that the struct would just
automatically be aligned to at least 8 bytes. But in fact, as I
mentioned, in6_addr is actually composed of u32[4] and not u64[2], so
it will only be aligned to 4 bytes, making the __aligned(8) necessary.

I think for the purposes of this patchset, this is a solved problem.
There's the unaligned version of the function if you don't know about
the data, and there's the aligned version if you're using
__aligned(SIPHASH_ALIGNMENT) on your data. Plain and simple.

Jason

^ permalink raw reply

* Re: [PATCH v2 1/4] siphash: add cryptographically secure hashtable function
From: Hannes Frederic Sowa @ 2016-12-15 20:31 UTC (permalink / raw)
  To: Jason A. Donenfeld, David Laight
  Cc: Netdev, kernel-hardening@lists.openwall.com,
	Jean-Philippe Aumasson, LKML, Linux Crypto Mailing List,
	Daniel J . Bernstein, Linus Torvalds, Eric Biggers
In-Reply-To: <CAHmME9pTLFu3-4n6m_OMj5jVWGE-+yC-4CnkynD--H4Nt8_cpA@mail.gmail.com>

Hello,

On 15.12.2016 19:50, Jason A. Donenfeld wrote:
> Hi David & Hannes,
> 
> This conversation is veering off course.

Why?

> I think this doesn't really
> matter at all. Gcc converts u64 into essentially a pair of u32 on
> 32-bit platforms, so the alignment requirements for 32-bit is at a
> maximum 32 bits. On 64-bit platforms the alignment requirements are
> related at a maximum to the biggest register size, so 64-bit
> alignment. For this reason, no matter the behavior of __aligned(8),
> we're okay. Likewise, even without __aligned(8), if gcc aligns structs
> by their biggest member, then we get 4 byte alignment on 32-bit and 8
> byte alignment on 64-bit, which is fine. There's no 32-bit platform
> that will trap on a 64-bit unaligned access because there's no such
> thing as a 64-bit access there. In short, we're fine.

ARM64 and x86-64 have memory operations that are not vector operations
that operate on 128 bit memory.

How do you know that the compiler for some architecture will not chose a
more optimized instruction to load a 64 bit memory value into two 32 bit
registers if you tell the compiler it is 8 byte aligned but it actually
isn't? I don't know the answer but telling the compiler some data is 8
byte aligned while it isn't really pretty much seems like a call for
trouble.

Why can't a compiler not vectorize this code if it can prove that it
doesn't conflict with other register users?

Bye,
Hannes

^ permalink raw reply

* [PATCH v5 4/4] random: use SipHash in place of MD5
From: Jason A. Donenfeld @ 2016-12-15 20:30 UTC (permalink / raw)
  To: Netdev, kernel-hardening, LKML, linux-crypto, David Laight,
	Ted Tso, Hannes Frederic Sowa, Linus Torvalds, Eric Biggers,
	Tom Herbert, George Spelvin, Vegard Nossum, ak, davem, luto
  Cc: Jason A. Donenfeld, Jean-Philippe Aumasson
In-Reply-To: <20161215203003.31989-1-Jason@zx2c4.com>

This duplicates the current algorithm for get_random_int/long, but uses
siphash instead. This comes with several benefits. It's certainly
faster and more cryptographically secure than MD5. This patch also
separates hashed fields into three values instead of one, in order to
increase diffusion.

The previous MD5 algorithm used a per-cpu MD5 state, which caused
successive calls to the function to chain upon each other. While it's
not entirely clear that this kind of chaining is absolutely necessary
when using a secure PRF like siphash, it can't hurt, and the timing of
the call chain does add a degree of natural entropy. So, in keeping with
this design, instead of the massive per-cpu 64-byte MD5 state, there is
instead a per-cpu previously returned value for chaining.

The speed benefits are substantial:

                | siphash | md5    | speedup |
		------------------------------
get_random_long | 137130  | 415983 | 3.03x   |
get_random_int  | 86384   | 343323 | 3.97x   |

Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Cc: Jean-Philippe Aumasson <jeanphilippe.aumasson@gmail.com>
Cc: Ted Tso <tytso@mit.edu>
---
 drivers/char/random.c | 32 +++++++++++++-------------------
 1 file changed, 13 insertions(+), 19 deletions(-)

diff --git a/drivers/char/random.c b/drivers/char/random.c
index d6876d506220..a51f0ff43f00 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -262,6 +262,7 @@
 #include <linux/syscalls.h>
 #include <linux/completion.h>
 #include <linux/uuid.h>
+#include <linux/siphash.h>
 #include <crypto/chacha20.h>
 
 #include <asm/processor.h>
@@ -2042,7 +2043,7 @@ struct ctl_table random_table[] = {
 };
 #endif 	/* CONFIG_SYSCTL */
 
-static u32 random_int_secret[MD5_MESSAGE_BYTES / 4] ____cacheline_aligned;
+static siphash_key_t random_int_secret;
 
 int random_int_secret_init(void)
 {
@@ -2050,8 +2051,7 @@ int random_int_secret_init(void)
 	return 0;
 }
 
-static DEFINE_PER_CPU(__u32 [MD5_DIGEST_WORDS], get_random_int_hash)
-		__aligned(sizeof(unsigned long));
+static DEFINE_PER_CPU(u64, get_random_int_chaining);
 
 /*
  * Get a random word for internal kernel use only. Similar to urandom but
@@ -2061,19 +2061,16 @@ static DEFINE_PER_CPU(__u32 [MD5_DIGEST_WORDS], get_random_int_hash)
  */
 unsigned int get_random_int(void)
 {
-	__u32 *hash;
 	unsigned int ret;
+	u64 *chaining;
 
 	if (arch_get_random_int(&ret))
 		return ret;
 
-	hash = get_cpu_var(get_random_int_hash);
-
-	hash[0] += current->pid + jiffies + random_get_entropy();
-	md5_transform(hash, random_int_secret);
-	ret = hash[0];
-	put_cpu_var(get_random_int_hash);
-
+	chaining = &get_cpu_var(get_random_int_chaining);
+	ret = *chaining = siphash_3u64(*chaining, jiffies, random_get_entropy() +
+				       current->pid, random_int_secret);
+	put_cpu_var(get_random_int_chaining);
 	return ret;
 }
 EXPORT_SYMBOL(get_random_int);
@@ -2083,19 +2080,16 @@ EXPORT_SYMBOL(get_random_int);
  */
 unsigned long get_random_long(void)
 {
-	__u32 *hash;
 	unsigned long ret;
+	u64 *chaining;
 
 	if (arch_get_random_long(&ret))
 		return ret;
 
-	hash = get_cpu_var(get_random_int_hash);
-
-	hash[0] += current->pid + jiffies + random_get_entropy();
-	md5_transform(hash, random_int_secret);
-	ret = *(unsigned long *)hash;
-	put_cpu_var(get_random_int_hash);
-
+	chaining = &get_cpu_var(get_random_int_chaining);
+	ret = *chaining = siphash_3u64(*chaining, jiffies, random_get_entropy() +
+				       current->pid, random_int_secret);
+	put_cpu_var(get_random_int_chaining);
 	return ret;
 }
 EXPORT_SYMBOL(get_random_long);
-- 
2.11.0

^ permalink raw reply related

* [PATCH v5 3/4] secure_seq: use SipHash in place of MD5
From: Jason A. Donenfeld @ 2016-12-15 20:30 UTC (permalink / raw)
  To: Netdev, kernel-hardening, LKML, linux-crypto, David Laight,
	Ted Tso, Hannes Frederic Sowa, Linus Torvalds, Eric Biggers,
	Tom Herbert, George Spelvin, Vegard Nossum, ak, davem, luto
  Cc: Jason A. Donenfeld
In-Reply-To: <20161215203003.31989-1-Jason@zx2c4.com>

This gives a clear speed and security improvement. Siphash is both
faster and is more solid crypto than the aging MD5.

Rather than manually filling MD5 buffers, for IPv6, we simply create
a layout by a simple anonymous struct, for which gcc generates
rather efficient code. For IPv4, we pass the values directly to the
short input convenience functions.

Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: David Miller <davem@davemloft.net>
Cc: David Laight <David.Laight@aculab.com>
Cc: Tom Herbert <tom@herbertland.com>
Cc: Hannes Frederic Sowa <hannes@stressinduktion.org>
---
 net/core/secure_seq.c | 133 ++++++++++++++++++++------------------------------
 1 file changed, 52 insertions(+), 81 deletions(-)

diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c
index 88a8e429fc3e..c80583bf3213 100644
--- a/net/core/secure_seq.c
+++ b/net/core/secure_seq.c
@@ -1,3 +1,5 @@
+/* Copyright (C) 2016 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. */
+
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/cryptohash.h>
@@ -8,14 +10,14 @@
 #include <linux/ktime.h>
 #include <linux/string.h>
 #include <linux/net.h>
-
+#include <linux/siphash.h>
 #include <net/secure_seq.h>
 
 #if IS_ENABLED(CONFIG_IPV6) || IS_ENABLED(CONFIG_INET)
+#include <linux/in6.h>
 #include <net/tcp.h>
-#define NET_SECRET_SIZE (MD5_MESSAGE_BYTES / 4)
 
-static u32 net_secret[NET_SECRET_SIZE] ____cacheline_aligned;
+static siphash_key_t net_secret;
 
 static __always_inline void net_secret_init(void)
 {
@@ -44,44 +46,42 @@ static u32 seq_scale(u32 seq)
 u32 secure_tcpv6_sequence_number(const __be32 *saddr, const __be32 *daddr,
 				 __be16 sport, __be16 dport, u32 *tsoff)
 {
-	u32 secret[MD5_MESSAGE_BYTES / 4];
-	u32 hash[MD5_DIGEST_WORDS];
-	u32 i;
-
+	const struct {
+		struct in6_addr saddr;
+		struct in6_addr daddr;
+		__be16 sport;
+		__be16 dport;
+		u32 padding;
+	} __aligned(SIPHASH_ALIGNMENT) combined = {
+		.saddr = *(struct in6_addr *)saddr,
+		.daddr = *(struct in6_addr *)daddr,
+		.sport = sport,
+		.dport = dport
+	};
+	u64 hash;
 	net_secret_init();
-	memcpy(hash, saddr, 16);
-	for (i = 0; i < 4; i++)
-		secret[i] = net_secret[i] + (__force u32)daddr[i];
-	secret[4] = net_secret[4] +
-		(((__force u16)sport << 16) + (__force u16)dport);
-	for (i = 5; i < MD5_MESSAGE_BYTES / 4; i++)
-		secret[i] = net_secret[i];
-
-	md5_transform(hash, secret);
-
-	*tsoff = sysctl_tcp_timestamps == 1 ? hash[1] : 0;
-	return seq_scale(hash[0]);
+	hash = siphash(&combined, sizeof(combined), net_secret);
+	*tsoff = sysctl_tcp_timestamps == 1 ? (hash >> 32) : 0;
+	return seq_scale(hash);
 }
 EXPORT_SYMBOL(secure_tcpv6_sequence_number);
 
 u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr,
 			       __be16 dport)
 {
-	u32 secret[MD5_MESSAGE_BYTES / 4];
-	u32 hash[MD5_DIGEST_WORDS];
-	u32 i;
-
+	const struct {
+		struct in6_addr saddr;
+		struct in6_addr daddr;
+		__be16 dport;
+		u16 padding1;
+		u32 padding2;
+	} __aligned(SIPHASH_ALIGNMENT) combined = {
+		.saddr = *(struct in6_addr *)saddr,
+		.daddr = *(struct in6_addr *)daddr,
+		.dport = dport
+	};
 	net_secret_init();
-	memcpy(hash, saddr, 16);
-	for (i = 0; i < 4; i++)
-		secret[i] = net_secret[i] + (__force u32) daddr[i];
-	secret[4] = net_secret[4] + (__force u32)dport;
-	for (i = 5; i < MD5_MESSAGE_BYTES / 4; i++)
-		secret[i] = net_secret[i];
-
-	md5_transform(hash, secret);
-
-	return hash[0];
+	return siphash(&combined, sizeof(combined), net_secret);
 }
 EXPORT_SYMBOL(secure_ipv6_port_ephemeral);
 #endif
@@ -91,33 +91,17 @@ EXPORT_SYMBOL(secure_ipv6_port_ephemeral);
 u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr,
 			       __be16 sport, __be16 dport, u32 *tsoff)
 {
-	u32 hash[MD5_DIGEST_WORDS];
-
+	u64 hash;
 	net_secret_init();
-	hash[0] = (__force u32)saddr;
-	hash[1] = (__force u32)daddr;
-	hash[2] = ((__force u16)sport << 16) + (__force u16)dport;
-	hash[3] = net_secret[15];
-
-	md5_transform(hash, net_secret);
-
-	*tsoff = sysctl_tcp_timestamps == 1 ? hash[1] : 0;
-	return seq_scale(hash[0]);
+	hash = siphash_4u32(saddr, daddr, sport, dport, net_secret);
+	*tsoff = sysctl_tcp_timestamps == 1 ? (hash >> 32) : 0;
+	return seq_scale(hash);
 }
 
 u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport)
 {
-	u32 hash[MD5_DIGEST_WORDS];
-
 	net_secret_init();
-	hash[0] = (__force u32)saddr;
-	hash[1] = (__force u32)daddr;
-	hash[2] = (__force u32)dport ^ net_secret[14];
-	hash[3] = net_secret[15];
-
-	md5_transform(hash, net_secret);
-
-	return hash[0];
+	return siphash_4u32(saddr, daddr, dport, 0, net_secret);
 }
 EXPORT_SYMBOL_GPL(secure_ipv4_port_ephemeral);
 #endif
@@ -126,21 +110,11 @@ EXPORT_SYMBOL_GPL(secure_ipv4_port_ephemeral);
 u64 secure_dccp_sequence_number(__be32 saddr, __be32 daddr,
 				__be16 sport, __be16 dport)
 {
-	u32 hash[MD5_DIGEST_WORDS];
 	u64 seq;
-
 	net_secret_init();
-	hash[0] = (__force u32)saddr;
-	hash[1] = (__force u32)daddr;
-	hash[2] = ((__force u16)sport << 16) + (__force u16)dport;
-	hash[3] = net_secret[15];
-
-	md5_transform(hash, net_secret);
-
-	seq = hash[0] | (((u64)hash[1]) << 32);
+	seq = siphash_4u32(saddr, daddr, sport, dport, net_secret);
 	seq += ktime_get_real_ns();
 	seq &= (1ull << 48) - 1;
-
 	return seq;
 }
 EXPORT_SYMBOL(secure_dccp_sequence_number);
@@ -149,26 +123,23 @@ EXPORT_SYMBOL(secure_dccp_sequence_number);
 u64 secure_dccpv6_sequence_number(__be32 *saddr, __be32 *daddr,
 				  __be16 sport, __be16 dport)
 {
-	u32 secret[MD5_MESSAGE_BYTES / 4];
-	u32 hash[MD5_DIGEST_WORDS];
+	const struct {
+		struct in6_addr saddr;
+		struct in6_addr daddr;
+		__be16 sport;
+		__be16 dport;
+		u32 padding;
+	} __aligned(SIPHASH_ALIGNMENT) combined = {
+		.saddr = *(struct in6_addr *)saddr,
+		.daddr = *(struct in6_addr *)daddr,
+		.sport = sport,
+		.dport = dport
+	};
 	u64 seq;
-	u32 i;
-
 	net_secret_init();
-	memcpy(hash, saddr, 16);
-	for (i = 0; i < 4; i++)
-		secret[i] = net_secret[i] + (__force u32)daddr[i];
-	secret[4] = net_secret[4] +
-		(((__force u16)sport << 16) + (__force u16)dport);
-	for (i = 5; i < MD5_MESSAGE_BYTES / 4; i++)
-		secret[i] = net_secret[i];
-
-	md5_transform(hash, secret);
-
-	seq = hash[0] | (((u64)hash[1]) << 32);
+	seq = siphash(&combined, sizeof(combined), net_secret);
 	seq += ktime_get_real_ns();
 	seq &= (1ull << 48) - 1;
-
 	return seq;
 }
 EXPORT_SYMBOL(secure_dccpv6_sequence_number);
-- 
2.11.0

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox