linux-ext4.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: "George Spelvin" <linux@horizon.com>
To: linux-ext4@vger.kernel.org
Cc: linux@horizon.com, tytso@mit.edu
Subject: [PATCH v1 8/10] dirhash.c: Add siphash24()
Date: 23 Sep 2014 06:27:42 -0400	[thread overview]
Message-ID: <20140923102742.13288.qmail@ns.horizon.com> (raw)
In-Reply-To: <20140923100214.26896.qmail@ns.horizon.com>

Not hooked up to anything yet.

Signed-off-by: George Spelvin <linux@horizon.com>
---
Is there a nicer way to implement get_unaligned_le64()?
I know we all hate #ifdef, but I was tempted by the Dark Side.

 lib/ext2fs/dirhash.c | 126 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 126 insertions(+)

diff --git a/lib/ext2fs/dirhash.c b/lib/ext2fs/dirhash.c
index 048486e..f51a342 100644
--- a/lib/ext2fs/dirhash.c
+++ b/lib/ext2fs/dirhash.c
@@ -14,6 +14,7 @@
 #include "config.h"
 #include <stdio.h>
 #include <string.h>
+#include <stdint.h>
 
 #include "ext2_fs.h"
 #include "ext2fs.h"
@@ -174,6 +175,131 @@ static void str2hashbuf(const char *msg, int len, __u32 *buf, int num,
 		*buf++ = pad;
 }
 
+#if __i386 || __x86_64
+/* This is such a common case it's worth optimizing */
+# define get_unaligned_le64(p) (*(const __u64 *)(p))
+#else
+#if __GNUC__ >= 3
+__attribute__ ((__pure__))
+#endif
+static __u64 get_unaligned_le64(const void *p)
+{
+	const __u8 *q = p;
+
+	return	(__u64)q[0] |
+		(__u64)q[1] << 8 |
+		(__u64)q[2] << 16 |
+		(__u64)q[3] << 24 |
+		(__u64)q[4] << 32 |
+		(__u64)q[5] << 40 |
+		(__u64)q[6] << 48 |
+		(__u64)q[7] << 56;
+}
+#endif
+
+#define rol64(x, s) ((x) << (s) | (x) >> (64 - (s)))
+
+/* The basic ARX mixing function, taken from Skein */
+#define SIP_MIX(a, b, s) ((a) += (b), (b) = rol64(b, s), (b) ^= (a))
+
+/*
+ * The complete SipRound.  Note that, when unrolled twice like below,
+ * the 32-bit rotates drop out on 32-bit machines.
+ */
+#define SIP_ROUND(a, b, c, d) \
+	(SIP_MIX(a, b, 13), SIP_MIX(c, d, 16), (a) = rol64(a, 32), \
+	 SIP_MIX(c, b, 17), SIP_MIX(a, d, 21), (c) = rol64(c, 32))
+
+#if __GNUC__ >= 3
+__attribute__ ((__pure__))
+#endif
+static __u64
+siphash24(char const *in, size_t len, __u32 const seed[4])
+{
+	__u64	a = 0x736f6d6570736575ull;	/* somepseu */
+	__u64	b = 0x646f72616e646f6dull;	/* dorandom */
+	__u64	c = 0x6c7967656e657261ull;	/* lygenera */
+	__u64	d = 0x7465646279746573ull;	/* tedbytes */
+	__u64	m = 0;
+	__u8	padbyte = len;
+
+	/*
+	 * Mix in the 128-bit hash seed.  This is in a format convenient
+	 * to the ext3/ext4 code.  Please feel free to adapt the
+	 * */
+	if (seed) {
+		m = seed[2] | (__u64)seed[3] << 32;
+		b ^= m;
+		d ^= m;
+		m = seed[0] | (__u64)seed[1] << 32;
+		/* a ^= m; is done in loop below */
+		c ^= m;
+	}
+
+	/*
+	 * By using the same SipRound code for all iterations, we
+	 * save space, at the expense of some branch prediction.  But
+	 * branch prediction is hard because of variable length anyway.
+	 */
+	len = len/8 + 3;	/* Now number of rounds to perform */
+	do {
+		a ^= m;
+
+		switch (--len) {
+			unsigned bytes;
+
+		default:	/* Full words */
+			d ^= m = get_unaligned_le64(in);
+			in += 8;
+			break;
+		case 2:		/* Final partial word */
+			/*
+			 * We'd like to do one 64-bit fetch rather than
+			 * mess around with bytes, but reading past the end
+			 * might hit a protection boundary.  Fortunately,
+			 * we know that protection boundaries are aligned,
+			 * so we can consider only three cases:
+			 * - The remainder occupies zero words
+			 * - The remainder fits into one word
+			 * - The remainder straddles two words
+			 */
+			bytes = padbyte & 7;
+
+			if (bytes == 0) {
+				m = 0;
+			} else {
+				unsigned offset = (unsigned)(uintptr_t)in & 7;
+
+				if (offset + bytes <= 8) {
+					m = ext2fs_le64_to_cpu(*(__u64 const *)
+								(in - offset));
+					m >>= 8*offset;
+				} else {
+					m = get_unaligned_le64(in);
+				}
+				m &= ((__u64)1 << 8*bytes) - 1;
+			}
+			/* Could use | or +, but ^ allows associativity */
+			d ^= m ^= (__u64)padbyte << 56;
+			break;
+		case 1:		/* Beginning of finalization */
+			m = 0;
+			c ^= 0xff;
+			/*FALLTHROUGH*/
+		case 0:
+			break;
+		}
+
+		SIP_ROUND(a, b, c, d);
+		SIP_ROUND(a, b, c, d);
+	} while (len);
+
+	return a ^ b ^ c ^ d;
+}
+
+#undef SIP_ROUND
+#undef SIP_MIX
+
 /*
  * Returns the hash of a filename.  If len is 0 and name is NULL, then
  * this function can be used to test whether or not a hash version is
-- 
2.1.0


  parent reply	other threads:[~2014-09-23 10:27 UTC|newest]

Thread overview: 23+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-09-23 10:02 [PATCH v1 0/10] Add SipHash-2-4 directory hashing support George Spelvin
2014-09-23 10:03 ` patch V1 1/10] ex4: Introduce DX_HASH_UNSIGNED_DELTA George Spelvin
2014-09-23 21:06   ` [PATCH v1.1 1/10] ext4: " George Spelvin
2014-09-23 10:05 ` [PATCH v1 2/10] ext4: Remove redundant local variable p from ext4fs_dirhash George Spelvin
2014-09-23 10:10 ` [PATCH v1 3/10] byteorder: Fix some erroneous comments George Spelvin
2014-09-23 10:14 ` [PATCH v1 4/10] lib/siphash.c: New file George Spelvin
2014-09-29 19:12   ` Darrick J. Wong
2014-12-06 23:32     ` George Spelvin
2014-12-08 13:16       ` Theodore Ts'o
2014-09-23 10:16 ` [PATCH v1 5/10] ext4: Add DX_HASH_SIPHASH24 support George Spelvin
2014-09-23 19:12   ` Andreas Dilger
2014-09-23 20:45     ` George Spelvin
2014-09-24  1:47       ` Theodore Ts'o
2014-09-24  3:08         ` George Spelvin
2014-09-24 15:35           ` Theodore Ts'o
2014-09-24 23:31             ` George Spelvin
2014-09-25  2:36               ` Theodore Ts'o
2014-09-23 10:17 ` [PATCH v1 6/10] Add EXT2_HASH_UNSIGNED instead of magic constant 3 George Spelvin
2014-09-23 10:19 ` [PATCH v1 7/10] dirhash.c (ext2fs_dirhash): Remove redundant local variable p George Spelvin
2014-09-23 10:27 ` George Spelvin [this message]
2014-09-23 10:29 ` [PATCH v1 9/10] Add EXT2_HASH_SIPHASH24 (=3) George Spelvin
2014-09-23 10:31 ` [PATCH v1 10/10] Add "hash_alg=siphash" support George Spelvin
2014-09-29 19:24   ` Darrick J. Wong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20140923102742.13288.qmail@ns.horizon.com \
    --to=linux@horizon.com \
    --cc=linux-ext4@vger.kernel.org \
    --cc=tytso@mit.edu \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).