All of lore.kernel.org
 help / color / mirror / Atom feed
From: Karsten Blees <karsten.blees@gmail.com>
To: Git List <git@vger.kernel.org>
Subject: [PATCH v1 1/4] hashmap: factor out getting an int hash code from a, SHA1
Date: Thu, 03 Jul 2014 00:20:20 +0200	[thread overview]
Message-ID: <53B485A4.20609@gmail.com> (raw)
In-Reply-To: <53B48540.5070600@gmail.com>

Copying the first bytes of a SHA1 is duplicated in six places, however,
the implications (wrong byte order on little-endian systems) is documented
only once.

Add a properly documented API for this.

Signed-off-by: Karsten Blees <blees@dcon.de>
---
 Documentation/technical/api-hashmap.txt |  9 +++++++++
 builtin/describe.c                      | 11 ++---------
 decorate.c                              |  5 +----
 diffcore-rename.c                       |  4 +---
 hashmap.h                               | 11 +++++++++++
 khash.h                                 | 11 ++---------
 object.c                                | 13 +------------
 pack-objects.c                          |  5 ++---
 8 files changed, 29 insertions(+), 40 deletions(-)

diff --git a/Documentation/technical/api-hashmap.txt b/Documentation/technical/api-hashmap.txt
index b977ae8..4689968 100644
--- a/Documentation/technical/api-hashmap.txt
+++ b/Documentation/technical/api-hashmap.txt
@@ -58,6 +58,15 @@ Functions
 +
 `strihash` and `memihash` are case insensitive versions.
 
+`unsigned int sha1hash(const unsigned char *sha1)`::
+
+	Converts a cryptographic hash (e.g. SHA-1) into an int-sized hash code
+	for use in hash tables. Cryptographic hashes are supposed to have
+	uniform distribution, so in contrast to `memhash()`, this just copies
+	the first `sizeof(int)` bytes without shuffling any bits. Note that
+	the results will be different on big-endian and little-endian
+	platforms, so they should not be stored or transferred over the net!
+
 `void hashmap_init(struct hashmap *map, hashmap_cmp_fn equals_function, size_t initial_size)`::
 
 	Initializes a hashmap structure.
diff --git a/builtin/describe.c b/builtin/describe.c
index 24d740c..57e84c8 100644
--- a/builtin/describe.c
+++ b/builtin/describe.c
@@ -56,17 +56,10 @@ static int commit_name_cmp(const struct commit_name *cn1,
 	return hashcmp(cn1->peeled, peeled ? peeled : cn2->peeled);
 }
 
-static inline unsigned int hash_sha1(const unsigned char *sha1)
-{
-	unsigned int hash;
-	memcpy(&hash, sha1, sizeof(hash));
-	return hash;
-}
-
 static inline struct commit_name *find_commit_name(const unsigned char *peeled)
 {
 	struct commit_name key;
-	hashmap_entry_init(&key, hash_sha1(peeled));
+	hashmap_entry_init(&key, sha1hash(peeled));
 	return hashmap_get(&names, &key, peeled);
 }
 
@@ -114,7 +107,7 @@ static void add_to_known_names(const char *path,
 		if (!e) {
 			e = xmalloc(sizeof(struct commit_name));
 			hashcpy(e->peeled, peeled);
-			hashmap_entry_init(e, hash_sha1(peeled));
+			hashmap_entry_init(e, sha1hash(peeled));
 			hashmap_add(&names, e);
 			e->path = NULL;
 		}
diff --git a/decorate.c b/decorate.c
index 7cb5d29..b2aac90 100644
--- a/decorate.c
+++ b/decorate.c
@@ -8,10 +8,7 @@
 
 static unsigned int hash_obj(const struct object *obj, unsigned int n)
 {
-	unsigned int hash;
-
-	memcpy(&hash, obj->sha1, sizeof(unsigned int));
-	return hash % n;
+	return sha1hash(obj->sha1) % n;
 }
 
 static void *insert_decoration(struct decoration *n, const struct object *base, void *decoration)
diff --git a/diffcore-rename.c b/diffcore-rename.c
index 749a35d..6fa97d4 100644
--- a/diffcore-rename.c
+++ b/diffcore-rename.c
@@ -242,14 +242,12 @@ struct file_similarity {
 
 static unsigned int hash_filespec(struct diff_filespec *filespec)
 {
-	unsigned int hash;
 	if (!filespec->sha1_valid) {
 		if (diff_populate_filespec(filespec, 0))
 			return 0;
 		hash_sha1_file(filespec->data, filespec->size, "blob", filespec->sha1);
 	}
-	memcpy(&hash, filespec->sha1, sizeof(hash));
-	return hash;
+	return sha1hash(filespec->sha1);
 }
 
 static int find_identical_files(struct hashmap *srcs,
diff --git a/hashmap.h b/hashmap.h
index a816ad4..ed5425a 100644
--- a/hashmap.h
+++ b/hashmap.h
@@ -13,6 +13,17 @@ extern unsigned int strihash(const char *buf);
 extern unsigned int memhash(const void *buf, size_t len);
 extern unsigned int memihash(const void *buf, size_t len);
 
+static inline unsigned int sha1hash(const unsigned char *sha1)
+{
+	/*
+	 * Equivalent to 'return *(int *)sha1;', but safe on platforms that
+	 * don't support unaligned reads.
+	 */
+	unsigned int hash;
+	memcpy(&hash, sha1, sizeof(hash));
+	return hash;
+}
+
 /* data structures */
 
 struct hashmap_entry {
diff --git a/khash.h b/khash.h
index 57ff603..06c7906 100644
--- a/khash.h
+++ b/khash.h
@@ -320,19 +320,12 @@ static const double __ac_HASH_UPPER = 0.77;
 		code;												\
 	} }
 
-static inline khint_t __kh_oid_hash(const unsigned char *oid)
-{
-	khint_t hash;
-	memcpy(&hash, oid, sizeof(hash));
-	return hash;
-}
-
 #define __kh_oid_cmp(a, b) (hashcmp(a, b) == 0)
 
-KHASH_INIT(sha1, const unsigned char *, void *, 1, __kh_oid_hash, __kh_oid_cmp)
+KHASH_INIT(sha1, const unsigned char *, void *, 1, sha1hash, __kh_oid_cmp)
 typedef kh_sha1_t khash_sha1;
 
-KHASH_INIT(sha1_pos, const unsigned char *, int, 1, __kh_oid_hash, __kh_oid_cmp)
+KHASH_INIT(sha1_pos, const unsigned char *, int, 1, sha1hash, __kh_oid_cmp)
 typedef kh_sha1_pos_t khash_sha1_pos;
 
 #endif /* __AC_KHASH_H */
diff --git a/object.c b/object.c
index 57a0890..73acaea 100644
--- a/object.c
+++ b/object.c
@@ -50,18 +50,7 @@ int type_from_string(const char *str)
  */
 static unsigned int hash_obj(const unsigned char *sha1, unsigned int n)
 {
-	unsigned int hash;
-
-	/*
-	 * Since the sha1 is essentially random, we just take the
-	 * required number of bits directly from the first
-	 * sizeof(unsigned int) bytes of sha1.  First we have to copy
-	 * the bytes into a properly aligned integer.  If we cared
-	 * about getting consistent results across architectures, we
-	 * would have to call ntohl() here, too.
-	 */
-	memcpy(&hash, sha1, sizeof(unsigned int));
-	return hash & (n - 1);
+	return sha1hash(sha1) & (n - 1);
 }
 
 /*
diff --git a/pack-objects.c b/pack-objects.c
index 4f36c32..9992f3e 100644
--- a/pack-objects.c
+++ b/pack-objects.c
@@ -7,10 +7,9 @@ static uint32_t locate_object_entry_hash(struct packing_data *pdata,
 					 const unsigned char *sha1,
 					 int *found)
 {
-	uint32_t i, hash, mask = (pdata->index_size - 1);
+	uint32_t i, mask = (pdata->index_size - 1);
 
-	memcpy(&hash, sha1, sizeof(uint32_t));
-	i = hash & mask;
+	i = sha1hash(sha1) & mask;
 
 	while (pdata->index[i] > 0) {
 		uint32_t pos = pdata->index[i] - 1;
-- 
1.9.4.msysgit.0.dirty

  reply	other threads:[~2014-07-02 22:20 UTC|newest]

Thread overview: 12+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-07-02 22:18 [PATCH v1 0/4] hashmap improvements Karsten Blees
2014-07-02 22:20 ` Karsten Blees [this message]
2014-07-07 17:22   ` [PATCH v1 1/4] hashmap: factor out getting an int hash code from a, SHA1 Junio C Hamano
2014-07-02 22:21 ` [PATCH v1 2/4] hashmap: improve struct hashmap member documentation Karsten Blees
2014-07-02 22:22 ` [PATCH v1 3/4] hashmap: add simplified hashmap_get_from_hash() API Karsten Blees
2014-07-07 17:43   ` Junio C Hamano
2014-07-11 19:11     ` Karsten Blees
2014-07-11 22:21       ` Junio C Hamano
2014-07-02 22:22 ` [PATCH v1 4/4] hashmap: add string interning API Karsten Blees
2014-07-03  7:22   ` Matthieu Moy
2014-07-07 17:44   ` Junio C Hamano
2014-07-03  7:23 ` [PATCH v1 0/4] hashmap improvements Matthieu Moy

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=53B485A4.20609@gmail.com \
    --to=karsten.blees@gmail.com \
    --cc=git@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.