From: Brandon Williams <bmwill@google.com>
To: git@vger.kernel.org
Cc: Brandon Williams <bmwill@google.com>,
sbeller@google.com, gitster@pobox.com, pclouds@gmail.com
Subject: [PATCH v3 21/27] attr: use hashmap for attribute dictionary
Date: Fri, 27 Jan 2017 18:02:01 -0800 [thread overview]
Message-ID: <20170128020207.179015-22-bmwill@google.com> (raw)
In-Reply-To: <20170128020207.179015-1-bmwill@google.com>
The current implementation of the attribute dictionary uses a custom
hashtable. This modernizes the dictionary by converting it to the builtin
'hashmap' structure.
Also, in order to enable a threaded API in the future add an
accompanying mutex which must be acquired prior to accessing the
dictionary of interned attributes.
Signed-off-by: Brandon Williams <bmwill@google.com>
---
attr.c | 173 +++++++++++++++++++++++++++++++++++++++++++---------------
attr.h | 2 +
common-main.c | 3 +
3 files changed, 133 insertions(+), 45 deletions(-)
diff --git a/attr.c b/attr.c
index 9fe848f59..e008f3026 100644
--- a/attr.c
+++ b/attr.c
@@ -14,6 +14,7 @@
#include "dir.h"
#include "utf8.h"
#include "quote.h"
+#include "thread-utils.h"
const char git_attr__true[] = "(builtin)true";
const char git_attr__false[] = "\0(builtin)false";
@@ -23,28 +24,17 @@ static const char git_attr__unknown[] = "(builtin)unknown";
#define ATTR__UNSET NULL
#define ATTR__UNKNOWN git_attr__unknown
-/* This is a randomly chosen prime. */
-#define HASHSIZE 257
-
#ifndef DEBUG_ATTR
#define DEBUG_ATTR 0
#endif
-/*
- * NEEDSWORK: the global dictionary of the interned attributes
- * must stay a singleton even after we become thread-ready.
- * Access to these must be surrounded with mutex when it happens.
- */
struct git_attr {
- struct git_attr *next;
- unsigned h;
- int attr_nr;
+ int attr_nr; /* unique attribute number */
int maybe_macro;
int maybe_real;
- char name[FLEX_ARRAY];
+ char name[FLEX_ARRAY]; /* attribute name */
};
static int attr_nr;
-static struct git_attr *(git_attr_hash[HASHSIZE]);
/*
* NEEDSWORK: maybe-real, maybe-macro are not property of
@@ -63,15 +53,94 @@ const char *git_attr_name(const struct git_attr *attr)
return attr->name;
}
-static unsigned hash_name(const char *name, int namelen)
+struct attr_hashmap {
+ struct hashmap map;
+#ifndef NO_PTHREADS
+ pthread_mutex_t mutex;
+#endif
+};
+
+static inline void hashmap_lock(struct attr_hashmap *map)
+{
+#ifndef NO_PTHREADS
+ pthread_mutex_lock(&map->mutex);
+#endif
+}
+
+static inline void hashmap_unlock(struct attr_hashmap *map)
{
- unsigned val = 0, c;
+#ifndef NO_PTHREADS
+ pthread_mutex_unlock(&map->mutex);
+#endif
+}
- while (namelen--) {
- c = *name++;
- val = ((val << 7) | (val >> 22)) ^ c;
- }
- return val;
+/*
+ * The global dictionary of all interned attributes. This
+ * is a singleton object which is shared between threads.
+ * Access to this dictionary must be surrounded with a mutex.
+ */
+static struct attr_hashmap g_attr_hashmap;
+
+/* The container for objects stored in "struct attr_hashmap" */
+struct attr_hash_entry {
+ struct hashmap_entry ent; /* must be the first member! */
+ const char *key; /* the key; memory should be owned by value */
+ size_t keylen; /* length of the key */
+ void *value; /* the stored value */
+};
+
+/* attr_hashmap comparison function */
+static int attr_hash_entry_cmp(const struct attr_hash_entry *a,
+ const struct attr_hash_entry *b,
+ void *unused)
+{
+ return (a->keylen != b->keylen) || strncmp(a->key, b->key, a->keylen);
+}
+
+/* Initialize an 'attr_hashmap' object */
+static void attr_hashmap_init(struct attr_hashmap *map)
+{
+ hashmap_init(&map->map, (hashmap_cmp_fn) attr_hash_entry_cmp, 0);
+}
+
+/*
+ * Retrieve the 'value' stored in a hashmap given the provided 'key'.
+ * If there is no matching entry, return NULL.
+ */
+static void *attr_hashmap_get(struct attr_hashmap *map,
+ const char *key, size_t keylen)
+{
+ struct attr_hash_entry k;
+ struct attr_hash_entry *e;
+
+ if (!map->map.tablesize)
+ attr_hashmap_init(map);
+
+ hashmap_entry_init(&k, memhash(key, keylen));
+ k.key = key;
+ k.keylen = keylen;
+ e = hashmap_get(&map->map, &k, NULL);
+
+ return e ? e->value : NULL;
+}
+
+/* Add 'value' to a hashmap based on the provided 'key'. */
+static void attr_hashmap_add(struct attr_hashmap *map,
+ const char *key, size_t keylen,
+ void *value)
+{
+ struct attr_hash_entry *e;
+
+ if (!map->map.tablesize)
+ attr_hashmap_init(map);
+
+ e = xmalloc(sizeof(struct attr_hash_entry));
+ hashmap_entry_init(e, memhash(key, keylen));
+ e->key = key;
+ e->keylen = keylen;
+ e->value = value;
+
+ hashmap_add(&map->map, e);
}
static int attr_name_valid(const char *name, size_t namelen)
@@ -103,37 +172,44 @@ static void report_invalid_attr(const char *name, size_t len,
strbuf_release(&err);
}
-static struct git_attr *git_attr_internal(const char *name, int len)
+/*
+ * Given a 'name', lookup and return the corresponding attribute in the global
+ * dictionary. If no entry is found, create a new attribute and store it in
+ * the dictionary.
+ */
+static struct git_attr *git_attr_internal(const char *name, int namelen)
{
- unsigned hval = hash_name(name, len);
- unsigned pos = hval % HASHSIZE;
struct git_attr *a;
- for (a = git_attr_hash[pos]; a; a = a->next) {
- if (a->h == hval &&
- !memcmp(a->name, name, len) && !a->name[len])
- return a;
- }
-
- if (!attr_name_valid(name, len))
+ if (!attr_name_valid(name, namelen))
return NULL;
- FLEX_ALLOC_MEM(a, name, name, len);
- a->h = hval;
- a->next = git_attr_hash[pos];
- a->attr_nr = attr_nr++;
- a->maybe_macro = 0;
- a->maybe_real = 0;
- git_attr_hash[pos] = a;
+ hashmap_lock(&g_attr_hashmap);
+
+ a = attr_hashmap_get(&g_attr_hashmap, name, namelen);
+
+ if (!a) {
+ FLEX_ALLOC_MEM(a, name, name, namelen);
+ a->attr_nr = g_attr_hashmap.map.size;
+ a->maybe_real = 0;
+ a->maybe_macro = 0;
+
+ attr_hashmap_add(&g_attr_hashmap, a->name, namelen, a);
+ assert(a->attr_nr == (g_attr_hashmap.map.size - 1));
+
+ /*
+ * NEEDSWORK: per git_attr_check check_all_attr
+ * will be initialized a lot more lazily, not
+ * like this, and not here.
+ */
+ REALLOC_ARRAY(check_all_attr, ++attr_nr);
+ check_all_attr[a->attr_nr].attr = a;
+ check_all_attr[a->attr_nr].value = ATTR__UNKNOWN;
+ assert(a->attr_nr == (attr_nr - 1));
+ }
+
+ hashmap_unlock(&g_attr_hashmap);
- /*
- * NEEDSWORK: per git_attr_check check_all_attr
- * will be initialized a lot more lazily, not
- * like this, and not here.
- */
- REALLOC_ARRAY(check_all_attr, attr_nr);
- check_all_attr[a->attr_nr].attr = a;
- check_all_attr[a->attr_nr].value = ATTR__UNKNOWN;
return a;
}
@@ -941,3 +1017,10 @@ void git_attr_set_direction(enum git_attr_direction new, struct index_state *ist
drop_attr_stack();
use_index = istate;
}
+
+void attr_start(void)
+{
+#ifndef NO_PTHREADS
+ pthread_mutex_init(&g_attr_hashmap.mutex, NULL);
+#endif
+}
diff --git a/attr.h b/attr.h
index b2cfd8550..898e1a8c9 100644
--- a/attr.h
+++ b/attr.h
@@ -67,4 +67,6 @@ enum git_attr_direction {
};
void git_attr_set_direction(enum git_attr_direction, struct index_state *);
+extern void attr_start(void);
+
#endif /* ATTR_H */
diff --git a/common-main.c b/common-main.c
index c654f9555..6a689007e 100644
--- a/common-main.c
+++ b/common-main.c
@@ -1,5 +1,6 @@
#include "cache.h"
#include "exec_cmd.h"
+#include "attr.h"
/*
* Many parts of Git have subprograms communicate via pipe, expect the
@@ -33,6 +34,8 @@ int main(int argc, const char **argv)
git_setup_gettext();
+ attr_start();
+
git_extract_argv0_path(argv[0]);
restore_sigpipe_to_default();
--
2.11.0.483.g087da7b7c-goog
next prev parent reply other threads:[~2017-01-28 2:03 UTC|newest]
Thread overview: 111+ messages / expand[flat|nested] mbox.gz Atom feed top
2017-01-12 23:53 [PATCH 00/27] Revamp the attribute system; another round Brandon Williams
2017-01-12 23:53 ` [PATCH 01/27] commit.c: use strchrnul() to scan for one line Brandon Williams
2017-01-12 23:53 ` [PATCH 02/27] attr.c: " Brandon Williams
2017-01-12 23:53 ` [PATCH 03/27] attr.c: update a stale comment on "struct match_attr" Brandon Williams
2017-01-12 23:53 ` [PATCH 04/27] attr.c: explain the lack of attr-name syntax check in parse_attr() Brandon Williams
2017-01-12 23:53 ` [PATCH 05/27] attr.c: complete a sentence in a comment Brandon Williams
2017-01-12 23:53 ` [PATCH 06/27] attr.c: mark where #if DEBUG ends more clearly Brandon Williams
2017-01-12 23:53 ` [PATCH 07/27] attr.c: simplify macroexpand_one() Brandon Williams
2017-01-12 23:53 ` [PATCH 08/27] attr.c: tighten constness around "git_attr" structure Brandon Williams
2017-01-12 23:53 ` [PATCH 09/27] attr.c: plug small leak in parse_attr_line() Brandon Williams
2017-01-12 23:53 ` [PATCH 10/27] attr: support quoting pathname patterns in C style Brandon Williams
2017-01-12 23:53 ` [PATCH 11/27] attr.c: add push_stack() helper Brandon Williams
2017-01-12 23:53 ` [PATCH 12/27] Documentation: fix a typo Brandon Williams
2017-01-12 23:53 ` [PATCH 13/27] attr.c: outline the future plans by heavily commenting Brandon Williams
2017-01-12 23:53 ` [PATCH 14/27] attr: rename function and struct related to checking attributes Brandon Williams
2017-01-12 23:53 ` [PATCH 15/27] attr: (re)introduce git_check_attr() and struct attr_check Brandon Williams
2017-01-12 23:53 ` [PATCH 16/27] attr: convert git_all_attrs() to use "struct attr_check" Brandon Williams
2017-01-12 23:53 ` [PATCH 17/27] attr: convert git_check_attrs() callers to use the new API Brandon Williams
2017-01-12 23:53 ` [PATCH 18/27] attr: retire git_check_attrs() API Brandon Williams
2017-01-12 23:53 ` [PATCH 19/27] attr: pass struct attr_check to collect_some_attrs Brandon Williams
2017-01-12 23:53 ` [PATCH 20/27] attr: change validity check for attribute names to use positive logic Brandon Williams
2017-01-12 23:53 ` [PATCH 21/27] attr: use hashmap for attribute dictionary Brandon Williams
2017-01-18 20:20 ` Stefan Beller
2017-01-18 20:23 ` Brandon Williams
2017-01-12 23:53 ` [PATCH 22/27] attr: eliminate global check_all_attr array Brandon Williams
2017-01-12 23:53 ` [PATCH 23/27] attr: remove maybe-real, maybe-macro from git_attr Brandon Williams
2017-01-12 23:53 ` [PATCH 24/27] attr: tighten const correctness with git_attr and match_attr Brandon Williams
2017-01-12 23:53 ` [PATCH 25/27] attr: store attribute stacks in hashmap Brandon Williams
2017-01-13 21:20 ` Junio C Hamano
2017-01-18 20:34 ` Brandon Williams
2017-01-23 18:08 ` Brandon Williams
2017-01-18 20:39 ` Stefan Beller
2017-01-18 20:45 ` Stefan Beller
2017-01-18 20:50 ` Brandon Williams
2017-01-12 23:53 ` [PATCH 26/27] attr: push the bare repo check into read_attr() Brandon Williams
2017-01-12 23:53 ` [PATCH 27/27] attr: reformat git_attr_set_direction() function Brandon Williams
2017-01-15 23:47 ` [PATCH 00/27] Revamp the attribute system; another round Junio C Hamano
2017-01-16 8:10 ` Jeff King
2017-01-23 20:34 ` [PATCH v2 " Brandon Williams
2017-01-23 20:34 ` [PATCH v2 01/27] commit.c: use strchrnul() to scan for one line Brandon Williams
2017-01-23 20:35 ` [PATCH v2 02/27] attr.c: " Brandon Williams
2017-01-23 20:35 ` [PATCH v2 03/27] attr.c: update a stale comment on "struct match_attr" Brandon Williams
2017-01-23 20:35 ` [PATCH v2 04/27] attr.c: explain the lack of attr-name syntax check in parse_attr() Brandon Williams
2017-01-23 20:35 ` [PATCH v2 05/27] attr.c: complete a sentence in a comment Brandon Williams
2017-01-23 20:35 ` [PATCH v2 06/27] attr.c: mark where #if DEBUG ends more clearly Brandon Williams
2017-01-23 20:35 ` [PATCH v2 07/27] attr.c: simplify macroexpand_one() Brandon Williams
2017-01-23 20:35 ` [PATCH v2 08/27] attr.c: tighten constness around "git_attr" structure Brandon Williams
2017-01-23 20:35 ` [PATCH v2 09/27] attr.c: plug small leak in parse_attr_line() Brandon Williams
2017-01-23 20:35 ` [PATCH v2 10/27] attr: support quoting pathname patterns in C style Brandon Williams
2017-01-23 20:35 ` [PATCH v2 11/27] attr.c: add push_stack() helper Brandon Williams
2017-01-23 20:35 ` [PATCH v2 12/27] Documentation: fix a typo Brandon Williams
2017-01-23 20:35 ` [PATCH v2 13/27] attr.c: outline the future plans by heavily commenting Brandon Williams
2017-01-23 20:35 ` [PATCH v2 14/27] attr: rename function and struct related to checking attributes Brandon Williams
2017-01-23 20:35 ` [PATCH v2 15/27] attr: (re)introduce git_check_attr() and struct attr_check Brandon Williams
2017-01-23 20:35 ` [PATCH v2 16/27] attr: convert git_all_attrs() to use "struct attr_check" Brandon Williams
2017-01-23 20:35 ` [PATCH v2 17/27] attr: convert git_check_attrs() callers to use the new API Brandon Williams
2017-01-23 20:35 ` [PATCH v2 18/27] attr: retire git_check_attrs() API Brandon Williams
2017-01-23 20:35 ` [PATCH v2 19/27] attr: pass struct attr_check to collect_some_attrs Brandon Williams
2017-01-23 20:35 ` [PATCH v2 20/27] attr: change validity check for attribute names to use positive logic Brandon Williams
2017-01-23 20:35 ` [PATCH v2 21/27] attr: use hashmap for attribute dictionary Brandon Williams
2017-01-23 20:35 ` [PATCH v2 22/27] attr: eliminate global check_all_attr array Brandon Williams
2017-01-23 21:11 ` Junio C Hamano
2017-01-23 20:35 ` [PATCH v2 23/27] attr: remove maybe-real, maybe-macro from git_attr Brandon Williams
2017-01-23 20:35 ` [PATCH v2 24/27] attr: tighten const correctness with git_attr and match_attr Brandon Williams
2017-01-23 20:35 ` [PATCH v2 25/27] attr: store attribute stack in attr_check structure Brandon Williams
2017-01-23 21:42 ` Junio C Hamano
2017-01-23 22:06 ` Brandon Williams
2017-01-24 1:11 ` Brandon Williams
2017-01-24 2:28 ` Junio C Hamano
2017-01-25 19:57 ` Brandon Williams
2017-01-25 20:10 ` Stefan Beller
2017-01-25 20:14 ` Junio C Hamano
2017-01-25 21:54 ` Brandon Williams
2017-01-25 23:19 ` Brandon Williams
2017-01-23 20:35 ` [PATCH v2 26/27] attr: push the bare repo check into read_attr() Brandon Williams
2017-01-23 20:35 ` [PATCH v2 27/27] attr: reformat git_attr_set_direction() function Brandon Williams
2017-01-28 2:01 ` [PATCH v3 00/27] Revamp the attribute system; another round Brandon Williams
2017-01-28 2:01 ` [PATCH v3 01/27] commit.c: use strchrnul() to scan for one line Brandon Williams
2017-01-28 2:01 ` [PATCH v3 02/27] attr.c: " Brandon Williams
2017-01-28 2:01 ` [PATCH v3 03/27] attr.c: update a stale comment on "struct match_attr" Brandon Williams
2017-01-28 2:01 ` [PATCH v3 04/27] attr.c: explain the lack of attr-name syntax check in parse_attr() Brandon Williams
2017-01-28 2:01 ` [PATCH v3 05/27] attr.c: complete a sentence in a comment Brandon Williams
2017-01-28 2:01 ` [PATCH v3 06/27] attr.c: mark where #if DEBUG ends more clearly Brandon Williams
2017-01-28 2:01 ` [PATCH v3 07/27] attr.c: simplify macroexpand_one() Brandon Williams
2017-01-28 2:01 ` [PATCH v3 08/27] attr.c: tighten constness around "git_attr" structure Brandon Williams
2017-01-28 2:01 ` [PATCH v3 09/27] attr.c: plug small leak in parse_attr_line() Brandon Williams
2017-01-28 2:01 ` [PATCH v3 10/27] attr: support quoting pathname patterns in C style Brandon Williams
2017-01-28 2:01 ` [PATCH v3 11/27] attr.c: add push_stack() helper Brandon Williams
2017-01-28 2:01 ` [PATCH v3 12/27] Documentation: fix a typo Brandon Williams
2017-01-28 2:01 ` [PATCH v3 13/27] attr.c: outline the future plans by heavily commenting Brandon Williams
2017-01-28 2:01 ` [PATCH v3 14/27] attr: rename function and struct related to checking attributes Brandon Williams
2017-01-28 2:01 ` [PATCH v3 15/27] attr: (re)introduce git_check_attr() and struct attr_check Brandon Williams
2017-01-30 18:05 ` Brandon Williams
2017-01-28 2:01 ` [PATCH v3 16/27] attr: convert git_all_attrs() to use "struct attr_check" Brandon Williams
2017-01-28 23:50 ` Stefan Beller
2017-01-29 2:44 ` Brandon Williams
2017-01-30 18:06 ` Brandon Williams
2017-01-28 2:01 ` [PATCH v3 17/27] attr: convert git_check_attrs() callers to use the new API Brandon Williams
2017-01-28 2:01 ` [PATCH v3 18/27] attr: retire git_check_attrs() API Brandon Williams
2017-01-28 2:01 ` [PATCH v3 19/27] attr: pass struct attr_check to collect_some_attrs Brandon Williams
2017-01-28 2:02 ` [PATCH v3 20/27] attr: change validity check for attribute names to use positive logic Brandon Williams
2017-01-28 2:02 ` Brandon Williams [this message]
2017-01-28 2:02 ` [PATCH v3 22/27] attr: eliminate global check_all_attr array Brandon Williams
2017-01-28 2:02 ` [PATCH v3 23/27] attr: remove maybe-real, maybe-macro from git_attr Brandon Williams
2017-01-28 2:02 ` [PATCH v3 24/27] attr: tighten const correctness with git_attr and match_attr Brandon Williams
2017-01-28 2:02 ` [PATCH v3 25/27] attr: store attribute stack in attr_check structure Brandon Williams
2017-01-28 2:02 ` [PATCH v3 26/27] attr: push the bare repo check into read_attr() Brandon Williams
2017-01-28 2:02 ` [PATCH v3 27/27] attr: reformat git_attr_set_direction() function Brandon Williams
2017-02-02 19:14 ` [PATCH v3 00/27] Revamp the attribute system; another round Junio C Hamano
2017-02-09 17:18 ` Brandon Williams
2017-02-09 19:31 ` Junio C Hamano
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20170128020207.179015-22-bmwill@google.com \
--to=bmwill@google.com \
--cc=git@vger.kernel.org \
--cc=gitster@pobox.com \
--cc=pclouds@gmail.com \
--cc=sbeller@google.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.