From: David Turner <dturner@twopensource.com>
To: git@vger.kernel.org, mhagger@alum.mit.edu,
Christian Couder <christian.couder@gmail.com>
Cc: David Turner <dturner@twopensource.com>,
Junio C Hamano <gitster@pobox.com>
Subject: [PATCH v5 2/3] path: optimize common dir checking
Date: Mon, 31 Aug 2015 22:13:10 -0400 [thread overview]
Message-ID: <1441073591-639-3-git-send-email-dturner@twopensource.com> (raw)
In-Reply-To: <1441073591-639-1-git-send-email-dturner@twopensource.com>
Instead of a linear search over common_list to check whether
a path is common, use a trie. The trie search operates on
path prefixes, and handles excludes.
Signed-off-by: David Turner <dturner@twopensource.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
path.c | 227 ++++++++++++++++++++++++++++++++++++++++++++++----
t/t0060-path-utils.sh | 1 +
2 files changed, 214 insertions(+), 14 deletions(-)
diff --git a/path.c b/path.c
index f1ffe37..c87f44a 100644
--- a/path.c
+++ b/path.c
@@ -121,25 +121,224 @@ struct common_dir common_list[] = {
{ 0, 0, 0, NULL }
};
-static void update_common_dir(struct strbuf *buf, int git_dir_len)
+/*
+ * A compressed trie. A trie node consists of zero or more characters that
+ * are common to all elements with this prefix, optionally followed by some
+ * children. If value is not NULL, the trie node is a terminal node.
+ *
+ * For example, consider the following set of strings:
+ * abc
+ * def
+ * definite
+ * definition
+ *
+ * The trie would look look like:
+ * root: len = 0, children a and d non-NULL, value = NULL.
+ * a: len = 2, contents = bc, value = (data for "abc")
+ * d: len = 2, contents = ef, children i non-NULL, value = (data for "def")
+ * i: len = 3, contents = nit, children e and i non-NULL, value = NULL
+ * e: len = 0, children all NULL, value = (data for "definite")
+ * i: len = 2, contents = on, children all NULL,
+ * value = (data for "definition")
+ */
+struct trie {
+ struct trie *children[256];
+ int len;
+ char *contents;
+ void *value;
+};
+
+static struct trie *make_trie_node(const char *key, void *value)
{
- char *base = buf->buf + git_dir_len;
- const struct common_dir *p;
+ struct trie *new_node = xcalloc(1, sizeof(*new_node));
+ new_node->len = strlen(key);
+ if (new_node->len) {
+ new_node->contents = xmalloc(new_node->len);
+ memcpy(new_node->contents, key, new_node->len);
+ }
+ new_node->value = value;
+ return new_node;
+}
- if (is_dir_file(base, "logs", "HEAD") ||
- is_dir_file(base, "info", "sparse-checkout"))
- return; /* keep this in $GIT_DIR */
- for (p = common_list; p->dirname; p++) {
- const char *path = p->dirname;
- if (p->is_dir && dir_prefix(base, path)) {
- replace_dir(buf, git_dir_len, get_git_common_dir());
- return;
+/*
+ * Add a key/value pair to a trie. The key is assumed to be \0-terminated.
+ * If there was an existing value for this key, return it.
+ */
+static void *add_to_trie(struct trie *root, const char *key, void *value)
+{
+ struct trie *child;
+ void *old;
+ int i;
+
+ if (!*key) {
+ /* we have reached the end of the key */
+ old = root->value;
+ root->value = value;
+ return old;
+ }
+
+ for (i = 0; i < root->len; i++) {
+ if (root->contents[i] == key[i])
+ continue;
+
+ /*
+ * Split this node: child will contain this node's
+ * existing children.
+ */
+ child = malloc(sizeof(*child));
+ memcpy(child->children, root->children, sizeof(root->children));
+
+ child->len = root->len - i - 1;
+ if (child->len) {
+ child->contents = xstrndup(root->contents + i + 1,
+ child->len);
}
- if (!p->is_dir && !strcmp(base, path)) {
- replace_dir(buf, git_dir_len, get_git_common_dir());
- return;
+ child->value = root->value;
+ root->value = NULL;
+ root->len = i;
+
+ memset(root->children, 0, sizeof(root->children));
+ root->children[(unsigned char)root->contents[i]] = child;
+
+ /* This is the newly-added child. */
+ root->children[(unsigned char)key[i]] =
+ make_trie_node(key + i + 1, value);
+ return NULL;
+ }
+
+ /* We have matched the entire compressed section */
+ if (key[i]) {
+ child = root->children[(unsigned char)key[root->len]];
+ if (child) {
+ return add_to_trie(child, key + root->len + 1, value);
+ } else {
+ child = make_trie_node(key + root->len + 1, value);
+ root->children[(unsigned char)key[root->len]] = child;
+ return NULL;
}
}
+
+ old = root->value;
+ root->value = value;
+ return old;
+}
+
+typedef int (*match_fn)(const char *unmatched, void *data, void *baton);
+
+/*
+ * Search a trie for some key. Find the longest /-or-\0-terminated
+ * prefix of the key for which the trie contains a value. Call fn
+ * with the unmatched portion of the key and the found value, and
+ * return its return value. If there is no such prefix, return -1.
+ *
+ * The key is partially normalized: consecutive slashes are skipped.
+ *
+ * For example, consider the trie containing only [refs,
+ * refs/worktree] (both with values).
+ *
+ * | key | unmatched | val from node | return value |
+ * |-----------------|------------|---------------|--------------|
+ * | a | not called | n/a | -1 |
+ * | refs | \0 | refs | as per fn |
+ * | refs/ | / | refs | as per fn |
+ * | refs/w | /w | refs | as per fn |
+ * | refs/worktree | \0 | refs/worktree | as per fn |
+ * | refs/worktree/ | / | refs/worktree | as per fn |
+ * | refs/worktree/a | /a | refs/worktree | as per fn |
+ * |-----------------|------------|---------------|--------------|
+ *
+ */
+static int trie_find(struct trie *root, const char *key, match_fn fn,
+ void *baton)
+{
+ int i;
+ int result;
+ struct trie *child;
+
+ if (!*key) {
+ /* we have reached the end of the key */
+ if (root->value && !root->len)
+ return fn(key, root->value, baton);
+ else
+ return -1;
+ }
+
+ for (i = 0; i < root->len; i++) {
+ /* Partial path normalization: skip consecutive slashes. */
+ if (key[i] == '/' && key[i+1] == '/') {
+ key++;
+ continue;
+ }
+ if (root->contents[i] != key[i])
+ return -1;
+ }
+
+ /* Matched the entire compressed section */
+ key += i;
+ if (!*key)
+ /* End of key */
+ return fn(key, root->value, baton);
+
+ /* Partial path normalization: skip consecutive slashes */
+ while (key[0] == '/' && key[1] == '/')
+ key++;
+
+ child = root->children[(unsigned char)*key];
+ if (child)
+ result = trie_find(child, key + 1, fn, baton);
+ else
+ result = -1;
+
+ if (result >= 0 || (*key != '/' && *key != 0))
+ return result;
+ if (root->value)
+ return fn(key, root->value, baton);
+ else
+ return -1;
+}
+
+static struct trie common_trie;
+static int common_trie_done_setup;
+
+static void init_common_trie(void)
+{
+ struct common_dir *p;
+
+ if (common_trie_done_setup)
+ return;
+
+ for (p = common_list; p->dirname; p++)
+ add_to_trie(&common_trie, p->dirname, p);
+
+ common_trie_done_setup = 1;
+}
+
+/*
+ * Helper function for update_common_dir: returns 1 if the dir
+ * prefix is common.
+ */
+static int check_common(const char *unmatched, void *value, void *baton)
+{
+ struct common_dir *dir = value;
+
+ if (!dir)
+ return 0;
+
+ if (dir->is_dir && (unmatched[0] == 0 || unmatched[0] == '/'))
+ return !dir->exclude;
+
+ if (!dir->is_dir && unmatched[0] == 0)
+ return !dir->exclude;
+
+ return 0;
+}
+
+static void update_common_dir(struct strbuf *buf, int git_dir_len)
+{
+ char *base = buf->buf + git_dir_len;
+ init_common_trie();
+ if (trie_find(&common_trie, base, check_common, NULL) > 0)
+ replace_dir(buf, git_dir_len, get_git_common_dir());
}
void report_linked_checkout_garbage(void)
diff --git a/t/t0060-path-utils.sh b/t/t0060-path-utils.sh
index 93605f4..1ca49e1 100755
--- a/t/t0060-path-utils.sh
+++ b/t/t0060-path-utils.sh
@@ -271,6 +271,7 @@ test_git_path GIT_COMMON_DIR=bar objects/bar bar/objects/bar
test_git_path GIT_COMMON_DIR=bar info/exclude bar/info/exclude
test_git_path GIT_COMMON_DIR=bar info/grafts bar/info/grafts
test_git_path GIT_COMMON_DIR=bar info/sparse-checkout .git/info/sparse-checkout
+test_git_path GIT_COMMON_DIR=bar info//sparse-checkout .git/info//sparse-checkout
test_git_path GIT_COMMON_DIR=bar remotes/bar bar/remotes/bar
test_git_path GIT_COMMON_DIR=bar branches/bar bar/branches/bar
test_git_path GIT_COMMON_DIR=bar logs/refs/heads/master bar/logs/refs/heads/master
--
2.0.4.315.gad8727a-twtrsrc
next prev parent reply other threads:[~2015-09-01 2:13 UTC|newest]
Thread overview: 10+ messages / expand[flat|nested] mbox.gz Atom feed top
2015-09-01 2:13 (unknown), David Turner
2015-09-01 2:13 ` [PATCH v5 1/3] refs: clean up common_list David Turner
2015-09-01 2:13 ` David Turner [this message]
2015-10-05 3:00 ` [PATCH v5 2/3] path: optimize common dir checking Michael Haggerty
2015-10-05 17:22 ` Junio C Hamano
2015-10-05 20:10 ` David Turner
2015-10-05 20:36 ` Junio C Hamano
2015-10-05 20:43 ` Junio C Hamano
2015-09-01 2:13 ` [PATCH v5 3/3] refs: make refs/bisect/* per-worktree David Turner
2015-09-01 16:32 ` making refs/bisect/ per worktree (was Re: (unknown)) Junio C Hamano
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1441073591-639-3-git-send-email-dturner@twopensource.com \
--to=dturner@twopensource.com \
--cc=christian.couder@gmail.com \
--cc=git@vger.kernel.org \
--cc=gitster@pobox.com \
--cc=mhagger@alum.mit.edu \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.