From: David Turner <dturner@twopensource.com>
To: git@vger.kernel.org, mhagger@alum.mit.edu,
chriscool@tuxfamily.org, pclouds@gmail.com
Cc: David Turner <dturner@twopensource.com>
Subject: [PATCH v3 2/4] path: optimize common dir checking
Date: Wed, 12 Aug 2015 17:57:23 -0400 [thread overview]
Message-ID: <1439416645-19173-2-git-send-email-dturner@twopensource.com> (raw)
In-Reply-To: <1439416645-19173-1-git-send-email-dturner@twopensource.com>
Instead of a linear search over common_list to check whether
a path is common, use a trie. The trie search operates on
path prefixes, and handles excludes.
Signed-off-by: David Turner <dturner@twopensource.com>
---
Probably overkill, but maybe we could later use it for making exclude
or sparse-checkout matching faster (or maybe we have to go all the way
to McNaughton-Yamada for that to be truly worthwhile).
---
path.c | 215 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----
1 file changed, 201 insertions(+), 14 deletions(-)
diff --git a/path.c b/path.c
index 236f797..21a4ce7 100644
--- a/path.c
+++ b/path.c
@@ -121,25 +121,212 @@ struct common_dir common_list[] = {
{ NULL, 0, 0, 0 }
};
-static void update_common_dir(struct strbuf *buf, int git_dir_len)
+/*
+ * A compressed trie. A trie node consists of zero or more characters that
+ * are common to all elements with this prefix, optionally followed by some
+ * children. If value is not NULL, the trie node is a terminal node.
+ *
+ * For example, consider the following set of strings:
+ * abc
+ * def
+ * definite
+ * definition
+ *
+ * The trie would look look like:
+ * root: len = 0, value = (something), children a and d non-NULL.
+ * a: len = 2, contents = bc
+ * d: len = 2, contents = ef, children i non-NULL, value = (something)
+ * i: len = 3, contents = nit, children e and i non-NULL, value = NULL
+ * e: len = 0, children all NULL, value = (something)
+ * i: len = 2, contents = on, children all NULL, value = (something)
+ */
+struct trie {
+ struct trie *children[256];
+ int len;
+ char *contents;
+ void *value;
+};
+
+static struct trie *make_trie_node(const char *key, void *value)
{
- char *base = buf->buf + git_dir_len;
- const struct common_dir *p;
+ struct trie *new_node = xcalloc(1, sizeof(*new_node));
+ new_node->len = strlen(key);
+ if (new_node->len) {
+ new_node->contents = xmalloc(new_node->len);
+ memcpy(new_node->contents, key, new_node->len);
+ }
+ new_node->value = value;
+ return new_node;
+}
- if (is_dir_file(base, "logs", "HEAD") ||
- is_dir_file(base, "info", "sparse-checkout"))
- return; /* keep this in $GIT_DIR */
- for (p = common_list; p->dirname; p++) {
- const char *path = p->dirname;
- if (p->is_dir && dir_prefix(base, path)) {
- replace_dir(buf, git_dir_len, get_git_common_dir());
- return;
+/*
+ * Add a key/value pair to a trie. The key is assumed to be \0-terminated.
+ * If there was an existing value for this key, return it.
+ */
+static void *add_to_trie(struct trie *root, const char *key, void *value)
+{
+ struct trie *child;
+ void *old;
+ int i;
+
+ if (!*key) {
+ /* we have reached the end of the key */
+ old = root->value;
+ root->value = value;
+ return old;
+ }
+
+ for (i = 0; i < root->len; ++i) {
+ if (root->contents[i] == key[i])
+ continue;
+
+ /*
+ * Split this node: child will contain this node's
+ * existing children.
+ */
+ child = malloc(sizeof(*child));
+ memcpy(child->children, root->children, sizeof(root->children));
+
+ child->len = root->len - i - 1;
+ if (child->len) {
+ child->contents = strndup(root->contents + i + 1,
+ child->len);
}
- if (!p->is_dir && !strcmp(base, path)) {
- replace_dir(buf, git_dir_len, get_git_common_dir());
- return;
+ child->value = root->value;
+ root->value = NULL;
+ root->len = i;
+
+ memset(root->children, 0, sizeof(root->children));
+ root->children[(unsigned char)root->contents[i]] = child;
+
+ /* This is the newly-added child. */
+ root->children[(unsigned char)key[i]] =
+ make_trie_node(key + i + 1, value);
+ return NULL;
+ }
+
+ /* We have matched the entire compressed section */
+ if (key[i]) {
+ child = root->children[(unsigned char)key[root->len]];
+ if (child) {
+ return add_to_trie(child, key + root->len + 1, value);
+ } else {
+ child = make_trie_node(key + root->len + 1, value);
+ root->children[(unsigned char)key[root->len]] = child;
+ return NULL;
}
}
+
+ old = root->value;
+ root->value = value;
+ return old;
+}
+
+typedef int (*match_fn)(const char *unmatched, void *data, void *baton);
+
+/*
+ * Search a trie for some key. Find the longest /-or-\0-terminated
+ * prefix of the key for which the trie contains a value. Call fn
+ * with the unmatched portion of the key and the found value, and
+ * return its return value. If there is no such prefix, return -1.
+ *
+ * For example, consider the trie containing only [refs,
+ * refs/worktree] (both with values).
+ *
+ * | key | unmatched | val from node | return value |
+ * |-----------------|------------|---------------|--------------|
+ * | a | not called | n/a | -1 |
+ * | refs | \0 | refs | as per fn |
+ * | refs/ | / | refs | as per fn |
+ * | refs/w | /w | refs | as per fn |
+ * | refs/worktree | \0 | refs/worktree | as per fn |
+ * | refs/worktree/ | / | refs/worktree | as per fn |
+ * | refs/worktree/a | /a | refs/worktree | as per fn |
+ * |-----------------|------------|---------------|--------------|
+ *
+ */
+static int trie_find(struct trie *root, const char *key, match_fn fn,
+ void *baton)
+{
+ int i;
+ int result;
+ struct trie *child;
+
+ if (!*key) {
+ /* we have reached the end of the key */
+ if (root->value && !root->len)
+ return fn(key, root->value, baton);
+ else
+ return -1;
+ }
+
+ for (i = 0; i < root->len; ++i) {
+ if (root->contents[i] != key[i])
+ return -1;
+ }
+
+ /* Matched the entire compressed section */
+ key += i;
+ if (!*key)
+ /* End of key */
+ return fn(key, root->value, baton);
+
+ child = root->children[(unsigned char)*key];
+ if (child)
+ result = trie_find(child, key + 1, fn, baton);
+ else
+ result = -1;
+
+ if (result >= 0 || (*key != '/' && *key != 0))
+ return result;
+ if (root->value)
+ return fn(key, root->value, baton);
+ else
+ return -1;
+}
+
+static struct trie common_trie;
+static int common_trie_done_setup;
+
+static void init_common_trie(void)
+{
+ struct common_dir *p;
+
+ if (common_trie_done_setup)
+ return;
+
+ for (p = common_list; p->dirname; p++)
+ add_to_trie(&common_trie, p->dirname, p);
+
+ common_trie_done_setup = 1;
+}
+
+/*
+ * Helper function for update_common_dir: returns 1 if the dir
+ * prefix is common.
+ */
+static int check_common(const char *unmatched, void *value, void *baton)
+{
+ struct common_dir *dir = value;
+
+ if (!dir)
+ return 0;
+
+ if (dir->is_dir && (unmatched[0] == 0 || unmatched[0] == '/'))
+ return !dir->exclude;
+
+ if (!dir->is_dir && unmatched[0] == 0)
+ return !dir->exclude;
+
+ return 0;
+}
+
+static void update_common_dir(struct strbuf *buf, int git_dir_len)
+{
+ char *base = buf->buf + git_dir_len;
+ init_common_trie();
+ if (trie_find(&common_trie, base, check_common, NULL) > 0)
+ replace_dir(buf, git_dir_len, get_git_common_dir());
}
void report_linked_checkout_garbage(void)
--
2.0.4.315.gad8727a-twtrsrc
next prev parent reply other threads:[~2015-08-12 21:58 UTC|newest]
Thread overview: 24+ messages / expand[flat|nested] mbox.gz Atom feed top
2015-08-12 21:57 [PATCH v3 1/4] refs: clean up common_list David Turner
2015-08-12 21:57 ` David Turner [this message]
2015-08-12 22:48 ` [PATCH v3 2/4] path: optimize common dir checking Junio C Hamano
2015-08-13 9:05 ` Michael Haggerty
2015-08-14 17:04 ` Junio C Hamano
2015-08-14 20:04 ` David Turner
2015-08-14 20:27 ` Junio C Hamano
2015-08-14 20:54 ` David Turner
2015-08-15 18:20 ` Michael Haggerty
2015-08-15 18:12 ` Michael Haggerty
2015-08-17 15:55 ` Junio C Hamano
2015-08-15 7:59 ` Duy Nguyen
2015-08-16 5:04 ` David Turner
2015-08-16 12:20 ` Duy Nguyen
2015-08-12 21:57 ` [PATCH v3 3/4] refs: make refs/worktree/* per-worktree David Turner
2015-08-13 17:15 ` Eric Sunshine
2015-08-13 17:41 ` David Turner
2015-08-13 20:16 ` Michael Haggerty
2015-08-13 20:32 ` David Turner
2015-08-14 8:18 ` Michael Haggerty
2015-08-14 17:10 ` Junio C Hamano
2015-08-15 8:04 ` Duy Nguyen
2015-08-12 21:57 ` [PATCH v3 4/4] bisect: make bisection refs per-worktree David Turner
2015-08-15 7:44 ` [PATCH v3 1/4] refs: clean up common_list Duy Nguyen
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1439416645-19173-2-git-send-email-dturner@twopensource.com \
--to=dturner@twopensource.com \
--cc=chriscool@tuxfamily.org \
--cc=git@vger.kernel.org \
--cc=mhagger@alum.mit.edu \
--cc=pclouds@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).