All of lore.kernel.org
 help / color / mirror / Atom feed
From: Kevin Willford <kewillf@microsoft.com>
To: git@vger.kernel.org
Cc: gitster@pobox.com, peff@peff.net, Kevin Willford <kewillf@microsoft.com>
Subject: [PATCH 3/3] merge-recursive: change current file dir string_lists to hashmap
Date: Mon, 28 Aug 2017 14:28:29 -0600	[thread overview]
Message-ID: <20170828202829.3056-4-kewillf@microsoft.com> (raw)
In-Reply-To: <20170828202829.3056-1-kewillf@microsoft.com>

The code was using two string_lists, one for the directories and
one for the files.  The code never checks the lists independently
so we should be able to only use one list.  The string_list also
is a O(log n) for lookup and insertion.  Switching this to use a
hashmap will give O(1) which will save some time when there are
millions of paths that will be checked.

Signed-off-by: Kevin Willford <kewillf@microsoft.com>
---
 merge-recursive.c | 48 +++++++++++++++++++++++++++++++++++++-----------
 merge-recursive.h |  3 +--
 2 files changed, 38 insertions(+), 13 deletions(-)

diff --git a/merge-recursive.c b/merge-recursive.c
index d47f40ea81..ef4fe5f09f 100644
--- a/merge-recursive.c
+++ b/merge-recursive.c
@@ -24,6 +24,26 @@
 #include "dir.h"
 #include "submodule.h"
 
+struct path_hashmap_entry {
+	struct hashmap_entry;
+	char path[FLEX_ARRAY];
+};
+
+static unsigned int (*pathhash)(const char *path);
+static int (*pathcmp)(const char *a, const char *b);
+
+static int path_hashmap_cmp(const void *cmp_data,
+			    const void *entry,
+			    const void *entry_or_key,
+			    const void *keydata)
+{
+	const struct path_hashmap_entry *a = entry;
+	const struct path_hashmap_entry *b = entry_or_key;
+	const char *key = keydata;
+
+	return pathcmp(a->path, key ? key : b->path);
+}
+
 static void flush_output(struct merge_options *o)
 {
 	if (o->buffer_output < 2 && o->obuf.len) {
@@ -314,15 +334,15 @@ static int save_files_dirs(const unsigned char *sha1,
 		struct strbuf *base, const char *path,
 		unsigned int mode, int stage, void *context)
 {
+	struct path_hashmap_entry *entry;
 	int baselen = base->len;
 	struct merge_options *o = context;
 
 	strbuf_addstr(base, path);
 
-	if (S_ISDIR(mode))
-		string_list_insert(&o->current_directory_set, base->buf);
-	else
-		string_list_insert(&o->current_file_set, base->buf);
+	FLEX_ALLOC_MEM(entry, path, base->buf, base->len);
+	hashmap_entry_init(entry, pathhash(entry->path));
+	hashmap_add(&o->current_file_dir_set, entry);
 
 	strbuf_setlen(base, baselen);
 	return (S_ISDIR(mode) ? READ_TREE_RECURSIVE : 0);
@@ -642,6 +662,8 @@ static void add_flattened_path(struct strbuf *out, const char *s)
 
 static char *unique_path(struct merge_options *o, const char *path, const char *branch)
 {
+	struct path_hashmap_entry dummy;
+	struct path_hashmap_entry *entry;
 	struct strbuf newpath = STRBUF_INIT;
 	int suffix = 0;
 	size_t base_len;
@@ -650,14 +672,17 @@ static char *unique_path(struct merge_options *o, const char *path, const char *
 	add_flattened_path(&newpath, branch);
 
 	base_len = newpath.len;
-	while (string_list_has_string(&o->current_file_set, newpath.buf) ||
-	       string_list_has_string(&o->current_directory_set, newpath.buf) ||
+	hashmap_entry_init(&dummy, pathhash(newpath.buf));
+	while (hashmap_get(&o->current_file_dir_set, &dummy, newpath.buf) ||
 	       (!o->call_depth && file_exists(newpath.buf))) {
 		strbuf_setlen(&newpath, base_len);
 		strbuf_addf(&newpath, "_%d", suffix++);
+		hashmap_entry_init(&dummy, pathhash(newpath.buf));
 	}
 
-	string_list_insert(&o->current_file_set, newpath.buf);
+	FLEX_ALLOC_MEM(entry, path, newpath.buf, newpath.len);
+	hashmap_entry_init(entry, pathhash(entry->path));
+	hashmap_add(&o->current_file_dir_set, entry);
 	return strbuf_detach(&newpath, NULL);
 }
 
@@ -1941,8 +1966,7 @@ int merge_trees(struct merge_options *o,
 	if (unmerged_cache()) {
 		struct string_list *entries, *re_head, *re_merge;
 		int i;
-		string_list_clear(&o->current_file_set, 1);
-		string_list_clear(&o->current_directory_set, 1);
+		hashmap_init(&o->current_file_dir_set, path_hashmap_cmp, NULL, 512);
 		get_files_dirs(o, head);
 		get_files_dirs(o, merge);
 
@@ -1978,6 +2002,8 @@ int merge_trees(struct merge_options *o,
 		string_list_clear(re_head, 0);
 		string_list_clear(entries, 1);
 
+		hashmap_free(&o->current_file_dir_set, 1);
+
 		free(re_merge);
 		free(re_head);
 		free(entries);
@@ -2179,8 +2205,8 @@ void init_merge_options(struct merge_options *o)
 	if (o->verbosity >= 5)
 		o->buffer_output = 0;
 	strbuf_init(&o->obuf, 0);
-	string_list_init(&o->current_file_set, 1);
-	string_list_init(&o->current_directory_set, 1);
+	pathhash = ignore_case ? strihash : strhash;
+	pathcmp = ignore_case ? strcasecmp : strcmp;
 	string_list_init(&o->df_conflict_file_set, 1);
 }
 
diff --git a/merge-recursive.h b/merge-recursive.h
index 735343b413..80d69d1401 100644
--- a/merge-recursive.h
+++ b/merge-recursive.h
@@ -25,8 +25,7 @@ struct merge_options {
 	int show_rename_progress;
 	int call_depth;
 	struct strbuf obuf;
-	struct string_list current_file_set;
-	struct string_list current_directory_set;
+	struct hashmap current_file_dir_set;
 	struct string_list df_conflict_file_set;
 };
 
-- 
2.14.1.329.g6edf0add19


  parent reply	other threads:[~2017-08-28 20:29 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-08-28 20:28 [PATCH 0/3] merge-recursive: replace string_list with hashmap Kevin Willford
2017-08-28 20:28 ` [PATCH 1/3] merge-recursive: fix memory leak Kevin Willford
2017-08-28 22:42   ` Ben Peart
2017-08-29  8:12   ` Jeff King
2017-08-28 20:28 ` [PATCH 2/3] merge-recursive: remove return value from get_files_dirs Kevin Willford
2017-08-28 22:45   ` Ben Peart
2017-08-29  8:19     ` Jeff King
2017-08-29  8:17   ` Jeff King
2017-08-29 15:58     ` Kevin Willford
2017-08-29 16:50       ` Jeff King
2017-08-31 18:12       ` Stefan Beller
2017-08-28 20:28 ` Kevin Willford [this message]
2017-08-28 23:06   ` [PATCH 3/3] merge-recursive: change current file dir string_lists to hashmap Ben Peart
2017-08-29  8:41   ` Jeff King
2017-09-06  3:35   ` Junio C Hamano

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20170828202829.3056-4-kewillf@microsoft.com \
    --to=kewillf@microsoft.com \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    --cc=peff@peff.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.