git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Ben Peart <peartben@gmail.com>
To: Kevin Willford <kewillf@microsoft.com>, git@vger.kernel.org
Cc: gitster@pobox.com, peff@peff.net
Subject: Re: [PATCH 3/3] merge-recursive: change current file dir string_lists to hashmap
Date: Mon, 28 Aug 2017 19:06:08 -0400	[thread overview]
Message-ID: <a9140320-05df-d45b-19bc-e67a7a5b6eb9@gmail.com> (raw)
In-Reply-To: <20170828202829.3056-4-kewillf@microsoft.com>



On 8/28/2017 4:28 PM, Kevin Willford wrote:
> The code was using two string_lists, one for the directories and
> one for the files.  The code never checks the lists independently
> so we should be able to only use one list.  The string_list also
> is a O(log n) for lookup and insertion.  Switching this to use a
> hashmap will give O(1) which will save some time when there are
> millions of paths that will be checked.
> 

Good observation that the two string lists were never used 
independently.  Perhaps the intent was to help the O(log n) issue by 
splitting them up?  A single hashmap is much faster.  Looks good.


> Signed-off-by: Kevin Willford <kewillf@microsoft.com>
> ---
>   merge-recursive.c | 48 +++++++++++++++++++++++++++++++++++++-----------
>   merge-recursive.h |  3 +--
>   2 files changed, 38 insertions(+), 13 deletions(-)
> 
> diff --git a/merge-recursive.c b/merge-recursive.c
> index d47f40ea81..ef4fe5f09f 100644
> --- a/merge-recursive.c
> +++ b/merge-recursive.c
> @@ -24,6 +24,26 @@
>   #include "dir.h"
>   #include "submodule.h"
>   
> +struct path_hashmap_entry {
> +	struct hashmap_entry;
> +	char path[FLEX_ARRAY];
> +};
> +
> +static unsigned int (*pathhash)(const char *path);
> +static int (*pathcmp)(const char *a, const char *b);
> +
> +static int path_hashmap_cmp(const void *cmp_data,
> +			    const void *entry,
> +			    const void *entry_or_key,
> +			    const void *keydata)
> +{
> +	const struct path_hashmap_entry *a = entry;
> +	const struct path_hashmap_entry *b = entry_or_key;
> +	const char *key = keydata;
> +
> +	return pathcmp(a->path, key ? key : b->path);
> +}
> +
>   static void flush_output(struct merge_options *o)
>   {
>   	if (o->buffer_output < 2 && o->obuf.len) {
> @@ -314,15 +334,15 @@ static int save_files_dirs(const unsigned char *sha1,
>   		struct strbuf *base, const char *path,
>   		unsigned int mode, int stage, void *context)
>   {
> +	struct path_hashmap_entry *entry;
>   	int baselen = base->len;
>   	struct merge_options *o = context;
>   
>   	strbuf_addstr(base, path);
>   
> -	if (S_ISDIR(mode))
> -		string_list_insert(&o->current_directory_set, base->buf);
> -	else
> -		string_list_insert(&o->current_file_set, base->buf);
> +	FLEX_ALLOC_MEM(entry, path, base->buf, base->len);
> +	hashmap_entry_init(entry, pathhash(entry->path));
> +	hashmap_add(&o->current_file_dir_set, entry);
>   
>   	strbuf_setlen(base, baselen);
>   	return (S_ISDIR(mode) ? READ_TREE_RECURSIVE : 0);
> @@ -642,6 +662,8 @@ static void add_flattened_path(struct strbuf *out, const char *s)
>   
>   static char *unique_path(struct merge_options *o, const char *path, const char *branch)
>   {
> +	struct path_hashmap_entry dummy;
> +	struct path_hashmap_entry *entry;
>   	struct strbuf newpath = STRBUF_INIT;
>   	int suffix = 0;
>   	size_t base_len;
> @@ -650,14 +672,17 @@ static char *unique_path(struct merge_options *o, const char *path, const char *
>   	add_flattened_path(&newpath, branch);
>   
>   	base_len = newpath.len;
> -	while (string_list_has_string(&o->current_file_set, newpath.buf) ||
> -	       string_list_has_string(&o->current_directory_set, newpath.buf) ||
> +	hashmap_entry_init(&dummy, pathhash(newpath.buf));
> +	while (hashmap_get(&o->current_file_dir_set, &dummy, newpath.buf) ||
>   	       (!o->call_depth && file_exists(newpath.buf))) {
>   		strbuf_setlen(&newpath, base_len);
>   		strbuf_addf(&newpath, "_%d", suffix++);
> +		hashmap_entry_init(&dummy, pathhash(newpath.buf));
>   	}
>   
> -	string_list_insert(&o->current_file_set, newpath.buf);
> +	FLEX_ALLOC_MEM(entry, path, newpath.buf, newpath.len);
> +	hashmap_entry_init(entry, pathhash(entry->path));
> +	hashmap_add(&o->current_file_dir_set, entry);
>   	return strbuf_detach(&newpath, NULL);
>   }
>   
> @@ -1941,8 +1966,7 @@ int merge_trees(struct merge_options *o,
>   	if (unmerged_cache()) {
>   		struct string_list *entries, *re_head, *re_merge;
>   		int i;
> -		string_list_clear(&o->current_file_set, 1);
> -		string_list_clear(&o->current_directory_set, 1);
> +		hashmap_init(&o->current_file_dir_set, path_hashmap_cmp, NULL, 512);
>   		get_files_dirs(o, head);
>   		get_files_dirs(o, merge);
>   
> @@ -1978,6 +2002,8 @@ int merge_trees(struct merge_options *o,
>   		string_list_clear(re_head, 0);
>   		string_list_clear(entries, 1);
>   
> +		hashmap_free(&o->current_file_dir_set, 1);
> +
>   		free(re_merge);
>   		free(re_head);
>   		free(entries);
> @@ -2179,8 +2205,8 @@ void init_merge_options(struct merge_options *o)
>   	if (o->verbosity >= 5)
>   		o->buffer_output = 0;
>   	strbuf_init(&o->obuf, 0);
> -	string_list_init(&o->current_file_set, 1);
> -	string_list_init(&o->current_directory_set, 1);
> +	pathhash = ignore_case ? strihash : strhash;
> +	pathcmp = ignore_case ? strcasecmp : strcmp;
>   	string_list_init(&o->df_conflict_file_set, 1);
>   }
>   
> diff --git a/merge-recursive.h b/merge-recursive.h
> index 735343b413..80d69d1401 100644
> --- a/merge-recursive.h
> +++ b/merge-recursive.h
> @@ -25,8 +25,7 @@ struct merge_options {
>   	int show_rename_progress;
>   	int call_depth;
>   	struct strbuf obuf;
> -	struct string_list current_file_set;
> -	struct string_list current_directory_set;
> +	struct hashmap current_file_dir_set;
>   	struct string_list df_conflict_file_set;
>   };
>   
> 

  reply	other threads:[~2017-08-28 23:06 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-08-28 20:28 [PATCH 0/3] merge-recursive: replace string_list with hashmap Kevin Willford
2017-08-28 20:28 ` [PATCH 1/3] merge-recursive: fix memory leak Kevin Willford
2017-08-28 22:42   ` Ben Peart
2017-08-29  8:12   ` Jeff King
2017-08-28 20:28 ` [PATCH 2/3] merge-recursive: remove return value from get_files_dirs Kevin Willford
2017-08-28 22:45   ` Ben Peart
2017-08-29  8:19     ` Jeff King
2017-08-29  8:17   ` Jeff King
2017-08-29 15:58     ` Kevin Willford
2017-08-29 16:50       ` Jeff King
2017-08-31 18:12       ` Stefan Beller
2017-08-28 20:28 ` [PATCH 3/3] merge-recursive: change current file dir string_lists to hashmap Kevin Willford
2017-08-28 23:06   ` Ben Peart [this message]
2017-08-29  8:41   ` Jeff King
2017-09-06  3:35   ` Junio C Hamano

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=a9140320-05df-d45b-19bc-e67a7a5b6eb9@gmail.com \
    --to=peartben@gmail.com \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    --cc=kewillf@microsoft.com \
    --cc=peff@peff.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).