From: Karsten Blees <karsten.blees@gmail.com>
To: Git List <git@vger.kernel.org>
Subject: [PATCH/RFC 4/5] diffcore-rename.c: simplify finding exact renames
Date: Wed, 11 Sep 2013 01:30:08 +0200 [thread overview]
Message-ID: <522FAB80.9020406@gmail.com> (raw)
In-Reply-To: <522FAAC4.2080601@gmail.com>
The find_exact_renames function currently only uses the hash table for
grouping, i.e.:
1. add sources
2. add destinations
3. iterate all buckets, per bucket:
4. split sources from destinations
5. iterate destinations, per destination:
6. iterate sources to find best match
This can be simplified by utilizing the lookup functionality of the hash
table, i.e.:
1. add sources
2. iterate destinations, per destination:
3. lookup sources matching the current destination
4. iterate sources to find best match
This saves several iterations and file_similarity allocations for the
destinations.
Signed-off-by: Karsten Blees <blees@dcon.de>
---
diffcore-rename.c | 75 +++++++++++++++----------------------------------------
1 file changed, 20 insertions(+), 55 deletions(-)
diff --git a/diffcore-rename.c b/diffcore-rename.c
index 008a60c..82b7975 100644
--- a/diffcore-rename.c
+++ b/diffcore-rename.c
@@ -243,7 +243,7 @@ static int score_compare(const void *a_, const void *b_)
}
struct file_similarity {
- int src_dst, index;
+ int index;
struct diff_filespec *filespec;
struct file_similarity *next;
};
@@ -260,25 +260,21 @@ static unsigned int hash_filespec(struct diff_filespec *filespec)
return hash;
}
-static int find_identical_files(struct file_similarity *src,
- struct file_similarity *dst,
+static int find_identical_files(struct hash_table *srcs,
+ int dst_index,
struct diff_options *options)
{
int renames = 0;
- /*
- * Walk over all the destinations ...
- */
- do {
- struct diff_filespec *target = dst->filespec;
+ struct diff_filespec *target = rename_dst[dst_index].two;
struct file_similarity *p, *best;
int i = 100, best_score = -1;
/*
- * .. to find the best source match
+ * Find the best source match for specified destination.
*/
best = NULL;
- for (p = src; p; p = p->next) {
+ for (p = lookup_hash(hash_filespec(target), srcs); p; p = p->next) {
int score;
struct diff_filespec *source = p->filespec;
@@ -307,61 +303,28 @@ static int find_identical_files(struct file_similarity *src,
break;
}
if (best) {
- record_rename_pair(dst->index, best->index, MAX_SCORE);
+ record_rename_pair(dst_index, best->index, MAX_SCORE);
renames++;
}
- } while ((dst = dst->next) != NULL);
return renames;
}
-static void free_similarity_list(struct file_similarity *p)
+static int free_similarity_list(void *p, void *unused)
{
while (p) {
struct file_similarity *entry = p;
- p = p->next;
+ p = entry->next;
free(entry);
}
+ return 0;
}
-static int find_same_files(void *ptr, void *data)
-{
- int ret;
- struct file_similarity *p = ptr;
- struct file_similarity *src = NULL, *dst = NULL;
- struct diff_options *options = data;
-
- /* Split the hash list up into sources and destinations */
- do {
- struct file_similarity *entry = p;
- p = p->next;
- if (entry->src_dst < 0) {
- entry->next = src;
- src = entry;
- } else {
- entry->next = dst;
- dst = entry;
- }
- } while (p);
-
- /*
- * If we have both sources *and* destinations, see if
- * we can match them up
- */
- ret = (src && dst) ? find_identical_files(src, dst, options) : 0;
-
- /* Free the hashes and return the number of renames found */
- free_similarity_list(src);
- free_similarity_list(dst);
- return ret;
-}
-
-static void insert_file_table(struct hash_table *table, int src_dst, int index, struct diff_filespec *filespec)
+static void insert_file_table(struct hash_table *table, int index, struct diff_filespec *filespec)
{
void **pos;
unsigned int hash;
struct file_similarity *entry = xmalloc(sizeof(*entry));
- entry->src_dst = src_dst;
entry->index = index;
entry->filespec = filespec;
entry->next = NULL;
@@ -385,24 +348,26 @@ static void insert_file_table(struct hash_table *table, int src_dst, int index,
*/
static int find_exact_renames(struct diff_options *options)
{
- int i;
+ int i, renames;
struct hash_table file_table;
+ /* Add all sources to the hash table */
init_hash(&file_table);
- preallocate_hash(&file_table, rename_src_nr + rename_dst_nr);
+ preallocate_hash(&file_table, rename_src_nr);
for (i = 0; i < rename_src_nr; i++)
- insert_file_table(&file_table, -1, i, rename_src[i].p->one);
+ insert_file_table(&file_table, i, rename_src[i].p->one);
+ /* Walk the destinations and find best source match */
for (i = 0; i < rename_dst_nr; i++)
- insert_file_table(&file_table, 1, i, rename_dst[i].two);
+ renames += find_identical_files(&file_table, i, options);
- /* Find the renames */
- i = for_each_hash(&file_table, find_same_files, options);
+ /* Free source file_similarity chains */
+ for_each_hash(&file_table, free_similarity_list, options);
/* .. and free the hash data structure */
free_hash(&file_table);
- return i;
+ return renames;
}
#define NUM_CANDIDATE_PER_DST 4
--
1.8.4.8243.gbcbdefd
next prev parent reply other threads:[~2013-09-10 23:30 UTC|newest]
Thread overview: 24+ messages / expand[flat|nested] mbox.gz Atom feed top
2013-09-10 23:27 [PATCH/RFC 0/5] New hash table implementation Karsten Blees
2013-09-10 23:28 ` [PATCH/RFC 1/5] add a hashtable implementation that supports O(1) removal Karsten Blees
2013-09-11 23:56 ` Junio C Hamano
2013-09-23 9:16 ` Karsten Blees
2013-09-12 4:10 ` Junio C Hamano
2013-09-23 9:21 ` Karsten Blees
2013-09-10 23:28 ` [PATCH/RFC 2/5] buitin/describe.c: use new hash map implementation Karsten Blees
2013-09-10 23:29 ` [PATCH/RFC 3/5] diffcore-rename.c: move code around to prepare for the next patch Karsten Blees
2013-09-10 23:30 ` Karsten Blees [this message]
2013-09-10 23:30 ` [PATCH/RFC 5/5] diffcore-rename.c: use new hash map implementation Karsten Blees
2013-09-24 9:50 ` [PATCH v2 0/5] New hash table implementation Karsten Blees
2013-09-24 9:51 ` [PATCH v2 1/5] add a hashtable implementation that supports O(1) removal Karsten Blees
2013-09-24 9:52 ` [PATCH v2 2/5] buitin/describe.c: use new hash map implementation Karsten Blees
2013-09-24 9:52 ` [PATCH v2 3/5] diffcore-rename.c: move code around to prepare for the next patch Karsten Blees
2013-09-24 9:53 ` [PATCH v2 4/5] diffcore-rename.c: simplify finding exact renames Karsten Blees
2013-09-24 9:54 ` [PATCH v2 5/5] diffcore-rename.c: use new hash map implementation Karsten Blees
2013-09-24 10:18 ` [PATCH v2 0/5] New hash table implementation Fredrik Gustafsson
2013-09-24 11:16 ` Tay Ray Chuan
2013-09-26 14:38 ` Karsten Blees
2013-09-26 10:16 ` Fredrik Gustafsson
2013-09-26 10:26 ` Duy Nguyen
2013-09-26 11:08 ` Fredrik Gustafsson
2013-09-26 11:14 ` Duy Nguyen
2013-09-26 13:55 ` Karsten Blees
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=522FAB80.9020406@gmail.com \
--to=karsten.blees@gmail.com \
--cc=git@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).