* [PATCH v2 1/2] diffcore-rename: support rename cache
@ 2008-11-08 11:27 Nguyễn Thái Ngọc Duy
2008-11-08 11:27 ` [PATCH v2 2/2] diffcore-rename: add config option to allow to cache renames Nguyễn Thái Ngọc Duy
0 siblings, 1 reply; 2+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2008-11-08 11:27 UTC (permalink / raw)
To: git, Junio C Hamano, Yann Dirson; +Cc: Nguyễn Thái Ngọc Duy
This patch teaches diffcore_rename() to look into
$GIT_DIR/rename-cache and make use of it to recreate diff_filepair.
With proper cache, there should be no available entry for estimation
after exact matching.
Rename caching is per commit. I don't think abitrary tree-tree caching
is worth it.
$GIT_DIR/rename-cache spans out like $GIT_DIR/objects. Each file
corresponds to one commit. Its content consists of lines like this
<Destination SHA-1> <SPC> <Source SHA-1> <SPC> <Score in decimal> <NL>
This can be used to:
- Make --find-copies-harder pratically usable for moderate-size
repositories. The first "git show" on a linux kernel commit was 5.3
sec, it then went down to 0.13 sec.
- Give git-svn a chance to (locally) import explicit renames from
Subversion
- People may correct rename results for better diff, if automatic
rename detection is not good enough.
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
> > > Has anybody thought about interaction between that caching and pathspec
> > > limited operation?
> > >
> >
> > I didn't. But I think all out-of-pathspec diff pairs are removed
> > before it reaches diffcore_rename() so the cache has nothing to do
> > with it (except it still loads full cache for a commit).
>
>
> Well, it could be that an out-of-pathspec pair would have a better
> score than an in-pathspec one. Maybe cache recording should be turned
> off when doing pathspec limitation ?
Changes from v1:
- rebased to next to avoid conflict
- no longer generate cache if pathspec is used
diff.h | 2 +
diffcore-rename.c | 142 ++++++++++++++++++++++++++++++++++++++++++++++-
log-tree.c | 2 +
t/t4031-rename-cache.sh | 56 ++++++++++++++++++
4 files changed, 200 insertions(+), 2 deletions(-)
create mode 100755 t/t4031-rename-cache.sh
diff --git a/diff.h b/diff.h
index 42582ed..64a1edd 100644
--- a/diff.h
+++ b/diff.h
@@ -111,6 +111,8 @@ struct diff_options {
add_remove_fn_t add_remove;
diff_format_fn_t format_callback;
void *format_callback_data;
+
+ struct commit *commit;
};
enum color_diff {
diff --git a/diffcore-rename.c b/diffcore-rename.c
index 168a95b..598cc8d 100644
--- a/diffcore-rename.c
+++ b/diffcore-rename.c
@@ -5,6 +5,7 @@
#include "diff.h"
#include "diffcore.h"
#include "hash.h"
+#include "commit.h"
/* Table of rename/copy destinations */
@@ -409,13 +410,130 @@ static void record_if_better(struct diff_score m[], struct diff_score *o)
m[worst] = *o;
}
+struct cached_filepair {
+ unsigned char dst[20];
+ unsigned char src[20];
+ int score;
+};
+
+static int free_cached_filepair(void *p)
+{
+ free(p);
+ return 0;
+}
+
+static void load_rename_cache(struct diff_queue_struct *q,
+ struct diff_queue_struct *cacheq,
+ struct diff_options *options)
+{
+ char *sha1_hex;
+ FILE *fp;
+ struct hash_table filepair_table;
+ struct hash_table src_table;
+ struct cached_filepair *pp;
+ int i, hash;
+ static int no_cache_available = -1;
+ struct stat st;
+ char *path;
+
+ if (no_cache_available == -1)
+ no_cache_available = stat(git_path("rename-cache"), &st) || !S_ISDIR(st.st_mode);
+
+ /* return soon so we don't need to waste CPU */
+ if (no_cache_available > 0)
+ return;
+
+
+ /* src_table initialization */
+ init_hash(&src_table);
+ for (i = 0; i < q->nr; i++) {
+ struct diff_filepair *p = q->queue[i];
+ if (DIFF_FILE_VALID(p->one)) {
+ unsigned int hash = hash_filespec(p->one);
+ insert_hash(hash, p, &src_table);
+ }
+ }
+
+ /* filepair_table initialization */
+ init_hash(&filepair_table);
+ sha1_hex = sha1_to_hex(options->commit->object.sha1);
+ path = git_path("rename-cache/%c%c/%s",sha1_hex[0], sha1_hex[1], sha1_hex+2);
+ if (stat(path, &st))
+ fp = NULL;
+ else
+ fp = fopen(path, "r");
+ if (fp) {
+ char src_sha1_hex[41], dst_sha1_hex[41];
+ struct cached_filepair p;
+
+ src_sha1_hex[40] = dst_sha1_hex[40] = '\0';
+ while (fscanf(fp, "%40c %40c %d\n", dst_sha1_hex, src_sha1_hex, &p.score) == 3) {
+ if (get_sha1_hex(src_sha1_hex, p.src) ||
+ get_sha1_hex(dst_sha1_hex, p.dst))
+ break;
+
+ pp = xmalloc(sizeof(*pp));
+ memcpy(pp, &p, sizeof(*pp));
+ memcpy(&hash, p.dst, sizeof(hash));
+ insert_hash(hash, pp, &filepair_table);
+ }
+ fclose(fp);
+ }
+
+ for (i = 0; i < q->nr; i++) {
+ struct diff_filepair *p = q->queue[i];
+ struct diff_filepair *dp, *src;
+
+ /* find remote_dst */
+ if (DIFF_FILE_VALID(p->one) ||
+ !DIFF_FILE_VALID(p->two) ||
+ (options->single_follow && strcmp(options->single_follow, p->two->path)))
+ continue;
+
+ memcpy(&hash, p->two->sha1, sizeof(hash));
+ pp = lookup_hash(hash, &filepair_table);
+ if (!pp || memcmp(p->two->sha1, pp->dst, 20))
+ continue;
+
+ /* create pair */
+ if (is_null_sha1(pp->src)) {
+ if (DIFF_FILE_VALID(p->one))
+ continue;
+ diff_q(cacheq, p);
+ q->queue[i] = NULL;
+ continue;
+ }
+
+ memcpy(&hash, pp->src, sizeof(hash));
+ src = lookup_hash(hash, &src_table);
+ if (!src || memcmp(pp->src, src->one->sha1, 20))
+ continue;
+
+ src->one->rename_used++;
+ src->one->count++;
+ p->two->count++;
+
+ dp = diff_queue(NULL, src->one, p->two);
+ dp->renamed_pair = 1;
+ dp->score = pp->score;
+
+ diff_q(cacheq, dp);
+ q->queue[i] = NULL;
+ diff_free_filepair(p);
+ }
+
+ for_each_hash(&filepair_table, free_cached_filepair);
+ free_hash(&src_table);
+ free_hash(&filepair_table);
+}
+
void diffcore_rename(struct diff_options *options)
{
int detect_rename = options->detect_rename;
int minimum_score = options->rename_score;
int rename_limit = options->rename_limit;
struct diff_queue_struct *q = &diff_queued_diff;
- struct diff_queue_struct outq;
+ struct diff_queue_struct outq, cacheq;
struct diff_score *mx;
int i, j, rename_count;
int num_create, num_src, dst_cnt;
@@ -423,8 +541,19 @@ void diffcore_rename(struct diff_options *options)
if (!minimum_score)
minimum_score = DEFAULT_RENAME_SCORE;
+ cacheq.queue = NULL;
+ cacheq.nr = cacheq.alloc = 0;
+
+ if (detect_rename && options->commit)
+ load_rename_cache(q, &cacheq, options);
+
for (i = 0; i < q->nr; i++) {
struct diff_filepair *p = q->queue[i];
+
+ /* was consumed by rename cache */
+ if (!p)
+ continue;
+
if (!DIFF_FILE_VALID(p->one)) {
if (!DIFF_FILE_VALID(p->two))
continue; /* unmerged */
@@ -563,10 +692,17 @@ void diffcore_rename(struct diff_options *options)
*/
outq.queue = NULL;
outq.nr = outq.alloc = 0;
- for (i = 0; i < q->nr; i++) {
+ for (i = j = 0; i < q->nr; i++) {
struct diff_filepair *p = q->queue[i];
struct diff_filepair *pair_to_free = NULL;
+ if (!p) {
+ if (j >= cacheq.nr)
+ die("Internal error: running out of cacheq.");
+ diff_q(&outq, cacheq.queue[j++]);
+ continue;
+ }
+
if (!DIFF_FILE_VALID(p->one) && DIFF_FILE_VALID(p->two)) {
/*
* Creation
@@ -635,6 +771,8 @@ void diffcore_rename(struct diff_options *options)
diff_debug_queue("done copying original", &outq);
free(q->queue);
+ if (cacheq.queue)
+ free(cacheq.queue);
*q = outq;
diff_debug_queue("done collapsing", q);
diff --git a/log-tree.c b/log-tree.c
index 5444f08..040e095 100644
--- a/log-tree.c
+++ b/log-tree.c
@@ -522,6 +522,7 @@ int log_tree_commit(struct rev_info *opt, struct commit *commit)
log.commit = commit;
log.parent = NULL;
opt->loginfo = &log;
+ opt->diffopt.commit = commit;
shown = log_tree_diff(opt, commit, &log);
if (!shown && opt->loginfo && opt->always_show_header) {
@@ -531,5 +532,6 @@ int log_tree_commit(struct rev_info *opt, struct commit *commit)
}
opt->loginfo = NULL;
maybe_flush_or_die(stdout, "stdout");
+ opt->diffopt.commit = NULL;
return shown;
}
diff --git a/t/t4031-rename-cache.sh b/t/t4031-rename-cache.sh
new file mode 100755
index 0000000..f7c53fd
--- /dev/null
+++ b/t/t4031-rename-cache.sh
@@ -0,0 +1,56 @@
+#!/bin/sh
+#
+# Copyright (c) 2008 Nguyen Thai Ngoc Duy
+#
+
+test_description='Test diff rename cache'
+. ./test-lib.sh
+
+cat >expected <<EOF
+ create mode 100644 sub/c
+ copy a => sub/d (100%)
+EOF
+test_expect_success 'setup' '
+ echo a > a
+ echo b > b
+ mkdir sub
+ echo c > sub/c
+ cp a sub/d
+ A_SHA1=$(git hash-object a)
+ B_SHA1=$(git hash-object b)
+ C_SHA1=$(git hash-object sub/c)
+ D_SHA1=$(git hash-object sub/d)
+ git add a b
+ test_tick
+ git commit -m first
+ git add sub
+ git commit -m second
+ git show --pretty=oneline --summary -C -M --find-copies-harder HEAD|sed 1d > result
+ test_cmp expected result
+'
+
+cat >expected <<EOF
+ copy a => sub/c (100%)
+ copy a => sub/d (100%)
+EOF
+test_expect_success 'load rename pair cache' '
+ P=.git/rename-cache/$(git rev-parse HEAD|sed "s,\(..\)\(.*\),\1/\2,")
+ mkdir -p $(dirname $P)
+ echo $C_SHA1 $A_SHA1 60000 >> $P
+ git show --pretty=oneline --summary -C -M --find-copies-harder HEAD|sed 1d > result
+ test_cmp expected result
+'
+
+cat >expected <<EOF
+ copy a => sub/c (100%)
+ create mode 100644 sub/d
+EOF
+test_expect_success 'load create pair cache' '
+ P=.git/rename-cache/$(git rev-parse HEAD|sed "s,\(..\)\(.*\),\1/\2,")
+ mkdir -p $(dirname $P)
+ echo $D_SHA1 0000000000000000000000000000000000000000 0 >> $P
+ git show --pretty=oneline --summary -C -M --find-copies-harder HEAD|sed 1d > result
+ test_cmp expected result
+'
+
+test_done
--
1.6.0.3.892.g83538
^ permalink raw reply related [flat|nested] 2+ messages in thread* [PATCH v2 2/2] diffcore-rename: add config option to allow to cache renames
2008-11-08 11:27 [PATCH v2 1/2] diffcore-rename: support rename cache Nguyễn Thái Ngọc Duy
@ 2008-11-08 11:27 ` Nguyễn Thái Ngọc Duy
0 siblings, 0 replies; 2+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2008-11-08 11:27 UTC (permalink / raw)
To: git, Junio C Hamano, Yann Dirson; +Cc: Nguyễn Thái Ngọc Duy
If diff.cacherenames is true, then renames will be cached to
$GIT_DIR/rename-cache. By default, it will not overwrite existing
cache. Add --refresh-cache to overwrite.
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
Documentation/config.txt | 5 ++++
Documentation/diff-options.txt | 5 ++++
diff.c | 12 +++++++++
diff.h | 2 +
diffcore-rename.c | 49 ++++++++++++++++++++++++++++++++++++++++
t/t4031-rename-cache.sh | 36 +++++++++++++++++++++++++++++
6 files changed, 109 insertions(+), 0 deletions(-)
diff --git a/Documentation/config.txt b/Documentation/config.txt
index 965ed74..8a7f00e 100644
--- a/Documentation/config.txt
+++ b/Documentation/config.txt
@@ -630,6 +630,11 @@ diff.renames::
will enable basic rename detection. If set to "copies" or
"copy", it will detect copies, as well.
+diff.cacherenames::
+ Tells git to automatically cache renames when detected. The
+ cache resides in $GIT_DIR/rename-cache, which is used by git
+ if exists.
+
fetch.unpackLimit::
If the number of objects fetched over the git native
transfer is below this
diff --git a/Documentation/diff-options.txt b/Documentation/diff-options.txt
index c62b45c..d477a40 100644
--- a/Documentation/diff-options.txt
+++ b/Documentation/diff-options.txt
@@ -102,6 +102,11 @@ endif::git-format-patch[]
Turn off rename detection, even when the configuration
file gives the default to do so.
+--refresh-rename-cache::
+ By default, when git finds a cached version of a commit, it
+ will not overwrite the cache. This option makes git overwrite
+ old cache or create a new one.
+
--check::
Warn if changes introduce trailing whitespace
or an indent that uses a space before a tab. Exits with
diff --git a/diff.c b/diff.c
index f644947..604cb12 100644
--- a/diff.c
+++ b/diff.c
@@ -26,6 +26,7 @@ int diff_use_color_default = -1;
static const char *external_diff_cmd_cfg;
int diff_auto_refresh_index = 1;
static int diff_mnemonic_prefix;
+static int diff_cache_renames;
static char diff_colors[][COLOR_MAXLEN] = {
"\033[m", /* reset */
@@ -103,6 +104,11 @@ int git_diff_basic_config(const char *var, const char *value, void *cb)
return 0;
}
+ if (!strcmp(var, "diff.cacherenames")) {
+ diff_cache_renames = git_config_bool(var, value);
+ return 0;
+ }
+
switch (userdiff_config(var, value)) {
case 0: break;
case -1: return -1;
@@ -2272,6 +2278,8 @@ int diff_setup_done(struct diff_options *options)
if (options->detect_rename && options->rename_limit < 0)
options->rename_limit = diff_rename_limit_default;
+ if (options->detect_rename && diff_cache_renames)
+ DIFF_OPT_SET(options, CACHE_RENAMES);
if (options->setup & DIFF_SETUP_USE_CACHE) {
if (!active_cache)
/* read-cache does not die even when it fails
@@ -2439,6 +2447,10 @@ int diff_opt_parse(struct diff_options *options, const char **av, int ac)
DIFF_OPT_SET(options, RELATIVE_NAME);
options->prefix = arg + 11;
}
+ else if (!strcmp(arg, "--refresh-rename-cache")) {
+ DIFF_OPT_SET(options, CACHE_RENAMES);
+ DIFF_OPT_SET(options, REFRESH_RENAME_CACHE);
+ }
/* xdiff options */
else if (!strcmp(arg, "-w") || !strcmp(arg, "--ignore-all-space"))
diff --git a/diff.h b/diff.h
index 64a1edd..0503b57 100644
--- a/diff.h
+++ b/diff.h
@@ -66,6 +66,8 @@ typedef void (*diff_format_fn_t)(struct diff_queue_struct *q,
#define DIFF_OPT_DIRSTAT_CUMULATIVE (1 << 19)
#define DIFF_OPT_DIRSTAT_BY_FILE (1 << 20)
#define DIFF_OPT_ALLOW_TEXTCONV (1 << 21)
+#define DIFF_OPT_CACHE_RENAMES (1 << 22)
+#define DIFF_OPT_REFRESH_RENAME_CACHE (1 << 23)
#define DIFF_OPT_TST(opts, flag) ((opts)->flags & DIFF_OPT_##flag)
#define DIFF_OPT_SET(opts, flag) ((opts)->flags |= DIFF_OPT_##flag)
#define DIFF_OPT_CLR(opts, flag) ((opts)->flags &= ~DIFF_OPT_##flag)
diff --git a/diffcore-rename.c b/diffcore-rename.c
index 598cc8d..49651ea 100644
--- a/diffcore-rename.c
+++ b/diffcore-rename.c
@@ -527,6 +527,44 @@ static void load_rename_cache(struct diff_queue_struct *q,
free_hash(&filepair_table);
}
+static void save_rename_cache(struct diff_queue_struct *outq,
+ struct diff_options *options)
+{
+ int i;
+ FILE *fp = NULL;
+ struct stat st;
+
+ for (i = 0;i < outq->nr; i++) {
+ struct diff_filepair *dp = outq->queue[i];
+
+ if (!(dp->renamed_pair || /* rename pair */
+ (!DIFF_FILE_VALID(dp->one) && DIFF_FILE_VALID(dp->two)))) /* create pair */
+ continue;
+
+ if (!fp) {
+ char *sha1 = sha1_to_hex(options->commit->object.sha1);
+ char *path = git_path("rename-cache/%c%c/%s", sha1[0], sha1[1], sha1+2);
+
+ /* Already cached. If not force refresh, move on */
+ if (!stat(path, &st) && !DIFF_OPT_TST(options, REFRESH_RENAME_CACHE))
+ return;
+
+ safe_create_leading_directories(path);
+ fp = fopen(path, "w");
+
+ if (!fp)
+ return;
+ }
+
+ fprintf(fp, "%s ", sha1_to_hex(dp->two->sha1));
+ fprintf(fp, "%s %d\n",
+ sha1_to_hex(DIFF_FILE_VALID(dp->one) ? dp->one->sha1 : null_sha1),
+ dp->score);
+ }
+ if (fp)
+ fclose(fp);
+}
+
void diffcore_rename(struct diff_options *options)
{
int detect_rename = options->detect_rename;
@@ -770,6 +808,17 @@ void diffcore_rename(struct diff_options *options)
}
diff_debug_queue("done copying original", &outq);
+ /*
+ * Only cache if:
+ * - Have a commit hint
+ * - diff.cacherenames is on
+ * - no pathspec limits
+ */
+ if (options->commit &&
+ DIFF_OPT_TST(options, CACHE_RENAMES) &&
+ !options->nr_paths)
+ save_rename_cache(&outq, options);
+
free(q->queue);
if (cacheq.queue)
free(cacheq.queue);
diff --git a/t/t4031-rename-cache.sh b/t/t4031-rename-cache.sh
index f7c53fd..2d3f993 100755
--- a/t/t4031-rename-cache.sh
+++ b/t/t4031-rename-cache.sh
@@ -53,4 +53,40 @@ test_expect_success 'load create pair cache' '
test_cmp expected result
'
+cat >expected <<EOF
+f2ad6c76f0115a6ba5b00456a849810e7ec0af20 0000000000000000000000000000000000000000 0
+78981922613b2afb6025042ff6bd878ac1994e85 78981922613b2afb6025042ff6bd878ac1994e85 60000
+EOF
+test_expect_success 'save rename cache' '
+ P=.git/rename-cache/$(git rev-parse HEAD|sed "s,\(..\)\(.*\),\1/\2,")
+ rm -r .git/rename-cache
+ git config diff.cacherenames true
+ git show --summary -C -M --find-copies-harder > /dev/null
+ test_cmp expected $P
+'
+
+test_expect_success 'do not save rename cache with limited pathspec' '
+ P=.git/rename-cache/$(git rev-parse HEAD|sed "s,\(..\)\(.*\),\1/\2,")
+ echo $P
+ rm $P
+ git config diff.cacherenames true
+ git log --summary -C -M --find-copies-harder HEAD -- sub
+ ! test -f $P
+'
+
+test_expect_success 'subsequent command does not change cache' '
+ P=.git/rename-cache/$(git rev-parse HEAD|sed "s,\(..\)\(.*\),\1/\2,")
+ echo corrupted > $P
+ ! test_cmp expected $P &&
+ git show --summary -C -M --find-copies-harder HEAD > /dev/null &&
+ ! test_cmp expected $P
+'
+
+test_expect_success 'overwrite cache with --refresh-rename-cache' '
+ P=.git/rename-cache/$(git rev-parse HEAD|sed "s,\(..\)\(.*\),\1/\2,")
+ ! test_cmp expected $P &&
+ git show --summary -C -M --find-copies-harder --refresh-rename-cache HEAD > /dev/null &&
+ test_cmp expected $P
+'
+
test_done
--
1.6.0.3.892.g83538
^ permalink raw reply related [flat|nested] 2+ messages in thread
end of thread, other threads:[~2008-11-08 11:30 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-11-08 11:27 [PATCH v2 1/2] diffcore-rename: support rename cache Nguyễn Thái Ngọc Duy
2008-11-08 11:27 ` [PATCH v2 2/2] diffcore-rename: add config option to allow to cache renames Nguyễn Thái Ngọc Duy
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox