* [PATCH v2 1/2] diffcore-rename: support rename cache
@ 2008-11-08 11:27 Nguyễn Thái Ngọc Duy
2008-11-08 11:27 ` [PATCH v2 2/2] diffcore-rename: add config option to allow to cache renames Nguyễn Thái Ngọc Duy
0 siblings, 1 reply; 2+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2008-11-08 11:27 UTC (permalink / raw)
To: git, Junio C Hamano, Yann Dirson; +Cc: Nguyễn Thái Ngọc Duy
This patch teaches diffcore_rename() to look into
$GIT_DIR/rename-cache and make use of it to recreate diff_filepair.
With proper cache, there should be no available entry for estimation
after exact matching.
Rename caching is per commit. I don't think abitrary tree-tree caching
is worth it.
$GIT_DIR/rename-cache spans out like $GIT_DIR/objects. Each file
corresponds to one commit. Its content consists of lines like this
<Destination SHA-1> <SPC> <Source SHA-1> <SPC> <Score in decimal> <NL>
This can be used to:
- Make --find-copies-harder pratically usable for moderate-size
repositories. The first "git show" on a linux kernel commit was 5.3
sec, it then went down to 0.13 sec.
- Give git-svn a chance to (locally) import explicit renames from
Subversion
- People may correct rename results for better diff, if automatic
rename detection is not good enough.
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
> > > Has anybody thought about interaction between that caching and pathspec
> > > limited operation?
> > >
> >
> > I didn't. But I think all out-of-pathspec diff pairs are removed
> > before it reaches diffcore_rename() so the cache has nothing to do
> > with it (except it still loads full cache for a commit).
>
>
> Well, it could be that an out-of-pathspec pair would have a better
> score than an in-pathspec one. Maybe cache recording should be turned
> off when doing pathspec limitation ?
Changes from v1:
- rebased to next to avoid conflict
- no longer generate cache if pathspec is used
diff.h | 2 +
diffcore-rename.c | 142 ++++++++++++++++++++++++++++++++++++++++++++++-
log-tree.c | 2 +
t/t4031-rename-cache.sh | 56 ++++++++++++++++++
4 files changed, 200 insertions(+), 2 deletions(-)
create mode 100755 t/t4031-rename-cache.sh
diff --git a/diff.h b/diff.h
index 42582ed..64a1edd 100644
--- a/diff.h
+++ b/diff.h
@@ -111,6 +111,8 @@ struct diff_options {
add_remove_fn_t add_remove;
diff_format_fn_t format_callback;
void *format_callback_data;
+
+ struct commit *commit;
};
enum color_diff {
diff --git a/diffcore-rename.c b/diffcore-rename.c
index 168a95b..598cc8d 100644
--- a/diffcore-rename.c
+++ b/diffcore-rename.c
@@ -5,6 +5,7 @@
#include "diff.h"
#include "diffcore.h"
#include "hash.h"
+#include "commit.h"
/* Table of rename/copy destinations */
@@ -409,13 +410,130 @@ static void record_if_better(struct diff_score m[], struct diff_score *o)
m[worst] = *o;
}
+struct cached_filepair {
+ unsigned char dst[20];
+ unsigned char src[20];
+ int score;
+};
+
+static int free_cached_filepair(void *p)
+{
+ free(p);
+ return 0;
+}
+
+static void load_rename_cache(struct diff_queue_struct *q,
+ struct diff_queue_struct *cacheq,
+ struct diff_options *options)
+{
+ char *sha1_hex;
+ FILE *fp;
+ struct hash_table filepair_table;
+ struct hash_table src_table;
+ struct cached_filepair *pp;
+ int i, hash;
+ static int no_cache_available = -1;
+ struct stat st;
+ char *path;
+
+ if (no_cache_available == -1)
+ no_cache_available = stat(git_path("rename-cache"), &st) || !S_ISDIR(st.st_mode);
+
+ /* return soon so we don't need to waste CPU */
+ if (no_cache_available > 0)
+ return;
+
+
+ /* src_table initialization */
+ init_hash(&src_table);
+ for (i = 0; i < q->nr; i++) {
+ struct diff_filepair *p = q->queue[i];
+ if (DIFF_FILE_VALID(p->one)) {
+ unsigned int hash = hash_filespec(p->one);
+ insert_hash(hash, p, &src_table);
+ }
+ }
+
+ /* filepair_table initialization */
+ init_hash(&filepair_table);
+ sha1_hex = sha1_to_hex(options->commit->object.sha1);
+ path = git_path("rename-cache/%c%c/%s",sha1_hex[0], sha1_hex[1], sha1_hex+2);
+ if (stat(path, &st))
+ fp = NULL;
+ else
+ fp = fopen(path, "r");
+ if (fp) {
+ char src_sha1_hex[41], dst_sha1_hex[41];
+ struct cached_filepair p;
+
+ src_sha1_hex[40] = dst_sha1_hex[40] = '\0';
+ while (fscanf(fp, "%40c %40c %d\n", dst_sha1_hex, src_sha1_hex, &p.score) == 3) {
+ if (get_sha1_hex(src_sha1_hex, p.src) ||
+ get_sha1_hex(dst_sha1_hex, p.dst))
+ break;
+
+ pp = xmalloc(sizeof(*pp));
+ memcpy(pp, &p, sizeof(*pp));
+ memcpy(&hash, p.dst, sizeof(hash));
+ insert_hash(hash, pp, &filepair_table);
+ }
+ fclose(fp);
+ }
+
+ for (i = 0; i < q->nr; i++) {
+ struct diff_filepair *p = q->queue[i];
+ struct diff_filepair *dp, *src;
+
+ /* find remote_dst */
+ if (DIFF_FILE_VALID(p->one) ||
+ !DIFF_FILE_VALID(p->two) ||
+ (options->single_follow && strcmp(options->single_follow, p->two->path)))
+ continue;
+
+ memcpy(&hash, p->two->sha1, sizeof(hash));
+ pp = lookup_hash(hash, &filepair_table);
+ if (!pp || memcmp(p->two->sha1, pp->dst, 20))
+ continue;
+
+ /* create pair */
+ if (is_null_sha1(pp->src)) {
+ if (DIFF_FILE_VALID(p->one))
+ continue;
+ diff_q(cacheq, p);
+ q->queue[i] = NULL;
+ continue;
+ }
+
+ memcpy(&hash, pp->src, sizeof(hash));
+ src = lookup_hash(hash, &src_table);
+ if (!src || memcmp(pp->src, src->one->sha1, 20))
+ continue;
+
+ src->one->rename_used++;
+ src->one->count++;
+ p->two->count++;
+
+ dp = diff_queue(NULL, src->one, p->two);
+ dp->renamed_pair = 1;
+ dp->score = pp->score;
+
+ diff_q(cacheq, dp);
+ q->queue[i] = NULL;
+ diff_free_filepair(p);
+ }
+
+ for_each_hash(&filepair_table, free_cached_filepair);
+ free_hash(&src_table);
+ free_hash(&filepair_table);
+}
+
void diffcore_rename(struct diff_options *options)
{
int detect_rename = options->detect_rename;
int minimum_score = options->rename_score;
int rename_limit = options->rename_limit;
struct diff_queue_struct *q = &diff_queued_diff;
- struct diff_queue_struct outq;
+ struct diff_queue_struct outq, cacheq;
struct diff_score *mx;
int i, j, rename_count;
int num_create, num_src, dst_cnt;
@@ -423,8 +541,19 @@ void diffcore_rename(struct diff_options *options)
if (!minimum_score)
minimum_score = DEFAULT_RENAME_SCORE;
+ cacheq.queue = NULL;
+ cacheq.nr = cacheq.alloc = 0;
+
+ if (detect_rename && options->commit)
+ load_rename_cache(q, &cacheq, options);
+
for (i = 0; i < q->nr; i++) {
struct diff_filepair *p = q->queue[i];
+
+ /* was consumed by rename cache */
+ if (!p)
+ continue;
+
if (!DIFF_FILE_VALID(p->one)) {
if (!DIFF_FILE_VALID(p->two))
continue; /* unmerged */
@@ -563,10 +692,17 @@ void diffcore_rename(struct diff_options *options)
*/
outq.queue = NULL;
outq.nr = outq.alloc = 0;
- for (i = 0; i < q->nr; i++) {
+ for (i = j = 0; i < q->nr; i++) {
struct diff_filepair *p = q->queue[i];
struct diff_filepair *pair_to_free = NULL;
+ if (!p) {
+ if (j >= cacheq.nr)
+ die("Internal error: running out of cacheq.");
+ diff_q(&outq, cacheq.queue[j++]);
+ continue;
+ }
+
if (!DIFF_FILE_VALID(p->one) && DIFF_FILE_VALID(p->two)) {
/*
* Creation
@@ -635,6 +771,8 @@ void diffcore_rename(struct diff_options *options)
diff_debug_queue("done copying original", &outq);
free(q->queue);
+ if (cacheq.queue)
+ free(cacheq.queue);
*q = outq;
diff_debug_queue("done collapsing", q);
diff --git a/log-tree.c b/log-tree.c
index 5444f08..040e095 100644
--- a/log-tree.c
+++ b/log-tree.c
@@ -522,6 +522,7 @@ int log_tree_commit(struct rev_info *opt, struct commit *commit)
log.commit = commit;
log.parent = NULL;
opt->loginfo = &log;
+ opt->diffopt.commit = commit;
shown = log_tree_diff(opt, commit, &log);
if (!shown && opt->loginfo && opt->always_show_header) {
@@ -531,5 +532,6 @@ int log_tree_commit(struct rev_info *opt, struct commit *commit)
}
opt->loginfo = NULL;
maybe_flush_or_die(stdout, "stdout");
+ opt->diffopt.commit = NULL;
return shown;
}
diff --git a/t/t4031-rename-cache.sh b/t/t4031-rename-cache.sh
new file mode 100755
index 0000000..f7c53fd
--- /dev/null
+++ b/t/t4031-rename-cache.sh
@@ -0,0 +1,56 @@
+#!/bin/sh
+#
+# Copyright (c) 2008 Nguyen Thai Ngoc Duy
+#
+
+test_description='Test diff rename cache'
+. ./test-lib.sh
+
+cat >expected <<EOF
+ create mode 100644 sub/c
+ copy a => sub/d (100%)
+EOF
+test_expect_success 'setup' '
+ echo a > a
+ echo b > b
+ mkdir sub
+ echo c > sub/c
+ cp a sub/d
+ A_SHA1=$(git hash-object a)
+ B_SHA1=$(git hash-object b)
+ C_SHA1=$(git hash-object sub/c)
+ D_SHA1=$(git hash-object sub/d)
+ git add a b
+ test_tick
+ git commit -m first
+ git add sub
+ git commit -m second
+ git show --pretty=oneline --summary -C -M --find-copies-harder HEAD|sed 1d > result
+ test_cmp expected result
+'
+
+cat >expected <<EOF
+ copy a => sub/c (100%)
+ copy a => sub/d (100%)
+EOF
+test_expect_success 'load rename pair cache' '
+ P=.git/rename-cache/$(git rev-parse HEAD|sed "s,\(..\)\(.*\),\1/\2,")
+ mkdir -p $(dirname $P)
+ echo $C_SHA1 $A_SHA1 60000 >> $P
+ git show --pretty=oneline --summary -C -M --find-copies-harder HEAD|sed 1d > result
+ test_cmp expected result
+'
+
+cat >expected <<EOF
+ copy a => sub/c (100%)
+ create mode 100644 sub/d
+EOF
+test_expect_success 'load create pair cache' '
+ P=.git/rename-cache/$(git rev-parse HEAD|sed "s,\(..\)\(.*\),\1/\2,")
+ mkdir -p $(dirname $P)
+ echo $D_SHA1 0000000000000000000000000000000000000000 0 >> $P
+ git show --pretty=oneline --summary -C -M --find-copies-harder HEAD|sed 1d > result
+ test_cmp expected result
+'
+
+test_done
--
1.6.0.3.892.g83538
^ permalink raw reply related [flat|nested] 2+ messages in thread* [PATCH v2 2/2] diffcore-rename: add config option to allow to cache renames
2008-11-08 11:27 [PATCH v2 1/2] diffcore-rename: support rename cache Nguyễn Thái Ngọc Duy
@ 2008-11-08 11:27 ` Nguyễn Thái Ngọc Duy
0 siblings, 0 replies; 2+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2008-11-08 11:27 UTC (permalink / raw)
To: git, Junio C Hamano, Yann Dirson; +Cc: Nguyễn Thái Ngọc Duy
If diff.cacherenames is true, then renames will be cached to
$GIT_DIR/rename-cache. By default, it will not overwrite existing
cache. Add --refresh-cache to overwrite.
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
Documentation/config.txt | 5 ++++
Documentation/diff-options.txt | 5 ++++
diff.c | 12 +++++++++
diff.h | 2 +
diffcore-rename.c | 49 ++++++++++++++++++++++++++++++++++++++++
t/t4031-rename-cache.sh | 36 +++++++++++++++++++++++++++++
6 files changed, 109 insertions(+), 0 deletions(-)
diff --git a/Documentation/config.txt b/Documentation/config.txt
index 965ed74..8a7f00e 100644
--- a/Documentation/config.txt
+++ b/Documentation/config.txt
@@ -630,6 +630,11 @@ diff.renames::
will enable basic rename detection. If set to "copies" or
"copy", it will detect copies, as well.
+diff.cacherenames::
+ Tells git to automatically cache renames when detected. The
+ cache resides in $GIT_DIR/rename-cache, which is used by git
+ if exists.
+
fetch.unpackLimit::
If the number of objects fetched over the git native
transfer is below this
diff --git a/Documentation/diff-options.txt b/Documentation/diff-options.txt
index c62b45c..d477a40 100644
--- a/Documentation/diff-options.txt
+++ b/Documentation/diff-options.txt
@@ -102,6 +102,11 @@ endif::git-format-patch[]
Turn off rename detection, even when the configuration
file gives the default to do so.
+--refresh-rename-cache::
+ By default, when git finds a cached version of a commit, it
+ will not overwrite the cache. This option makes git overwrite
+ old cache or create a new one.
+
--check::
Warn if changes introduce trailing whitespace
or an indent that uses a space before a tab. Exits with
diff --git a/diff.c b/diff.c
index f644947..604cb12 100644
--- a/diff.c
+++ b/diff.c
@@ -26,6 +26,7 @@ int diff_use_color_default = -1;
static const char *external_diff_cmd_cfg;
int diff_auto_refresh_index = 1;
static int diff_mnemonic_prefix;
+static int diff_cache_renames;
static char diff_colors[][COLOR_MAXLEN] = {
"\033[m", /* reset */
@@ -103,6 +104,11 @@ int git_diff_basic_config(const char *var, const char *value, void *cb)
return 0;
}
+ if (!strcmp(var, "diff.cacherenames")) {
+ diff_cache_renames = git_config_bool(var, value);
+ return 0;
+ }
+
switch (userdiff_config(var, value)) {
case 0: break;
case -1: return -1;
@@ -2272,6 +2278,8 @@ int diff_setup_done(struct diff_options *options)
if (options->detect_rename && options->rename_limit < 0)
options->rename_limit = diff_rename_limit_default;
+ if (options->detect_rename && diff_cache_renames)
+ DIFF_OPT_SET(options, CACHE_RENAMES);
if (options->setup & DIFF_SETUP_USE_CACHE) {
if (!active_cache)
/* read-cache does not die even when it fails
@@ -2439,6 +2447,10 @@ int diff_opt_parse(struct diff_options *options, const char **av, int ac)
DIFF_OPT_SET(options, RELATIVE_NAME);
options->prefix = arg + 11;
}
+ else if (!strcmp(arg, "--refresh-rename-cache")) {
+ DIFF_OPT_SET(options, CACHE_RENAMES);
+ DIFF_OPT_SET(options, REFRESH_RENAME_CACHE);
+ }
/* xdiff options */
else if (!strcmp(arg, "-w") || !strcmp(arg, "--ignore-all-space"))
diff --git a/diff.h b/diff.h
index 64a1edd..0503b57 100644
--- a/diff.h
+++ b/diff.h
@@ -66,6 +66,8 @@ typedef void (*diff_format_fn_t)(struct diff_queue_struct *q,
#define DIFF_OPT_DIRSTAT_CUMULATIVE (1 << 19)
#define DIFF_OPT_DIRSTAT_BY_FILE (1 << 20)
#define DIFF_OPT_ALLOW_TEXTCONV (1 << 21)
+#define DIFF_OPT_CACHE_RENAMES (1 << 22)
+#define DIFF_OPT_REFRESH_RENAME_CACHE (1 << 23)
#define DIFF_OPT_TST(opts, flag) ((opts)->flags & DIFF_OPT_##flag)
#define DIFF_OPT_SET(opts, flag) ((opts)->flags |= DIFF_OPT_##flag)
#define DIFF_OPT_CLR(opts, flag) ((opts)->flags &= ~DIFF_OPT_##flag)
diff --git a/diffcore-rename.c b/diffcore-rename.c
index 598cc8d..49651ea 100644
--- a/diffcore-rename.c
+++ b/diffcore-rename.c
@@ -527,6 +527,44 @@ static void load_rename_cache(struct diff_queue_struct *q,
free_hash(&filepair_table);
}
+static void save_rename_cache(struct diff_queue_struct *outq,
+ struct diff_options *options)
+{
+ int i;
+ FILE *fp = NULL;
+ struct stat st;
+
+ for (i = 0;i < outq->nr; i++) {
+ struct diff_filepair *dp = outq->queue[i];
+
+ if (!(dp->renamed_pair || /* rename pair */
+ (!DIFF_FILE_VALID(dp->one) && DIFF_FILE_VALID(dp->two)))) /* create pair */
+ continue;
+
+ if (!fp) {
+ char *sha1 = sha1_to_hex(options->commit->object.sha1);
+ char *path = git_path("rename-cache/%c%c/%s", sha1[0], sha1[1], sha1+2);
+
+ /* Already cached. If not force refresh, move on */
+ if (!stat(path, &st) && !DIFF_OPT_TST(options, REFRESH_RENAME_CACHE))
+ return;
+
+ safe_create_leading_directories(path);
+ fp = fopen(path, "w");
+
+ if (!fp)
+ return;
+ }
+
+ fprintf(fp, "%s ", sha1_to_hex(dp->two->sha1));
+ fprintf(fp, "%s %d\n",
+ sha1_to_hex(DIFF_FILE_VALID(dp->one) ? dp->one->sha1 : null_sha1),
+ dp->score);
+ }
+ if (fp)
+ fclose(fp);
+}
+
void diffcore_rename(struct diff_options *options)
{
int detect_rename = options->detect_rename;
@@ -770,6 +808,17 @@ void diffcore_rename(struct diff_options *options)
}
diff_debug_queue("done copying original", &outq);
+ /*
+ * Only cache if:
+ * - Have a commit hint
+ * - diff.cacherenames is on
+ * - no pathspec limits
+ */
+ if (options->commit &&
+ DIFF_OPT_TST(options, CACHE_RENAMES) &&
+ !options->nr_paths)
+ save_rename_cache(&outq, options);
+
free(q->queue);
if (cacheq.queue)
free(cacheq.queue);
diff --git a/t/t4031-rename-cache.sh b/t/t4031-rename-cache.sh
index f7c53fd..2d3f993 100755
--- a/t/t4031-rename-cache.sh
+++ b/t/t4031-rename-cache.sh
@@ -53,4 +53,40 @@ test_expect_success 'load create pair cache' '
test_cmp expected result
'
+cat >expected <<EOF
+f2ad6c76f0115a6ba5b00456a849810e7ec0af20 0000000000000000000000000000000000000000 0
+78981922613b2afb6025042ff6bd878ac1994e85 78981922613b2afb6025042ff6bd878ac1994e85 60000
+EOF
+test_expect_success 'save rename cache' '
+ P=.git/rename-cache/$(git rev-parse HEAD|sed "s,\(..\)\(.*\),\1/\2,")
+ rm -r .git/rename-cache
+ git config diff.cacherenames true
+ git show --summary -C -M --find-copies-harder > /dev/null
+ test_cmp expected $P
+'
+
+test_expect_success 'do not save rename cache with limited pathspec' '
+ P=.git/rename-cache/$(git rev-parse HEAD|sed "s,\(..\)\(.*\),\1/\2,")
+ echo $P
+ rm $P
+ git config diff.cacherenames true
+ git log --summary -C -M --find-copies-harder HEAD -- sub
+ ! test -f $P
+'
+
+test_expect_success 'subsequent command does not change cache' '
+ P=.git/rename-cache/$(git rev-parse HEAD|sed "s,\(..\)\(.*\),\1/\2,")
+ echo corrupted > $P
+ ! test_cmp expected $P &&
+ git show --summary -C -M --find-copies-harder HEAD > /dev/null &&
+ ! test_cmp expected $P
+'
+
+test_expect_success 'overwrite cache with --refresh-rename-cache' '
+ P=.git/rename-cache/$(git rev-parse HEAD|sed "s,\(..\)\(.*\),\1/\2,")
+ ! test_cmp expected $P &&
+ git show --summary -C -M --find-copies-harder --refresh-rename-cache HEAD > /dev/null &&
+ test_cmp expected $P
+'
+
test_done
--
1.6.0.3.892.g83538
^ permalink raw reply related [flat|nested] 2+ messages in thread
end of thread, other threads:[~2008-11-08 11:30 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-11-08 11:27 [PATCH v2 1/2] diffcore-rename: support rename cache Nguyễn Thái Ngọc Duy
2008-11-08 11:27 ` [PATCH v2 2/2] diffcore-rename: add config option to allow to cache renames Nguyễn Thái Ngọc Duy
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.