From: Nguyen Thai Ngoc Duy <pclouds@gmail.com>
To: git@vger.kernel.org
Subject: [WIP PATCH] Manual rename correction
Date: Tue, 31 Jul 2012 21:15:36 +0700 [thread overview]
Message-ID: <20120731141536.GA26283@do> (raw)
Git's rename detection is good but still not perfect. There have been
a few times I wanted to correct git for better output but I
couldn't. This PoC WIP patch attempts to address that. It allows to
break/rearrange any file pairs. We can do something crazy like this:
attr.c => dir.c | 1786 ++++++++++++++++++++++++++++++++-----------------
dir.c => attr.c | 1788 +++++++++++++++++---------------------------------
t/t1306-xdg-files.sh | 39 ++
t/test-lib.sh | 1 +
4 files changed, 1828 insertions(+), 1786 deletions(-)
The above output is done with "git diff --manual-rename=foo A B"
and "foo" contains (probably not in the best format though)
-- 8< --
attr.c dir.c
dir.c attr.c
-- 8< --
The plan is to use git-notes to record rename corrections like above
so that "git log --patch" for example can make use of them. I'm not
sure what to do with merge commits yet (can we track renames in a
merge?). We can generate rename file from "diff -CM", then users can
edit and save it.
If you want to diff between two arbitrary trees, you'll have to feed
rename corrections via command line as git-notes are for commit diff
only.
In some cases, manual rename may be cheaper than --find-copies-harder,
so this feature could help reduce cpu usage. Though that's not my main
aim.
Oh and I think rename detection in diff other than tree-tree does not
work. Maybe I tested it the wrong way?
Comments?
-- 8< --
diff --git a/diff.c b/diff.c
index 62cbe14..c8d55d2 100644
--- a/diff.c
+++ b/diff.c
@@ -3547,6 +3547,12 @@ int diff_opt_parse(struct diff_options *options, const char **av, int ac)
DIFF_OPT_SET(options, RENAME_EMPTY);
else if (!strcmp(arg, "--no-rename-empty"))
DIFF_OPT_CLR(options, RENAME_EMPTY);
+ else if (!prefixcmp(arg, "--manual-rename=")) {
+ int ret = strbuf_read_file(&options->renames, arg + 16, 0);
+ if (ret == -1)
+ die("unable to read %s", arg + 16);
+ DIFF_OPT_SET(options, MANUAL_RENAME);
+ }
else if (!strcmp(arg, "--relative"))
DIFF_OPT_SET(options, RELATIVE_NAME);
else if (!prefixcmp(arg, "--relative=")) {
@@ -4621,6 +4627,8 @@ void diffcore_std(struct diff_options *options)
if (options->skip_stat_unmatch)
diffcore_skip_stat_unmatch(options);
if (!options->found_follow) {
+ if (DIFF_OPT_TST(options, MANUAL_RENAME))
+ diffcore_manual_rename(options);
/* See try_to_follow_renames() in tree-diff.c */
if (options->break_opt != -1)
diffcore_break(options->break_opt);
diff --git a/diff.h b/diff.h
index e027650..60d104e 100644
--- a/diff.h
+++ b/diff.h
@@ -61,6 +61,7 @@ typedef struct strbuf *(*diff_prefix_fn_t)(struct diff_options *opt, void *data)
#define DIFF_OPT_FIND_COPIES_HARDER (1 << 6)
#define DIFF_OPT_FOLLOW_RENAMES (1 << 7)
#define DIFF_OPT_RENAME_EMPTY (1 << 8)
+#define DIFF_OPT_MANUAL_RENAME (1 << 9)
/* (1 << 9) unused */
#define DIFF_OPT_HAS_CHANGES (1 << 10)
#define DIFF_OPT_QUICK (1 << 11)
@@ -147,6 +148,7 @@ struct diff_options {
int close_file;
struct pathspec pathspec;
+ struct strbuf renames;
change_fn_t change;
add_remove_fn_t add_remove;
diff_format_fn_t format_callback;
diff --git a/diffcore-rename.c b/diffcore-rename.c
index 216a7a4..05da99f 100644
--- a/diffcore-rename.c
+++ b/diffcore-rename.c
@@ -722,3 +722,148 @@ void diffcore_rename(struct diff_options *options)
rename_src_nr = rename_src_alloc = 0;
return;
}
+
+struct rename {
+ char *one, *two;
+ struct rename *next_one, *next_two;
+ struct diff_filespec *spec_one;
+ struct diff_filespec *spec_two;
+};
+
+static unsigned int string_hash(const char *s)
+{
+ unsigned int v = 1;
+ while (s && *s)
+ v += (unsigned char)*s++;
+ return v;
+}
+
+void diffcore_manual_rename(struct diff_options *options)
+{
+ struct rename *renames = NULL;
+ int i, nr = 0, alloc = 0;
+ const char *next, *p, *end;
+ struct hash_table hash_one, hash_two;
+ struct diff_queue_struct *q = &diff_queued_diff;
+ struct diff_queue_struct outq;
+
+ /* parse rename instructions */
+ end = options->renames.buf + options->renames.len;
+ for (p = options->renames.buf; p < end; p = next) {
+ struct rename *r;
+ const char *sep, *nl, *next_sep;
+
+ nl = strchr(p, '\n');
+ if (!nl)
+ nl = next = end;
+ else {
+ next = nl + 1;
+ if (p == nl)
+ continue;
+ }
+
+ /* one space to separate two paths (for now, quoting can come later) */
+ sep = strchr(p, ' ');
+ if (!sep || sep >= nl)
+ die("invalid syntax");
+ next_sep = strchr(sep + 1, ' ');
+ if (next_sep && next_sep < nl)
+ die("invalid syntax");
+
+ ALLOC_GROW(renames, nr + 1, alloc);
+ r = renames + nr++;
+ memset(r, 0, sizeof(*r));
+ if (p < sep)
+ r->one = xstrndup(p, sep - p);
+ if (sep < nl)
+ r->two = xstrndup(sep + 1, nl - (sep + 1));
+ }
+
+
+ /* initialize hash tables */
+ init_hash(&hash_one);
+ init_hash(&hash_two);
+ for (i = 0; i < nr; i++) {
+ struct rename *r = renames + i;
+ void** p;
+ p = insert_hash(string_hash(r->one), r, &hash_one);
+ if (p) {
+ r->next_one = *p;
+ *p = r;
+ }
+ p = insert_hash(string_hash(r->two), r, &hash_two);
+ if (p) {
+ r->next_two = *p;
+ *p = r;
+ }
+ }
+
+ /* rename */
+ DIFF_QUEUE_CLEAR(&outq);
+ for (i = 0; i < q->nr; i++) {
+ struct diff_filepair *p = q->queue[i];
+ struct rename *r1 = NULL, *r2 = NULL;
+ int hash, skip = 0;
+ if (DIFF_PAIR_UNMERGED(p))
+ continue;
+ if (DIFF_FILE_VALID(p->one)) {
+ hash = string_hash(p->one->path);
+ r1 = lookup_hash(hash, &hash_one);
+ while (r1) {
+ if (!strcmp(p->one->path, r1->one)) {
+ r1->spec_one = p->one;
+ skip = 1;
+ }
+ r1 = r1->next_one;
+ }
+ }
+ if (DIFF_FILE_VALID(p->two)) {
+ hash = string_hash(p->two->path);
+ r2 = lookup_hash(hash, &hash_two);
+ while (r2) {
+ if (!strcmp(p->two->path, r2->two)) {
+ r2->spec_two = p->two;
+ skip = 1;
+ }
+ r2 = r2->next_two;
+ }
+ }
+
+ /* This pair has nothing to do with manual renames,
+ reinsert it */
+ if (!skip)
+ diff_q(&outq, p);
+ }
+ free(q->queue);
+
+ for (i = 0; i < nr; i++) {
+ struct rename *r = renames + i;
+ struct diff_filepair *dp;
+ if (r->spec_one && r->spec_two) {
+ dp = diff_queue(&outq, r->spec_one, r->spec_two);
+ dp->renamed_pair = 1;
+ dp->score = MAX_SCORE;
+ } else if (r->spec_one && !r->two) {
+ dp = diff_queue(&outq, r->spec_one,
+ alloc_filespec(r->one));
+ } else if (!r->one && r->spec_two) {
+ dp = diff_queue(&outq, alloc_filespec(r->two),
+ r->spec_two);
+ } else {
+ die("incorrect rename %s %s", r->one, r->two);
+ }
+ }
+ *q = outq;
+ /* required? */
+ diffcore_fix_diff_index(options);
+
+ /* cleanup */
+ for (i = 0; i < nr; i++) {
+ struct rename *r = renames + i;
+ free(r->one);
+ free(r->two);
+ }
+ free(renames);
+ free_hash(&hash_one);
+ free_hash(&hash_two);
+}
diff --git a/diffcore.h b/diffcore.h
index be0739c..193bc67 100644
--- a/diffcore.h
+++ b/diffcore.h
@@ -107,6 +107,7 @@ extern void diff_q(struct diff_queue_struct *, struct diff_filepair *);
extern void diffcore_break(int);
extern void diffcore_rename(struct diff_options *);
+extern void diffcore_manual_rename(struct diff_options *);
extern void diffcore_merge_broken(void);
extern void diffcore_pickaxe(struct diff_options *);
extern void diffcore_order(const char *orderfile);
-- 8< --
--
Duy
next reply other threads:[~2012-07-31 14:16 UTC|newest]
Thread overview: 35+ messages / expand[flat|nested] mbox.gz Atom feed top
2012-07-31 14:15 Nguyen Thai Ngoc Duy [this message]
2012-07-31 16:32 ` [WIP PATCH] Manual rename correction Junio C Hamano
2012-07-31 19:23 ` Jeff King
2012-07-31 20:20 ` Junio C Hamano
2012-08-01 0:42 ` Jeff King
2012-08-01 6:01 ` Junio C Hamano
2012-08-01 21:54 ` Jeff King
2012-08-01 22:10 ` Junio C Hamano
2012-08-02 22:37 ` Jeff King
2012-08-02 22:51 ` Junio C Hamano
2012-08-02 22:58 ` Jeff King
2012-08-02 5:33 ` Junio C Hamano
2012-08-01 1:10 ` Nguyen Thai Ngoc Duy
2012-08-01 2:01 ` Jeff King
2012-08-01 4:36 ` Nguyen Thai Ngoc Duy
2012-08-01 6:09 ` Junio C Hamano
2012-08-01 6:34 ` Nguyen Thai Ngoc Duy
2012-08-01 21:32 ` Jeff King
2012-08-01 21:27 ` Jeff King
2012-08-02 12:08 ` Nguyen Thai Ngoc Duy
2012-08-02 22:41 ` Jeff King
2012-08-04 17:09 ` [PATCH 0/8] caching rename results Jeff King
2012-08-04 17:10 ` [PATCH 1/8] implement generic key/value map Jeff King
2012-08-04 22:58 ` Junio C Hamano
2012-08-06 20:35 ` Jeff King
2012-08-04 17:10 ` [PATCH 2/8] map: add helper functions for objects as keys Jeff King
2012-08-04 17:11 ` [PATCH 3/8] fast-export: use object to uint32 map instead of "decorate" Jeff King
2012-08-04 17:11 ` [PATCH 4/8] decorate: use "map" for the underlying implementation Jeff King
2012-08-04 17:11 ` [PATCH 5/8] map: implement persistent maps Jeff King
2012-08-04 17:11 ` [PATCH 6/8] implement metadata cache subsystem Jeff King
2012-08-04 22:49 ` Junio C Hamano
2012-08-06 20:31 ` Jeff King
2012-08-06 20:38 ` Jeff King
2012-08-04 17:12 ` [PATCH 7/8] implement rename cache Jeff King
2012-08-04 17:14 ` [PATCH 8/8] diff: optionally use " Jeff King
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20120731141536.GA26283@do \
--to=pclouds@gmail.com \
--cc=git@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.