From: Bo Yang <struggleyb.nku@gmail.com>
To: git@vger.kernel.org
Cc: gitster@pobox.com
Subject: [PATCH v5.1 08/17] map/take range to the parent of commits
Date: Thu, 12 Aug 2010 21:09:58 +0800 [thread overview]
Message-ID: <1281618598-6721-1-git-send-email-struggleyb.nku@gmail.com> (raw)
When going from a commit to its parents, we map the "interesting"
range of lines according to the change made.
For non-merge commit, we just run map_range on the ranges, which
works as follows:
1. Run diffcore_std to find out the pre/postimage for each file.
2. Run xdi_diff_hunks on each interesting set of pre/postimages.
3. The map_range_cb callback is invoked for each hunk by the diff
engine, and we use it to calculate the pre-image range from the
post-image range in the function map_lines.
For merge commits, we run map_range once for every parent.
Simultaneously we use a take_range pass to eliminate all ranges
that are identical. If any ranges remain after that, then the
merge is considered non-trivial.
The algorithm that maps lines from post-image to pre-image is in
the function map_lines. Generally, we use simple line number
calculation method to do the map.
Signed-off-by: Bo Yang <struggleyb.nku@gmail.com>
---
line.c | 502 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
revision.h | 5 +-
2 files changed, 506 insertions(+), 1 deletions(-)
diff --git a/line.c b/line.c
index 1b77172..1aa828b 100644
--- a/line.c
+++ b/line.c
@@ -504,3 +504,505 @@ void setup_line(struct rev_info *rev, struct diff_line_range *r)
diff_tree_release_paths(opt);
}
+struct take_range_cb_data {
+ struct diff_line_range *interesting; /* currently interesting ranges */
+ struct diff_line_range *range;
+ /* the ranges corresponds to the interesting ranges of parent commit */
+ long plno, tlno;
+ /* the last line number of diff hunk */
+ int diff;
+ /* whether there is some line changes between the current
+ * commit and its parent */
+};
+
+#define SCALE_FACTOR 4
+/*
+ * [p_start, p_end] represents the pre-image of current diff hunk,
+ * [t_start, t_end] represents the post-image of the current diff hunk,
+ * [start, end] represents the currently interesting line range in
+ * post-image,
+ * [o_start, o_end] represents the original line range that coresponds
+ * to current line range.
+ */
+void map_lines(long p_start, long p_end, long t_start, long t_end,
+ long start, long end, long *o_start, long *o_end)
+{
+ /*
+ * Normally, p_start should be less than p_end, so does the
+ * t_start and t_end. But when the line range is added from
+ * scratch, p_start will be greater than p_end. When the line
+ * range is deleted, t_start will be greater than t_end.
+ */
+ if (p_start > p_end) {
+ *o_start = *o_end = 0;
+ return;
+ }
+ /* A deletion */
+ if (t_start > t_end) {
+ *o_start = p_start;
+ *o_end = p_end;
+ return;
+ }
+
+ if (start == t_start && end == t_end) {
+ *o_start = p_start;
+ *o_end = p_end;
+ return;
+ }
+
+ /*
+ * A heuristic for lines mapping:
+ *
+ * When the pre-image is no more than 1/SCALE_FACTOR of the post-image,
+ * there is no effective way to find out which part of pre-image
+ * corresponds to the currently interesting range of post-image.
+ * And we are in the danger of tracking totally useless lines.
+ * So, we just treat all the post-image lines as added from scratch.
+ */
+ if (SCALE_FACTOR * (p_end - p_start + 1) < (t_end - t_start + 1)) {
+ *o_start = *o_end = 0;
+ return;
+ }
+
+ *o_start = p_start + start - t_start;
+ *o_end = p_end - (t_end - end);
+
+ if (*o_start > *o_end) {
+ int temp = *o_start;
+ *o_start = *o_end;
+ *o_end = temp;
+ }
+
+ if (*o_start < p_start)
+ *o_start = p_start;
+ if (*o_end > p_end)
+ *o_end = p_end;
+}
+
+/*
+ * When same == 1:
+ * [p_start, p_end] represents the diff hunk line range of pre-image,
+ * [t_start, t_end] represents the diff hunk line range of post-image.
+ * When same == 0, they represent a range of identical lines between
+ * two images.
+ *
+ * This function find out the corresponding line ranges of currently
+ * interesting ranges which this diff hunk touches.
+ */
+static void map_range(struct take_range_cb_data *data, int same,
+ long p_start, long p_end, long t_start, long t_end)
+{
+ struct line_range *ranges = data->interesting->ranges;
+ long takens, takene, start, end;
+ int i = 0, out = 0, added = 0;
+ long op_start = p_start, op_end = p_end, ot_start = t_start, ot_end = t_end;
+
+ for (; i < data->interesting->nr; i++) {
+ added = 0;
+ if (t_start > ranges[i].end)
+ continue;
+ if (t_end < ranges[i].start)
+ break;
+
+ if (t_start > ranges[i].start) {
+ start = t_start;
+ takens = p_start;
+ if (t_end >= ranges[i].end) {
+ end = ranges[i].end;
+ takene = p_start + end - t_start;
+ } else {
+ end = t_end;
+ takene = p_end;
+ out = 1;
+ }
+ } else {
+ start = ranges[i].start;
+ takens = p_start + start - t_start;
+ if (t_end >= ranges[i].end) {
+ end = ranges[i].end;
+ takene = p_start + end - t_start;
+ } else {
+ end = t_end;
+ takene = p_end;
+ out = 1;
+ }
+ }
+
+ if (!same) {
+ struct print_pair *pair = &ranges[i].pair;
+ struct print_range *rr = NULL;
+ PRINT_PAIR_GROW(pair);
+ rr = pair->ranges + pair->nr - 1;
+ PRINT_RANGE_INIT(rr);
+ rr->start = start;
+ rr->end = end;
+ map_lines(op_start, op_end, ot_start, ot_end, start, end,
+ &takens, &takene);
+ if (takens == 0 && takene == 0) {
+ added = 1;
+ rr->line_added = 1;
+ }
+ rr->pstart = takens;
+ rr->pend = takene;
+ data->diff = 1;
+ data->interesting->diff = 1;
+ ranges[i].diff = 1;
+ }
+ if (added) {
+ /* Code movement/copy detect here, now place two dummy statements here */
+ int dummy = 0;
+ dummy = 1;
+ } else {
+ struct line_range *added_range = diff_line_range_insert(data->range,
+ NULL, takens, takene);
+ assert(added_range);
+ ranges[i].pstart = added_range->start;
+ ranges[i].pend = added_range->end;
+ }
+
+ t_start = end + 1;
+ p_start = takene + 1;
+
+ if (out)
+ break;
+ }
+}
+
+/*
+ * [p_start, p_end] represents the line range of pre-image,
+ * [t_start, t_end] represents the line range of post-image,
+ * and they are identical lines.
+ *
+ * This function substracts out the identical lines between current
+ * commit and its parent, from currently interesting ranges.
+ */
+static void take_range(struct take_range_cb_data *data,
+ long p_start, long p_end, long t_start, long t_end)
+{
+ struct line_range *ranges = data->interesting->ranges;
+ long takens, takene, start, end;
+ int i = 0, out = 0, added = 0;
+
+ for (; i < data->interesting->nr; i++) {
+ added = 0;
+ if (t_start > ranges[i].end)
+ continue;
+ if (t_end < ranges[i].start)
+ break;
+
+ if (t_start > ranges[i].start) {
+ long tmp = ranges[i].end;
+ ranges[i].end = t_start - 1;
+ start = t_start;
+ takens = p_start;
+ if (t_end >= tmp) {
+ end = tmp;
+ takene = p_start + end - t_start;
+ p_start = takene + 1;
+ t_start = end + 1;
+ } else {
+ end = t_end;
+ takene = p_end;
+ diff_line_range_insert(data->interesting, NULL,
+ t_end + 1, tmp);
+ out = 1;
+ }
+ } else {
+ start = ranges[i].start;
+ takens = p_start + start - t_start;
+ if (t_end >= ranges[i].end) {
+ int num = data->interesting->nr - 1;
+ end = ranges[i].end;
+ takene = p_start + end - t_start;
+ t_start = end + 1;
+ p_start = takene + 1;
+ memmove(ranges + i, ranges + i + 1, (num - i) * sizeof(*ranges));
+ data->interesting->nr = num;
+ i--;
+ } else {
+ end = t_end;
+ takene = p_end;
+ ranges[i].start = t_end + 1;
+ out = 1;
+ }
+ }
+
+ diff_line_range_insert(data->range, NULL, takens, takene);
+
+ if (out)
+ break;
+ }
+}
+
+static void take_range_cb(void *data, long same, long p_next, long t_next)
+{
+ struct take_range_cb_data *d = data;
+ long p_start = d->plno + 1, t_start = d->tlno + 1;
+ long p_end = p_start + same - t_start, t_end = same;
+
+ /* If one file is added from scratch, we should not bother to call
+ * take_range, since there is nothing to take
+ */
+ if (t_end >= t_start)
+ take_range(d, p_start, p_end, t_start, t_end);
+ d->plno = p_next;
+ d->tlno = t_next;
+}
+
+static void map_range_cb(void *data, long same, long p_next, long t_next)
+{
+ struct take_range_cb_data *d = data;
+
+ long p_start = d->plno + 1;
+ long t_start = d->tlno + 1;
+ long p_end = same - t_start + p_start;
+ long t_end = same;
+
+ /* Firstly, take the unchanged lines from child */
+ if (t_end >= t_start)
+ map_range(d, 1, p_start, p_end, t_start, t_end);
+
+ /* find out which lines to print */
+ t_start = same + 1;
+ p_start = d->plno + t_start - d->tlno;
+ map_range(d, 0, p_start, p_next, t_start, t_next);
+
+ d->plno = p_next;
+ d->tlno = t_next;
+}
+
+/*
+ * We support two kinds of operation in this function:
+ * 1. map == 0, take the same lines from the current commit and assign it
+ * to parent;
+ * 2. map == 1, in addition to the same lines, we also map the changed lines
+ * from the current commit to the parent according to the
+ * diff output.
+ * take_range_cb and take_range are used to take same lines from current commit
+ * to parents.
+ * map_range_cb and map_range are used to map line ranges to the parent.
+ */
+static void assign_range_to_parent(struct rev_info *rev, struct commit *c,
+ struct commit *p, struct diff_line_range *r,
+ struct diff_options *opt, int map)
+{
+ struct diff_line_range *rr = xmalloc(sizeof(*rr));
+ struct diff_line_range *cr = rr, *prev_r = rr;
+ struct diff_line_range *rg = NULL;
+ struct tree_desc desc1, desc2;
+ void *tree1 = NULL, *tree2 = NULL;
+ unsigned long size1, size2;
+ struct diff_queue_struct *queue;
+ struct take_range_cb_data cb = {NULL, cr, 0, 0};
+ xpparam_t xpp;
+ xdemitconf_t xecfg;
+ int i, diff = 0;
+ xdiff_emit_hunk_consume_fn fn = map ? map_range_cb : take_range_cb;
+
+ DIFF_LINE_RANGE_INIT(cr);
+ memset(&xpp, 0, sizeof(xpp));
+ memset(&xecfg, 0, sizeof(xecfg));
+ xecfg.ctxlen = xecfg.interhunkctxlen = 0;
+
+ /*
+ * Compose up two trees, for root commit, we make up a empty tree.
+ */
+ assert(c);
+ tree2 = read_object_with_reference(c->tree->object.sha1, "tree",
+ &size2, NULL);
+ if (tree2 == NULL)
+ die("Unable to read tree (%s)", sha1_to_hex(c->tree->object.sha1));
+ init_tree_desc(&desc2, tree2, size2);
+ if (p) {
+ tree1 = read_object_with_reference(p->tree->object.sha1,
+ "tree", &size1, NULL);
+ if (tree1 == NULL)
+ die("Unable to read tree (%s)",
+ sha1_to_hex(p->tree->object.sha1));
+ init_tree_desc(&desc1, tree1, size1);
+ } else {
+ init_tree_desc(&desc1, "", 0);
+ }
+
+ DIFF_QUEUE_CLEAR(&diff_queued_diff);
+ diff_tree(&desc1, &desc2, "", opt);
+ diffcore_std(opt);
+
+ queue = &diff_queued_diff;
+ for (i = 0; i < queue->nr; i++) {
+ struct diff_filepair *pair = queue->queue[i];
+ struct diff_line_range *rg = r;
+ mmfile_t file_p, file_t;
+ assert(pair->two->path);
+ while (rg) {
+ assert(rg->spec->path);
+ if (!strcmp(rg->spec->path, pair->two->path))
+ break;
+ rg = rg->next;
+ }
+
+ if (rg == NULL)
+ continue;
+ rg->touch = 1;
+ if (rg->nr == 0)
+ continue;
+
+ rg->status = pair->status;
+ assert(pair->two->sha1_valid);
+ diff_populate_filespec(pair->two, 0);
+ file_t.ptr = pair->two->data;
+ file_t.size = pair->two->size;
+
+ if (rg->prev)
+ free_filespec(rg->prev);
+ rg->prev = pair->one;
+ rg->prev->count++;
+ if (pair->one->sha1_valid) {
+ diff_populate_filespec(pair->one, 0);
+ file_p.ptr = pair->one->data;
+ file_p.size = pair->one->size;
+ } else {
+ file_p.ptr = "";
+ file_p.size = 0;
+ }
+
+ if (cr->nr != 0) {
+ struct diff_line_range *tmp = xmalloc(sizeof(*tmp));
+ cr->next = tmp;
+ prev_r = cr;
+ cr = tmp;
+ } else if (cr->spec)
+ DIFF_LINE_RANGE_CLEAR(cr);
+
+ DIFF_LINE_RANGE_INIT(cr);
+ if (pair->one->sha1_valid) {
+ cr->spec = pair->one;
+ cr->spec->count++;
+ }
+
+ cb.interesting = rg;
+ cb.range = cr;
+ cb.diff = 0;
+ cb.plno = cb.tlno = 0;
+ xdi_diff_hunks(&file_p, &file_t, fn, &cb, &xpp, &xecfg);
+ if (cb.diff)
+ diff = 1;
+ /*
+ * The remain part is the same part.
+ * Instead of calculating the true line number of the two files,
+ * use the biggest integer.
+ */
+ if (map)
+ map_range(&cb, 1, cb.plno + 1, INT_MAX, cb.tlno + 1, INT_MAX);
+ else
+ take_range(&cb, cb.plno + 1, INT_MAX, cb.tlno + 1, INT_MAX);
+ }
+ opt->output_format = DIFF_FORMAT_NO_OUTPUT;
+ diff_flush(opt);
+
+ /*
+ * Collect the untouch ranges, this comes from the files not changed
+ * between two commit.
+ */
+ rg = r;
+ while (rg) {
+ /* clear the touch one to make it usable in next round */
+ if (rg->touch) {
+ rg->touch = 0;
+ } else {
+ struct diff_line_range *untouch = diff_line_range_clone(rg);
+ if (prev_r == rr && rr->nr == 0) {
+ rr = prev_r = untouch;
+ } else {
+ prev_r->next = untouch;
+ prev_r = untouch;
+ }
+ }
+ rg = rg->next;
+ }
+
+ if (cr->nr == 0) {
+ DIFF_LINE_RANGE_CLEAR(cr);
+ free(cr);
+ if (prev_r == cr)
+ rr = NULL;
+ else
+ prev_r->next = NULL;
+ }
+
+ if (rr) {
+ assert(p);
+ add_line_range(rev, p, rr);
+ }
+
+ /* and the ranges of current commit c is updated */
+ c->object.flags &= ~RANGE_UPDATE;
+ if (diff)
+ c->object.flags |= NEED_PRINT;
+
+ if (tree1)
+ free(tree1);
+ if (tree2)
+ free(tree2);
+}
+
+static void diff_update_parent_range(struct rev_info *rev,
+ struct commit *commit)
+{
+ struct diff_line_range *r = lookup_line_range(rev, commit);
+ struct commit_list *parents = commit->parents;
+ struct commit *c = NULL;
+ if (parents) {
+ assert(parents->next == NULL);
+ c = parents->item;
+ }
+
+ assign_range_to_parent(rev, commit, c, r, &rev->diffopt, 1);
+}
+
+static void assign_parents_range(struct rev_info *rev, struct commit *commit)
+{
+ struct commit_list *parents = commit->parents;
+ struct diff_line_range *r = lookup_line_range(rev, commit);
+ struct diff_line_range *evil = NULL, *range = NULL;
+ int nontrivial = 0;
+
+ /*
+ * If we are in linear history, update range and flush the patch if
+ * necessary
+ */
+ if (parents == NULL || parents->next == NULL)
+ return diff_update_parent_range(rev, commit);
+
+ /*
+ * Loop on the parents and assign the ranges to different
+ * parents, if there is any range left, this commit must
+ * be an evil merge.
+ */
+ evil = diff_line_range_clone_deeply(r);
+ parents = commit->parents;
+ while (parents) {
+ struct commit *p = parents->item;
+ assign_range_to_parent(rev, commit, p, r, &rev->diffopt, 1);
+ assign_range_to_parent(rev, commit, p, evil, &rev->diffopt, 0);
+ parents = parents->next;
+ }
+
+ /*
+ * yes, this must be an evil merge.
+ */
+ range = evil;
+ while (range) {
+ if (range->nr) {
+ commit->object.flags |= NEED_PRINT | EVIL_MERGE;
+ nontrivial = 1;
+ }
+ range = range->next;
+ }
+
+ if (nontrivial)
+ add_decoration(&rev->nontrivial_merge, &commit->object, evil);
+ else
+ cleanup(evil);
+}
+
diff --git a/revision.h b/revision.h
index c0d5065..2627ec4 100644
--- a/revision.h
+++ b/revision.h
@@ -15,7 +15,9 @@
#define ADDED (1u<<7) /* Parents already parsed and added? */
#define SYMMETRIC_LEFT (1u<<8)
#define RANGE_UPDATE (1u<<9) /* for line level traverse */
-#define ALL_REV_FLAGS ((1u<<10)-1)
+#define NEED_PRINT (1u<<10)
+#define EVIL_MERGE (1u<<11)
+#define ALL_REV_FLAGS ((1u<<12)-1)
#define DECORATE_SHORT_REFS 1
#define DECORATE_FULL_REFS 2
@@ -141,6 +143,7 @@ struct rev_info {
int count_right;
/* line level range that we are chasing */
struct decoration line_range;
+ struct decoration nontrivial_merge;
};
#define REV_TREE_SAME 0
--
1.7.0.2.273.gc2413.dirty
reply other threads:[~2010-08-12 13:10 UTC|newest]
Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1281618598-6721-1-git-send-email-struggleyb.nku@gmail.com \
--to=struggleyb.nku@gmail.com \
--cc=git@vger.kernel.org \
--cc=gitster@pobox.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).