git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: "René Scharfe" <rene.scharfe@lsrfire.ath.cx>
To: Brian Downing <bdowning@lavos.net>
Cc: Junio C Hamano <gitster@pobox.com>, git@vger.kernel.org
Subject: [PATCH 5/5] blame: use xdi_diff_hunks(), get rid of struct patch
Date: Sat, 25 Oct 2008 15:31:36 +0200	[thread overview]
Message-ID: <49031FB8.8060003@lsrfire.ath.cx> (raw)
In-Reply-To: <48BF0FBF.3010104@lsrfire.ath.cx>

Based on a patch by Brian Downing, this replaces the struct patch based
code for blame passing with calls to xdi_diff_hunks().  This way we
avoid generating and then parsing patches; we only let the interesting
infos be passed to our callbacks instead.  This makes blame a bit faster:

   $ blame="./git blame -M -C -C -p --incremental v1.6.0"

   # master
   $ /usr/bin/time $blame Makefile >/dev/null
   1.38user 0.14system 0:01.52elapsed 100%CPU (0avgtext+0avgdata 0maxresident)k
   0inputs+0outputs (0major+12226minor)pagefaults 0swaps
   $ /usr/bin/time $blame cache.h >/dev/null
   1.66user 0.13system 0:01.80elapsed 99%CPU (0avgtext+0avgdata 0maxresident)k
   0inputs+0outputs (0major+12262minor)pagefaults 0swaps

   # this patch series
   $ /usr/bin/time $blame Makefile >/dev/null
   1.27user 0.12system 0:01.40elapsed 99%CPU (0avgtext+0avgdata 0maxresident)k
   0inputs+0outputs (0major+11836minor)pagefaults 0swaps
   $ /usr/bin/time $blame cache.h >/dev/null
   1.52user 0.12system 0:01.70elapsed 97%CPU (0avgtext+0avgdata 0maxresident)k
   0inputs+0outputs (0major+12052minor)pagefaults 0swaps

Signed-off-by: Rene Scharfe <rene.scharfe@lsrfire.ath.cx>
---
Brian, your numbers looked much more impressive.  Could you please clock
this code with your repository and the file server.c?  I wonder if this
callback mechanism is just too complicated or if your case simply benefits
lots more than the two files from git mentioned above.

The patch series ends here without adding xdiff caching, for two reasons.
It's quite easy to add it; patch 4 from your series applies unchanged and
patch 5 is just needs a few small changes to account for the absence of
compare_buffer().  More importantly, speed actually went down with caching
for the test case.  The common tail optimization (xdi_diff() vs. xdl_diff())
seems to beat caching for cache.h and Makefile..

 builtin-blame.c |  191 +++++++++++++++----------------------------------------
 1 files changed, 52 insertions(+), 139 deletions(-)

diff --git a/builtin-blame.c b/builtin-blame.c
index 5ca7065..b6bc5cf 100644
--- a/builtin-blame.c
+++ b/builtin-blame.c
@@ -443,113 +443,6 @@ static struct origin *find_rename(struct scoreboard *sb,
 }
 
 /*
- * Parsing of patch chunks...
- */
-struct chunk {
-	/* line number in postimage; up to but not including this
-	 * line is the same as preimage
-	 */
-	int same;
-
-	/* preimage line number after this chunk */
-	int p_next;
-
-	/* postimage line number after this chunk */
-	int t_next;
-};
-
-struct patch {
-	struct chunk *chunks;
-	int num;
-};
-
-struct blame_diff_state {
-	struct patch *ret;
-	unsigned hunk_post_context;
-	unsigned hunk_in_pre_context : 1;
-};
-
-static void process_u_diff(void *state_, char *line, unsigned long len)
-{
-	struct blame_diff_state *state = state_;
-	struct chunk *chunk;
-	int off1, off2, len1, len2, num;
-
-	num = state->ret->num;
-	if (len < 4 || line[0] != '@' || line[1] != '@') {
-		if (state->hunk_in_pre_context && line[0] == ' ')
-			state->ret->chunks[num - 1].same++;
-		else {
-			state->hunk_in_pre_context = 0;
-			if (line[0] == ' ')
-				state->hunk_post_context++;
-			else
-				state->hunk_post_context = 0;
-		}
-		return;
-	}
-
-	if (num && state->hunk_post_context) {
-		chunk = &state->ret->chunks[num - 1];
-		chunk->p_next -= state->hunk_post_context;
-		chunk->t_next -= state->hunk_post_context;
-	}
-	state->ret->num = ++num;
-	state->ret->chunks = xrealloc(state->ret->chunks,
-				      sizeof(struct chunk) * num);
-	chunk = &state->ret->chunks[num - 1];
-	if (parse_hunk_header(line, len, &off1, &len1, &off2, &len2)) {
-		state->ret->num--;
-		return;
-	}
-
-	/* Line numbers in patch output are one based. */
-	off1--;
-	off2--;
-
-	chunk->same = len2 ? off2 : (off2 + 1);
-
-	chunk->p_next = off1 + (len1 ? len1 : 1);
-	chunk->t_next = chunk->same + len2;
-	state->hunk_in_pre_context = 1;
-	state->hunk_post_context = 0;
-}
-
-static struct patch *compare_buffer(mmfile_t *file_p, mmfile_t *file_o,
-				    int context)
-{
-	struct blame_diff_state state;
-	xpparam_t xpp;
-	xdemitconf_t xecfg;
-	xdemitcb_t ecb;
-
-	memset(&xpp, 0, sizeof(xpp));
-	xpp.flags = xdl_opts;
-	memset(&xecfg, 0, sizeof(xecfg));
-	xecfg.ctxlen = context;
-	memset(&state, 0, sizeof(state));
-	state.ret = xmalloc(sizeof(struct patch));
-	state.ret->chunks = NULL;
-	state.ret->num = 0;
-
-	xdi_diff_outf(file_p, file_o, process_u_diff, &state, &xpp, &xecfg, &ecb);
-
-	if (state.ret->num) {
-		struct chunk *chunk;
-		chunk = &state.ret->chunks[state.ret->num - 1];
-		chunk->p_next -= state.hunk_post_context;
-		chunk->t_next -= state.hunk_post_context;
-	}
-	return state.ret;
-}
-
-static void free_patch(struct patch *p)
-{
-	free(p->chunks);
-	free(p);
-}
-
-/*
  * Link in a new blame entry to the scoreboard.  Entries that cover the
  * same line range have been removed from the scoreboard previously.
  */
@@ -795,6 +688,22 @@ static void blame_chunk(struct scoreboard *sb,
 	}
 }
 
+struct blame_chunk_cb_data {
+	struct scoreboard *sb;
+	struct origin *target;
+	struct origin *parent;
+	long plno;
+	long tlno;
+};
+
+static void blame_chunk_cb(void *data, long same, long p_next, long t_next)
+{
+	struct blame_chunk_cb_data *d = data;
+	blame_chunk(d->sb, d->tlno, d->plno, same, d->target, d->parent);
+	d->plno = p_next;
+	d->tlno = t_next;
+}
+
 /*
  * We are looking at the origin 'target' and aiming to pass blame
  * for the lines it is suspected to its parent.  Run diff to find
@@ -804,36 +713,28 @@ static int pass_blame_to_parent(struct scoreboard *sb,
 				struct origin *target,
 				struct origin *parent)
 {
-	int i, last_in_target, plno, tlno;
-	struct patch *patch;
+	int last_in_target;
 	mmfile_t file_p, file_o;
+	struct blame_chunk_cb_data d = { sb, target, parent, 0, 0 };
+	xpparam_t xpp;
+	xdemitconf_t xecfg;
 
 	last_in_target = find_last_in_target(sb, target);
 	if (last_in_target < 0)
 		return 1; /* nothing remains for this target */
 
-	/*
-	 * Run diff between two origins and grab the patch output, so that
-	 * we can pass blame for lines origin is currently suspected for
-	 * to its parent.
-	 */
 	fill_origin_blob(parent, &file_p);
 	fill_origin_blob(target, &file_o);
-	patch = compare_buffer(&file_p, &file_o, 0);
 	num_get_patch++;
 
-	plno = tlno = 0;
-	for (i = 0; i < patch->num; i++) {
-		struct chunk *chunk = &patch->chunks[i];
-
-		blame_chunk(sb, tlno, plno, chunk->same, target, parent);
-		plno = chunk->p_next;
-		tlno = chunk->t_next;
-	}
+	memset(&xpp, 0, sizeof(xpp));
+	xpp.flags = xdl_opts;
+	memset(&xecfg, 0, sizeof(xecfg));
+	xecfg.ctxlen = 0;
+	xdi_diff_hunks(&file_p, &file_o, blame_chunk_cb, &d, &xpp, &xecfg);
 	/* The rest (i.e. anything after tlno) are the same as the parent */
-	blame_chunk(sb, tlno, plno, last_in_target, target, parent);
+	blame_chunk(sb, d.tlno, d.plno, last_in_target, target, parent);
 
-	free_patch(patch);
 	return 0;
 }
 
@@ -925,6 +826,23 @@ static void handle_split(struct scoreboard *sb,
 	}
 }
 
+struct handle_split_cb_data {
+	struct scoreboard *sb;
+	struct blame_entry *ent;
+	struct origin *parent;
+	struct blame_entry *split;
+	long plno;
+	long tlno;
+};
+
+static void handle_split_cb(void *data, long same, long p_next, long t_next)
+{
+	struct handle_split_cb_data *d = data;
+	handle_split(d->sb, d->ent, d->tlno, d->plno, same, d->parent, d->split);
+	d->plno = p_next;
+	d->tlno = t_next;
+}
+
 /*
  * Find the lines from parent that are the same as ent so that
  * we can pass blames to it.  file_p has the blob contents for
@@ -939,8 +857,9 @@ static void find_copy_in_blob(struct scoreboard *sb,
 	const char *cp;
 	int cnt;
 	mmfile_t file_o;
-	struct patch *patch;
-	int i, plno, tlno;
+	struct handle_split_cb_data d = { sb, ent, parent, split, 0, 0 };
+	xpparam_t xpp;
+	xdemitconf_t xecfg;
 
 	/*
 	 * Prepare mmfile that contains only the lines in ent.
@@ -955,24 +874,18 @@ static void find_copy_in_blob(struct scoreboard *sb,
 	}
 	file_o.size = cp - file_o.ptr;
 
-	patch = compare_buffer(file_p, &file_o, 1);
-
 	/*
 	 * file_o is a part of final image we are annotating.
 	 * file_p partially may match that image.
 	 */
+	memset(&xpp, 0, sizeof(xpp));
+	xpp.flags = xdl_opts;
+	memset(&xecfg, 0, sizeof(xecfg));
+	xecfg.ctxlen = 1;
 	memset(split, 0, sizeof(struct blame_entry [3]));
-	plno = tlno = 0;
-	for (i = 0; i < patch->num; i++) {
-		struct chunk *chunk = &patch->chunks[i];
-
-		handle_split(sb, ent, tlno, plno, chunk->same, parent, split);
-		plno = chunk->p_next;
-		tlno = chunk->t_next;
-	}
+	xdi_diff_hunks(file_p, &file_o, handle_split_cb, &d, &xpp, &xecfg);
 	/* remainder, if any, all match the preimage */
-	handle_split(sb, ent, tlno, plno, ent->num_lines, parent, split);
-	free_patch(patch);
+	handle_split(sb, ent, d.tlno, d.plno, ent->num_lines, parent, split);
 }
 
 /*
-- 
1.6.0.3.514.g2f91b

  parent reply	other threads:[~2008-10-25 13:32 UTC|newest]

Thread overview: 17+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-08-21 23:21 [PATCH 0/5] More git blame speed improvements Brian Downing
2008-08-21 23:21 ` [PATCH 1/5] Allow alternate "low-level" emit function from xdl_diff Brian Downing
2008-08-21 23:21   ` [PATCH 2/5] Bypass textual patch generation and parsing in git blame Brian Downing
2008-08-21 23:21     ` [PATCH 3/5] Always initialize xpparam_t to 0 Brian Downing
2008-08-21 23:22       ` [PATCH 4/5] Allow xdiff machinery to cache hash results for a file Brian Downing
2008-08-21 23:22         ` [PATCH 5/5] Use xdiff caching to improve git blame performance Brian Downing
2008-08-23  8:15   ` [PATCH 1/5] Allow alternate "low-level" emit function from xdl_diff René Scharfe
2008-08-23  9:03     ` Junio C Hamano
2008-08-24  8:12     ` Brian Downing
2008-09-03 22:29       ` René Scharfe
2008-10-25 13:30         ` [PATCH 1/5] blame: inline get_patch() René Scharfe
2008-10-25 13:30         ` [PATCH 2/5] Always initialize xpparam_t to 0 René Scharfe
2008-10-25 13:30         ` [PATCH 3/5] Allow alternate "low-level" emit function from xdl_diff René Scharfe
2008-10-25 13:31         ` [PATCH 4/5] add xdi_diff_hunks() for callers that only need hunk lengths René Scharfe
2008-10-25 13:31         ` René Scharfe [this message]
2008-10-25 19:36           ` [PATCH 5/5] blame: use xdi_diff_hunks(), get rid of struct patch Junio C Hamano
2008-10-26 22:20             ` René Scharfe

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=49031FB8.8060003@lsrfire.ath.cx \
    --to=rene.scharfe@lsrfire.ath.cx \
    --cc=bdowning@lavos.net \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).