git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: "René Scharfe" <rene.scharfe@lsrfire.ath.cx>
To: Brian Downing <bdowning@lavos.net>
Cc: Junio C Hamano <gitster@pobox.com>, git@vger.kernel.org
Subject: Re: [PATCH 1/5] Allow alternate "low-level" emit function from xdl_diff
Date: Thu, 04 Sep 2008 00:29:19 +0200	[thread overview]
Message-ID: <48BF0FBF.3010104@lsrfire.ath.cx> (raw)
In-Reply-To: <20080824081254.GI31114@lavos.net>

Brian Downing schrieb:
> On Sat, Aug 23, 2008 at 10:15:59AM +0200, René Scharfe wrote:
>> Could we move more code into the library code to avoid that ugliness?
>>
>> AFAICS, compare_buffer() builds a struct patch with an array of
>> struct chunks, whose members are then fed one by one into either
>> blame_chunk() or handle_split().  Could we avoid the allocation
>> altogether by using a different interface?
> 
> Thanks, I think this is a good idea.  I'll try to work up something like
> this, but it may be a few days before I have any appreciable hacking
> time to do so.

Here's a patch on top of the one I'm replying to, which in turn is
b3779280ca4881252069fa9d1c7d2069a69c4a52 in pu.  While it removes more
code than it adds and has a slightly nicer interface, it doesn't speed
up blame.  The following test case:

   $ git-blame -M -C -C -p --incremental master -- Makefile >/dev/null

loses a few calls to mmap, munmap and brk, as strace tells me, but any
speed up that might result from that is lost in the noise.

I haven't had time to think about how to combine it with the cache you
introduced in patches 2-5, though, and I won't get to it before the
weekend (if at all). :-/

Thanks,
René


 builtin-blame.c   |  178 ++++++++++++++++------------------------------------
 xdiff-interface.c |   50 +++++++++++++++-
 xdiff-interface.h |    5 ++
 3 files changed, 109 insertions(+), 124 deletions(-)

diff --git a/builtin-blame.c b/builtin-blame.c
index 60f70bf..c9783dc 100644
--- a/builtin-blame.c
+++ b/builtin-blame.c
@@ -19,10 +19,6 @@
 #include "string-list.h"
 #include "mailmap.h"
 #include "parse-options.h"
-#include "xdiff/xtypes.h"
-#include "xdiff/xdiffi.h"
-#include "xdiff/xemit.h"
-#include "xdiff/xmacros.h"
 
 static char blame_usage[] = "git blame [options] [rev-opts] [rev] [--] file";
 
@@ -448,99 +444,6 @@ static struct origin *find_rename(struct scoreboard *sb,
 }
 
 /*
- * Parsing of patch chunks...
- */
-struct chunk {
-	/* line number in postimage; up to but not including this
-	 * line is the same as preimage
-	 */
-	int same;
-
-	/* preimage line number after this chunk */
-	int p_next;
-
-	/* postimage line number after this chunk */
-	int t_next;
-};
-
-struct patch {
-	struct chunk *chunks;
-	int num;
-};
-
-static int process_diff(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb,
-			xdemitconf_t const *xecfg)
-{
-	struct patch *patch = ecb->priv;
-	long s1, s2;
-	xdchange_t *xch, *xche;
-	struct chunk *chunk;
-
-	for (xch = xche = xscr; xch; xch = xche->next) {
-		xche = xdl_get_hunk(xch, xecfg);
-
-		s1 = XDL_MAX(xch->i1 - xecfg->ctxlen, 0);
-		s2 = XDL_MAX(xch->i2 - xecfg->ctxlen, 0);
-
-		++patch->num;
-		patch->chunks = xrealloc(patch->chunks,
-					 sizeof(struct chunk) * patch->num);
-		chunk = &patch->chunks[patch->num - 1];
-		chunk->same = s2 + XDL_MAX(xch->i1 - s1, 0);
-		chunk->p_next = xche->i1 + xche->chg1;
-		chunk->t_next = xche->i2 + xche->chg2;
-	}
-
-	return 0;
-}
-
-static struct patch *compare_buffer(mmfile_t *file_p, mmfile_t *file_o,
-				    int context)
-{
-	struct patch *patch;
-	xpparam_t xpp;
-	xdemitconf_t xecfg;
-	xdemitcb_t ecb;
-
-	xpp.flags = xdl_opts;
-	memset(&xecfg, 0, sizeof(xecfg));
-	xecfg.ctxlen = context;
-	patch = xmalloc(sizeof(struct patch));
-	patch->chunks = NULL;
-	patch->num = 0;
-	xecfg.emit_func = (void (*)())process_diff;
-	ecb.priv = patch;
-	xdi_diff(file_p, file_o, &xpp, &xecfg, &ecb);
-
-	return patch;
-}
-
-/*
- * Run diff between two origins and grab the patch output, so that
- * we can pass blame for lines origin is currently suspected for
- * to its parent.
- */
-static struct patch *get_patch(struct origin *parent, struct origin *origin)
-{
-	mmfile_t file_p, file_o;
-	struct patch *patch;
-
-	fill_origin_blob(parent, &file_p);
-	fill_origin_blob(origin, &file_o);
-	if (!file_p.ptr || !file_o.ptr)
-		return NULL;
-	patch = compare_buffer(&file_p, &file_o, 0);
-	num_get_patch++;
-	return patch;
-}
-
-static void free_patch(struct patch *p)
-{
-	free(p->chunks);
-	free(p);
-}
-
-/*
  * Link in a new blame entry to the scoreboard.  Entries that cover the
  * same line range have been removed from the scoreboard previously.
  */
@@ -786,6 +689,22 @@ static void blame_chunk(struct scoreboard *sb,
 	}
 }
 
+struct blame_chunk_cb_data {
+	struct scoreboard *sb;
+	struct origin *target;
+	struct origin *parent;
+	long plno;
+	long tlno;
+};
+
+static void blame_chunk_cb(void *data, long same, long p_next, long t_next)
+{
+	struct blame_chunk_cb_data *d = data;
+	blame_chunk(d->sb, d->tlno, d->plno, same, d->target, d->parent);
+	d->plno = p_next;
+	d->tlno = t_next;
+}
+
 /*
  * We are looking at the origin 'target' and aiming to pass blame
  * for the lines it is suspected to its parent.  Run diff to find
@@ -795,26 +714,28 @@ static int pass_blame_to_parent(struct scoreboard *sb,
 				struct origin *target,
 				struct origin *parent)
 {
-	int i, last_in_target, plno, tlno;
-	struct patch *patch;
+	int last_in_target;
+	mmfile_t file_p, file_o;
+	struct blame_chunk_cb_data d = { sb, target, parent, 0, 0 };
+	xpparam_t xpp;
+	xdemitconf_t xecfg;
 
 	last_in_target = find_last_in_target(sb, target);
 	if (last_in_target < 0)
 		return 1; /* nothing remains for this target */
 
-	patch = get_patch(parent, target);
-	plno = tlno = 0;
-	for (i = 0; i < patch->num; i++) {
-		struct chunk *chunk = &patch->chunks[i];
+	fill_origin_blob(parent, &file_p);
+	fill_origin_blob(target, &file_o);
 
-		blame_chunk(sb, tlno, plno, chunk->same, target, parent);
-		plno = chunk->p_next;
-		tlno = chunk->t_next;
-	}
+	num_get_patch++;
+
+	xpp.flags = xdl_opts;
+	memset(&xecfg, 0, sizeof(xecfg));
+	xecfg.ctxlen = 0;
+	xdi_diff_chunks(&file_p, &file_o, blame_chunk_cb, &d, &xpp, &xecfg);
 	/* The rest (i.e. anything after tlno) are the same as the parent */
-	blame_chunk(sb, tlno, plno, last_in_target, target, parent);
+	blame_chunk(sb, d.tlno, d.plno, last_in_target, target, parent);
 
-	free_patch(patch);
 	return 0;
 }
 
@@ -906,6 +827,23 @@ static void handle_split(struct scoreboard *sb,
 	}
 }
 
+struct handle_split_cb_data {
+	struct scoreboard *sb;
+	struct blame_entry *ent;
+	struct origin *parent;
+	struct blame_entry *split;
+	long plno;
+	long tlno;
+};
+
+static void handle_split_cb(void *data, long same, long p_next, long t_next)
+{
+	struct handle_split_cb_data *d = data;
+	handle_split(d->sb, d->ent, d->tlno, d->plno, same, d->parent, d->split);
+	d->plno = p_next;
+	d->tlno = t_next;
+}
+
 /*
  * Find the lines from parent that are the same as ent so that
  * we can pass blames to it.  file_p has the blob contents for
@@ -920,8 +858,9 @@ static void find_copy_in_blob(struct scoreboard *sb,
 	const char *cp;
 	int cnt;
 	mmfile_t file_o;
-	struct patch *patch;
-	int i, plno, tlno;
+	struct handle_split_cb_data d = { sb, ent, parent, split, 0, 0 };
+	xpparam_t xpp;
+	xdemitconf_t xecfg;
 
 	/*
 	 * Prepare mmfile that contains only the lines in ent.
@@ -936,24 +875,17 @@ static void find_copy_in_blob(struct scoreboard *sb,
 	}
 	file_o.size = cp - file_o.ptr;
 
-	patch = compare_buffer(file_p, &file_o, 1);
-
 	/*
 	 * file_o is a part of final image we are annotating.
 	 * file_p partially may match that image.
 	 */
+	xpp.flags = xdl_opts;
+	memset(&xecfg, 0, sizeof(xecfg));
+	xecfg.ctxlen = 1;
 	memset(split, 0, sizeof(struct blame_entry [3]));
-	plno = tlno = 0;
-	for (i = 0; i < patch->num; i++) {
-		struct chunk *chunk = &patch->chunks[i];
-
-		handle_split(sb, ent, tlno, plno, chunk->same, parent, split);
-		plno = chunk->p_next;
-		tlno = chunk->t_next;
-	}
+	xdi_diff_chunks(file_p, &file_o, handle_split_cb, &d, &xpp, &xecfg);
 	/* remainder, if any, all match the preimage */
-	handle_split(sb, ent, tlno, plno, ent->num_lines, parent, split);
-	free_patch(patch);
+	handle_split(sb, ent, d.tlno, d.plno, ent->num_lines, parent, split);
 }
 
 /*
diff --git a/xdiff-interface.c b/xdiff-interface.c
index 944ad98..a7cfdab 100644
--- a/xdiff-interface.c
+++ b/xdiff-interface.c
@@ -1,6 +1,9 @@
 #include "cache.h"
 #include "xdiff-interface.h"
-#include "strbuf.h"
+#include "xdiff/xtypes.h"
+#include "xdiff/xdiffi.h"
+#include "xdiff/xemit.h"
+#include "xdiff/xmacros.h"
 
 struct xdiff_emit_state {
 	xdiff_emit_consume_fn consume;
@@ -153,6 +156,51 @@ int xdi_diff_outf(mmfile_t *mf1, mmfile_t *mf2,
 	return ret;
 }
 
+struct xdiff_emit_chunk_state {
+	xdiff_emit_chunk_consume_fn consume;
+	void *consume_callback_data;
+};
+
+static int process_diff(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb,
+			xdemitconf_t const *xecfg)
+{
+	long s1, s2, same, p_next, t_next;
+	xdchange_t *xch, *xche;
+	struct xdiff_emit_chunk_state *state = ecb->priv;
+	xdiff_emit_chunk_consume_fn fn = state->consume;
+	void *consume_callback_data = state->consume_callback_data;
+
+
+	for (xch = xche = xscr; xch; xch = xche->next) {
+		xche = xdl_get_hunk(xch, xecfg);
+
+		s1 = XDL_MAX(xch->i1 - xecfg->ctxlen, 0);
+		s2 = XDL_MAX(xch->i2 - xecfg->ctxlen, 0);
+		same = s2 + XDL_MAX(xch->i1 - s1, 0);
+		p_next = xche->i1 + xche->chg1;
+		t_next = xche->i2 + xche->chg2;
+
+		fn(consume_callback_data, same, p_next, t_next);
+	}
+	return 0;
+}
+
+int xdi_diff_chunks(mmfile_t *mf1, mmfile_t *mf2,
+		    xdiff_emit_chunk_consume_fn fn, void *consume_callback_data,
+		    xpparam_t const *xpp, xdemitconf_t *xecfg)
+{
+	struct xdiff_emit_chunk_state state;
+	xdemitcb_t ecb;
+
+	memset(&state, 0, sizeof(state));
+	memset(&ecb, 0, sizeof(ecb));
+	state.consume = fn;
+	state.consume_callback_data = consume_callback_data;
+	xecfg->emit_func = (void (*)())process_diff;
+	ecb.priv = &state;
+	return xdi_diff(mf1, mf2, xpp, xecfg, &ecb);
+}
+
 int read_mmfile(mmfile_t *ptr, const char *filename)
 {
 	struct stat st;
diff --git a/xdiff-interface.h b/xdiff-interface.h
index 558492b..1f7d985 100644
--- a/xdiff-interface.h
+++ b/xdiff-interface.h
@@ -4,12 +4,17 @@
 #include "xdiff/xdiff.h"
 
 typedef void (*xdiff_emit_consume_fn)(void *, char *, unsigned long);
+typedef void (*xdiff_emit_chunk_consume_fn)(void *, long, long, long);
 
 int xdi_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp, xdemitconf_t const *xecfg, xdemitcb_t *ecb);
 int xdi_diff_outf(mmfile_t *mf1, mmfile_t *mf2,
 		  xdiff_emit_consume_fn fn, void *consume_callback_data,
 		  xpparam_t const *xpp,
 		  xdemitconf_t const *xecfg, xdemitcb_t *xecb);
+int xdi_diff_chunks(mmfile_t *mf1, mmfile_t *mf2,
+		    xdiff_emit_chunk_consume_fn fn, void *consume_callback_data,
+		    xpparam_t const *xpp, xdemitconf_t *xecfg);
+
 int parse_hunk_header(char *line, int len,
 		      int *ob, int *on,
 		      int *nb, int *nn);

  reply	other threads:[~2008-09-03 22:30 UTC|newest]

Thread overview: 17+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-08-21 23:21 [PATCH 0/5] More git blame speed improvements Brian Downing
2008-08-21 23:21 ` [PATCH 1/5] Allow alternate "low-level" emit function from xdl_diff Brian Downing
2008-08-21 23:21   ` [PATCH 2/5] Bypass textual patch generation and parsing in git blame Brian Downing
2008-08-21 23:21     ` [PATCH 3/5] Always initialize xpparam_t to 0 Brian Downing
2008-08-21 23:22       ` [PATCH 4/5] Allow xdiff machinery to cache hash results for a file Brian Downing
2008-08-21 23:22         ` [PATCH 5/5] Use xdiff caching to improve git blame performance Brian Downing
2008-08-23  8:15   ` [PATCH 1/5] Allow alternate "low-level" emit function from xdl_diff René Scharfe
2008-08-23  9:03     ` Junio C Hamano
2008-08-24  8:12     ` Brian Downing
2008-09-03 22:29       ` René Scharfe [this message]
2008-10-25 13:30         ` [PATCH 1/5] blame: inline get_patch() René Scharfe
2008-10-25 13:30         ` [PATCH 2/5] Always initialize xpparam_t to 0 René Scharfe
2008-10-25 13:30         ` [PATCH 3/5] Allow alternate "low-level" emit function from xdl_diff René Scharfe
2008-10-25 13:31         ` [PATCH 4/5] add xdi_diff_hunks() for callers that only need hunk lengths René Scharfe
2008-10-25 13:31         ` [PATCH 5/5] blame: use xdi_diff_hunks(), get rid of struct patch René Scharfe
2008-10-25 19:36           ` Junio C Hamano
2008-10-26 22:20             ` René Scharfe

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=48BF0FBF.3010104@lsrfire.ath.cx \
    --to=rene.scharfe@lsrfire.ath.cx \
    --cc=bdowning@lavos.net \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).