All of lore.kernel.org
 help / color / mirror / Atom feed
From: "René Scharfe" <rene.scharfe@lsrfire.ath.cx>
To: Brian Downing <bdowning@lavos.net>
Cc: Junio C Hamano <gitster@pobox.com>, git@vger.kernel.org
Subject: Re: [PATCH 1/5] Allow alternate "low-level" emit function from xdl_diff
Date: Thu, 04 Sep 2008 00:29:19 +0200	[thread overview]
Message-ID: <48BF0FBF.3010104@lsrfire.ath.cx> (raw)
In-Reply-To: <20080824081254.GI31114@lavos.net>

Brian Downing schrieb:
> On Sat, Aug 23, 2008 at 10:15:59AM +0200, René Scharfe wrote:
>> Could we move more code into the library code to avoid that ugliness?
>>
>> AFAICS, compare_buffer() builds a struct patch with an array of
>> struct chunks, whose members are then fed one by one into either
>> blame_chunk() or handle_split().  Could we avoid the allocation
>> altogether by using a different interface?
> 
> Thanks, I think this is a good idea.  I'll try to work up something like
> this, but it may be a few days before I have any appreciable hacking
> time to do so.

Here's a patch on top of the one I'm replying to, which in turn is
b3779280ca4881252069fa9d1c7d2069a69c4a52 in pu.  While it removes more
code than it adds and has a slightly nicer interface, it doesn't speed
up blame.  The following test case:

   $ git-blame -M -C -C -p --incremental master -- Makefile >/dev/null

loses a few calls to mmap, munmap and brk, as strace tells me, but any
speed up that might result from that is lost in the noise.

I haven't had time to think about how to combine it with the cache you
introduced in patches 2-5, though, and I won't get to it before the
weekend (if at all). :-/

Thanks,
René


 builtin-blame.c   |  178 ++++++++++++++++------------------------------------
 xdiff-interface.c |   50 +++++++++++++++-
 xdiff-interface.h |    5 ++
 3 files changed, 109 insertions(+), 124 deletions(-)

diff --git a/builtin-blame.c b/builtin-blame.c
index 60f70bf..c9783dc 100644
--- a/builtin-blame.c
+++ b/builtin-blame.c
@@ -19,10 +19,6 @@
 #include "string-list.h"
 #include "mailmap.h"
 #include "parse-options.h"
-#include "xdiff/xtypes.h"
-#include "xdiff/xdiffi.h"
-#include "xdiff/xemit.h"
-#include "xdiff/xmacros.h"
 
 static char blame_usage[] = "git blame [options] [rev-opts] [rev] [--] file";
 
@@ -448,99 +444,6 @@ static struct origin *find_rename(struct scoreboard *sb,
 }
 
 /*
- * Parsing of patch chunks...
- */
-struct chunk {
-	/* line number in postimage; up to but not including this
-	 * line is the same as preimage
-	 */
-	int same;
-
-	/* preimage line number after this chunk */
-	int p_next;
-
-	/* postimage line number after this chunk */
-	int t_next;
-};
-
-struct patch {
-	struct chunk *chunks;
-	int num;
-};
-
-static int process_diff(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb,
-			xdemitconf_t const *xecfg)
-{
-	struct patch *patch = ecb->priv;
-	long s1, s2;
-	xdchange_t *xch, *xche;
-	struct chunk *chunk;
-
-	for (xch = xche = xscr; xch; xch = xche->next) {
-		xche = xdl_get_hunk(xch, xecfg);
-
-		s1 = XDL_MAX(xch->i1 - xecfg->ctxlen, 0);
-		s2 = XDL_MAX(xch->i2 - xecfg->ctxlen, 0);
-
-		++patch->num;
-		patch->chunks = xrealloc(patch->chunks,
-					 sizeof(struct chunk) * patch->num);
-		chunk = &patch->chunks[patch->num - 1];
-		chunk->same = s2 + XDL_MAX(xch->i1 - s1, 0);
-		chunk->p_next = xche->i1 + xche->chg1;
-		chunk->t_next = xche->i2 + xche->chg2;
-	}
-
-	return 0;
-}
-
-static struct patch *compare_buffer(mmfile_t *file_p, mmfile_t *file_o,
-				    int context)
-{
-	struct patch *patch;
-	xpparam_t xpp;
-	xdemitconf_t xecfg;
-	xdemitcb_t ecb;
-
-	xpp.flags = xdl_opts;
-	memset(&xecfg, 0, sizeof(xecfg));
-	xecfg.ctxlen = context;
-	patch = xmalloc(sizeof(struct patch));
-	patch->chunks = NULL;
-	patch->num = 0;
-	xecfg.emit_func = (void (*)())process_diff;
-	ecb.priv = patch;
-	xdi_diff(file_p, file_o, &xpp, &xecfg, &ecb);
-
-	return patch;
-}
-
-/*
- * Run diff between two origins and grab the patch output, so that
- * we can pass blame for lines origin is currently suspected for
- * to its parent.
- */
-static struct patch *get_patch(struct origin *parent, struct origin *origin)
-{
-	mmfile_t file_p, file_o;
-	struct patch *patch;
-
-	fill_origin_blob(parent, &file_p);
-	fill_origin_blob(origin, &file_o);
-	if (!file_p.ptr || !file_o.ptr)
-		return NULL;
-	patch = compare_buffer(&file_p, &file_o, 0);
-	num_get_patch++;
-	return patch;
-}
-
-static void free_patch(struct patch *p)
-{
-	free(p->chunks);
-	free(p);
-}
-
-/*
  * Link in a new blame entry to the scoreboard.  Entries that cover the
  * same line range have been removed from the scoreboard previously.
  */
@@ -786,6 +689,22 @@ static void blame_chunk(struct scoreboard *sb,
 	}
 }
 
+struct blame_chunk_cb_data {
+	struct scoreboard *sb;
+	struct origin *target;
+	struct origin *parent;
+	long plno;
+	long tlno;
+};
+
+static void blame_chunk_cb(void *data, long same, long p_next, long t_next)
+{
+	struct blame_chunk_cb_data *d = data;
+	blame_chunk(d->sb, d->tlno, d->plno, same, d->target, d->parent);
+	d->plno = p_next;
+	d->tlno = t_next;
+}
+
 /*
  * We are looking at the origin 'target' and aiming to pass blame
  * for the lines it is suspected to its parent.  Run diff to find
@@ -795,26 +714,28 @@ static int pass_blame_to_parent(struct scoreboard *sb,
 				struct origin *target,
 				struct origin *parent)
 {
-	int i, last_in_target, plno, tlno;
-	struct patch *patch;
+	int last_in_target;
+	mmfile_t file_p, file_o;
+	struct blame_chunk_cb_data d = { sb, target, parent, 0, 0 };
+	xpparam_t xpp;
+	xdemitconf_t xecfg;
 
 	last_in_target = find_last_in_target(sb, target);
 	if (last_in_target < 0)
 		return 1; /* nothing remains for this target */
 
-	patch = get_patch(parent, target);
-	plno = tlno = 0;
-	for (i = 0; i < patch->num; i++) {
-		struct chunk *chunk = &patch->chunks[i];
+	fill_origin_blob(parent, &file_p);
+	fill_origin_blob(target, &file_o);
 
-		blame_chunk(sb, tlno, plno, chunk->same, target, parent);
-		plno = chunk->p_next;
-		tlno = chunk->t_next;
-	}
+	num_get_patch++;
+
+	xpp.flags = xdl_opts;
+	memset(&xecfg, 0, sizeof(xecfg));
+	xecfg.ctxlen = 0;
+	xdi_diff_chunks(&file_p, &file_o, blame_chunk_cb, &d, &xpp, &xecfg);
 	/* The rest (i.e. anything after tlno) are the same as the parent */
-	blame_chunk(sb, tlno, plno, last_in_target, target, parent);
+	blame_chunk(sb, d.tlno, d.plno, last_in_target, target, parent);
 
-	free_patch(patch);
 	return 0;
 }
 
@@ -906,6 +827,23 @@ static void handle_split(struct scoreboard *sb,
 	}
 }
 
+struct handle_split_cb_data {
+	struct scoreboard *sb;
+	struct blame_entry *ent;
+	struct origin *parent;
+	struct blame_entry *split;
+	long plno;
+	long tlno;
+};
+
+static void handle_split_cb(void *data, long same, long p_next, long t_next)
+{
+	struct handle_split_cb_data *d = data;
+	handle_split(d->sb, d->ent, d->tlno, d->plno, same, d->parent, d->split);
+	d->plno = p_next;
+	d->tlno = t_next;
+}
+
 /*
  * Find the lines from parent that are the same as ent so that
  * we can pass blames to it.  file_p has the blob contents for
@@ -920,8 +858,9 @@ static void find_copy_in_blob(struct scoreboard *sb,
 	const char *cp;
 	int cnt;
 	mmfile_t file_o;
-	struct patch *patch;
-	int i, plno, tlno;
+	struct handle_split_cb_data d = { sb, ent, parent, split, 0, 0 };
+	xpparam_t xpp;
+	xdemitconf_t xecfg;
 
 	/*
 	 * Prepare mmfile that contains only the lines in ent.
@@ -936,24 +875,17 @@ static void find_copy_in_blob(struct scoreboard *sb,
 	}
 	file_o.size = cp - file_o.ptr;
 
-	patch = compare_buffer(file_p, &file_o, 1);
-
 	/*
 	 * file_o is a part of final image we are annotating.
 	 * file_p partially may match that image.
 	 */
+	xpp.flags = xdl_opts;
+	memset(&xecfg, 0, sizeof(xecfg));
+	xecfg.ctxlen = 1;
 	memset(split, 0, sizeof(struct blame_entry [3]));
-	plno = tlno = 0;
-	for (i = 0; i < patch->num; i++) {
-		struct chunk *chunk = &patch->chunks[i];
-
-		handle_split(sb, ent, tlno, plno, chunk->same, parent, split);
-		plno = chunk->p_next;
-		tlno = chunk->t_next;
-	}
+	xdi_diff_chunks(file_p, &file_o, handle_split_cb, &d, &xpp, &xecfg);
 	/* remainder, if any, all match the preimage */
-	handle_split(sb, ent, tlno, plno, ent->num_lines, parent, split);
-	free_patch(patch);
+	handle_split(sb, ent, d.tlno, d.plno, ent->num_lines, parent, split);
 }
 
 /*
diff --git a/xdiff-interface.c b/xdiff-interface.c
index 944ad98..a7cfdab 100644
--- a/xdiff-interface.c
+++ b/xdiff-interface.c
@@ -1,6 +1,9 @@
 #include "cache.h"
 #include "xdiff-interface.h"
-#include "strbuf.h"
+#include "xdiff/xtypes.h"
+#include "xdiff/xdiffi.h"
+#include "xdiff/xemit.h"
+#include "xdiff/xmacros.h"
 
 struct xdiff_emit_state {
 	xdiff_emit_consume_fn consume;
@@ -153,6 +156,51 @@ int xdi_diff_outf(mmfile_t *mf1, mmfile_t *mf2,
 	return ret;
 }
 
+struct xdiff_emit_chunk_state {
+	xdiff_emit_chunk_consume_fn consume;
+	void *consume_callback_data;
+};
+
+static int process_diff(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb,
+			xdemitconf_t const *xecfg)
+{
+	long s1, s2, same, p_next, t_next;
+	xdchange_t *xch, *xche;
+	struct xdiff_emit_chunk_state *state = ecb->priv;
+	xdiff_emit_chunk_consume_fn fn = state->consume;
+	void *consume_callback_data = state->consume_callback_data;
+
+
+	for (xch = xche = xscr; xch; xch = xche->next) {
+		xche = xdl_get_hunk(xch, xecfg);
+
+		s1 = XDL_MAX(xch->i1 - xecfg->ctxlen, 0);
+		s2 = XDL_MAX(xch->i2 - xecfg->ctxlen, 0);
+		same = s2 + XDL_MAX(xch->i1 - s1, 0);
+		p_next = xche->i1 + xche->chg1;
+		t_next = xche->i2 + xche->chg2;
+
+		fn(consume_callback_data, same, p_next, t_next);
+	}
+	return 0;
+}
+
+int xdi_diff_chunks(mmfile_t *mf1, mmfile_t *mf2,
+		    xdiff_emit_chunk_consume_fn fn, void *consume_callback_data,
+		    xpparam_t const *xpp, xdemitconf_t *xecfg)
+{
+	struct xdiff_emit_chunk_state state;
+	xdemitcb_t ecb;
+
+	memset(&state, 0, sizeof(state));
+	memset(&ecb, 0, sizeof(ecb));
+	state.consume = fn;
+	state.consume_callback_data = consume_callback_data;
+	xecfg->emit_func = (void (*)())process_diff;
+	ecb.priv = &state;
+	return xdi_diff(mf1, mf2, xpp, xecfg, &ecb);
+}
+
 int read_mmfile(mmfile_t *ptr, const char *filename)
 {
 	struct stat st;
diff --git a/xdiff-interface.h b/xdiff-interface.h
index 558492b..1f7d985 100644
--- a/xdiff-interface.h
+++ b/xdiff-interface.h
@@ -4,12 +4,17 @@
 #include "xdiff/xdiff.h"
 
 typedef void (*xdiff_emit_consume_fn)(void *, char *, unsigned long);
+typedef void (*xdiff_emit_chunk_consume_fn)(void *, long, long, long);
 
 int xdi_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp, xdemitconf_t const *xecfg, xdemitcb_t *ecb);
 int xdi_diff_outf(mmfile_t *mf1, mmfile_t *mf2,
 		  xdiff_emit_consume_fn fn, void *consume_callback_data,
 		  xpparam_t const *xpp,
 		  xdemitconf_t const *xecfg, xdemitcb_t *xecb);
+int xdi_diff_chunks(mmfile_t *mf1, mmfile_t *mf2,
+		    xdiff_emit_chunk_consume_fn fn, void *consume_callback_data,
+		    xpparam_t const *xpp, xdemitconf_t *xecfg);
+
 int parse_hunk_header(char *line, int len,
 		      int *ob, int *on,
 		      int *nb, int *nn);

  reply	other threads:[~2008-09-03 22:30 UTC|newest]

Thread overview: 17+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-08-21 23:21 [PATCH 0/5] More git blame speed improvements Brian Downing
2008-08-21 23:21 ` [PATCH 1/5] Allow alternate "low-level" emit function from xdl_diff Brian Downing
2008-08-21 23:21   ` [PATCH 2/5] Bypass textual patch generation and parsing in git blame Brian Downing
2008-08-21 23:21     ` [PATCH 3/5] Always initialize xpparam_t to 0 Brian Downing
2008-08-21 23:22       ` [PATCH 4/5] Allow xdiff machinery to cache hash results for a file Brian Downing
2008-08-21 23:22         ` [PATCH 5/5] Use xdiff caching to improve git blame performance Brian Downing
2008-08-23  8:15   ` [PATCH 1/5] Allow alternate "low-level" emit function from xdl_diff René Scharfe
2008-08-23  9:03     ` Junio C Hamano
2008-08-24  8:12     ` Brian Downing
2008-09-03 22:29       ` René Scharfe [this message]
2008-10-25 13:30         ` [PATCH 1/5] blame: inline get_patch() René Scharfe
2008-10-25 13:30         ` [PATCH 2/5] Always initialize xpparam_t to 0 René Scharfe
2008-10-25 13:30         ` [PATCH 3/5] Allow alternate "low-level" emit function from xdl_diff René Scharfe
2008-10-25 13:31         ` [PATCH 4/5] add xdi_diff_hunks() for callers that only need hunk lengths René Scharfe
2008-10-25 13:31         ` [PATCH 5/5] blame: use xdi_diff_hunks(), get rid of struct patch René Scharfe
2008-10-25 19:36           ` Junio C Hamano
2008-10-26 22:20             ` René Scharfe

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=48BF0FBF.3010104@lsrfire.ath.cx \
    --to=rene.scharfe@lsrfire.ath.cx \
    --cc=bdowning@lavos.net \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.