From: "René Scharfe" <rene.scharfe@lsrfire.ath.cx>
To: Clemens Buchacher <drizzd@aon.at>
Cc: git@vger.kernel.org, gitster@pobox.com
Subject: Re: [PATCH 3/3 v2] use cache for function names in hunk headers
Date: Sun, 26 Sep 2010 18:26:56 +0200 [thread overview]
Message-ID: <4C9F7450.9060208@lsrfire.ath.cx> (raw)
In-Reply-To: <20100923070439.GA29764@localhost>
Am 23.09.2010 09:04, schrieb Clemens Buchacher:
> For each hunk, xdl_find_func searches the preimage
> for a function name until the beginning of the
> file. If the file does not contain any function
> names, this search has complexity O(n2) in the
> number of hunks n.
>
> The timing test in t3419 creates a file with 50000
> lines and a one-line change every 10 lines, i.e.,
> about 5000 hunks. Since none of the lines matches
> a function definition the file is searched 5000
> times.
>
> Instead of searching the entire file for each hunk
> individually, cache and reuse the search result
> from previous hunks.
>
> Diff performance for the test described above
> before and after this optimization:
>
> 2.78user 0.01system 0:02.82elapsed 99%CPU
> 0.05user 0.01system 0:00.06elapsed 96%CPU
>
> Signed-off-by: Clemens Buchacher <drizzd@aon.at>
> ---
>
> I have added the test description as requested by
> Sverre. There are no code changes with respect to
> the previous version of the patch.
>
> The test scenario might seem quite obscure. But I
> have this case in some text files which are not
> edited directly by humans.
>
> Clemens
>
> xdiff/xemit.c | 44 ++++++++++++++++++++++++++++++++------------
> 1 files changed, 32 insertions(+), 12 deletions(-)
>
> diff --git a/xdiff/xemit.c b/xdiff/xemit.c
> index c4bedf0..349bd6b 100644
> --- a/xdiff/xemit.c
> +++ b/xdiff/xemit.c
> @@ -85,8 +85,15 @@ static long def_ff(const char *rec, long len, char *buf, long sz, void *priv)
> return -1;
> }
>
> -static void xdl_find_func(xdfile_t *xf, long i, char *buf, long sz, long *ll,
> - find_func_t ff, void *ff_priv) {
> +struct xdl_find_func_cache {
> + char buf[80];
> + long len;
> + xdfile_t *xf;
> + int line;
> +};
Is xf needed? Does xdl_emit_diff() handle multiple files in one go?
If you inline xdl_find_func() the struct isn't needed anymore.
> +
> +static void xdl_find_func(xdfile_t *xf, long line, find_func_t ff,
> + void *ff_priv, struct xdl_find_func_cache *cache) {
>
> /*
> * Be quite stupid about this for now. Find a line in the old file
> @@ -96,13 +103,28 @@ static void xdl_find_func(xdfile_t *xf, long i, char *buf, long sz, long *ll,
>
> const char *rec;
> long len;
> + int i, l;
>
> - while (i-- > 0) {
> - len = xdl_get_rec(xf, i, &rec);
> - if ((*ll = ff(rec, len, buf, sz, ff_priv)) >= 0)
> + if (line < cache->line)
> + cache->xf = 0;
NULL is preferred.
> +
> + i = line;
> + l = -1;
> + while (--i >= 0 && l < 0) {
> + if (xf == cache->xf && i < cache->line) {
> + cache->line = line;
> return;
> + }
> +
> + len = xdl_get_rec(xf, i, &rec);
> + l = ff(rec, len, cache->buf, sizeof(cache->buf), ff_priv);
> }
> - *ll = 0;
> + if (l < 0)
> + l = 0;
> +
> + cache->xf = xf;
> + cache->len = l;
> + cache->line = line;
> }
>
>
> @@ -125,8 +147,7 @@ int xdl_emit_diff(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb,
> xdemitconf_t const *xecfg) {
> long s1, s2, e1, e2, lctx;
> xdchange_t *xch, *xche;
> - char funcbuf[80];
> - long funclen = 0;
> + struct xdl_find_func_cache func_cache = { "", 0, NULL, -1 };
> find_func_t ff = xecfg->find_func ? xecfg->find_func : def_ff;
>
> if (xecfg->flags & XDL_EMIT_COMMON)
> @@ -150,12 +171,11 @@ int xdl_emit_diff(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb,
> */
>
> if (xecfg->flags & XDL_EMIT_FUNCNAMES) {
> - xdl_find_func(&xe->xdf1, s1, funcbuf,
> - sizeof(funcbuf), &funclen,
> - ff, xecfg->find_func_priv);
> + xdl_find_func(&xe->xdf1, s1, ff, xecfg->find_func_priv,
> + &func_cache);
> }
> if (xdl_emit_hunk_hdr(s1 + 1, e1 - s1, s2 + 1, e2 - s2,
> - funcbuf, funclen, ecb) < 0)
> + func_cache.buf, func_cache.len, ecb) < 0)
> return -1;
>
> /*
How about something like this? It also removes an outdated comment. The
inlining part should probably split out in its own patch..
xdiff/xemit.c | 38 ++++++++++++++------------------------
1 files changed, 14 insertions(+), 24 deletions(-)
diff --git a/xdiff/xemit.c b/xdiff/xemit.c
index c4bedf0..a663f21 100644
--- a/xdiff/xemit.c
+++ b/xdiff/xemit.c
@@ -85,27 +85,6 @@ static long def_ff(const char *rec, long len, char *buf, long sz, void *priv)
return -1;
}
-static void xdl_find_func(xdfile_t *xf, long i, char *buf, long sz, long *ll,
- find_func_t ff, void *ff_priv) {
-
- /*
- * Be quite stupid about this for now. Find a line in the old file
- * before the start of the hunk (and context) which starts with a
- * plausible character.
- */
-
- const char *rec;
- long len;
-
- while (i-- > 0) {
- len = xdl_get_rec(xf, i, &rec);
- if ((*ll = ff(rec, len, buf, sz, ff_priv)) >= 0)
- return;
- }
- *ll = 0;
-}
-
-
static int xdl_emit_common(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb,
xdemitconf_t const *xecfg) {
xdfile_t *xdf = &xe->xdf1;
@@ -127,6 +106,7 @@ int xdl_emit_diff(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb,
xdchange_t *xch, *xche;
char funcbuf[80];
long funclen = 0;
+ long funclineprev = -1;
find_func_t ff = xecfg->find_func ? xecfg->find_func : def_ff;
if (xecfg->flags & XDL_EMIT_COMMON)
@@ -150,9 +130,19 @@ int xdl_emit_diff(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb,
*/
if (xecfg->flags & XDL_EMIT_FUNCNAMES) {
- xdl_find_func(&xe->xdf1, s1, funcbuf,
- sizeof(funcbuf), &funclen,
- ff, xecfg->find_func_priv);
+ long l;
+ for (l = s1 - 1; l >= 0 && l > funclineprev; l--) {
+ const char *rec;
+ long reclen = xdl_get_rec(&xe->xdf1, l, &rec);
+ long newfunclen = ff(rec, reclen, funcbuf,
+ sizeof(funcbuf),
+ xecfg->find_func_priv);
+ if (newfunclen >= 0) {
+ funclen = newfunclen;
+ break;
+ }
+ }
+ funclineprev = s1 - 1;
}
if (xdl_emit_hunk_hdr(s1 + 1, e1 - s1, s2 + 1, e2 - s2,
funcbuf, funclen, ecb) < 0)
next prev parent reply other threads:[~2010-09-26 16:27 UTC|newest]
Thread overview: 13+ messages / expand[flat|nested] mbox.gz Atom feed top
2010-09-19 9:59 Patch id tests and diff performance optimization Clemens Buchacher
2010-09-19 9:59 ` [PATCH 1/3] add rebase patch id tests Clemens Buchacher
2010-09-19 9:59 ` [PATCH 2/3] do not search functions for patch ID Clemens Buchacher
2010-09-19 9:59 ` [PATCH 3/3] use cache for function names in hunk headers Clemens Buchacher
2010-09-19 20:24 ` Sverre Rabbelier
2010-09-20 17:36 ` Clemens Buchacher
2010-09-20 19:15 ` Sverre Rabbelier
2010-09-23 7:04 ` [PATCH 3/3 v2] " Clemens Buchacher
2010-09-26 16:26 ` René Scharfe [this message]
2010-09-26 20:43 ` Clemens Buchacher
2010-09-26 22:17 ` René Scharfe
2010-09-27 17:52 ` Junio C Hamano
2010-09-30 18:33 ` Junio C Hamano
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=4C9F7450.9060208@lsrfire.ath.cx \
--to=rene.scharfe@lsrfire.ath.cx \
--cc=drizzd@aon.at \
--cc=git@vger.kernel.org \
--cc=gitster@pobox.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).