From: Jeff King <peff@peff.net>
To: John Keeping <john@keeping.me.uk>
Cc: git@vger.kernel.org, Kevin Bracey <kevin@bracey.fi>,
Junio C Hamano <gitster@pobox.com>
Subject: Re: [RFC/PATCH] patch-ids: check modified paths before calculating diff
Date: Wed, 29 May 2013 03:50:20 -0400 [thread overview]
Message-ID: <20130529075020.GC11955@sigill.intra.peff.net> (raw)
In-Reply-To: <20130529072225.GB11955@sigill.intra.peff.net>
On Wed, May 29, 2013 at 03:22:25AM -0400, Jeff King wrote:
> revs=origin/master...origin/jk/submodule-subdirectory-ok
> stock | you | me
> -------------------------------
> real 0m0.501s | 0m0.078s | 0m0.098s
> user 0m0.480s | 0m0.056s | 0m0.084s
> sys 0m0.016s | 0m0.020s | 0m0.012s
>
> revs=origin/next...origin/pu
> stock | you | me
> -------------------------------
> real 0m0.857s | 0m0.847s | 0m0.519s
> user 0m0.828s | 0m0.812s | 0m0.488s
> sys 0m0.024s | 0m0.028s | 0m0.024s
>
> So it performs slightly less well on the small case, and a bit better on
> the large case. I think part of the problem is that when we do have a
> "loose" hit, we end up re-doing the tree diff to find the strict, which
> involves re-inflating the trees. It's unavoidable for the lazy entries
> unless we want to cache the whole diff_queued_diff, but for the non-lazy
> entries we should be able to do the strict patch-id incrementally. We
> just need to improve the function interfaces.
The (somewhat hacky) patch below, on top of my previous one, does that
optimization. But the timings aren't improved by much. My best-of-five
for the second case went down to:
real 0m0.495s
user 0m0.488s
sys 0m0.004s
However, the actual time to just do "git log --raw $revs" is:
real 0m0.333s
user 0m0.292s
sys 0m0.032s
which provides a lower-bound for how well we can do, as it is just doing
a single tree diff for each commit. So I think we have reaped most of
the benefits of this approach already (and we will generally have to do
_some_ true patch-id calculations, so we can never meet that lower
bound).
---
diff.c | 11 +++-------
diff.h | 3 ++-
patch-ids.c | 39 ++++++++++++++++++++++++++---------
3 files changed, 34 insertions(+), 19 deletions(-)
diff --git a/diff.c b/diff.c
index 3b55788..161c5bf 100644
--- a/diff.c
+++ b/diff.c
@@ -4233,8 +4233,8 @@ static void patch_id_consume(void *priv, char *line, unsigned long len)
}
/* returns 0 upon success, and writes result into sha1 */
-static int diff_get_patch_id(struct diff_options *options, unsigned char *sha1,
- int loose)
+int diff_get_patch_id(struct diff_options *options, unsigned char *sha1,
+ int loose)
{
struct diff_queue_struct *q = &diff_queued_diff;
int i;
@@ -4330,21 +4330,16 @@ int diff_flush_patch_id(struct diff_options *options,
return 0;
}
-int diff_flush_patch_id(struct diff_options *options,
- unsigned char *sha1,
- int loose)
+void diff_queue_clear(void)
{
struct diff_queue_struct *q = &diff_queued_diff;
int i;
- int result = diff_get_patch_id(options, sha1, loose);
for (i = 0; i < q->nr; i++)
diff_free_filepair(q->queue[i]);
free(q->queue);
DIFF_QUEUE_CLEAR(q);
-
- return result;
}
static int is_summary_empty(const struct diff_queue_struct *q)
diff --git a/diff.h b/diff.h
index b018aaf..7207d4b 100644
--- a/diff.h
+++ b/diff.h
@@ -320,7 +320,8 @@ extern int do_diff_cache(const unsigned char *, struct diff_options *);
extern int run_diff_index(struct rev_info *revs, int cached);
extern int do_diff_cache(const unsigned char *, struct diff_options *);
-extern int diff_flush_patch_id(struct diff_options *, unsigned char *, int loose);
+extern int diff_get_patch_id(struct diff_options *, unsigned char *, int loose);
+extern void diff_queue_clear(void);
extern int diff_result_code(struct diff_options *, int);
diff --git a/patch-ids.c b/patch-ids.c
index 3a83ee6..83cda92 100644
--- a/patch-ids.c
+++ b/patch-ids.c
@@ -4,8 +4,7 @@
#include "sha1-lookup.h"
#include "patch-ids.h"
-static int commit_patch_id(struct commit *commit, struct diff_options *options,
- unsigned char *sha1, int loose)
+static void start_patch_id(struct commit *commit, struct diff_options *options)
{
if (commit->parents)
diff_tree_sha1(commit->parents->item->object.sha1,
@@ -13,7 +12,6 @@ static int commit_patch_id(struct commit *commit, struct diff_options *options,
else
diff_root_tree_sha1(commit->object.sha1, "", options);
diffcore_std(options);
- return diff_flush_patch_id(options, sha1, loose);
}
int init_patch_ids(struct patch_ids *ids)
@@ -50,12 +48,16 @@ static struct patch_id *add_commit(struct commit *commit,
int no_add)
{
struct patch_id *p;
- unsigned char loose[20], strict[20];
+ unsigned char loose[20], strict[20] = {0};
unsigned long hash;
void **pos;
- if (commit_patch_id(commit, &ids->diffopts, loose, 1))
+ start_patch_id(commit, &ids->diffopts);
+ if (diff_get_patch_id(&ids->diffopts, loose, 1)) {
+ diff_queue_clear();
return NULL;
+ }
+
hash = hash_sha1(loose);
p = lookup_hash(hash, &ids->table);
@@ -66,15 +68,24 @@ static struct patch_id *add_commit(struct commit *commit,
/*
* We have a real loose match; lazily load and cache the strict
- * id to compare.
+ * id to compare. We must do the "current" commit first, as its
+ * incremental state is waiting in the diff machinery.
*/
+ if (is_null_sha1(strict)) {
+ int result = diff_get_patch_id(&ids->diffopts, strict, 0);
+ diff_queue_clear();
+ if (result)
+ return NULL;
+ }
+
if (is_null_sha1(p->strict)) {
- if (commit_patch_id(p->commit, &ids->diffopts,
- p->strict, 0))
+ int result;
+ start_patch_id(p->commit, &ids->diffopts);
+ result = diff_get_patch_id(&ids->diffopts, p->strict, 0);
+ diff_queue_clear();
+ if (result)
return NULL;
}
- if (commit_patch_id(commit, &ids->diffopts, strict, 0))
- return NULL;
/*
* If the strict ones match, we do not need to look farther;
@@ -85,6 +96,14 @@ static struct patch_id *add_commit(struct commit *commit,
}
/*
+ * If we get here and have not filled in strict, then we do not need
+ * to compute it (for now), but we must clean up the leftover diff
+ * state.
+ */
+ if (is_null_sha1(strict))
+ diff_queue_clear();
+
+ /*
* Otherwise, we may have a loose but not strict match, or even no
* loose match at all. Now we can add the new entry (or return failure
* to look it up).
next prev parent reply other threads:[~2013-05-29 7:50 UTC|newest]
Thread overview: 13+ messages / expand[flat|nested] mbox.gz Atom feed top
2013-05-19 13:17 [RFC/PATCH] patch-ids: check modified paths before calculating diff John Keeping
2013-05-19 19:36 ` Jonathan Nieder
2013-05-19 22:02 ` John Keeping
2013-05-20 6:36 ` Jonathan Nieder
2013-05-20 8:16 ` John Keeping
2013-05-20 4:46 ` Junio C Hamano
2013-05-29 6:20 ` Jeff King
2013-05-29 7:22 ` Jeff King
2013-05-29 7:50 ` Jeff King [this message]
2013-05-29 18:08 ` Junio C Hamano
2013-05-29 18:36 ` Jeff King
2013-05-29 18:48 ` Junio C Hamano
2013-05-29 21:30 ` John Keeping
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20130529075020.GC11955@sigill.intra.peff.net \
--to=peff@peff.net \
--cc=git@vger.kernel.org \
--cc=gitster@pobox.com \
--cc=john@keeping.me.uk \
--cc=kevin@bracey.fi \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).