git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Junio C Hamano <gitster@pobox.com>
To: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Brandon Casey <casey@nrlssc.navy.mil>,
	Johannes Schindelin <johannes.schindelin@gmx.de>,
	Git Mailing List <git@vger.kernel.org>
Subject: Re: "git reflog expire --all" very slow
Date: Mon, 30 Mar 2009 22:24:05 -0700	[thread overview]
Message-ID: <7vy6um6z9m.fsf@gitster.siamese.dyndns.org> (raw)
In-Reply-To: <alpine.LFD.2.00.0903302154000.4093@localhost.localdomain> (Linus Torvalds's message of "Mon, 30 Mar 2009 22:09:25 -0700 (PDT)")

Linus Torvalds <torvalds@linux-foundation.org> writes:

> On Mon, 30 Mar 2009, Junio C Hamano wrote:
>> 
>> Most of your reflog entries are expected to be reachable from the tip, so
>> one optimization would be to mark all commits reachable from the tip
>> upfront, and omit the in_merge_bases() computation for the ones that are
>> already marked.  Perhaps something like this...
>
> This if anything makes things just go slower.
>
> Not much, but some. It went from 36.566s to 38.070s. That may be in the 
> noise, I've not done any sensitivity analysis.

I actually think that the cutoff for history traversal is bogus.  You may
have a 30-day old reflog entry that pulled in a 45-day old commit, and
giving up the smudging at the expiry cutoff timestamp does not make much
sense.

I do have a lot of reflog entries kept around, as my main repository has
these:

        [gc]
                reflogexpire = '2005-01-01 00:00:00 +0000'
                reflogexpireunreachable = '2005-01-01 00:00:00 +0000'

so let me experiment a bit more.

 builtin-reflog.c |   61 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 files changed, 59 insertions(+), 2 deletions(-)

diff --git a/builtin-reflog.c b/builtin-reflog.c
index d95f515..ef02b5a 100644
--- a/builtin-reflog.c
+++ b/builtin-reflog.c
@@ -36,6 +36,7 @@ struct expire_reflog_cb {
 	FILE *newlog;
 	const char *ref;
 	struct commit *ref_commit;
+	int commit_marked;
 	struct cmd_reflog_expire_cb *cmd;
 	unsigned char last_kept_sha1[20];
 };
@@ -52,6 +53,7 @@ struct collect_reflog_cb {
 
 #define INCOMPLETE	(1u<<10)
 #define STUDYING	(1u<<11)
+#define REACHABLE	(1u<<12)
 
 static int tree_is_complete(const unsigned char *sha1)
 {
@@ -209,6 +211,43 @@ static int keep_entry(struct commit **it, unsigned char *sha1)
 	return 1;
 }
 
+static void mark_reachable(struct commit *commit, unsigned long expire_limit)
+{
+	/*
+	 * We need to compute if commit on either side of an reflog
+	 * entry is reachable from the tip of the ref for all entries.
+	 * Mark commits that are reachable from the tip down to the
+	 * time threashold first; we know a commit marked thusly is
+	 * reachable from the tip without running in_merge_bases()
+	 * at all.
+	 */
+	struct commit_list *pending = NULL;
+
+	commit_list_insert(commit, &pending);
+	while (pending) {
+		struct commit_list *entry = pending;
+		struct commit_list *parent;
+		pending = entry->next;
+		commit = entry->item;
+		free(entry);
+		if (commit->object.flags & REACHABLE)
+			continue;
+		commit->object.flags |= REACHABLE;
+		parent = commit->parents;
+		while (parent) {
+			commit = parent->item;
+			parent = parent->next;
+			if (commit->object.flags & REACHABLE)
+				continue;
+			if (parse_commit(commit))
+				continue;
+			if (commit->date < expire_limit)
+				continue;
+			commit_list_insert(commit, &pending);
+		}
+	}
+}
+
 static int expire_reflog_ent(unsigned char *osha1, unsigned char *nsha1,
 		const char *email, unsigned long timestamp, int tz,
 		const char *message, void *cb_data)
@@ -230,12 +269,28 @@ static int expire_reflog_ent(unsigned char *osha1, unsigned char *nsha1,
 	if (timestamp < cb->cmd->expire_unreachable) {
 		if (!cb->ref_commit)
 			goto prune;
+
 		if (!old && !is_null_sha1(osha1))
 			old = lookup_commit_reference_gently(osha1, 1);
 		if (!new && !is_null_sha1(nsha1))
 			new = lookup_commit_reference_gently(nsha1, 1);
-		if ((old && !in_merge_bases(old, &cb->ref_commit, 1)) ||
-		    (new && !in_merge_bases(new, &cb->ref_commit, 1)))
+
+		if (!cb->commit_marked) {
+			if (old) {
+				mark_reachable(cb->ref_commit, old->date);
+				cb->commit_marked = 1;
+			} else if (new) {
+				mark_reachable(cb->ref_commit, new->date);
+				cb->commit_marked = 1;
+			}
+		}
+
+		if ((old &&
+		     !(old->object.flags & REACHABLE) &&
+		     !in_merge_bases(old, &cb->ref_commit, 1)) ||
+		    (new &&
+		     !(new->object.flags & REACHABLE) &&
+		     !in_merge_bases(new, &cb->ref_commit, 1)))
 			goto prune;
 	}
 
@@ -289,6 +344,8 @@ static int expire_reflog(const char *ref, const unsigned char *sha1, int unused,
 	cb.ref = ref;
 	cb.cmd = cmd;
 	for_each_reflog_ent(ref, expire_reflog_ent, &cb);
+	if (cb.ref_commit && cb.commit_marked)
+		clear_commit_marks(cb.ref_commit, REACHABLE);
  finish:
 	if (cb.newlog) {
 		if (fclose(cb.newlog)) {

  reply	other threads:[~2009-03-31  5:25 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-03-31  1:43 "git reflog expire --all" very slow Linus Torvalds
2009-03-31  4:34 ` Junio C Hamano
2009-03-31  5:09   ` Linus Torvalds
2009-03-31  5:24     ` Junio C Hamano [this message]
2009-03-31  5:42       ` Linus Torvalds
2009-03-31  5:57         ` Junio C Hamano
2009-03-31  5:50       ` Junio C Hamano
2009-03-31  5:38     ` Linus Torvalds
2009-03-31  5:50       ` Linus Torvalds
2009-03-31  5:51         ` Linus Torvalds
2009-04-02  6:46           ` Junio C Hamano
2009-04-02 15:30             ` Linus Torvalds
2009-03-31  6:08         ` Junio C Hamano

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=7vy6um6z9m.fsf@gitster.siamese.dyndns.org \
    --to=gitster@pobox.com \
    --cc=casey@nrlssc.navy.mil \
    --cc=git@vger.kernel.org \
    --cc=johannes.schindelin@gmx.de \
    --cc=torvalds@linux-foundation.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).