git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Junio C Hamano <gitster@pobox.com>
To: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Brandon Casey <casey@nrlssc.navy.mil>,
	Johannes Schindelin <johannes.schindelin@gmx.de>,
	Git Mailing List <git@vger.kernel.org>
Subject: Re: "git reflog expire --all" very slow
Date: Mon, 30 Mar 2009 21:34:14 -0700	[thread overview]
Message-ID: <7vk5668g55.fsf@gitster.siamese.dyndns.org> (raw)
In-Reply-To: <alpine.LFD.2.00.0903301803190.4093@localhost.localdomain> (Linus Torvalds's message of "Mon, 30 Mar 2009 18:43:59 -0700 (PDT)")

Linus Torvalds <torvalds@linux-foundation.org> writes:

> I have not checked if there is anything really obvious going on that could 
> change that whole logic that causes us to do merge-bases into something 
> saner, since the reflog code is not a part of git I'm familiar with. 

When we look at one reflog entry with old and new commit, we decide to
prune an old entry if either side is unreachable from the tip of the
branch:

	if (timestamp < cb->cmd->expire_unreachable) {
		if (!cb->ref_commit)
			goto prune;
		if (!old && !is_null_sha1(osha1))
			old = lookup_commit_reference_gently(osha1, 1);
		if (!new && !is_null_sha1(nsha1))
			new = lookup_commit_reference_gently(nsha1, 1);
		if ((old && !in_merge_bases(old, &cb->ref_commit, 1)) ||
		    (new && !in_merge_bases(new, &cb->ref_commit, 1)))
			goto prune;
	}

Most of your reflog entries are expected to be reachable from the tip, so
one optimization would be to mark all commits reachable from the tip
upfront, and omit the in_merge_bases() computation for the ones that are
already marked.  Perhaps something like this...

 builtin-reflog.c |   49 +++++++++++++++++++++++++++++++++++++++++++++++--
 1 files changed, 47 insertions(+), 2 deletions(-)

diff --git a/builtin-reflog.c b/builtin-reflog.c
index d95f515..f0d61bd 100644
--- a/builtin-reflog.c
+++ b/builtin-reflog.c
@@ -52,6 +52,7 @@ struct collect_reflog_cb {
 
 #define INCOMPLETE	(1u<<10)
 #define STUDYING	(1u<<11)
+#define REACHABLE	(1u<<12)
 
 static int tree_is_complete(const unsigned char *sha1)
 {
@@ -209,6 +210,43 @@ static int keep_entry(struct commit **it, unsigned char *sha1)
 	return 1;
 }
 
+static void mark_reachable(struct commit *commit, unsigned long expire_limit)
+{
+	/*
+	 * We need to compute if commit on either side of an reflog
+	 * entry is reachable from the tip of the ref for all entries.
+	 * Mark commits that are reachable from the tip down to the
+	 * time threashold first; we know a commit marked thusly is
+	 * reachable from the tip without running in_merge_bases()
+	 * at all.
+	 */
+	struct commit_list *pending = NULL;
+
+	commit_list_insert(commit, &pending);
+	while (pending) {
+		struct commit_list *entry = pending;
+		struct commit_list *parent;
+		pending = entry->next;
+		commit = entry->item;
+		free(entry);
+		if (commit->object.flags & REACHABLE)
+			continue;
+		commit->object.flags |= REACHABLE;
+		parent = commit->parents;
+		while (parent) {
+			commit = parent->item;
+			parent = parent->next;
+			if (commit->object.flags & REACHABLE)
+				continue;
+			if (parse_commit(commit))
+				continue;
+			if (commit->date < expire_limit)
+				continue;
+			commit_list_insert(commit, &pending);
+		}
+	}
+}
+
 static int expire_reflog_ent(unsigned char *osha1, unsigned char *nsha1,
 		const char *email, unsigned long timestamp, int tz,
 		const char *message, void *cb_data)
@@ -234,8 +272,12 @@ static int expire_reflog_ent(unsigned char *osha1, unsigned char *nsha1,
 			old = lookup_commit_reference_gently(osha1, 1);
 		if (!new && !is_null_sha1(nsha1))
 			new = lookup_commit_reference_gently(nsha1, 1);
-		if ((old && !in_merge_bases(old, &cb->ref_commit, 1)) ||
-		    (new && !in_merge_bases(new, &cb->ref_commit, 1)))
+		if ((old &&
+		     !(old->object.flags & REACHABLE) &&
+		     !in_merge_bases(old, &cb->ref_commit, 1)) ||
+		    (new &&
+		     !(new->object.flags & REACHABLE) &&
+		     !in_merge_bases(new, &cb->ref_commit, 1)))
 			goto prune;
 	}
 
@@ -288,7 +330,10 @@ static int expire_reflog(const char *ref, const unsigned char *sha1, int unused,
 	cb.ref_commit = lookup_commit_reference_gently(sha1, 1);
 	cb.ref = ref;
 	cb.cmd = cmd;
+
+	mark_reachable(cb.ref_commit, cmd->expire_unreachable);
 	for_each_reflog_ent(ref, expire_reflog_ent, &cb);
+	clear_commit_marks(cb.ref_commit, REACHABLE);
  finish:
 	if (cb.newlog) {
 		if (fclose(cb.newlog)) {

  reply	other threads:[~2009-03-31  4:35 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-03-31  1:43 "git reflog expire --all" very slow Linus Torvalds
2009-03-31  4:34 ` Junio C Hamano [this message]
2009-03-31  5:09   ` Linus Torvalds
2009-03-31  5:24     ` Junio C Hamano
2009-03-31  5:42       ` Linus Torvalds
2009-03-31  5:57         ` Junio C Hamano
2009-03-31  5:50       ` Junio C Hamano
2009-03-31  5:38     ` Linus Torvalds
2009-03-31  5:50       ` Linus Torvalds
2009-03-31  5:51         ` Linus Torvalds
2009-04-02  6:46           ` Junio C Hamano
2009-04-02 15:30             ` Linus Torvalds
2009-03-31  6:08         ` Junio C Hamano

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=7vk5668g55.fsf@gitster.siamese.dyndns.org \
    --to=gitster@pobox.com \
    --cc=casey@nrlssc.navy.mil \
    --cc=git@vger.kernel.org \
    --cc=johannes.schindelin@gmx.de \
    --cc=torvalds@linux-foundation.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).