All of lore.kernel.org
 help / color / mirror / Atom feed
From: Robert Fitzsimons <robfitz@273k.net>
To: git@vger.kernel.org
Subject: [RFC] Possible optimization for gitweb
Date: Tue, 19 Dec 2006 20:54:22 +0000	[thread overview]
Message-ID: <20061219205422.GA17864@localhost> (raw)

While looking at the gitweb source yesterday, I noticed a number of
similar expensive workflows used by a number of actions (summary,
shortlog, log, rss, atom, and history).

The current workflows are:
	get ~100 sha1's using rev-list
	foreach sha1
		get/parse 1 commit using rev-list
		output commit

The new workflows I'm proposing would be:
	get/parse ~100 commit's using rev-list
	foreach commit
		output commit

The following simplified commands gives an idea of the git only overhead
between these two workflows.

time \
for r in `git-rev-list --max-count=100 HEAD --` ; \
do git-rev-list --header --parents --max-count=1 $r -- ; \
done > /dev/null

real    0m0.490s
user    0m0.224s
sys     0m0.228s

time \
git-rev-list --header --parents --max-count=100 HEAD -- > /dev/null

real    0m0.058s
user    0m0.008s
sys     0m0.004s

There would seems to be a benefit from making the proposed change to
these workflows, when run on my machine against a clone of Linus's tree.

One issue with this change is that, gitweb is page orientated.  Page 0
shows the first 100 items from a given hash, page 1 uses the same given
hash but show 100 to 199 items, etc.  Using 'git-rev-list --header
--parents' and then throwing away most of the result is very wasteful.

So I'm suggesting we add a new option to git-rev-list which will only
start show results once its has iterated past a given number of items.
Using a caret or tilde doesn't seem to return the same result.

I've attached a discussion patch which adds a new option --start-count
to git-rev-list and changed the summary and showlog actions of gitweb to
use this new option.

I'm sure there are many improvements to this patch, comments?

Robert

-----


diff --git a/gitweb/gitweb.perl b/gitweb/gitweb.perl
index 4059894..a1e0ccc 100755
--- a/gitweb/gitweb.perl
+++ b/gitweb/gitweb.perl
@@ -1260,6 +1260,30 @@ sub parse_tag {
 	return %tag
 }
 
+sub parse_commits {
+	my $commit_id = shift;
+	my $start_count = shift;
+	my $max_count = shift;
+
+	my @cos;
+	my @commit_lines;
+
+	local $/ = "\0";
+	open my $fd, "-|", git_cmd(), "rev-list",
+		"--header", "--parents", "--start-count=$start_count", "--max-count=$max_count",
+		$commit_id, "--"
+		or return;
+	while (my $commit = <$fd>) {
+		@commit_lines = split '\n', $commit;
+		pop @commit_lines;
+		my %co = parse_commit(undef, \@commit_lines);
+		push @cos, \%co;
+	}
+	close $fd or return;
+
+	return @cos;
+}
+
 sub parse_commit {
 	my $commit_id = shift;
 	my $commit_text = shift;
@@ -2633,29 +2657,29 @@ sub git_project_list_body {
 
 sub git_shortlog_body {
 	# uses global variable $project
-	my ($revlist, $from, $to, $refs, $extra) = @_;
+	my ($commitlist, $from, $to, $refs, $extra) = @_;
 
 	$from = 0 unless defined $from;
-	$to = $#{$revlist} if (!defined $to || $#{$revlist} < $to);
+	$to = $#{$commitlist} if (!defined $to || $#{$commitlist} < $to);
 
 	print "<table class=\"shortlog\" cellspacing=\"0\">\n";
 	my $alternate = 1;
 	for (my $i = $from; $i <= $to; $i++) {
-		my $commit = $revlist->[$i];
+		my $co = $commitlist->[$i];
+		my $commit = $co->{'id'};
 		#my $ref = defined $refs ? format_ref_marker($refs, $commit) : '';
 		my $ref = format_ref_marker($refs, $commit);
-		my %co = parse_commit($commit);
 		if ($alternate) {
 			print "<tr class=\"dark\">\n";
 		} else {
 			print "<tr class=\"light\">\n";
 		}
 		$alternate ^= 1;
-		# git_summary() used print "<td><i>$co{'age_string'}</i></td>\n" .
-		print "<td title=\"$co{'age_string_age'}\"><i>$co{'age_string_date'}</i></td>\n" .
-		      "<td><i>" . esc_html(chop_str($co{'author_name'}, 10)) . "</i></td>\n" .
+		# git_summary() used print "<td><i>$co->{'age_string'}</i></td>\n" .
+		print "<td title=\"$co->{'age_string_age'}\"><i>$co->{'age_string_date'}</i></td>\n" .
+		      "<td><i>" . esc_html(chop_str($co->{'author_name'}, 10)) . "</i></td>\n" .
 		      "<td>";
-		print format_subject_html($co{'title'}, $co{'title_short'},
+		print format_subject_html($co->{'title'}, $co->{'title_short'},
 		                          href(action=>"commit", hash=>$commit), $ref);
 		print "</td>\n" .
 		      "<td class=\"link\">" .
@@ -2952,13 +2976,9 @@ sub git_summary {
 		}
 	}
 
-	open my $fd, "-|", git_cmd(), "rev-list", "--max-count=17",
-		git_get_head_hash($project), "--"
-		or die_error(undef, "Open git-rev-list failed");
-	my @revlist = map { chomp; $_ } <$fd>;
-	close $fd;
+	my @commitlist = parse_commits($head, 0, 17);
 	git_print_header_div('shortlog');
-	git_shortlog_body(\@revlist, 0, 15, $refs,
+	git_shortlog_body(\@commitlist, 0, 15, $refs,
 	                  $cgi->a({-href => href(action=>"shortlog")}, "..."));
 
 	if (@taglist) {
@@ -4313,15 +4333,12 @@ sub git_shortlog {
 	}
 	my $refs = git_get_references();
 
-	my $limit = sprintf("--max-count=%i", (100 * ($page+1)));
-	open my $fd, "-|", git_cmd(), "rev-list", $limit, $hash, "--"
-		or die_error(undef, "Open git-rev-list failed");
-	my @revlist = map { chomp; $_ } <$fd>;
-	close $fd;
+	my $max_count = (100 * ($page+1));
+	my @commitlist = parse_commits($hash, (100 * $page), $max_count);
 
-	my $paging_nav = format_paging_nav('shortlog', $hash, $head, $page, $#revlist);
+	my $paging_nav = format_paging_nav('shortlog', $hash, $head, $page, $max_count);
 	my $next_link = '';
-	if ($#revlist >= (100 * ($page+1)-1)) {
+	if ($max_count >= 100) {
 		$next_link =
 			$cgi->a({-href => href(action=>"shortlog", hash=>$hash, page=>$page+1),
 			         -title => "Alt-n"}, "next");
@@ -4332,7 +4349,7 @@ sub git_shortlog {
 	git_print_page_nav('shortlog','', $hash,$hash,$hash, $paging_nav);
 	git_print_header_div('summary', $project);
 
-	git_shortlog_body(\@revlist, ($page * 100), $#revlist, $refs, $next_link);
+	git_shortlog_body(\@commitlist, 0, $#commitlist, $refs, $next_link);
 
 	git_footer_html();
 }
diff --git a/list-objects.c b/list-objects.c
index f1fa21c..d96c8bf 100644
--- a/list-objects.c
+++ b/list-objects.c
@@ -108,8 +108,12 @@ void traverse_commit_list(struct rev_info *revs,
 	struct object_array objects = { 0, 0, NULL };
 
 	while ((commit = get_revision(revs)) != NULL) {
-		process_tree(revs, commit->tree, &objects, NULL, "");
-		show_commit(commit);
+		if (revs->start_count <= 0) {
+			process_tree(revs, commit->tree, &objects, NULL, "");
+			show_commit(commit);
+		} else {
+			revs->start_count--;
+		}
 	}
 	for (i = 0; i < revs->pending.nr; i++) {
 		struct object_array_entry *pending = revs->pending.objects + i;
diff --git a/revision.c b/revision.c
index 993bb66..3e3d929 100644
--- a/revision.c
+++ b/revision.c
@@ -524,6 +524,7 @@ void init_revisions(struct rev_info *revs, const char *prefix)
 	revs->prefix = prefix;
 	revs->max_age = -1;
 	revs->min_age = -1;
+	revs->start_count = -1;
 	revs->max_count = -1;
 
 	revs->prune_fn = NULL;
@@ -756,6 +757,10 @@ int setup_revisions(int argc, const char **argv, struct rev_info *revs, const ch
 		const char *arg = argv[i];
 		if (*arg == '-') {
 			int opts;
+			if (!strncmp(arg, "--start-count=", 14)) {
+				revs->start_count = atoi(arg + 14);
+				continue;
+			}
 			if (!strncmp(arg, "--max-count=", 12)) {
 				revs->max_count = atoi(arg + 12);
 				continue;
diff --git a/revision.h b/revision.h
index 3adab95..c2dce8c 100644
--- a/revision.h
+++ b/revision.h
@@ -75,6 +75,7 @@ struct rev_info {
 	struct grep_opt	*grep_filter;
 
 	/* special limits */
+	int start_count;
 	int max_count;
 	unsigned long max_age;

             reply	other threads:[~2006-12-19 20:54 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2006-12-19 20:54 Robert Fitzsimons [this message]
2006-12-19 21:45 ` [RFC] Possible optimization for gitweb Jakub Narebski
2006-12-20  0:52   ` Robert Fitzsimons
2006-12-19 22:10 ` Junio C Hamano
2006-12-19 22:22   ` Jakub Narebski
2006-12-20  0:29     ` [PATCH] rev-list: Add a new option --skip Robert Fitzsimons
2006-12-20  1:09       ` Junio C Hamano
2006-12-20 14:59         ` [PATCH] rev-list: Document --skip and add test cases Robert Fitzsimons

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20061219205422.GA17864@localhost \
    --to=robfitz@273k.net \
    --cc=git@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.