All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC] Possible optimization for gitweb
@ 2006-12-19 20:54 Robert Fitzsimons
  2006-12-19 21:45 ` Jakub Narebski
  2006-12-19 22:10 ` Junio C Hamano
  0 siblings, 2 replies; 8+ messages in thread
From: Robert Fitzsimons @ 2006-12-19 20:54 UTC (permalink / raw)
  To: git

While looking at the gitweb source yesterday, I noticed a number of
similar expensive workflows used by a number of actions (summary,
shortlog, log, rss, atom, and history).

The current workflows are:
	get ~100 sha1's using rev-list
	foreach sha1
		get/parse 1 commit using rev-list
		output commit

The new workflows I'm proposing would be:
	get/parse ~100 commit's using rev-list
	foreach commit
		output commit

The following simplified commands gives an idea of the git only overhead
between these two workflows.

time \
for r in `git-rev-list --max-count=100 HEAD --` ; \
do git-rev-list --header --parents --max-count=1 $r -- ; \
done > /dev/null

real    0m0.490s
user    0m0.224s
sys     0m0.228s

time \
git-rev-list --header --parents --max-count=100 HEAD -- > /dev/null

real    0m0.058s
user    0m0.008s
sys     0m0.004s

There would seems to be a benefit from making the proposed change to
these workflows, when run on my machine against a clone of Linus's tree.

One issue with this change is that, gitweb is page orientated.  Page 0
shows the first 100 items from a given hash, page 1 uses the same given
hash but show 100 to 199 items, etc.  Using 'git-rev-list --header
--parents' and then throwing away most of the result is very wasteful.

So I'm suggesting we add a new option to git-rev-list which will only
start show results once its has iterated past a given number of items.
Using a caret or tilde doesn't seem to return the same result.

I've attached a discussion patch which adds a new option --start-count
to git-rev-list and changed the summary and showlog actions of gitweb to
use this new option.

I'm sure there are many improvements to this patch, comments?

Robert

-----


diff --git a/gitweb/gitweb.perl b/gitweb/gitweb.perl
index 4059894..a1e0ccc 100755
--- a/gitweb/gitweb.perl
+++ b/gitweb/gitweb.perl
@@ -1260,6 +1260,30 @@ sub parse_tag {
 	return %tag
 }
 
+sub parse_commits {
+	my $commit_id = shift;
+	my $start_count = shift;
+	my $max_count = shift;
+
+	my @cos;
+	my @commit_lines;
+
+	local $/ = "\0";
+	open my $fd, "-|", git_cmd(), "rev-list",
+		"--header", "--parents", "--start-count=$start_count", "--max-count=$max_count",
+		$commit_id, "--"
+		or return;
+	while (my $commit = <$fd>) {
+		@commit_lines = split '\n', $commit;
+		pop @commit_lines;
+		my %co = parse_commit(undef, \@commit_lines);
+		push @cos, \%co;
+	}
+	close $fd or return;
+
+	return @cos;
+}
+
 sub parse_commit {
 	my $commit_id = shift;
 	my $commit_text = shift;
@@ -2633,29 +2657,29 @@ sub git_project_list_body {
 
 sub git_shortlog_body {
 	# uses global variable $project
-	my ($revlist, $from, $to, $refs, $extra) = @_;
+	my ($commitlist, $from, $to, $refs, $extra) = @_;
 
 	$from = 0 unless defined $from;
-	$to = $#{$revlist} if (!defined $to || $#{$revlist} < $to);
+	$to = $#{$commitlist} if (!defined $to || $#{$commitlist} < $to);
 
 	print "<table class=\"shortlog\" cellspacing=\"0\">\n";
 	my $alternate = 1;
 	for (my $i = $from; $i <= $to; $i++) {
-		my $commit = $revlist->[$i];
+		my $co = $commitlist->[$i];
+		my $commit = $co->{'id'};
 		#my $ref = defined $refs ? format_ref_marker($refs, $commit) : '';
 		my $ref = format_ref_marker($refs, $commit);
-		my %co = parse_commit($commit);
 		if ($alternate) {
 			print "<tr class=\"dark\">\n";
 		} else {
 			print "<tr class=\"light\">\n";
 		}
 		$alternate ^= 1;
-		# git_summary() used print "<td><i>$co{'age_string'}</i></td>\n" .
-		print "<td title=\"$co{'age_string_age'}\"><i>$co{'age_string_date'}</i></td>\n" .
-		      "<td><i>" . esc_html(chop_str($co{'author_name'}, 10)) . "</i></td>\n" .
+		# git_summary() used print "<td><i>$co->{'age_string'}</i></td>\n" .
+		print "<td title=\"$co->{'age_string_age'}\"><i>$co->{'age_string_date'}</i></td>\n" .
+		      "<td><i>" . esc_html(chop_str($co->{'author_name'}, 10)) . "</i></td>\n" .
 		      "<td>";
-		print format_subject_html($co{'title'}, $co{'title_short'},
+		print format_subject_html($co->{'title'}, $co->{'title_short'},
 		                          href(action=>"commit", hash=>$commit), $ref);
 		print "</td>\n" .
 		      "<td class=\"link\">" .
@@ -2952,13 +2976,9 @@ sub git_summary {
 		}
 	}
 
-	open my $fd, "-|", git_cmd(), "rev-list", "--max-count=17",
-		git_get_head_hash($project), "--"
-		or die_error(undef, "Open git-rev-list failed");
-	my @revlist = map { chomp; $_ } <$fd>;
-	close $fd;
+	my @commitlist = parse_commits($head, 0, 17);
 	git_print_header_div('shortlog');
-	git_shortlog_body(\@revlist, 0, 15, $refs,
+	git_shortlog_body(\@commitlist, 0, 15, $refs,
 	                  $cgi->a({-href => href(action=>"shortlog")}, "..."));
 
 	if (@taglist) {
@@ -4313,15 +4333,12 @@ sub git_shortlog {
 	}
 	my $refs = git_get_references();
 
-	my $limit = sprintf("--max-count=%i", (100 * ($page+1)));
-	open my $fd, "-|", git_cmd(), "rev-list", $limit, $hash, "--"
-		or die_error(undef, "Open git-rev-list failed");
-	my @revlist = map { chomp; $_ } <$fd>;
-	close $fd;
+	my $max_count = (100 * ($page+1));
+	my @commitlist = parse_commits($hash, (100 * $page), $max_count);
 
-	my $paging_nav = format_paging_nav('shortlog', $hash, $head, $page, $#revlist);
+	my $paging_nav = format_paging_nav('shortlog', $hash, $head, $page, $max_count);
 	my $next_link = '';
-	if ($#revlist >= (100 * ($page+1)-1)) {
+	if ($max_count >= 100) {
 		$next_link =
 			$cgi->a({-href => href(action=>"shortlog", hash=>$hash, page=>$page+1),
 			         -title => "Alt-n"}, "next");
@@ -4332,7 +4349,7 @@ sub git_shortlog {
 	git_print_page_nav('shortlog','', $hash,$hash,$hash, $paging_nav);
 	git_print_header_div('summary', $project);
 
-	git_shortlog_body(\@revlist, ($page * 100), $#revlist, $refs, $next_link);
+	git_shortlog_body(\@commitlist, 0, $#commitlist, $refs, $next_link);
 
 	git_footer_html();
 }
diff --git a/list-objects.c b/list-objects.c
index f1fa21c..d96c8bf 100644
--- a/list-objects.c
+++ b/list-objects.c
@@ -108,8 +108,12 @@ void traverse_commit_list(struct rev_info *revs,
 	struct object_array objects = { 0, 0, NULL };
 
 	while ((commit = get_revision(revs)) != NULL) {
-		process_tree(revs, commit->tree, &objects, NULL, "");
-		show_commit(commit);
+		if (revs->start_count <= 0) {
+			process_tree(revs, commit->tree, &objects, NULL, "");
+			show_commit(commit);
+		} else {
+			revs->start_count--;
+		}
 	}
 	for (i = 0; i < revs->pending.nr; i++) {
 		struct object_array_entry *pending = revs->pending.objects + i;
diff --git a/revision.c b/revision.c
index 993bb66..3e3d929 100644
--- a/revision.c
+++ b/revision.c
@@ -524,6 +524,7 @@ void init_revisions(struct rev_info *revs, const char *prefix)
 	revs->prefix = prefix;
 	revs->max_age = -1;
 	revs->min_age = -1;
+	revs->start_count = -1;
 	revs->max_count = -1;
 
 	revs->prune_fn = NULL;
@@ -756,6 +757,10 @@ int setup_revisions(int argc, const char **argv, struct rev_info *revs, const ch
 		const char *arg = argv[i];
 		if (*arg == '-') {
 			int opts;
+			if (!strncmp(arg, "--start-count=", 14)) {
+				revs->start_count = atoi(arg + 14);
+				continue;
+			}
 			if (!strncmp(arg, "--max-count=", 12)) {
 				revs->max_count = atoi(arg + 12);
 				continue;
diff --git a/revision.h b/revision.h
index 3adab95..c2dce8c 100644
--- a/revision.h
+++ b/revision.h
@@ -75,6 +75,7 @@ struct rev_info {
 	struct grep_opt	*grep_filter;
 
 	/* special limits */
+	int start_count;
 	int max_count;
 	unsigned long max_age;

^ permalink raw reply related	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2006-12-20 15:00 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2006-12-19 20:54 [RFC] Possible optimization for gitweb Robert Fitzsimons
2006-12-19 21:45 ` Jakub Narebski
2006-12-20  0:52   ` Robert Fitzsimons
2006-12-19 22:10 ` Junio C Hamano
2006-12-19 22:22   ` Jakub Narebski
2006-12-20  0:29     ` [PATCH] rev-list: Add a new option --skip Robert Fitzsimons
2006-12-20  1:09       ` Junio C Hamano
2006-12-20 14:59         ` [PATCH] rev-list: Document --skip and add test cases Robert Fitzsimons

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.