From: Robert Fitzsimons <robfitz@273k.net>
To: git@vger.kernel.org
Subject: [RFC] Possible optimization for gitweb
Date: Tue, 19 Dec 2006 20:54:22 +0000 [thread overview]
Message-ID: <20061219205422.GA17864@localhost> (raw)
While looking at the gitweb source yesterday, I noticed a number of
similar expensive workflows used by a number of actions (summary,
shortlog, log, rss, atom, and history).
The current workflows are:
get ~100 sha1's using rev-list
foreach sha1
get/parse 1 commit using rev-list
output commit
The new workflows I'm proposing would be:
get/parse ~100 commit's using rev-list
foreach commit
output commit
The following simplified commands gives an idea of the git only overhead
between these two workflows.
time \
for r in `git-rev-list --max-count=100 HEAD --` ; \
do git-rev-list --header --parents --max-count=1 $r -- ; \
done > /dev/null
real 0m0.490s
user 0m0.224s
sys 0m0.228s
time \
git-rev-list --header --parents --max-count=100 HEAD -- > /dev/null
real 0m0.058s
user 0m0.008s
sys 0m0.004s
There would seems to be a benefit from making the proposed change to
these workflows, when run on my machine against a clone of Linus's tree.
One issue with this change is that, gitweb is page orientated. Page 0
shows the first 100 items from a given hash, page 1 uses the same given
hash but show 100 to 199 items, etc. Using 'git-rev-list --header
--parents' and then throwing away most of the result is very wasteful.
So I'm suggesting we add a new option to git-rev-list which will only
start show results once its has iterated past a given number of items.
Using a caret or tilde doesn't seem to return the same result.
I've attached a discussion patch which adds a new option --start-count
to git-rev-list and changed the summary and showlog actions of gitweb to
use this new option.
I'm sure there are many improvements to this patch, comments?
Robert
-----
diff --git a/gitweb/gitweb.perl b/gitweb/gitweb.perl
index 4059894..a1e0ccc 100755
--- a/gitweb/gitweb.perl
+++ b/gitweb/gitweb.perl
@@ -1260,6 +1260,30 @@ sub parse_tag {
return %tag
}
+sub parse_commits {
+ my $commit_id = shift;
+ my $start_count = shift;
+ my $max_count = shift;
+
+ my @cos;
+ my @commit_lines;
+
+ local $/ = "\0";
+ open my $fd, "-|", git_cmd(), "rev-list",
+ "--header", "--parents", "--start-count=$start_count", "--max-count=$max_count",
+ $commit_id, "--"
+ or return;
+ while (my $commit = <$fd>) {
+ @commit_lines = split '\n', $commit;
+ pop @commit_lines;
+ my %co = parse_commit(undef, \@commit_lines);
+ push @cos, \%co;
+ }
+ close $fd or return;
+
+ return @cos;
+}
+
sub parse_commit {
my $commit_id = shift;
my $commit_text = shift;
@@ -2633,29 +2657,29 @@ sub git_project_list_body {
sub git_shortlog_body {
# uses global variable $project
- my ($revlist, $from, $to, $refs, $extra) = @_;
+ my ($commitlist, $from, $to, $refs, $extra) = @_;
$from = 0 unless defined $from;
- $to = $#{$revlist} if (!defined $to || $#{$revlist} < $to);
+ $to = $#{$commitlist} if (!defined $to || $#{$commitlist} < $to);
print "<table class=\"shortlog\" cellspacing=\"0\">\n";
my $alternate = 1;
for (my $i = $from; $i <= $to; $i++) {
- my $commit = $revlist->[$i];
+ my $co = $commitlist->[$i];
+ my $commit = $co->{'id'};
#my $ref = defined $refs ? format_ref_marker($refs, $commit) : '';
my $ref = format_ref_marker($refs, $commit);
- my %co = parse_commit($commit);
if ($alternate) {
print "<tr class=\"dark\">\n";
} else {
print "<tr class=\"light\">\n";
}
$alternate ^= 1;
- # git_summary() used print "<td><i>$co{'age_string'}</i></td>\n" .
- print "<td title=\"$co{'age_string_age'}\"><i>$co{'age_string_date'}</i></td>\n" .
- "<td><i>" . esc_html(chop_str($co{'author_name'}, 10)) . "</i></td>\n" .
+ # git_summary() used print "<td><i>$co->{'age_string'}</i></td>\n" .
+ print "<td title=\"$co->{'age_string_age'}\"><i>$co->{'age_string_date'}</i></td>\n" .
+ "<td><i>" . esc_html(chop_str($co->{'author_name'}, 10)) . "</i></td>\n" .
"<td>";
- print format_subject_html($co{'title'}, $co{'title_short'},
+ print format_subject_html($co->{'title'}, $co->{'title_short'},
href(action=>"commit", hash=>$commit), $ref);
print "</td>\n" .
"<td class=\"link\">" .
@@ -2952,13 +2976,9 @@ sub git_summary {
}
}
- open my $fd, "-|", git_cmd(), "rev-list", "--max-count=17",
- git_get_head_hash($project), "--"
- or die_error(undef, "Open git-rev-list failed");
- my @revlist = map { chomp; $_ } <$fd>;
- close $fd;
+ my @commitlist = parse_commits($head, 0, 17);
git_print_header_div('shortlog');
- git_shortlog_body(\@revlist, 0, 15, $refs,
+ git_shortlog_body(\@commitlist, 0, 15, $refs,
$cgi->a({-href => href(action=>"shortlog")}, "..."));
if (@taglist) {
@@ -4313,15 +4333,12 @@ sub git_shortlog {
}
my $refs = git_get_references();
- my $limit = sprintf("--max-count=%i", (100 * ($page+1)));
- open my $fd, "-|", git_cmd(), "rev-list", $limit, $hash, "--"
- or die_error(undef, "Open git-rev-list failed");
- my @revlist = map { chomp; $_ } <$fd>;
- close $fd;
+ my $max_count = (100 * ($page+1));
+ my @commitlist = parse_commits($hash, (100 * $page), $max_count);
- my $paging_nav = format_paging_nav('shortlog', $hash, $head, $page, $#revlist);
+ my $paging_nav = format_paging_nav('shortlog', $hash, $head, $page, $max_count);
my $next_link = '';
- if ($#revlist >= (100 * ($page+1)-1)) {
+ if ($max_count >= 100) {
$next_link =
$cgi->a({-href => href(action=>"shortlog", hash=>$hash, page=>$page+1),
-title => "Alt-n"}, "next");
@@ -4332,7 +4349,7 @@ sub git_shortlog {
git_print_page_nav('shortlog','', $hash,$hash,$hash, $paging_nav);
git_print_header_div('summary', $project);
- git_shortlog_body(\@revlist, ($page * 100), $#revlist, $refs, $next_link);
+ git_shortlog_body(\@commitlist, 0, $#commitlist, $refs, $next_link);
git_footer_html();
}
diff --git a/list-objects.c b/list-objects.c
index f1fa21c..d96c8bf 100644
--- a/list-objects.c
+++ b/list-objects.c
@@ -108,8 +108,12 @@ void traverse_commit_list(struct rev_info *revs,
struct object_array objects = { 0, 0, NULL };
while ((commit = get_revision(revs)) != NULL) {
- process_tree(revs, commit->tree, &objects, NULL, "");
- show_commit(commit);
+ if (revs->start_count <= 0) {
+ process_tree(revs, commit->tree, &objects, NULL, "");
+ show_commit(commit);
+ } else {
+ revs->start_count--;
+ }
}
for (i = 0; i < revs->pending.nr; i++) {
struct object_array_entry *pending = revs->pending.objects + i;
diff --git a/revision.c b/revision.c
index 993bb66..3e3d929 100644
--- a/revision.c
+++ b/revision.c
@@ -524,6 +524,7 @@ void init_revisions(struct rev_info *revs, const char *prefix)
revs->prefix = prefix;
revs->max_age = -1;
revs->min_age = -1;
+ revs->start_count = -1;
revs->max_count = -1;
revs->prune_fn = NULL;
@@ -756,6 +757,10 @@ int setup_revisions(int argc, const char **argv, struct rev_info *revs, const ch
const char *arg = argv[i];
if (*arg == '-') {
int opts;
+ if (!strncmp(arg, "--start-count=", 14)) {
+ revs->start_count = atoi(arg + 14);
+ continue;
+ }
if (!strncmp(arg, "--max-count=", 12)) {
revs->max_count = atoi(arg + 12);
continue;
diff --git a/revision.h b/revision.h
index 3adab95..c2dce8c 100644
--- a/revision.h
+++ b/revision.h
@@ -75,6 +75,7 @@ struct rev_info {
struct grep_opt *grep_filter;
/* special limits */
+ int start_count;
int max_count;
unsigned long max_age;
next reply other threads:[~2006-12-19 20:54 UTC|newest]
Thread overview: 8+ messages / expand[flat|nested] mbox.gz Atom feed top
2006-12-19 20:54 Robert Fitzsimons [this message]
2006-12-19 21:45 ` [RFC] Possible optimization for gitweb Jakub Narebski
2006-12-20 0:52 ` Robert Fitzsimons
2006-12-19 22:10 ` Junio C Hamano
2006-12-19 22:22 ` Jakub Narebski
2006-12-20 0:29 ` [PATCH] rev-list: Add a new option --skip Robert Fitzsimons
2006-12-20 1:09 ` Junio C Hamano
2006-12-20 14:59 ` [PATCH] rev-list: Document --skip and add test cases Robert Fitzsimons
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20061219205422.GA17864@localhost \
--to=robfitz@273k.net \
--cc=git@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).