From: Jakob Stoklund Olesen <stoklund@2pi.dk>
To: git@vger.kernel.org
Cc: Eric Wong <normalperson@yhbt.net>, Sam Vilain <sam@vilain.net>,
Steven Walter <stevenrwalter@gmail.com>,
Peter Baumann <waste.manager@gmx.de>,
Andrew Myrick <amyrick@apple.com>,
Jakob Stoklund Olesen <stoklund@2pi.dk>
Subject: [PATCH 1/2] git-svn: only look at the new parts of svn:mergeinfo
Date: Wed, 16 Apr 2014 23:54:05 -0700 [thread overview]
Message-ID: <1397717646-54248-1-git-send-email-stoklund@2pi.dk> (raw)
In a Subversion repository where many feature branches are merged into a
trunk, the svn:mergeinfo property can grow very large. This severely
slows down git-svn's make_log_entry() because it is checking all
mergeinfo entries every time the property changes.
In most cases, the additions to svn:mergeinfo since the last commit are
pretty small, and there is nothing to gain by checking merges that were
already checked for the last commit in the branch.
Add a mergeinfo_changes() function which computes the set of interesting
changes to svn:mergeinfo since the last commit. Filter out merged
branches whose ranges haven't changed, and remove a common prefix of
ranges from other merged branches.
This speeds up "git svn fetch" by several orders of magnitude on a large
repository where thousands of feature branches have been merged.
Signed-off-by: Jakob Stoklund Olesen <stoklund@2pi.dk>
---
perl/Git/SVN.pm | 84 ++++++++++++++++++++++++++++++++++++++++++++++++---------
1 file changed, 72 insertions(+), 12 deletions(-)
diff --git a/perl/Git/SVN.pm b/perl/Git/SVN.pm
index a59564f..d3785ab 100644
--- a/perl/Git/SVN.pm
+++ b/perl/Git/SVN.pm
@@ -1178,7 +1178,7 @@ sub find_parent_branch {
or die "SVN connection failed somewhere...\n";
}
print STDERR "Successfully followed parent\n" unless $::_q > 1;
- return $self->make_log_entry($rev, [$parent], $ed);
+ return $self->make_log_entry($rev, [$parent], $ed, $r0, $branch_from);
}
return undef;
}
@@ -1210,7 +1210,7 @@ sub do_fetch {
unless ($self->ra->gs_do_update($last_rev, $rev, $self, $ed)) {
die "SVN connection failed somewhere...\n";
}
- $self->make_log_entry($rev, \@parents, $ed);
+ $self->make_log_entry($rev, \@parents, $ed, $last_rev);
}
sub mkemptydirs {
@@ -1478,9 +1478,9 @@ sub find_extra_svk_parents {
sub lookup_svn_merge {
my $uuid = shift;
my $url = shift;
- my $merge = shift;
+ my $source = shift;
+ my $revs = shift;
- my ($source, $revs) = split ":", $merge;
my $path = $source;
$path =~ s{^/}{};
my $gs = Git::SVN->find_by_url($url.$source, $url, $path);
@@ -1702,6 +1702,62 @@ sub parents_exclude {
return @excluded;
}
+# Compute what's new in svn:mergeinfo.
+sub mergeinfo_changes {
+ my ($self, $old_path, $old_rev, $path, $rev, $mergeinfo_prop) = @_;
+ my %minfo = map {split ":", $_ } split "\n", $mergeinfo_prop;
+ my $old_minfo = {};
+
+ # Initialize cache on the first call.
+ unless (defined $self->{cached_mergeinfo_rev}) {
+ $self->{cached_mergeinfo_rev} = {};
+ $self->{cached_mergeinfo} = {};
+ }
+
+ my $cached_rev = $self->{cached_mergeinfo_rev}{$old_path};
+ if (defined $cached_rev && $cached_rev == $old_rev) {
+ $old_minfo = $self->{cached_mergeinfo}{$old_path};
+ } else {
+ my $ra = $self->ra;
+ # Give up if $old_path isn't in the repo.
+ # This is probably a merge on a subtree.
+ if ($ra->check_path($old_path, $old_rev) != $SVN::Node::dir) {
+ warn "W: ignoring svn:mergeinfo on $old_path, ",
+ "directory didn't exist in r$old_rev\n";
+ return {};
+ }
+ my (undef, undef, $props) =
+ $self->ra->get_dir($old_path, $old_rev);
+ if (defined $props->{"svn:mergeinfo"}) {
+ my %omi = map {split ":", $_ } split "\n",
+ $props->{"svn:mergeinfo"};
+ $old_minfo = \%omi;
+ }
+ $self->{cached_mergeinfo}{$old_path} = $old_minfo;
+ $self->{cached_mergeinfo_rev}{$old_path} = $old_rev;
+ }
+
+ # Cache the new mergeinfo.
+ $self->{cached_mergeinfo}{$path} = \%minfo;
+ $self->{cached_mergeinfo_rev}{$path} = $rev;
+
+ my %changes = ();
+ foreach my $p (keys %minfo) {
+ my $a = $old_minfo->{$p} || "";
+ my $b = $minfo{$p};
+ # Omit merged branches whose ranges lists are unchanged.
+ next if $a eq $b;
+ # Remove any common range list prefix.
+ ($a ^ $b) =~ /^[\0]*/;
+ my $common_prefix = rindex $b, ",", $+[0] - 1;
+ $changes{$p} = substr $b, $common_prefix + 1;
+ }
+ print STDERR "Checking svn:mergeinfo changes since r$old_rev: ",
+ scalar(keys %minfo), " sources, ",
+ scalar(keys %changes), " changed\n";
+
+ return \%changes;
+}
# note: this function should only be called if the various dirprops
# have actually changed
@@ -1715,14 +1771,15 @@ sub find_extra_svn_parents {
# history. Then, we figure out which git revisions are in
# that tip, but not this revision. If all of those revisions
# are now marked as merge, we can add the tip as a parent.
- my @merges = split "\n", $mergeinfo;
+ my @merges = sort keys %$mergeinfo;
my @merge_tips;
my $url = $self->url;
my $uuid = $self->ra_uuid;
my @all_ranges;
for my $merge ( @merges ) {
my ($tip_commit, @ranges) =
- lookup_svn_merge( $uuid, $url, $merge );
+ lookup_svn_merge( $uuid, $url,
+ $merge, $mergeinfo->{$merge} );
unless (!$tip_commit or
grep { $_ eq $tip_commit } @$parents ) {
push @merge_tips, $tip_commit;
@@ -1738,8 +1795,9 @@ sub find_extra_svn_parents {
# check merge tips for new parents
my @new_parents;
for my $merge_tip ( @merge_tips ) {
- my $spec = shift @merges;
+ my $merge = shift @merges;
next unless $merge_tip and $excluded{$merge_tip};
+ my $spec = "$merge:$mergeinfo->{$merge}";
# check out 'new' tips
my $merge_base;
@@ -1770,7 +1828,7 @@ sub find_extra_svn_parents {
.@incomplete." commit(s) (eg $incomplete[0])\n";
} else {
warn
- "Found merge parent (svn:mergeinfo prop): ",
+ "Found merge parent ($spec): ",
$merge_tip, "\n";
push @new_parents, $merge_tip;
}
@@ -1797,7 +1855,7 @@ sub find_extra_svn_parents {
}
sub make_log_entry {
- my ($self, $rev, $parents, $ed) = @_;
+ my ($self, $rev, $parents, $ed, $parent_rev, $parent_path) = @_;
my $untracked = $self->get_untracked($ed);
my @parents = @$parents;
@@ -1809,10 +1867,12 @@ sub make_log_entry {
($ed, $props->{"svk:merge"}, \@parents);
}
if ( $props->{"svn:mergeinfo"} ) {
+ my $mi_changes = $self->mergeinfo_changes
+ ($parent_path || $path, $parent_rev,
+ $path, $rev,
+ $props->{"svn:mergeinfo"});
$self->find_extra_svn_parents
- ($ed,
- $props->{"svn:mergeinfo"},
- \@parents);
+ ($ed, $mi_changes, \@parents);
}
}
--
1.8.5.2 (Apple Git-48)
next reply other threads:[~2014-04-17 6:55 UTC|newest]
Thread overview: 5+ messages / expand[flat|nested] mbox.gz Atom feed top
2014-04-17 6:54 Jakob Stoklund Olesen [this message]
2014-04-17 6:54 ` [PATCH 2/2] git-svn: only look at the root path for svn:mergeinfo Jakob Stoklund Olesen
2014-04-22 18:54 ` Eric Wong
2014-04-27 19:00 ` Jakob Stoklund Olesen
2014-04-22 18:47 ` [PATCH 1/2] git-svn: only look at the new parts of svn:mergeinfo Eric Wong
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1397717646-54248-1-git-send-email-stoklund@2pi.dk \
--to=stoklund@2pi.dk \
--cc=amyrick@apple.com \
--cc=git@vger.kernel.org \
--cc=normalperson@yhbt.net \
--cc=sam@vilain.net \
--cc=stevenrwalter@gmail.com \
--cc=waste.manager@gmx.de \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).