From: Eric Wong <normalperson@yhbt.net>
To: Junio C Hamano <junkio@cox.net>
Cc: git@vger.kernel.org, Pazu <pazu@pazu.com.br>
Subject: [PATCH 1/2] git-svn: enable delta transfers during fetches when using SVN:: libs
Date: Mon, 27 Nov 2006 21:44:48 -0800 [thread overview]
Message-ID: <20061128054448.GA396@soma> (raw)
In-Reply-To: <loom.20061124T143148-286@post.gmane.org>
This should drastically reduce bandwidth used for network
transfers. This is not enabled for file:// repositories by
default because of the increased CPU usage and I/O needed.
GIT_SVN_DELTA_FETCH may be set to a true value to enable or
false (0) to disable delta transfers regardless of the
repository type.
Signed-off-by: Eric Wong <normalperson@yhbt.net>
---
git-svn.perl | 194 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
1 files changed, 189 insertions(+), 5 deletions(-)
diff --git a/git-svn.perl b/git-svn.perl
index d5d9c49..9b86d91 100755
--- a/git-svn.perl
+++ b/git-svn.perl
@@ -68,7 +68,7 @@ my ($_revision,$_stdin,$_no_ignore_ext,$
$_limit, $_verbose, $_incremental, $_oneline, $_l_fmt, $_show_commit,
$_version, $_upgrade, $_authors, $_branch_all_refs, @_opt_m,
$_merge, $_strategy, $_dry_run, $_ignore_nodate, $_non_recursive,
- $_username, $_config_dir, $_no_auth_cache);
+ $_username, $_config_dir, $_no_auth_cache, $_xfer_delta);
my (@_branch_from, %tree_map, %users, %rusers, %equiv);
my ($_svn_co_url_revs, $_svn_pg_peg_revs);
my @repo_path_split_cache;
@@ -2675,6 +2675,9 @@ sub libsvn_load {
require SVN::Ra;
require SVN::Delta;
push @SVN::Git::Editor::ISA, 'SVN::Delta::Editor';
+ push @SVN::Git::Fetcher::ISA, 'SVN::Delta::Editor';
+ *SVN::Git::Fetcher::process_rm = *process_rm;
+ *SVN::Git::Fetcher::safe_qx = *safe_qx;
my $kill_stupid_warnings = $SVN::Node::none.$SVN::Node::file.
$SVN::Node::dir.$SVN::Node::unknown.
$SVN::Node::none.$SVN::Node::file.
@@ -2827,6 +2830,13 @@ sub libsvn_connect {
config => $config,
pool => SVN::Pool->new,
auth_provider_callbacks => $callbacks);
+
+ my $df = $ENV{GIT_SVN_DELTA_FETCH};
+ if (defined $df) {
+ $_xfer_delta = $df;
+ } else {
+ $_xfer_delta = ($url =~ m#^file://#) ? undef : 1;
+ }
$ra->{svn_path} = $url;
$ra->{repos_root} = $ra->get_repos_root;
$ra->{svn_path} =~ s#^\Q$ra->{repos_root}\E/*##;
@@ -2915,6 +2925,24 @@ sub process_rm {
}
sub libsvn_fetch {
+ $_xfer_delta ? libsvn_fetch_delta(@_) : libsvn_fetch_full(@_);
+}
+
+sub libsvn_fetch_delta {
+ my ($last_commit, $paths, $rev, $author, $date, $msg) = @_;
+ my $pool = SVN::Pool->new;
+ my $ed = SVN::Git::Fetcher->new({ c => $last_commit, ra => $SVN,
+ paths => $paths });
+ my $reporter = $SVN->do_update($rev, '', 1, $ed, $pool);
+ my @lock = $SVN::Core::VERSION ge '1.2.0' ? (undef) : ();
+ my (undef, $last_rev, undef) = cmt_metadata($last_commit);
+ $reporter->set_path('', $last_rev, 0, @lock, $pool);
+ $reporter->finish_report($pool);
+ $pool->clear;
+ libsvn_log_entry($rev, $author, $date, $msg, [$last_commit]);
+}
+
+sub libsvn_fetch_full {
my ($last_commit, $paths, $rev, $author, $date, $msg) = @_;
open my $gui, '| git-update-index -z --index-info' or croak $!;
my @amr;
@@ -3133,7 +3161,11 @@ sub libsvn_find_parent_branch {
unlink $GIT_SVN_INDEX;
print STDERR "Found branch parent: ($GIT_SVN) $parent\n";
sys(qw/git-read-tree/, $parent);
- return libsvn_fetch($parent, $paths, $rev,
+ # I can't seem to get do_switch() to work correctly with
+ # the SWIG interface (TypeError when passing switch_url...),
+ # so we'll unconditionally bypass the delta interface here
+ # for now
+ return libsvn_fetch_full($parent, $paths, $rev,
$author, $date, $msg);
}
print STDERR "Nope, branch point not imported or unknown\n";
@@ -3153,9 +3185,19 @@ sub libsvn_new_tree {
return $log_entry;
}
my ($paths, $rev, $author, $date, $msg) = @_;
- open my $gui, '| git-update-index -z --index-info' or croak $!;
- libsvn_traverse($gui, '', $SVN->{svn_path}, $rev);
- close $gui or croak $?;
+ if ($_xfer_delta) {
+ my $pool = SVN::Pool->new;
+ my $ed = SVN::Git::Fetcher->new({paths => $paths, ra => $SVN});
+ my $reporter = $SVN->do_update($rev, '', 1, $ed, $pool);
+ my @lock = $SVN::Core::VERSION ge '1.2.0' ? (undef) : ();
+ $reporter->set_path('', $rev, 1, @lock, $pool);
+ $reporter->finish_report($pool);
+ $pool->clear;
+ } else {
+ open my $gui, '| git-update-index -z --index-info' or croak $!;
+ libsvn_traverse($gui, '', $SVN->{svn_path}, $rev);
+ close $gui or croak $?;
+ }
return libsvn_log_entry($rev, $author, $date, $msg);
}
@@ -3325,6 +3367,148 @@ sub copy_remote_ref {
"refs/remotes/$GIT_SVN on $origin\n";
}
}
+package SVN::Git::Fetcher;
+use vars qw/@ISA/;
+use strict;
+use warnings;
+use Carp qw/croak/;
+use IO::File qw//;
+
+# file baton members: path, mode_a, mode_b, pool, fh, blob, base
+sub new {
+ my ($class, $git_svn) = @_;
+ my $self = SVN::Delta::Editor->new;
+ bless $self, $class;
+ open my $gui, '| git-update-index -z --index-info' or croak $!;
+ $self->{gui} = $gui;
+ $self->{c} = $git_svn->{c} if exists $git_svn->{c};
+ if (my $p = $git_svn->{paths} && $git_svn->{ra}) {
+ my $s = $git_svn->{ra}->{svn_path};
+ $s = length $s ? qr#^/\Q$s\E/# : qr#^/#;
+ $self->{paths} = { map { my $x = $_;
+ $x =~ s/$s//;
+ $x => $p->{$_} } keys %$p };
+ }
+ require Digest::MD5;
+ $self;
+}
+
+sub delete_entry {
+ my ($self, $path, $rev, $pb) = @_;
+ process_rm($self->{gui}, $self->{c}, $path);
+ undef;
+}
+
+sub open_file {
+ my ($self, $path, $pb, $rev) = @_;
+ my ($mode, $blob) = (safe_qx('git-ls-tree',$self->{c},'--',$path)
+ =~ /^(\d{6}) blob ([a-f\d]{40})\t/);
+ { path => $path, mode_a => $mode, mode_b => $mode, blob => $blob,
+ pool => SVN::Pool->new };
+}
+
+sub add_file {
+ my ($self, $path, $pb, $cp_path, $cp_rev) = @_;
+ { path => $path, mode_a => 100644, mode_b => 100644,
+ pool => SVN::Pool->new };
+}
+
+sub change_file_prop {
+ my ($self, $fb, $prop, $value) = @_;
+ if ($prop eq 'svn:executable') {
+ if ($fb->{mode_b} != 120000) {
+ $fb->{mode_b} = defined $value ? 100755 : 100644;
+ }
+ } elsif ($prop eq 'svn:special') {
+ $fb->{mode_b} = defined $value ? 120000 : 100644;
+ }
+ undef;
+}
+
+sub apply_textdelta {
+ my ($self, $fb, $exp) = @_;
+ my $fh = IO::File->new_tmpfile;
+ $fh->autoflush(1);
+ # $fh gets auto-closed() by SVN::TxDelta::apply(),
+ # (but $base does not,) so dup() it for reading in close_file
+ open my $dup, '<&', $fh or croak $!;
+ my $base = IO::File->new_tmpfile;
+ $base->autoflush(1);
+ if ($fb->{blob}) {
+ defined (my $pid = fork) or croak $!;
+ if (!$pid) {
+ open STDOUT, '>&', $base or croak $!;
+ print STDOUT 'link ' if ($fb->{mode_a} == 120000);
+ exec qw/git-cat-file blob/, $fb->{blob} or croak $!;
+ }
+ waitpid $pid, 0;
+ croak $? if $?;
+
+ if (defined $exp) {
+ seek $base, 0, 0 or croak $!;
+ my $md5 = Digest::MD5->new;
+ $md5->addfile($base);
+ my $got = $md5->hexdigest;
+ die "Checksum mismatch: $fb->{path} $fb->{blob}\n",
+ "expected: $exp\n",
+ " got: $got\n" if ($got ne $exp);
+ }
+ }
+ seek $base, 0, 0 or croak $!;
+ $fb->{fh} = $dup;
+ $fb->{base} = $base;
+ [ SVN::TxDelta::apply($base, $fh, undef, $fb->{path}, $fb->{pool}) ];
+}
+
+sub close_file {
+ my ($self, $fb, $exp) = @_;
+ my $hash;
+ my $path = $fb->{path};
+ if (my $fh = $fb->{fh}) {
+ seek($fh, 0, 0) or croak $!;
+ my $md5 = Digest::MD5->new;
+ $md5->addfile($fh);
+ my $got = $md5->hexdigest;
+ die "Checksum mismatch: $path\n",
+ "expected: $exp\n got: $got\n" if ($got ne $exp);
+ seek($fh, 0, 0) or croak $!;
+ if ($fb->{mode_b} == 120000) {
+ read($fh, my $buf, 5) == 5 or croak $!;
+ $buf eq 'link ' or die "$path has mode 120000",
+ "but is not a link\n";
+ }
+ defined(my $pid = open my $out,'-|') or die "Can't fork: $!\n";
+ if (!$pid) {
+ open STDIN, '<&', $fh or croak $!;
+ exec qw/git-hash-object -w --stdin/ or croak $!;
+ }
+ chomp($hash = do { local $/; <$out> });
+ close $out or croak $!;
+ close $fh or croak $!;
+ $hash =~ /^[a-f\d]{40}$/ or die "not a sha1: $hash\n";
+ close $fb->{base} or croak $!;
+ } else {
+ $hash = $fb->{blob} or die "no blob information\n";
+ }
+ $fb->{pool}->clear;
+ my $gui = $self->{gui};
+ print $gui "$fb->{mode_b} $hash\t$path\0" or croak $!;
+ print "\t", $self->{paths}->{$path}->action,
+ "\t$path\n" if defined $self->{paths}->{$path};
+ undef;
+}
+
+sub abort_edit {
+ my $self = shift;
+ close $self->{gui};
+ $self->SUPER::abort_edit(@_);
+}
+
+sub close_edit {
+ my $self = shift;
+ close $self->{gui} or croak;
+ $self->SUPER::close_edit(@_);
+}
package SVN::Git::Editor;
use vars qw/@ISA/;
--
1.4.4.1.g22a08
next prev parent reply other threads:[~2006-11-28 5:44 UTC|newest]
Thread overview: 22+ messages / expand[flat|nested] mbox.gz Atom feed top
2006-11-24 13:36 git-svn: why fetching files is so slow Pazu
2006-11-24 17:10 ` Seth Falcon
2006-11-24 19:16 ` Eric Wong
2006-11-24 19:28 ` Pazu
2006-11-24 20:33 ` Eric Wong
2006-11-24 20:42 ` Junio C Hamano
2006-11-24 22:14 ` Eric Wong
2006-11-28 5:44 ` Eric Wong [this message]
2006-11-28 10:29 ` [PATCH 1.1/2] git-svn: fix output reporting from the delta fetcher Eric Wong
2006-11-28 10:50 ` [PATCH 1.2/2 (fixed)] " Eric Wong
2006-11-28 12:45 ` Pazu
2006-11-28 15:32 ` Pazu
2006-11-28 16:07 ` Seth Falcon
2006-11-28 16:56 ` Pazu
2006-11-28 20:16 ` Eric Wong
2006-11-28 20:47 ` Pazu
2006-11-28 21:15 ` Eric Wong
2006-11-29 16:15 ` git-svn and empty directories in svn (was: [PATCH 1.2/2 (fixed)] git-svn: fix output reporting from the delta fetcher) Seth Falcon
2006-12-03 1:47 ` Eric Wong
2006-12-03 4:08 ` git-svn and empty directories in svn Steven Grimm
2006-12-04 21:33 ` Eric Wong
2006-11-28 5:46 ` [PATCH 2/2] git-svn: update tests for recent changes Eric Wong
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20061128054448.GA396@soma \
--to=normalperson@yhbt.net \
--cc=git@vger.kernel.org \
--cc=junkio@cox.net \
--cc=pazu@pazu.com.br \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.