git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Yaacov Akiba Slama <ya@slamail.org>
To: git@vger.kernel.org
Subject: [PATCH] Several fixes to import mono's svn tree
Date: Mon, 31 Oct 2005 12:09:26 +0200	[thread overview]
Message-ID: <4365ED56.3040602@slamail.org> (raw)

[-- Attachment #1: Type: text/plain, Size: 159 bytes --]

I successfully imported the mono svn tree (in fact a local mirror of it) 
which has more that 50000 revisions with the included git-svnimport.

Thanks,

--yas

[-- Attachment #2: 0001-Several-fixes-to-import-mono-s-svn-tree.txt --]
[-- Type: text/plain, Size: 9067 bytes --]

(The mono tree can be found svn://svn.myrealbox.com/source
and a web interface at http://svn.myrealbox.com/viewcvs/)

1) Fix the memory leak (using svn pools).
2) Don't output errors if the following paths appear in the logs :
   /, /branches, /tags, taking into account the opt_b and opt_t.
3) Unify the handling of "A" (add) and "R" (replace) actions.
4) Handle better the difference between directory and file using check_path
5) When a branch is using files from several branches, add them as parents of
   the commit.
6) Handle the case when the only operation in a revision is to add a tag
   - that is - to create a directory called /tags/thetag.
7) Other small fixes.

Signed-off-by: Yaacov Akiba Slama <ya@slamail.org>

---

 git-svnimport.perl |  147 ++++++++++++++++++++++++++++++++++++----------------
 1 files changed, 101 insertions(+), 46 deletions(-)

applies-to: 8c8ace55eb085e930dcc9482f7cf66f9399374b4
850208bc0981001fb62a9af1dfa14b2fb2aed05c
diff --git a/git-svnimport.perl b/git-svnimport.perl
index 45b6a19..e97f470 100755
--- a/git-svnimport.perl
+++ b/git-svnimport.perl
@@ -112,7 +112,9 @@ sub file {
 		    DIR => File::Spec->tmpdir(), UNLINK => 1);
 
 	print "... $rev $path ...\n" if $opt_v;
-	eval { $self->{'svn'}->get_file($path,$rev,$fh); };
+	my $pool = SVN::Pool->new();
+	eval { $self->{'svn'}->get_file($path,$rev,$fh,$pool); };
+	$pool->clear;
 	if($@) {
 		return undef if $@ =~ /Attempted to get checksum/;
 		die $@;
@@ -258,10 +260,17 @@ EOM
 
 open BRANCHES,">>", "$git_dir/svn2git";
 
-sub get_file($$$) {
-	my($rev,$branch,$path) = @_;
+sub node_kind($$$) {
+	my ($branch, $path, $revision) = @_;
+	my $pool=SVN::Pool->new;
+	my $kind = $svn->{'svn'}->check_path(revert_split_path($branch,$path),$revision,$pool);
+	$pool->clear;
+	return $kind;
+}
+
+sub revert_split_path($$) {
+	my($branch,$path) = @_;
 
-	# revert split_path(), below
 	my $svnpath;
 	$path = "" if $path eq "/"; # this should not happen, but ...
 	if($branch eq "/") {
@@ -272,6 +281,14 @@ sub get_file($$$) {
 		$svnpath = "$branch_name/$branch/$path";
 	}
 
+	return $svnpath
+}
+
+sub get_file($$$) {
+	my($rev,$branch,$path) = @_;
+
+	my $svnpath = revert_split_path($branch,$path);
+
 	# now get it
 	my $name;
 	if($opt_d) {
@@ -319,28 +336,57 @@ sub split_path($$) {
 	} elsif($path =~ s#^/\Q$branch_name\E/([^/]+)/?##) {
 		$branch = $1;
 	} else {
-		print STDERR "$rev: Unrecognized path: $path\n";
+		my %no_error = (
+			"/" => 1,
+			"/$tag_name" => 1,
+			"/$branch_name" => 1
+		);
+		print STDERR "$rev: Unrecognized path: $path\n" unless (defined $no_error{$path});
 		return ()
 	}
 	$path = "/" if $path eq "";
 	return ($branch,$path);
 }
 
-sub copy_subdir($$$$$$) {
+sub branch_rev($$) {
+
+	my ($srcbranch,$uptorev) = @_;
+
+	my $bbranches = $branches{$srcbranch};
+	my @revs = reverse sort { ($a eq 'LAST' ? 0 : $a) <=> ($b eq 'LAST' ? 0 : $b) } keys %$bbranches;
+	my $therev;
+	foreach my $arev(@revs) {
+		next if  ($arev eq 'LAST');
+		if ($arev <= $uptorev) {
+			$therev = $arev;
+			last;
+		}
+	}
+	return $therev;
+}
+
+sub copy_path($$$$$$$$) {
 	# Somebody copied a whole subdirectory.
 	# We need to find the index entries from the old version which the
 	# SVN log entry points to, and add them to the new place.
 
-	my($newrev,$newbranch,$path,$oldpath,$rev,$new) = @_;
-	my($branch,$srcpath) = split_path($rev,$oldpath);
+	my($newrev,$newbranch,$path,$oldpath,$rev,$node_kind,$new,$parents) = @_;
 
-	my $gitrev = $branches{$branch}{$rev};
+	my($srcbranch,$srcpath) = split_path($rev,$oldpath);
+	my $therev = branch_rev($srcbranch, $rev);
+	my $gitrev = $branches{$srcbranch}{$therev};
 	unless($gitrev) {
 		print STDERR "$newrev:$newbranch: could not find $oldpath \@ $rev\n";
 		return;
 	}
-	print "$newrev:$newbranch:$path: copying from $branch:$srcpath @ $rev\n" if $opt_v;
-	$srcpath =~ s#/*$#/#;
+	if ($srcbranch ne $newbranch) {
+		push(@$parents, $branches{$srcbranch}{'LAST'});
+	}
+	print "$newrev:$newbranch:$path: copying from $srcbranch:$srcpath @ $rev\n" if $opt_v;
+	if ($node_kind eq $SVN::Node::dir) {
+			$srcpath =~ s#/*$#/#;
+	}
+	
 	open my $f,"-|","git-ls-tree","-r","-z",$gitrev,$srcpath;
 	local $/ = "\0";
 	while(<$f>) {
@@ -348,9 +394,12 @@ sub copy_subdir($$$$$$) {
 		my($m,$p) = split(/\t/,$_,2);
 		my($mode,$type,$sha1) = split(/ /,$m);
 		next if $type ne "blob";
-		$p = substr($p,length($srcpath)-1);
-		print "... found $path$p ...\n" if $opt_v;
-		push(@$new,[$mode,$sha1,$path.$p]);
+		if ($node_kind eq $SVN::Node::dir) {
+			$p = $path . substr($p,length($srcpath)-1);
+		} else {
+			$p = $path;
+		}
+		push(@$new,[$mode,$sha1,$p]);	
 	}
 	close($f) or
 		print STDERR "$newrev:$newbranch: could not list files in $oldpath \@ $rev\n";
@@ -359,7 +408,7 @@ sub copy_subdir($$$$$$) {
 sub commit {
 	my($branch, $changed_paths, $revision, $author, $date, $message) = @_;
 	my($author_name,$author_email,$dest);
-	my(@old,@new);
+	my(@old,@new,@parents);
 
 	if (not defined $author) {
 		$author_name = $author_email = "unknown";
@@ -446,6 +495,8 @@ sub commit {
 		$last_rev = $rev;
 	}
 
+	push (@parents, $rev) if defined $rev;
+
 	my $cid;
 	if($tag and not %$changed_paths) {
 		$cid = $rev;
@@ -454,39 +505,31 @@ sub commit {
 		foreach my $path(@paths) {
 			my $action = $changed_paths->{$path};
 
-			if ($action->[0] eq "A") {
-				my $f = get_file($revision,$branch,$path);
-				if($f) {
-					push(@new,$f) if $f;
-				} elsif($action->[1]) {
-					copy_subdir($revision,$branch,$path,$action->[1],$action->[2],\@new);
-				} else {
-					my $opath = $action->[3];
-					print STDERR "$revision: $branch: could not fetch '$opath'\n";
+			if ($action->[0] eq "R") {
+				# refer to a file/tree in an earlier commit
+				push(@old,$path); # remove any old stuff
+			}
+			if(($action->[0] eq "A") || ($action->[0] eq "R")) {
+				my $node_kind = node_kind($branch,$path,$revision);
+				if($action->[1]) {
+					copy_path($revision,$branch,$path,$action->[1],$action->[2],$node_kind,\@new,\@parents);
+				} elsif ($node_kind eq $SVN::Node::file) {
+					my $f = get_file($revision,$branch,$path);
+					if ($f) {
+						push(@new,$f) if $f;
+					} else {
+						my $opath = $action->[3];
+						print STDERR "$revision: $branch: could not fetch '$opath'\n";
+					}
 				}
 			} elsif ($action->[0] eq "D") {
 				push(@old,$path);
 			} elsif ($action->[0] eq "M") {
-				my $f = get_file($revision,$branch,$path);
-				push(@new,$f) if $f;
-			} elsif ($action->[0] eq "R") {
-				# refer to a file/tree in an earlier commit
-				push(@old,$path); # remove any old stuff
-
-				# ... and add any new stuff
-				my($b,$srcpath) = split_path($revision,$action->[1]);
-				$srcpath =~ s#/*$#/#;
-				open my $F,"-|","git-ls-tree","-r","-z", $branches{$b}{$action->[2]}, $srcpath;
-				local $/ = "\0";
-				while(<$F>) {
-					chomp;
-					my($m,$p) = split(/\t/,$_,2);
-					my($mode,$type,$sha1) = split(/ /,$m);
-					next if $type ne "blob";
-					$p = substr($p,length($srcpath)-1);
-					push(@new,[$mode,$sha1,$path.$p]);
+				my $node_kind = node_kind($branch,$path,$revision);
+				if ($node_kind eq $SVN::Node::file) {
+					my $f = get_file($revision,$branch,$path);
+					push(@new,$f) if $f;
 				}
-				close($F);
 			} else {
 				die "$revision: unknown action '".$action->[0]."' for $path\n";
 			}
@@ -554,7 +597,6 @@ sub commit {
 			$pw->close();
 
 			my @par = ();
-			@par = ("-p",$rev) if defined $rev;
 
 			# loose detection of merges
 			# based on the commit msg
@@ -564,11 +606,17 @@ sub commit {
 					if ($mparent eq 'HEAD') { $mparent = $opt_o };
 					if ( -e "$git_dir/refs/heads/$mparent") {
 						$mparent = get_headref($mparent, $git_dir);
-						push @par, '-p', $mparent;
+						push (@parents, $mparent);
 						print OUT "Merge parent branch: $mparent\n" if $opt_v;
 					}
 				}
 			}
+			my %seen_parents = ();
+			my @unique_parents = grep { ! $seen_parents{$_} ++ } @parents;
+			foreach my $bparent (@unique_parents) {
+				push @par, '-p', $bparent;
+				print OUT "Merge parent branch: $bparent\n" if $opt_v;
+			}
 
 			exec("env",
 				"GIT_AUTHOR_NAME=$author_name",
@@ -600,6 +648,10 @@ sub commit {
 		die "Error running git-commit-tree: $?\n" if $?;
 	}
 
+	if (not defined $cid) {
+		$cid = $branches{"/"}{"LAST"};
+	}
+
 	if(not defined $dest) {
 		print "... no known parent\n" if $opt_v;
 	} elsif(not $tag) {
@@ -616,6 +668,7 @@ sub commit {
 		# the tag was 'complex', i.e. did not refer to a "real" revision
 
 		$dest =~ tr/_/\./ if $opt_u;
+		$branch = $dest;
 
 		my $pid = open2($in, $out, 'git-mktag');
 		print $out ("object $cid\n".
@@ -674,7 +727,9 @@ sub commit_all {
 }
 
 while(++$current_rev <= $svn->{'maxrev'}) {
-	$svn->{'svn'}->get_log("/",$current_rev,$current_rev,$current_rev,1,1,\&_commit_all,"");
+	my $pool=SVN::Pool->new;
+	$svn->{'svn'}->get_log("/",$current_rev,$current_rev,1,1,1,\&_commit_all,$pool);
+	$pool->clear;
 	commit_all();
 	if($opt_l and not --$opt_l) {
 		print STDERR "Stopping, because there is a memory leak (in the SVN library).\n";
---
@@GIT_VERSION@@

                 reply	other threads:[~2005-10-31 10:10 UTC|newest]

Thread overview: [no followups] expand[flat|nested]  mbox.gz  Atom feed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4365ED56.3040602@slamail.org \
    --to=ya@slamail.org \
    --cc=git@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).