All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] Several fixes to import mono's svn tree
@ 2005-10-31 10:09 Yaacov Akiba Slama
  0 siblings, 0 replies; only message in thread
From: Yaacov Akiba Slama @ 2005-10-31 10:09 UTC (permalink / raw)
  To: git

[-- Attachment #1: Type: text/plain, Size: 159 bytes --]

I successfully imported the mono svn tree (in fact a local mirror of it) 
which has more that 50000 revisions with the included git-svnimport.

Thanks,

--yas

[-- Attachment #2: 0001-Several-fixes-to-import-mono-s-svn-tree.txt --]
[-- Type: text/plain, Size: 9067 bytes --]

(The mono tree can be found svn://svn.myrealbox.com/source
and a web interface at http://svn.myrealbox.com/viewcvs/)

1) Fix the memory leak (using svn pools).
2) Don't output errors if the following paths appear in the logs :
   /, /branches, /tags, taking into account the opt_b and opt_t.
3) Unify the handling of "A" (add) and "R" (replace) actions.
4) Handle better the difference between directory and file using check_path
5) When a branch is using files from several branches, add them as parents of
   the commit.
6) Handle the case when the only operation in a revision is to add a tag
   - that is - to create a directory called /tags/thetag.
7) Other small fixes.

Signed-off-by: Yaacov Akiba Slama <ya@slamail.org>

---

 git-svnimport.perl |  147 ++++++++++++++++++++++++++++++++++++----------------
 1 files changed, 101 insertions(+), 46 deletions(-)

applies-to: 8c8ace55eb085e930dcc9482f7cf66f9399374b4
850208bc0981001fb62a9af1dfa14b2fb2aed05c
diff --git a/git-svnimport.perl b/git-svnimport.perl
index 45b6a19..e97f470 100755
--- a/git-svnimport.perl
+++ b/git-svnimport.perl
@@ -112,7 +112,9 @@ sub file {
 		    DIR => File::Spec->tmpdir(), UNLINK => 1);
 
 	print "... $rev $path ...\n" if $opt_v;
-	eval { $self->{'svn'}->get_file($path,$rev,$fh); };
+	my $pool = SVN::Pool->new();
+	eval { $self->{'svn'}->get_file($path,$rev,$fh,$pool); };
+	$pool->clear;
 	if($@) {
 		return undef if $@ =~ /Attempted to get checksum/;
 		die $@;
@@ -258,10 +260,17 @@ EOM
 
 open BRANCHES,">>", "$git_dir/svn2git";
 
-sub get_file($$$) {
-	my($rev,$branch,$path) = @_;
+sub node_kind($$$) {
+	my ($branch, $path, $revision) = @_;
+	my $pool=SVN::Pool->new;
+	my $kind = $svn->{'svn'}->check_path(revert_split_path($branch,$path),$revision,$pool);
+	$pool->clear;
+	return $kind;
+}
+
+sub revert_split_path($$) {
+	my($branch,$path) = @_;
 
-	# revert split_path(), below
 	my $svnpath;
 	$path = "" if $path eq "/"; # this should not happen, but ...
 	if($branch eq "/") {
@@ -272,6 +281,14 @@ sub get_file($$$) {
 		$svnpath = "$branch_name/$branch/$path";
 	}
 
+	return $svnpath
+}
+
+sub get_file($$$) {
+	my($rev,$branch,$path) = @_;
+
+	my $svnpath = revert_split_path($branch,$path);
+
 	# now get it
 	my $name;
 	if($opt_d) {
@@ -319,28 +336,57 @@ sub split_path($$) {
 	} elsif($path =~ s#^/\Q$branch_name\E/([^/]+)/?##) {
 		$branch = $1;
 	} else {
-		print STDERR "$rev: Unrecognized path: $path\n";
+		my %no_error = (
+			"/" => 1,
+			"/$tag_name" => 1,
+			"/$branch_name" => 1
+		);
+		print STDERR "$rev: Unrecognized path: $path\n" unless (defined $no_error{$path});
 		return ()
 	}
 	$path = "/" if $path eq "";
 	return ($branch,$path);
 }
 
-sub copy_subdir($$$$$$) {
+sub branch_rev($$) {
+
+	my ($srcbranch,$uptorev) = @_;
+
+	my $bbranches = $branches{$srcbranch};
+	my @revs = reverse sort { ($a eq 'LAST' ? 0 : $a) <=> ($b eq 'LAST' ? 0 : $b) } keys %$bbranches;
+	my $therev;
+	foreach my $arev(@revs) {
+		next if  ($arev eq 'LAST');
+		if ($arev <= $uptorev) {
+			$therev = $arev;
+			last;
+		}
+	}
+	return $therev;
+}
+
+sub copy_path($$$$$$$$) {
 	# Somebody copied a whole subdirectory.
 	# We need to find the index entries from the old version which the
 	# SVN log entry points to, and add them to the new place.
 
-	my($newrev,$newbranch,$path,$oldpath,$rev,$new) = @_;
-	my($branch,$srcpath) = split_path($rev,$oldpath);
+	my($newrev,$newbranch,$path,$oldpath,$rev,$node_kind,$new,$parents) = @_;
 
-	my $gitrev = $branches{$branch}{$rev};
+	my($srcbranch,$srcpath) = split_path($rev,$oldpath);
+	my $therev = branch_rev($srcbranch, $rev);
+	my $gitrev = $branches{$srcbranch}{$therev};
 	unless($gitrev) {
 		print STDERR "$newrev:$newbranch: could not find $oldpath \@ $rev\n";
 		return;
 	}
-	print "$newrev:$newbranch:$path: copying from $branch:$srcpath @ $rev\n" if $opt_v;
-	$srcpath =~ s#/*$#/#;
+	if ($srcbranch ne $newbranch) {
+		push(@$parents, $branches{$srcbranch}{'LAST'});
+	}
+	print "$newrev:$newbranch:$path: copying from $srcbranch:$srcpath @ $rev\n" if $opt_v;
+	if ($node_kind eq $SVN::Node::dir) {
+			$srcpath =~ s#/*$#/#;
+	}
+	
 	open my $f,"-|","git-ls-tree","-r","-z",$gitrev,$srcpath;
 	local $/ = "\0";
 	while(<$f>) {
@@ -348,9 +394,12 @@ sub copy_subdir($$$$$$) {
 		my($m,$p) = split(/\t/,$_,2);
 		my($mode,$type,$sha1) = split(/ /,$m);
 		next if $type ne "blob";
-		$p = substr($p,length($srcpath)-1);
-		print "... found $path$p ...\n" if $opt_v;
-		push(@$new,[$mode,$sha1,$path.$p]);
+		if ($node_kind eq $SVN::Node::dir) {
+			$p = $path . substr($p,length($srcpath)-1);
+		} else {
+			$p = $path;
+		}
+		push(@$new,[$mode,$sha1,$p]);	
 	}
 	close($f) or
 		print STDERR "$newrev:$newbranch: could not list files in $oldpath \@ $rev\n";
@@ -359,7 +408,7 @@ sub copy_subdir($$$$$$) {
 sub commit {
 	my($branch, $changed_paths, $revision, $author, $date, $message) = @_;
 	my($author_name,$author_email,$dest);
-	my(@old,@new);
+	my(@old,@new,@parents);
 
 	if (not defined $author) {
 		$author_name = $author_email = "unknown";
@@ -446,6 +495,8 @@ sub commit {
 		$last_rev = $rev;
 	}
 
+	push (@parents, $rev) if defined $rev;
+
 	my $cid;
 	if($tag and not %$changed_paths) {
 		$cid = $rev;
@@ -454,39 +505,31 @@ sub commit {
 		foreach my $path(@paths) {
 			my $action = $changed_paths->{$path};
 
-			if ($action->[0] eq "A") {
-				my $f = get_file($revision,$branch,$path);
-				if($f) {
-					push(@new,$f) if $f;
-				} elsif($action->[1]) {
-					copy_subdir($revision,$branch,$path,$action->[1],$action->[2],\@new);
-				} else {
-					my $opath = $action->[3];
-					print STDERR "$revision: $branch: could not fetch '$opath'\n";
+			if ($action->[0] eq "R") {
+				# refer to a file/tree in an earlier commit
+				push(@old,$path); # remove any old stuff
+			}
+			if(($action->[0] eq "A") || ($action->[0] eq "R")) {
+				my $node_kind = node_kind($branch,$path,$revision);
+				if($action->[1]) {
+					copy_path($revision,$branch,$path,$action->[1],$action->[2],$node_kind,\@new,\@parents);
+				} elsif ($node_kind eq $SVN::Node::file) {
+					my $f = get_file($revision,$branch,$path);
+					if ($f) {
+						push(@new,$f) if $f;
+					} else {
+						my $opath = $action->[3];
+						print STDERR "$revision: $branch: could not fetch '$opath'\n";
+					}
 				}
 			} elsif ($action->[0] eq "D") {
 				push(@old,$path);
 			} elsif ($action->[0] eq "M") {
-				my $f = get_file($revision,$branch,$path);
-				push(@new,$f) if $f;
-			} elsif ($action->[0] eq "R") {
-				# refer to a file/tree in an earlier commit
-				push(@old,$path); # remove any old stuff
-
-				# ... and add any new stuff
-				my($b,$srcpath) = split_path($revision,$action->[1]);
-				$srcpath =~ s#/*$#/#;
-				open my $F,"-|","git-ls-tree","-r","-z", $branches{$b}{$action->[2]}, $srcpath;
-				local $/ = "\0";
-				while(<$F>) {
-					chomp;
-					my($m,$p) = split(/\t/,$_,2);
-					my($mode,$type,$sha1) = split(/ /,$m);
-					next if $type ne "blob";
-					$p = substr($p,length($srcpath)-1);
-					push(@new,[$mode,$sha1,$path.$p]);
+				my $node_kind = node_kind($branch,$path,$revision);
+				if ($node_kind eq $SVN::Node::file) {
+					my $f = get_file($revision,$branch,$path);
+					push(@new,$f) if $f;
 				}
-				close($F);
 			} else {
 				die "$revision: unknown action '".$action->[0]."' for $path\n";
 			}
@@ -554,7 +597,6 @@ sub commit {
 			$pw->close();
 
 			my @par = ();
-			@par = ("-p",$rev) if defined $rev;
 
 			# loose detection of merges
 			# based on the commit msg
@@ -564,11 +606,17 @@ sub commit {
 					if ($mparent eq 'HEAD') { $mparent = $opt_o };
 					if ( -e "$git_dir/refs/heads/$mparent") {
 						$mparent = get_headref($mparent, $git_dir);
-						push @par, '-p', $mparent;
+						push (@parents, $mparent);
 						print OUT "Merge parent branch: $mparent\n" if $opt_v;
 					}
 				}
 			}
+			my %seen_parents = ();
+			my @unique_parents = grep { ! $seen_parents{$_} ++ } @parents;
+			foreach my $bparent (@unique_parents) {
+				push @par, '-p', $bparent;
+				print OUT "Merge parent branch: $bparent\n" if $opt_v;
+			}
 
 			exec("env",
 				"GIT_AUTHOR_NAME=$author_name",
@@ -600,6 +648,10 @@ sub commit {
 		die "Error running git-commit-tree: $?\n" if $?;
 	}
 
+	if (not defined $cid) {
+		$cid = $branches{"/"}{"LAST"};
+	}
+
 	if(not defined $dest) {
 		print "... no known parent\n" if $opt_v;
 	} elsif(not $tag) {
@@ -616,6 +668,7 @@ sub commit {
 		# the tag was 'complex', i.e. did not refer to a "real" revision
 
 		$dest =~ tr/_/\./ if $opt_u;
+		$branch = $dest;
 
 		my $pid = open2($in, $out, 'git-mktag');
 		print $out ("object $cid\n".
@@ -674,7 +727,9 @@ sub commit_all {
 }
 
 while(++$current_rev <= $svn->{'maxrev'}) {
-	$svn->{'svn'}->get_log("/",$current_rev,$current_rev,$current_rev,1,1,\&_commit_all,"");
+	my $pool=SVN::Pool->new;
+	$svn->{'svn'}->get_log("/",$current_rev,$current_rev,1,1,1,\&_commit_all,$pool);
+	$pool->clear;
 	commit_all();
 	if($opt_l and not --$opt_l) {
 		print STDERR "Stopping, because there is a memory leak (in the SVN library).\n";
---
@@GIT_VERSION@@

^ permalink raw reply related	[flat|nested] only message in thread

only message in thread, other threads:[~2005-10-31 10:10 UTC | newest]

Thread overview: (only message) (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2005-10-31 10:09 [PATCH] Several fixes to import mono's svn tree Yaacov Akiba Slama

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.