git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Eric Wong <normalperson@yhbt.net>
To: Junio C Hamano <junkio@cox.net>
Cc: git <git@vger.kernel.org>, Jan-Benedict Glaw <jbglaw@lug-owl.de>,
	Eric Wong <normalperson@yhbt.net>
Subject: [PATCH] contrib/git-svn: stabilize memory usage for big fetches
Date: Sat, 25 Mar 2006 18:52:31 -0800	[thread overview]
Message-ID: <11433415513822-git-send-email-normalperson@yhbt.net> (raw)
In-Reply-To: <20060325082521.GA17473@hand.yhbt.net>

We should be safely able to import histories with thousands
of revisions without hogging up lots of memory.

With this, we lose the ability to autocorrect mistakes when
people specify revisions in reverse, but it's probably no longer
a problem since we only have one method of log parsing nowadays.

I've added an extra check to ensure that revision numbers do
increment.

Also, increment the version number to 0.11.0.  I really should
just call it 1.0 soon...

Signed-off-by: Eric Wong <normalperson@yhbt.net>

---

 contrib/git-svn/git-svn.perl |  109 ++++++++++++++++++++++++------------------
 1 files changed, 63 insertions(+), 46 deletions(-)

c76df6617116a7d330a3110230bc3b01eaf9c66d
diff --git a/contrib/git-svn/git-svn.perl b/contrib/git-svn/git-svn.perl
index f3fc3ec..3e5733e 100755
--- a/contrib/git-svn/git-svn.perl
+++ b/contrib/git-svn/git-svn.perl
@@ -8,7 +8,7 @@ use vars qw/	$AUTHOR $VERSION
 		$GIT_SVN_INDEX $GIT_SVN
 		$GIT_DIR $REV_DIR/;
 $AUTHOR = 'Eric Wong <normalperson@yhbt.net>';
-$VERSION = '0.10.0';
+$VERSION = '0.11.0';
 $GIT_DIR = $ENV{GIT_DIR} || "$ENV{PWD}/.git";
 # make sure the svn binary gives consistent output between locales and TZs:
 $ENV{TZ} = 'UTC';
@@ -217,9 +217,8 @@ sub fetch {
 	push @log_args, '--stop-on-copy' unless $_no_stop_copy;
 
 	my $svn_log = svn_log_raw(@log_args);
-	@$svn_log = sort { $a->{revision} <=> $b->{revision} } @$svn_log;
 
-	my $base = shift @$svn_log or croak "No base revision!\n";
+	my $base = next_log_entry($svn_log) or croak "No base revision!\n";
 	my $last_commit = undef;
 	unless (-d $SVN_WC) {
 		svn_cmd_checkout($SVN_URL,$base->{revision},$SVN_WC);
@@ -234,18 +233,22 @@ sub fetch {
 	}
 	my @svn_up = qw(svn up);
 	push @svn_up, '--ignore-externals' unless $_no_ignore_ext;
-	my $last_rev = $base->{revision};
-	foreach my $log_msg (@$svn_log) {
-		assert_svn_wc_clean($last_rev, $last_commit);
-		$last_rev = $log_msg->{revision};
-		sys(@svn_up,"-r$last_rev");
+	my $last = $base;
+	while (my $log_msg = next_log_entry($svn_log)) {
+		assert_svn_wc_clean($last->{revision}, $last_commit);
+		if ($last->{revision} >= $log_msg->{revision}) {
+			croak "Out of order: last >= current: ",
+				"$last->{revision} >= $log_msg->{revision}\n";
+		}
+		sys(@svn_up,"-r$log_msg->{revision}");
 		$last_commit = git_commit($log_msg, $last_commit, @parents);
+		$last = $log_msg;
 	}
-	assert_svn_wc_clean($last_rev, $last_commit);
+	assert_svn_wc_clean($last->{revision}, $last_commit);
 	unless (-e "$GIT_DIR/refs/heads/master") {
 		sys(qw(git-update-ref refs/heads/master),$last_commit);
 	}
-	return pop @$svn_log;
+	return $last;
 }
 
 sub commit {
@@ -708,49 +711,61 @@ sub svn_commit_tree {
 	return fetch("$rev_committed=$commit")->{revision};
 }
 
+# read the entire log into a temporary file (which is removed ASAP)
+# and store the file handle + parser state
 sub svn_log_raw {
 	my (@log_args) = @_;
-	my $pid = open my $log_fh,'-|';
+	my $log_fh = IO::File->new_tmpfile or croak $!;
+	my $pid = fork;
 	defined $pid or croak $!;
-
-	if ($pid == 0) {
+	if (!$pid) {
+		open STDOUT, '>&', $log_fh or croak $!;
 		exec (qw(svn log), @log_args) or croak $!
 	}
+	waitpid $pid, 0;
+	croak if $?;
+	seek $log_fh, 0, 0 or croak $!;
+	return { state => 'sep', fh => $log_fh };
+}
+
+sub next_log_entry {
+	my $log = shift; # retval of svn_log_raw()
+	my $ret = undef;
+	my $fh = $log->{fh};
 
-	my @svn_log;
-	my $state = 'sep';
-	while (<$log_fh>) {
+	while (<$fh>) {
 		chomp;
 		if (/^\-{72}$/) {
-			if ($state eq 'msg') {
-				if ($svn_log[$#svn_log]->{lines}) {
-					$svn_log[$#svn_log]->{msg} .= $_."\n";
-					unless(--$svn_log[$#svn_log]->{lines}) {
-						$state = 'sep';
+			if ($log->{state} eq 'msg') {
+				if ($ret->{lines}) {
+					$ret->{msg} .= $_."\n";
+					unless(--$ret->{lines}) {
+						$log->{state} = 'sep';
 					}
 				} else {
 					croak "Log parse error at: $_\n",
-						$svn_log[$#svn_log]->{revision},
+						$ret->{revision},
 						"\n";
 				}
 				next;
 			}
-			if ($state ne 'sep') {
+			if ($log->{state} ne 'sep') {
 				croak "Log parse error at: $_\n",
-					"state: $state\n",
-					$svn_log[$#svn_log]->{revision},
+					"state: $log->{state}\n",
+					$ret->{revision},
 					"\n";
 			}
-			$state = 'rev';
+			$log->{state} = 'rev';
 
 			# if we have an empty log message, put something there:
-			if (@svn_log) {
-				$svn_log[$#svn_log]->{msg} ||= "\n";
-				delete $svn_log[$#svn_log]->{lines};
+			if ($ret) {
+				$ret->{msg} ||= "\n";
+				delete $ret->{lines};
+				return $ret;
 			}
 			next;
 		}
-		if ($state eq 'rev' && s/^r(\d+)\s*\|\s*//) {
+		if ($log->{state} eq 'rev' && s/^r(\d+)\s*\|\s*//) {
 			my $rev = $1;
 			my ($author, $date, $lines) = split(/\s*\|\s*/, $_, 3);
 			($lines) = ($lines =~ /(\d+)/);
@@ -758,36 +773,34 @@ sub svn_log_raw {
 					/(\d{4})\-(\d\d)\-(\d\d)\s
 					 (\d\d)\:(\d\d)\:(\d\d)\s([\-\+]\d+)/x)
 					 or croak "Failed to parse date: $date\n";
-			my %log_msg = (	revision => $rev,
+			$ret = {	revision => $rev,
 					date => "$tz $Y-$m-$d $H:$M:$S",
 					author => $author,
 					lines => $lines,
-					msg => '' );
+					msg => '' };
 			if (defined $_authors && ! defined $users{$author}) {
 				die "Author: $author not defined in ",
 						"$_authors file\n";
 			}
-			push @svn_log, \%log_msg;
-			$state = 'msg_start';
+			$log->{state} = 'msg_start';
 			next;
 		}
 		# skip the first blank line of the message:
-		if ($state eq 'msg_start' && /^$/) {
-			$state = 'msg';
-		} elsif ($state eq 'msg') {
-			if ($svn_log[$#svn_log]->{lines}) {
-				$svn_log[$#svn_log]->{msg} .= $_."\n";
-				unless (--$svn_log[$#svn_log]->{lines}) {
-					$state = 'sep';
+		if ($log->{state} eq 'msg_start' && /^$/) {
+			$log->{state} = 'msg';
+		} elsif ($log->{state} eq 'msg') {
+			if ($ret->{lines}) {
+				$ret->{msg} .= $_."\n";
+				unless (--$ret->{lines}) {
+					$log->{state} = 'sep';
 				}
 			} else {
 				croak "Log parse error at: $_\n",
-					$svn_log[$#svn_log]->{revision},"\n";
+					$ret->{revision},"\n";
 			}
 		}
 	}
-	close $log_fh or croak $?;
-	return \@svn_log;
+	return $ret;
 }
 
 sub svn_info {
@@ -1114,9 +1127,13 @@ __END__
 
 Data structures:
 
-@svn_log = array of log_msg hashes
+$svn_log hashref (as returned by svn_log_raw)
+{
+	fh => file handle of the log file,
+	state => state of the log file parser (sep/msg/rev/msg_start...)
+}
 
-$log_msg hash
+$log_msg hashref as returned by next_log_entry($svn_log)
 {
 	msg => 'whitespace-formatted log entry
 ',						# trailing newline is preserved
-- 
1.2.4.gb622a

  reply	other threads:[~2006-03-26  2:53 UTC|newest]

Thread overview: 43+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2006-03-22 13:33 Errors GITtifying GCC and Binutils Jan-Benedict Glaw
2006-03-22 23:39 ` Linus Torvalds
2006-03-23  0:12   ` Linus Torvalds
2006-03-23  1:28     ` Linus Torvalds
2006-03-23 20:03       ` Jan-Benedict Glaw
2006-03-23 20:42         ` Linus Torvalds
2006-03-24  0:39         ` Chris Shoemaker
2006-03-24  6:12           ` Keith Packard
2006-03-24  7:52           ` Jan-Benedict Glaw
2006-03-25  0:37             ` Chris Shoemaker
2006-03-23  6:09     ` H. Peter Anvin
2006-03-23 15:45       ` Keith Packard
2006-03-23 16:01         ` Linus Torvalds
     [not found]           ` <20060323131200.02c535b8.seanlkml@sympatico.ca>
2006-03-23 18:12             ` sean
2006-03-23 20:38               ` Linus Torvalds
2006-03-23 20:48                 ` Shawn Pearce
2006-03-23 21:11                   ` Ryan Anderson
2006-03-24  0:15                     ` Junio C Hamano
2006-03-23 23:30                   ` Junio C Hamano
2006-03-24 15:12                     ` Johannes Schindelin
2006-03-24 11:11                   ` Mark Wooding
2006-03-24 11:29                     ` Andreas Ericsson
2006-03-23 21:31                 ` David S. Miller
2006-03-23 21:48                   ` Linus Torvalds
2006-03-23 22:36                   ` Timo Hirvonen
     [not found]                 ` <20060323170515.3612dc61.seanlkml@sympatico.ca>
2006-03-23 22:05                   ` sean
2006-03-24 12:32                 ` Ralf Baechle
2006-03-24 12:59                   ` missing git features (was: Re: Errors GITtifying GCC and Binutils) Andreas Ericsson
2006-03-24 16:44                     ` Carl Worth
2006-03-24 18:55                       ` missing git features Andreas Ericsson
2006-03-23 21:02           ` Errors GITtifying GCC and Binutils Ryan Anderson
2006-03-23 21:39             ` Linus Torvalds
2006-03-23 23:51             ` Junio C Hamano
2006-03-24  0:06               ` Ryan Anderson
2006-03-24  0:34                 ` Junio C Hamano
2006-03-24 12:44   ` Ralf Baechle
2006-03-24 18:25 ` Jan-Benedict Glaw
2006-03-24 19:10   ` Andreas Ericsson
2006-03-25 10:17     ` Jan-Benedict Glaw
2006-03-24 19:35   ` Santi Béjar
2006-03-25  8:25   ` Eric Wong
2006-03-26  2:52     ` Eric Wong [this message]
2006-03-25  9:10 ` James Cloos

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=11433415513822-git-send-email-normalperson@yhbt.net \
    --to=normalperson@yhbt.net \
    --cc=git@vger.kernel.org \
    --cc=jbglaw@lug-owl.de \
    --cc=junkio@cox.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).