From: Eric Wong <normalperson@yhbt.net>
To: Martin Langhoff <martin.langhoff@gmail.com>
Cc: git list <git@vger.kernel.org>
Subject: [PATCH 5/5] -D <depth> option to recurse into merged branches
Date: Sat, 12 Nov 2005 01:32:08 -0800 [thread overview]
Message-ID: <20051112093208.GF16218@Muzzle> (raw)
In-Reply-To: <20051112093045.GE16218@Muzzle>
-D <depth> option to recurse into merged branches
-a auto-register Arch archive if it's on mirrors.sourcecontrol.net
fix for dealing with tag revisions
remove unused module loading (no more String::ShellQuote dep)
Signed-off-by: Eric Wong <normalperson@yhbt.net>
---
git-archimport.perl | 257 ++++++++++++++++++++++++++++-----------------------
1 files changed, 141 insertions(+), 116 deletions(-)
applies-to: d6d3e5272bc39ea086e5c1b0b39ceb5b51ade1ff
2fe160b44c5e5da1a139668767ba184b6b63f605
diff --git a/git-archimport.perl b/git-archimport.perl
index 5616d42..a0ea016 100755
--- a/git-archimport.perl
+++ b/git-archimport.perl
@@ -22,9 +22,10 @@ See man (1) git-archimport for more deta
=head1 TODO
- create tag objects instead of ref tags
- - audit shell-escaping of filenames
- hide our private tags somewhere smarter
- - find a way to make "cat *patches | patch" safe even when patchfiles are missing newlines
+ - sort and apply patches by graphing ancestry relations instead of just
+ relying in dates supplied in the changeset itself.
+ tla ancestry-graph -m could be helpful here...
=head1 Devel tricks
@@ -53,15 +54,9 @@ and can contain multiple, unrelated bran
use strict;
use warnings;
use Getopt::Std;
-use File::Spec;
-use File::Temp qw(tempfile tempdir);
+use File::Temp qw(tempdir);
use File::Path qw(mkpath rmtree);
use File::Basename qw(basename dirname);
-use String::ShellQuote;
-use Time::Local;
-use IO::Socket;
-use IO::Pipe;
-use POSIX qw(strftime dup2);
use Data::Dumper qw/ Dumper /;
use IPC::Open2;
@@ -72,29 +67,35 @@ my $git_dir = $ENV{"GIT_DIR"} || ".git";
$ENV{"GIT_DIR"} = $git_dir;
my $ptag_dir = "$git_dir/archimport/tags";
-our($opt_h,$opt_v, $opt_T,
- $opt_C,$opt_t);
+our($opt_h,$opt_v,$opt_T,$opt_t,$opt_D,$opt_a);
sub usage() {
print STDERR <<END;
Usage: ${\basename $0} # fetch/update GIT from Arch
- [ -h ] [ -v ] [ -T ] [ -t tempdir ]
+ [ -h ] [ -v ] [ -T ] [ -a ] [ -D depth ] [ -t tempdir ]
repository/arch-branch [ repository/arch-branch] ...
END
exit(1);
}
-getopts("Thvt:") or usage();
+getopts("Thvat:D:") or usage();
usage if $opt_h;
@ARGV >= 1 or usage();
-my @arch_roots = @ARGV;
+# $arch_branches:
+# values associated with keys:
+# =1 - Arch version / git 'branch' detected via abrowse on a limit
+# >1 - Arch version / git 'branch' of an auxilliary branch we've merged
+my %arch_branches = map { $_ => 1 } @ARGV;
+
my $tmptree;
$ENV{'TMPDIR'} = $opt_t if $opt_t;
$tmptree = tempdir('git-archimport-XXXXXX', TMPDIR => 1, CLEANUP => 1);
$opt_v && print "+ Using $tmptree to store temporary trees\n";
+my %reachable = (); # Arch repositories we can access
+my %unreachable = (); # Arch repositories we can't access :<
my @psets = (); # the collection
my %psets = (); # the collection, by name
@@ -102,114 +103,117 @@ my %rptags = (); # my rev
# to map a SHA1 to a commitid
my $TLA = $ENV{'ARCH_CLIENT'} || 'tla';
-foreach my $root (@arch_roots) {
- my ($arepo, $abranch) = split(m!/!, $root);
- open ABROWSE, "tla abrowse -f -A $arepo --desc --merges $abranch |"
- or die "Problems with tla abrowse: $!";
-
- my %ps = (); # the current one
- my $mode = '';
- my $lastseen = '';
-
- while (<ABROWSE>) {
- chomp;
-
- # first record padded w 8 spaces
- if (s/^\s{8}\b//) {
-
- # store the record we just captured
- if (%ps) {
- my %temp = %ps; # break references
- push (@psets, \%temp);
- $psets{$temp{id}} = \%temp;
- %ps = ();
- }
-
- my ($id, $type) = split(m/\s{3}/, $_);
- $ps{id} = $id;
- $ps{repo} = $arepo;
-
- # deal with types
- if ($type =~ m/^\(simple changeset\)/) {
- $ps{type} = 's';
- } elsif ($type eq '(initial import)') {
- $ps{type} = 'i';
- } elsif ($type =~ m/^\(tag revision of (.+)\)/) {
- $ps{type} = 't';
- $ps{tag} = $1;
- } else {
- warn "Unknown type $type";
- }
- $lastseen = 'id';
- }
-
- if (s/^\s{10}//) {
- # 10 leading spaces or more
- # indicate commit metadata
-
- # date & author
- if ($lastseen eq 'id' && m/^\d{4}-\d{2}-\d{2}/) {
+sub do_abrowse {
+ my $stage = shift;
+ while (my ($limit, $level) = each %arch_branches) {
+ next unless $level == $stage;
+
+ open ABROWSE, "$TLA abrowse -fkD --merges $limit |"
+ or die "Problems with tla abrowse: $!";
+
+ my %ps = (); # the current one
+ my $lastseen = '';
+
+ while (<ABROWSE>) {
+ chomp;
+
+ # first record padded w 8 spaces
+ if (s/^\s{8}\b//) {
+ my ($id, $type) = split(m/\s+/, $_, 2);
+
+ my %last_ps;
+ # store the record we just captured
+ if (%ps && !exists $psets{ $ps{id} }) {
+ %last_ps = %ps; # break references
+ push (@psets, \%last_ps);
+ $psets{ $last_ps{id} } = \%last_ps;
+ }
- my ($date, $authoremail) = split(m/\s{2,}/, $_);
- $ps{date} = $date;
- $ps{date} =~ s/\bGMT$//; # strip off trailign GMT
- if ($ps{date} =~ m/\b\w+$/) {
- warn 'Arch dates not in GMT?! - imported dates will be wrong';
+ my $branch = extract_versionname($id);
+ %ps = ( id => $id, branch => $branch );
+ if (%last_ps && ($last_ps{branch} eq $branch)) {
+ $ps{parent_id} = $last_ps{id};
+ }
+
+ $arch_branches{$branch} = 1;
+ $lastseen = 'id';
+
+ # deal with types (should work with baz or tla):
+ if ($type =~ m/\(.*changeset\)/) {
+ $ps{type} = 's';
+ } elsif ($type =~ /\(.*import\)/) {
+ $ps{type} = 'i';
+ } elsif ($type =~ m/\(tag.*\)/) {
+ $ps{type} = 't';
+ # read which revision we've tagged when we parse the log
+ #$ps{tag} = $1;
+ } else {
+ warn "Unknown type $type";
+ }
+
+ $arch_branches{$branch} = 1;
+ $lastseen = 'id';
+ } elsif (s/^\s{10}//) {
+ # 10 leading spaces or more
+ # indicate commit metadata
+
+ # date
+ if ($lastseen eq 'id' && m/^(\d{4}-\d\d-\d\d \d\d:\d\d:\d\d)/){
+ $ps{date} = $1;
+ $lastseen = 'date';
+ } elsif ($_ eq 'merges in:') {
+ $ps{merges} = [];
+ $lastseen = 'merges';
+ } elsif ($lastseen eq 'merges' && s/^\s{2}//) {
+ my $id = $_;
+ push (@{$ps{merges}}, $id);
+
+ # aggressive branch finding:
+ if ($opt_D) {
+ my $branch = extract_versionname($id);
+ my $repo = extract_reponame($branch);
+
+ if (archive_reachable($repo) &&
+ !defined $arch_branches{$branch}) {
+ $arch_branches{$branch} = $stage + 1;
+ }
+ }
+ } else {
+ warn "more metadata after merges!?: $_\n" unless /^\s*$/;
}
-
- $authoremail =~ m/^(.+)\s(\S+)$/;
- $ps{author} = $1;
- $ps{email} = $2;
-
- $lastseen = 'date';
-
- } elsif ($lastseen eq 'date') {
- # the only hint is position
- # subject is after date
- $ps{subj} = $_;
- $lastseen = 'subj';
-
- } elsif ($lastseen eq 'subj' && $_ eq 'merges in:') {
- $ps{merges} = [];
- $lastseen = 'merges';
-
- } elsif ($lastseen eq 'merges' && s/^\s{2}//) {
- push (@{$ps{merges}}, $_);
- } else {
- warn 'more metadata after merges!?';
}
-
}
- }
- if (%ps) {
- my %temp = %ps; # break references
- push (@psets, \%temp);
- $psets{ $temp{id} } = \%temp;
- %ps = ();
- }
- close ABROWSE;
+ if (%ps && !exists $psets{ $ps{id} }) {
+ my %temp = %ps; # break references
+ if ($psets[$#psets]{branch} eq $ps{branch}) {
+ $temp{parent_id} = $psets[$#psets]{id};
+ }
+ push (@psets, \%temp);
+ $psets{ $temp{id} } = \%temp;
+ }
+
+ close ABROWSE or die "$TLA abrowse failed on $limit\n";
+ }
} # end foreach $root
+do_abrowse(1);
+my $depth = 2;
+$opt_D ||= 0;
+while ($depth <= $opt_D) {
+ do_abrowse($depth);
+ $depth++;
+}
+
## Order patches by time
+# FIXME see if we can find a more optimal way to do this by graphing
+# the ancestry data and walking it, that way we won't have to rely on
+# client-supplied dates
@psets = sort {$a->{date}.$b->{id} cmp $b->{date}.$b->{id}} @psets;
-#print Dumper \@psets;
-
-##
-## TODO cleanup irrelevant patches
-## and put an initial import
-## or a full tag
-my $import = 0;
unless (-d $git_dir) { # initial import
- if ($psets[0]{type} eq 'i' || $psets[0]{type} eq 't') {
- print "Starting import from $psets[0]{id}\n";
- `git-init-db`;
- die $! if $?;
- $import = 1;
- } else {
- die "Need to start from an import or a tag -- cannot use $psets[0]{id}";
- }
+ print "Starting import from $psets[0]{id}\n";
+ system('git-init-db') == 0 or die "$! $?\n";
} else { # progressing an import
# load the rptags
opendir(DIR, $ptag_dir)
@@ -233,7 +237,6 @@ unless (-d $git_dir) { # initial import
closedir DIR;
}
-# process patchsets
# extract the Arch repository name (Arch "archive" in Arch-speak)
sub extract_reponame {
my $fq_cvbr = shift; # archivename/[[[[category]branch]version]revision]
@@ -266,21 +269,21 @@ sub tree_dirname {
*git_branchname = *tree_dirname;
-# process patchsets
+# process patchsets in ancestry order
foreach my $ps (@psets) {
$ps->{branch} = git_branchname($ps->{id});
#
# ensure we have a clean state
#
- if (`git diff-files`) {
+ if (`git-diff-files`) {
die "Unclean tree when about to process $ps->{id} " .
" - did we fail to commit cleanly before?";
}
die $! if $?;
#
- # skip commits already in repo
+ # skip commits already in git repo
#
if (ptag($ps->{id})) {
$opt_v && print " * Skipping already imported: $ps->{id}\n";
@@ -427,7 +430,7 @@ sub sync_to_ps {
my $tree_dir = $tmptree.'/'.tree_dirname($ps->{id});
if (-d $tree_dir) {
- if ($ps->{type} eq 't' && defined $ps->{tag}) {
+ if ($ps->{type} eq 't') {
# looks like a tag-only or (worse,) a mixed tags/changeset branch,
# can't rely on replay to work correctly on these
rmtree($tree_dir);
@@ -435,13 +438,16 @@ sub sync_to_ps {
} else {
my $tree_id = arch_tree_id($tree_dir);
if ($ps->{parent_id} eq $tree_id) {
+ # the common case (hopefully)
safe_pipe_capture($TLA,'replay','-d',$tree_dir,$ps->{id});
} else {
+ # this can happen if branches cherry-pick
safe_pipe_capture($TLA,'apply-delta','-d',$tree_dir,
$tree_id, $ps->{id});
}
}
} else {
+ # new branch work
safe_pipe_capture($TLA,'get','--no-pristine',$ps->{id},$tree_dir);
}
@@ -750,4 +756,23 @@ sub arch_tree_id {
return $ret;
}
+sub archive_reachable {
+ my $archive = shift;
+ return 1 if $reachable{$archive};
+ return 0 if $unreachable{$archive};
+
+ if (system "$TLA whereis-archive $archive >/dev/null") {
+ if ($opt_a && (system($TLA,'register-archive',
+ "http://mirrors.sourcecontrol.net/$archive") == 0)) {
+ $reachable{$archive} = 1;
+ return 1;
+ }
+ print STDERR "Archive is unreachable: $archive\n";
+ $unreachable{$archive} = 1;
+ return 0;
+ } else {
+ $reachable{$archive} = 1;
+ return 1;
+ }
+}
---
0.99.9.GIT
next prev parent reply other threads:[~2005-11-12 9:32 UTC|newest]
Thread overview: 39+ messages / expand[flat|nested] mbox.gz Atom feed top
2005-11-12 9:23 [PATCH] archimport improvements Eric Wong
2005-11-12 9:25 ` [PATCH 1/5] remove shellquote usage for tags Eric Wong
2005-11-12 9:27 ` [PATCH 2/5] archimport: don't die on merge-base failure Eric Wong
2005-11-12 9:29 ` [PATCH 3/5] Disambiguate the term 'branch' in Arch vs git Eric Wong
2005-11-12 9:30 ` [PATCH 4/5] Overhaul of changeset application Eric Wong
2005-11-12 9:32 ` Eric Wong [this message]
2005-11-14 2:01 ` [PATCH 5/5] -D <depth> option to recurse into merged branches Eric Wong
2005-11-12 12:07 ` [PATCH 4/5] Overhaul of changeset application Martin Langhoff
2005-11-12 20:49 ` Eric Wong
2005-11-12 11:54 ` [PATCH] archimport improvements Martin Langhoff
2005-11-12 20:21 ` Eric Wong
2005-11-14 22:38 ` Martin Langhoff
2005-11-15 8:03 ` Eric Wong
2005-11-15 8:05 ` [PATCH 1/2] archimport: allow for old style branch and public tag names Eric Wong
2005-11-15 8:06 ` [PATCH 2/2] archimport: sync_to_ps() messages for tracking tla methods Eric Wong
2005-11-15 8:07 ` [PATCH 1/2] archimport: allow for old style branch and public tag names Eric Wong
2005-11-17 9:26 ` [PATCH] archimport improvements Martin Langhoff
2005-11-24 7:46 ` Eric Wong
2005-11-24 7:47 ` [PATCH 1/9] archimport: first, make sure it still compiles Eric Wong
2005-11-24 7:48 ` [PATCH 2/9] remove String::ShellQuote dependency Eric Wong
2005-11-24 7:50 ` [PATCH 3/9] fix -t tmpdir switch Eric Wong
2005-11-24 7:51 ` [PATCH 4/9] remove git wrapper dependency Eric Wong
2005-11-24 7:52 ` [PATCH 5/9] add -D <depth> and -a switch Eric Wong
2005-11-24 7:53 ` [PATCH 6/9] safer log file parsing Eric Wong
2005-11-24 7:55 ` [PATCH 7/9] Add the accurate changeset applyer Eric Wong
2005-11-24 7:56 ` [PATCH 8/9] Fix a bug I introduced in the new log parser Eric Wong
2005-11-24 7:58 ` [PATCH 9/9] fix a in new changeset applyer addition Eric Wong
2005-11-27 4:24 ` [PATCH 7/9] Add the accurate changeset applyer Martin Langhoff
2005-11-27 5:43 ` Eric Wong
2005-12-01 17:02 ` Martin Langhoff
2005-12-03 2:51 ` Eric Wong
2005-12-05 18:53 ` Martin Langhoff
2005-11-24 8:20 ` [PATCH 4/9] remove git wrapper dependency Andreas Ericsson
2005-11-24 8:35 ` Junio C Hamano
2005-11-24 8:50 ` Eric Wong
2005-11-24 18:54 ` [PATCH 1/9] archimport: first, make sure it still compiles Linus Torvalds
2005-11-26 10:51 ` Martin Langhoff
2005-11-26 20:43 ` Eric Wong
2005-11-24 9:25 ` [PATCH] archimport improvements Martin Langhoff
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20051112093208.GF16218@Muzzle \
--to=normalperson@yhbt.net \
--cc=git@vger.kernel.org \
--cc=martin.langhoff@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).