From: Eric Wong <normalperson@yhbt.net>
To: Martin Langhoff <martin.langhoff@gmail.com>
Cc: git list <git@vger.kernel.org>
Subject: [PATCH 5/5] -D <depth> option to recurse into merged branches
Date: Sat, 12 Nov 2005 01:32:08 -0800 [thread overview]
Message-ID: <20051112093208.GF16218@Muzzle> (raw)
In-Reply-To: <20051112093045.GE16218@Muzzle>
-D <depth> option to recurse into merged branches
-a auto-register Arch archive if it's on mirrors.sourcecontrol.net
fix for dealing with tag revisions
remove unused module loading (no more String::ShellQuote dep)
Signed-off-by: Eric Wong <normalperson@yhbt.net>
---
git-archimport.perl | 257 ++++++++++++++++++++++++++++-----------------------
1 files changed, 141 insertions(+), 116 deletions(-)
applies-to: d6d3e5272bc39ea086e5c1b0b39ceb5b51ade1ff
2fe160b44c5e5da1a139668767ba184b6b63f605
diff --git a/git-archimport.perl b/git-archimport.perl
index 5616d42..a0ea016 100755
--- a/git-archimport.perl
+++ b/git-archimport.perl
@@ -22,9 +22,10 @@ See man (1) git-archimport for more deta
=head1 TODO
- create tag objects instead of ref tags
- - audit shell-escaping of filenames
- hide our private tags somewhere smarter
- - find a way to make "cat *patches | patch" safe even when patchfiles are missing newlines
+ - sort and apply patches by graphing ancestry relations instead of just
+ relying in dates supplied in the changeset itself.
+ tla ancestry-graph -m could be helpful here...
=head1 Devel tricks
@@ -53,15 +54,9 @@ and can contain multiple, unrelated bran
use strict;
use warnings;
use Getopt::Std;
-use File::Spec;
-use File::Temp qw(tempfile tempdir);
+use File::Temp qw(tempdir);
use File::Path qw(mkpath rmtree);
use File::Basename qw(basename dirname);
-use String::ShellQuote;
-use Time::Local;
-use IO::Socket;
-use IO::Pipe;
-use POSIX qw(strftime dup2);
use Data::Dumper qw/ Dumper /;
use IPC::Open2;
@@ -72,29 +67,35 @@ my $git_dir = $ENV{"GIT_DIR"} || ".git";
$ENV{"GIT_DIR"} = $git_dir;
my $ptag_dir = "$git_dir/archimport/tags";
-our($opt_h,$opt_v, $opt_T,
- $opt_C,$opt_t);
+our($opt_h,$opt_v,$opt_T,$opt_t,$opt_D,$opt_a);
sub usage() {
print STDERR <<END;
Usage: ${\basename $0} # fetch/update GIT from Arch
- [ -h ] [ -v ] [ -T ] [ -t tempdir ]
+ [ -h ] [ -v ] [ -T ] [ -a ] [ -D depth ] [ -t tempdir ]
repository/arch-branch [ repository/arch-branch] ...
END
exit(1);
}
-getopts("Thvt:") or usage();
+getopts("Thvat:D:") or usage();
usage if $opt_h;
@ARGV >= 1 or usage();
-my @arch_roots = @ARGV;
+# $arch_branches:
+# values associated with keys:
+# =1 - Arch version / git 'branch' detected via abrowse on a limit
+# >1 - Arch version / git 'branch' of an auxilliary branch we've merged
+my %arch_branches = map { $_ => 1 } @ARGV;
+
my $tmptree;
$ENV{'TMPDIR'} = $opt_t if $opt_t;
$tmptree = tempdir('git-archimport-XXXXXX', TMPDIR => 1, CLEANUP => 1);
$opt_v && print "+ Using $tmptree to store temporary trees\n";
+my %reachable = (); # Arch repositories we can access
+my %unreachable = (); # Arch repositories we can't access :<
my @psets = (); # the collection
my %psets = (); # the collection, by name
@@ -102,114 +103,117 @@ my %rptags = (); # my rev
# to map a SHA1 to a commitid
my $TLA = $ENV{'ARCH_CLIENT'} || 'tla';
-foreach my $root (@arch_roots) {
- my ($arepo, $abranch) = split(m!/!, $root);
- open ABROWSE, "tla abrowse -f -A $arepo --desc --merges $abranch |"
- or die "Problems with tla abrowse: $!";
-
- my %ps = (); # the current one
- my $mode = '';
- my $lastseen = '';
-
- while (<ABROWSE>) {
- chomp;
-
- # first record padded w 8 spaces
- if (s/^\s{8}\b//) {
-
- # store the record we just captured
- if (%ps) {
- my %temp = %ps; # break references
- push (@psets, \%temp);
- $psets{$temp{id}} = \%temp;
- %ps = ();
- }
-
- my ($id, $type) = split(m/\s{3}/, $_);
- $ps{id} = $id;
- $ps{repo} = $arepo;
-
- # deal with types
- if ($type =~ m/^\(simple changeset\)/) {
- $ps{type} = 's';
- } elsif ($type eq '(initial import)') {
- $ps{type} = 'i';
- } elsif ($type =~ m/^\(tag revision of (.+)\)/) {
- $ps{type} = 't';
- $ps{tag} = $1;
- } else {
- warn "Unknown type $type";
- }
- $lastseen = 'id';
- }
-
- if (s/^\s{10}//) {
- # 10 leading spaces or more
- # indicate commit metadata
-
- # date & author
- if ($lastseen eq 'id' && m/^\d{4}-\d{2}-\d{2}/) {
+sub do_abrowse {
+ my $stage = shift;
+ while (my ($limit, $level) = each %arch_branches) {
+ next unless $level == $stage;
+
+ open ABROWSE, "$TLA abrowse -fkD --merges $limit |"
+ or die "Problems with tla abrowse: $!";
+
+ my %ps = (); # the current one
+ my $lastseen = '';
+
+ while (<ABROWSE>) {
+ chomp;
+
+ # first record padded w 8 spaces
+ if (s/^\s{8}\b//) {
+ my ($id, $type) = split(m/\s+/, $_, 2);
+
+ my %last_ps;
+ # store the record we just captured
+ if (%ps && !exists $psets{ $ps{id} }) {
+ %last_ps = %ps; # break references
+ push (@psets, \%last_ps);
+ $psets{ $last_ps{id} } = \%last_ps;
+ }
- my ($date, $authoremail) = split(m/\s{2,}/, $_);
- $ps{date} = $date;
- $ps{date} =~ s/\bGMT$//; # strip off trailign GMT
- if ($ps{date} =~ m/\b\w+$/) {
- warn 'Arch dates not in GMT?! - imported dates will be wrong';
+ my $branch = extract_versionname($id);
+ %ps = ( id => $id, branch => $branch );
+ if (%last_ps && ($last_ps{branch} eq $branch)) {
+ $ps{parent_id} = $last_ps{id};
+ }
+
+ $arch_branches{$branch} = 1;
+ $lastseen = 'id';
+
+ # deal with types (should work with baz or tla):
+ if ($type =~ m/\(.*changeset\)/) {
+ $ps{type} = 's';
+ } elsif ($type =~ /\(.*import\)/) {
+ $ps{type} = 'i';
+ } elsif ($type =~ m/\(tag.*\)/) {
+ $ps{type} = 't';
+ # read which revision we've tagged when we parse the log
+ #$ps{tag} = $1;
+ } else {
+ warn "Unknown type $type";
+ }
+
+ $arch_branches{$branch} = 1;
+ $lastseen = 'id';
+ } elsif (s/^\s{10}//) {
+ # 10 leading spaces or more
+ # indicate commit metadata
+
+ # date
+ if ($lastseen eq 'id' && m/^(\d{4}-\d\d-\d\d \d\d:\d\d:\d\d)/){
+ $ps{date} = $1;
+ $lastseen = 'date';
+ } elsif ($_ eq 'merges in:') {
+ $ps{merges} = [];
+ $lastseen = 'merges';
+ } elsif ($lastseen eq 'merges' && s/^\s{2}//) {
+ my $id = $_;
+ push (@{$ps{merges}}, $id);
+
+ # aggressive branch finding:
+ if ($opt_D) {
+ my $branch = extract_versionname($id);
+ my $repo = extract_reponame($branch);
+
+ if (archive_reachable($repo) &&
+ !defined $arch_branches{$branch}) {
+ $arch_branches{$branch} = $stage + 1;
+ }
+ }
+ } else {
+ warn "more metadata after merges!?: $_\n" unless /^\s*$/;
}
-
- $authoremail =~ m/^(.+)\s(\S+)$/;
- $ps{author} = $1;
- $ps{email} = $2;
-
- $lastseen = 'date';
-
- } elsif ($lastseen eq 'date') {
- # the only hint is position
- # subject is after date
- $ps{subj} = $_;
- $lastseen = 'subj';
-
- } elsif ($lastseen eq 'subj' && $_ eq 'merges in:') {
- $ps{merges} = [];
- $lastseen = 'merges';
-
- } elsif ($lastseen eq 'merges' && s/^\s{2}//) {
- push (@{$ps{merges}}, $_);
- } else {
- warn 'more metadata after merges!?';
}
-
}
- }
- if (%ps) {
- my %temp = %ps; # break references
- push (@psets, \%temp);
- $psets{ $temp{id} } = \%temp;
- %ps = ();
- }
- close ABROWSE;
+ if (%ps && !exists $psets{ $ps{id} }) {
+ my %temp = %ps; # break references
+ if ($psets[$#psets]{branch} eq $ps{branch}) {
+ $temp{parent_id} = $psets[$#psets]{id};
+ }
+ push (@psets, \%temp);
+ $psets{ $temp{id} } = \%temp;
+ }
+
+ close ABROWSE or die "$TLA abrowse failed on $limit\n";
+ }
} # end foreach $root
+do_abrowse(1);
+my $depth = 2;
+$opt_D ||= 0;
+while ($depth <= $opt_D) {
+ do_abrowse($depth);
+ $depth++;
+}
+
## Order patches by time
+# FIXME see if we can find a more optimal way to do this by graphing
+# the ancestry data and walking it, that way we won't have to rely on
+# client-supplied dates
@psets = sort {$a->{date}.$b->{id} cmp $b->{date}.$b->{id}} @psets;
-#print Dumper \@psets;
-
-##
-## TODO cleanup irrelevant patches
-## and put an initial import
-## or a full tag
-my $import = 0;
unless (-d $git_dir) { # initial import
- if ($psets[0]{type} eq 'i' || $psets[0]{type} eq 't') {
- print "Starting import from $psets[0]{id}\n";
- `git-init-db`;
- die $! if $?;
- $import = 1;
- } else {
- die "Need to start from an import or a tag -- cannot use $psets[0]{id}";
- }
+ print "Starting import from $psets[0]{id}\n";
+ system('git-init-db') == 0 or die "$! $?\n";
} else { # progressing an import
# load the rptags
opendir(DIR, $ptag_dir)
@@ -233,7 +237,6 @@ unless (-d $git_dir) { # initial import
closedir DIR;
}
-# process patchsets
# extract the Arch repository name (Arch "archive" in Arch-speak)
sub extract_reponame {
my $fq_cvbr = shift; # archivename/[[[[category]branch]version]revision]
@@ -266,21 +269,21 @@ sub tree_dirname {
*git_branchname = *tree_dirname;
-# process patchsets
+# process patchsets in ancestry order
foreach my $ps (@psets) {
$ps->{branch} = git_branchname($ps->{id});
#
# ensure we have a clean state
#
- if (`git diff-files`) {
+ if (`git-diff-files`) {
die "Unclean tree when about to process $ps->{id} " .
" - did we fail to commit cleanly before?";
}
die $! if $?;
#
- # skip commits already in repo
+ # skip commits already in git repo
#
if (ptag($ps->{id})) {
$opt_v && print " * Skipping already imported: $ps->{id}\n";
@@ -427,7 +430,7 @@ sub sync_to_ps {
my $tree_dir = $tmptree.'/'.tree_dirname($ps->{id});
if (-d $tree_dir) {
- if ($ps->{type} eq 't' && defined $ps->{tag}) {
+ if ($ps->{type} eq 't') {
# looks like a tag-only or (worse,) a mixed tags/changeset branch,
# can't rely on replay to work correctly on these
rmtree($tree_dir);
@@ -435,13 +438,16 @@ sub sync_to_ps {
} else {
my $tree_id = arch_tree_id($tree_dir);
if ($ps->{parent_id} eq $tree_id) {
+ # the common case (hopefully)
safe_pipe_capture($TLA,'replay','-d',$tree_dir,$ps->{id});
} else {
+ # this can happen if branches cherry-pick
safe_pipe_capture($TLA,'apply-delta','-d',$tree_dir,
$tree_id, $ps->{id});
}
}
} else {
+ # new branch work
safe_pipe_capture($TLA,'get','--no-pristine',$ps->{id},$tree_dir);
}
@@ -750,4 +756,23 @@ sub arch_tree_id {
return $ret;
}
+sub archive_reachable {
+ my $archive = shift;
+ return 1 if $reachable{$archive};
+ return 0 if $unreachable{$archive};
+
+ if (system "$TLA whereis-archive $archive >/dev/null") {
+ if ($opt_a && (system($TLA,'register-archive',
+ "http://mirrors.sourcecontrol.net/$archive") == 0)) {
+ $reachable{$archive} = 1;
+ return 1;
+ }
+ print STDERR "Archive is unreachable: $archive\n";
+ $unreachable{$archive} = 1;
+ return 0;
+ } else {
+ $reachable{$archive} = 1;
+ return 1;
+ }
+}
---
0.99.9.GIT
next prev parent reply other threads:[~2005-11-12 9:32 UTC|newest]
Thread overview: 39+ messages / expand[flat|nested] mbox.gz Atom feed top
2005-11-12 9:23 [PATCH] archimport improvements Eric Wong
2005-11-12 9:25 ` [PATCH 1/5] remove shellquote usage for tags Eric Wong
2005-11-12 9:27 ` [PATCH 2/5] archimport: don't die on merge-base failure Eric Wong
2005-11-12 9:29 ` [PATCH 3/5] Disambiguate the term 'branch' in Arch vs git Eric Wong
2005-11-12 9:30 ` [PATCH 4/5] Overhaul of changeset application Eric Wong
2005-11-12 9:32 ` Eric Wong [this message]
2005-11-14 2:01 ` [PATCH 5/5] -D <depth> option to recurse into merged branches Eric Wong
2005-11-12 12:07 ` [PATCH 4/5] Overhaul of changeset application Martin Langhoff
2005-11-12 20:49 ` Eric Wong
2005-11-12 11:54 ` [PATCH] archimport improvements Martin Langhoff
2005-11-12 20:21 ` Eric Wong
2005-11-14 22:38 ` Martin Langhoff
2005-11-15 8:03 ` Eric Wong
2005-11-15 8:05 ` [PATCH 1/2] archimport: allow for old style branch and public tag names Eric Wong
2005-11-15 8:06 ` [PATCH 2/2] archimport: sync_to_ps() messages for tracking tla methods Eric Wong
2005-11-15 8:07 ` [PATCH 1/2] archimport: allow for old style branch and public tag names Eric Wong
2005-11-17 9:26 ` [PATCH] archimport improvements Martin Langhoff
2005-11-24 7:46 ` Eric Wong
2005-11-24 7:47 ` [PATCH 1/9] archimport: first, make sure it still compiles Eric Wong
2005-11-24 7:48 ` [PATCH 2/9] remove String::ShellQuote dependency Eric Wong
2005-11-24 7:50 ` [PATCH 3/9] fix -t tmpdir switch Eric Wong
2005-11-24 7:51 ` [PATCH 4/9] remove git wrapper dependency Eric Wong
2005-11-24 7:52 ` [PATCH 5/9] add -D <depth> and -a switch Eric Wong
2005-11-24 7:53 ` [PATCH 6/9] safer log file parsing Eric Wong
2005-11-24 7:55 ` [PATCH 7/9] Add the accurate changeset applyer Eric Wong
2005-11-24 7:56 ` [PATCH 8/9] Fix a bug I introduced in the new log parser Eric Wong
2005-11-24 7:58 ` [PATCH 9/9] fix a in new changeset applyer addition Eric Wong
2005-11-27 4:24 ` [PATCH 7/9] Add the accurate changeset applyer Martin Langhoff
2005-11-27 5:43 ` Eric Wong
2005-12-01 17:02 ` Martin Langhoff
2005-12-03 2:51 ` Eric Wong
2005-12-05 18:53 ` Martin Langhoff
2005-11-24 8:20 ` [PATCH 4/9] remove git wrapper dependency Andreas Ericsson
2005-11-24 8:35 ` Junio C Hamano
2005-11-24 8:50 ` Eric Wong
2005-11-24 18:54 ` [PATCH 1/9] archimport: first, make sure it still compiles Linus Torvalds
2005-11-26 10:51 ` Martin Langhoff
2005-11-26 20:43 ` Eric Wong
2005-11-24 9:25 ` [PATCH] archimport improvements Martin Langhoff
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20051112093208.GF16218@Muzzle \
--to=normalperson@yhbt.net \
--cc=git@vger.kernel.org \
--cc=martin.langhoff@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.