git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* arch2git import script
@ 2005-08-23 13:11 Martin Langhoff
  2005-08-23 13:13 ` Martin Langhoff
  0 siblings, 1 reply; 2+ messages in thread
From: Martin Langhoff @ 2005-08-23 13:11 UTC (permalink / raw)
  To: GIT, Catalin Marinas

First draft of an Arch import. Very limited but will import one
"branch" into one head. The basics are for it to follow branches, and
some support for marking merges (or at least what git would consider
merges.

TODO:
- proper branch switching
- binary file handling
- get the date format right!
- parse the output of cat-archive-log to know what files to add/delete
and cleanup the logmsg
- ensure all shell invocations have error handling
- allow multiple "Archives"
- learn to run repeatedly over the same git repo, importing only new commits
- track cherrypicking to identify how far branches are merged (in the
git sense)
- a few options as to how to follow (or not) the history
  - follow a full branch starting from a full import "down" with
branches opening from it.
  - follow a branch history "up"

An example invocation to import the sears-ldap-dev branch is:

  mkdir sample-import
  cd sample-import;
  git-archimport-script -t /tmp/some/dir -i \
      -A arch-eduforge@catalyst.net.nz--2004 sears-ldap--dev

the commit message needs cleanup, the dates are bogus, and we aren't
marking deletes... but it otherwise works great! ;)

cheers,


martin

^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: arch2git import script
  2005-08-23 13:11 arch2git import script Martin Langhoff
@ 2005-08-23 13:13 ` Martin Langhoff
  0 siblings, 0 replies; 2+ messages in thread
From: Martin Langhoff @ 2005-08-23 13:13 UTC (permalink / raw)
  To: GIT, Catalin Marinas

[-- Attachment #1: Type: text/plain, Size: 169 bytes --]

On 8/24/05, Martin Langhoff <martin.langhoff@gmail.com> wrote:
> First draft of an Arch import. 

And now, with sample script attached, too. 

cheers,


martin

[-- Attachment #2: git-archimport-script --]
[-- Type: application/octet-stream, Size: 8524 bytes --]

#!/usr/bin/perl -w

# This tool is copyright (c) 2005, Matthias Urlichs.
# It is released under the Gnu Public License, version 2.
#
# The basic idea is to walk the output of tla abrowse, 
# fetch the changesets and apply them. 
#

# The head revision is on branch "origin" by default.
# You can change that with the '-o' option.

use strict;
use warnings;
use Getopt::Std;
use File::Spec;
use File::Temp qw(tempfile);
use File::Path qw(mkpath);
use File::Basename qw(basename dirname);
use Time::Local;
use IO::Socket;
use IO::Pipe;
use POSIX qw(strftime dup2);
use Data::Dumper qw/ Dumper /;
use IPC::Open2;

$SIG{'PIPE'}="IGNORE";
$ENV{'TZ'}="UTC";

our($opt_h,$opt_A,$opt_v,$opt_k,
    $opt_d,$opt_p,$opt_C,$opt_z,
    $opt_i,$opt_t);

sub usage() {
	print STDERR <<END;
Usage: ${\basename $0}     # fetch/update GIT from Arch
       [ -h ] [ -v ] [ -A archive ]
       [ -C GIT_repository ] [ -t tempdir ] 
       [ arch-branch ]
END
	exit(1);
}

getopts("hviA:C:t:") or usage();
usage if $opt_h;

@ARGV <= 1 or usage();


my $tmp = $opt_t;
$tmp ||= '/tmp';
$tmp .= '/git-archimport/';

my $git_tree = $opt_C;
$git_tree ||= ".";

my $arch_branch = '';
if ($#ARGV == 0) {
	$arch_branch = $ARGV[0];
} 

# TODO: handle more than one repo
open ABROWSE, "tla abrowse -f -A $opt_A --desc --merges $arch_branch |" 
    or die "Problems with tla abrowse: $!";

my @psets  = (); # the collection
my %ps        = (); # the current one
my $mode      = '';
my $lastseen  = '';

while (<ABROWSE>) {
    chomp;

    # first record padded w 8 spaces
    if (s/^\s{8}\b//) {
	
	# store the record we just captured
	if (%ps) {
	    my %temp = %ps; # break references
	    push (@psets, \%temp);
	    %ps = ();
	}

	my ($id, $type) = split(m/\s{3}/, $_);
	$ps{id} = $id;
	
	# deal with types
	if ($type =~ m/^\(simple changeset\)/) {
	    $ps{type} = 's';
	} elsif ($type eq '(initial import)') {
	    $ps{type} = 'i';
	} elsif ($type =~ m/^\(tag revision of (.+)\)/) {
	    $ps{type} = 't';
	    $ps{tag}  = $1;
	} else { 
	    warn "Unknown type $type";
	}
	$lastseen = 'id';
    }

    if (s/^\s{10}//) { 
        # 10 leading spaces or more 
	# indicate commit metadata

	# date & author 
	if ($lastseen eq 'id' && m/^\d{4}-\d{2}-\d{2}/) {

	    my ($date, $authoremail) = split(m/\s{2,}/, $_);
	    $ps{date}   = "+0000 $date";

	    $authoremail =~ m/^(.+)\s(\S+)$/;
	    $ps{author} = $1;
	    $ps{email}  = $2;

	    $lastseen = 'date';

	} elsif ($lastseen eq 'date') {
	    # the only hint is position
	    # subject is after date
	    $ps{subj} = $_;
	    $lastseen = 'subj';

	} elsif ($lastseen eq 'subj' && $_ eq 'merges in:') {
	    $ps{merges} = [];
	    $lastseen = 'merges';

	} elsif ($lastseen eq 'merges' && s/^\s{2}//) {
	    push (@{$ps{merges}}, $_);
	} else {
	    warn 'more metadata after merges!?';
	}

    }
}
if (%ps) {
    my %temp = %ps; # break references
    push (@psets, \%temp);
    %ps = ();
}

close ABROWSE;

## Order patches by time
@psets = sort {$a->{date}.$b->{id} cmp $b->{date}.$b->{id}} @psets;

#print Dumper \@psets;

##
## TODO cleanup irrelevant patches
##      and put an initial import
##      or a full tag

if ($opt_i) { # initial import 
    if ($psets[0]{type} eq 'i' || $psets[0]{type} eq 't') {
	print "Starting import from $psets[0]{id}\n";
    } else {
	die "Need to start from an import or a tag -- cannot use $psets[0]{id}";
    }
    `git-init-db`;
    die $! if $?;
}

# process
my $lastbranch = branchname($psets[0]{id}); # only good for initial import

foreach my $ps (@psets) {

    $ps->{branch} =  branchname($ps->{id});

    #
    # ensure we have a clean state 
    # 
    if (`git diff-files`){
	die "Unclean tree when about to process $ps->{id} " .
	    " - did we fail to commit cleanly before?";
    }
    die $! if $?;

    # 
    # create the branch if needed
    # TODO: Find the ancestor!
    #
    unless ( -e ".git/refs/heads/$ps->{branch}") {
	`git checkout -b $ps->{branch}`;
    } else {
	`git checkout    $ps->{branch}`;
    }
    die $! if $?;

    #
    # Apply the import/changeset/merge into the working tree
    # 
    if ($ps->{type} eq 'i') {
	apply_import($ps) or die $!;
    } elsif ($ps->{type} eq 's') {
	apply_cset($ps);
    }

    #
    # prepare update git's index, based on what arch knows
    # about the pset, resolve parents, etc
    #

    # =for reference
    # A log entry looks like 
    # Revision: moodle-org--moodle--1.3.3--patch-15
    # Archive: arch-eduforge@catalyst.net.nz--2004
# Creator: Penny Leach <penny@catalyst.net.nz>
# Date: Wed May 25 14:15:34 NZST 2005
# Standard-date: 2005-05-25 02:15:34 GMT
# New-files: lang/de/.arch-ids/block_glossary_random.php.id
#     lang/de/.arch-ids/block_html.php.id
# New-directories: lang/de/help/questionnaire
#     lang/de/help/questionnaire/.arch-ids
# Removed-files: lang/be/docs/.arch-ids/release.html.id
#     lang/be/docs/.arch-ids/releaseold.html.id
# Modified-files: admin/cron.php admin/delete.php
#     admin/editor.html backup/lib.php backup/restore.php
# New-patches: arch-eduforge@catalyst.net.nz--2004/moodle-org--moodle--1.3.3--patch-15
# Summary: Updating to latest from MOODLE_14_STABLE (1.4.5+)
# Keywords:
#
# Updating yadda tadda tadda madda

    my ($logsummary, $logmessage, $tree);
    { # scope block for some vars
	my $commitlog = `tla cat-archive-log -A $opt_A $ps->{id}`; 
	die "Error in cat-archive-log: $!" if $?;
	
	# process_commitlog will git-add/rm files
	# and generally prepare things for the commit	
	$logmessage = $commitlog;

	# add all the files, re-adds will be ignored
	`find . -type f -print0 | grep -zv '^./.git' | xargs -0 git-update-cache --add`;
	# warn "errors when runnign git-update-cache! $!";
	$tree = `git-write-tree`;
	die "cannot write tree $!" if $?;
	chomp $tree;
	
	
	
    } # end scope block
    #
    # Commit and clean state
    #
    my @par;
    if ( -e ".git/refs/heads/$ps->{branch}"){
	if (open HEAD, "<.git/refs/heads/$ps->{branch}") {
	    my $p = <HEAD>;
	    close HEAD;
	    chomp $p;
	    push @par, '-p', $p;
	} else { 
	    if ($ps->{type} eq 's') {
		warn "Could not find the right head for the branch $ps->{branch}";
	    }
	}
    }
    my $par = join (' ', @par);
    $ENV{GIT_AUTHOR_NAME}     = $ps->{author};
    $ENV{GIT_AUTHOR_EMAIL}    = $ps->{email};
    $ENV{GIT_COMMITTER_NAME}  = $ps->{author};
    $ENV{GIT_COMMITTER_EMAIL} = $ps->{email};
    $ENV{COMMITTER_DATE}      = $ps->{date};

    my ($pid, $commit_rh, $commit_wh);
    $commit_rh = 'commit_rh';
    $commit_wh = 'commit_wh';
    
    $pid = open2(*READER, *WRITER, "git-commit-tree $tree $par") 
	or die $!;
    print WRITER $logmessage; # write
    close WRITER;
    my $commitid = <READER>; # read
    chomp $commitid;
    close READER;
    waitpid $pid,0;           # close;

    if (length $commitid != 40) {
	die "Something went wrong with the commit! $! $commitid";
    }
    #
    # Update the branch
    # 
    open  HEAD, ">.git/refs/heads/$ps->{branch}";
    print HEAD $commitid;
    close HEAD;
    unlink ('.git/HEAD');
    symlink("refs/heads/$ps->{branch}",".git/HEAD");

    print " * Committed $ps->{id}\n";

}

sub branchname {
    my $id = shift;
    $id =~ s#^.+?/##;
    my @parts = split(m/--/, $id);
    return join('--', @parts[0..1]);
}

sub apply_import {
    my $ps = shift;
   my $bname = branchname($ps->{id});

    `mkdir -p $tmp`;

    `tla get -s --no-pristine -A $opt_A $ps->{id} $tmp/import`;
    die "Cannot get import: $!" if $?;
    print "rsync --archive --delete --exclude '.git' --exclude '.arch-ids' --exclude '{arch}' $tmp/import/* ./";
    print `rsync -v --archive --delete --exclude '.git' --exclude '.arch-ids' --exclude '{arch}' $tmp/import/* ./`;
    die "Cannot rsync import:$!" if $?;
    
    `rm -fr $tmp/import`;
    die "Cannot remove tempdir: $!" if $?;
    

    return 1;
}

sub apply_cset {
    my $ps = shift;

    `mkdir -p $tmp`;

    # get the changeset
    `tla get-changeset  -A $opt_A  $ps->{id} $tmp/changeset`;
    die "Cannot get changeset: $!" if $?;
    
    # apply patches
    if (`find $tmp/changeset/patches -type f -name '*.patch'`) {
	`(find $tmp/changeset/patches -type f -print0 -name '*.patch' | xargs -0 --no-run-if-empty cat ) | patch -p1 --verbose --forward `;
	die "Problem applying patches! $!" if $?;
    }

    # bring in new files
    `rsync --archive --exclude '.git' --exclude '.arch-ids' --exclude '{arch}' $tmp/changeset/new-files-archive/* ./`;

    # deleted files are hinted from the commitlog processing

    `rm -fr $tmp/changeset`;
}


^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2005-08-23 13:13 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2005-08-23 13:11 arch2git import script Martin Langhoff
2005-08-23 13:13 ` Martin Langhoff

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).