git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Martin Langhoff <martin.langhoff@gmail.com>
To: GIT <git@vger.kernel.org>, Catalin Marinas <catalin.marinas@gmail.com>
Subject: Re: arch2git import script
Date: Wed, 24 Aug 2005 01:13:48 +1200	[thread overview]
Message-ID: <46a038f90508230613527c11c8@mail.gmail.com> (raw)
In-Reply-To: <46a038f90508230611337c695e@mail.gmail.com>

[-- Attachment #1: Type: text/plain, Size: 169 bytes --]

On 8/24/05, Martin Langhoff <martin.langhoff@gmail.com> wrote:
> First draft of an Arch import. 

And now, with sample script attached, too. 

cheers,


martin

[-- Attachment #2: git-archimport-script --]
[-- Type: application/octet-stream, Size: 8524 bytes --]

#!/usr/bin/perl -w

# This tool is copyright (c) 2005, Matthias Urlichs.
# It is released under the Gnu Public License, version 2.
#
# The basic idea is to walk the output of tla abrowse, 
# fetch the changesets and apply them. 
#

# The head revision is on branch "origin" by default.
# You can change that with the '-o' option.

use strict;
use warnings;
use Getopt::Std;
use File::Spec;
use File::Temp qw(tempfile);
use File::Path qw(mkpath);
use File::Basename qw(basename dirname);
use Time::Local;
use IO::Socket;
use IO::Pipe;
use POSIX qw(strftime dup2);
use Data::Dumper qw/ Dumper /;
use IPC::Open2;

$SIG{'PIPE'}="IGNORE";
$ENV{'TZ'}="UTC";

our($opt_h,$opt_A,$opt_v,$opt_k,
    $opt_d,$opt_p,$opt_C,$opt_z,
    $opt_i,$opt_t);

sub usage() {
	print STDERR <<END;
Usage: ${\basename $0}     # fetch/update GIT from Arch
       [ -h ] [ -v ] [ -A archive ]
       [ -C GIT_repository ] [ -t tempdir ] 
       [ arch-branch ]
END
	exit(1);
}

getopts("hviA:C:t:") or usage();
usage if $opt_h;

@ARGV <= 1 or usage();


my $tmp = $opt_t;
$tmp ||= '/tmp';
$tmp .= '/git-archimport/';

my $git_tree = $opt_C;
$git_tree ||= ".";

my $arch_branch = '';
if ($#ARGV == 0) {
	$arch_branch = $ARGV[0];
} 

# TODO: handle more than one repo
open ABROWSE, "tla abrowse -f -A $opt_A --desc --merges $arch_branch |" 
    or die "Problems with tla abrowse: $!";

my @psets  = (); # the collection
my %ps        = (); # the current one
my $mode      = '';
my $lastseen  = '';

while (<ABROWSE>) {
    chomp;

    # first record padded w 8 spaces
    if (s/^\s{8}\b//) {
	
	# store the record we just captured
	if (%ps) {
	    my %temp = %ps; # break references
	    push (@psets, \%temp);
	    %ps = ();
	}

	my ($id, $type) = split(m/\s{3}/, $_);
	$ps{id} = $id;
	
	# deal with types
	if ($type =~ m/^\(simple changeset\)/) {
	    $ps{type} = 's';
	} elsif ($type eq '(initial import)') {
	    $ps{type} = 'i';
	} elsif ($type =~ m/^\(tag revision of (.+)\)/) {
	    $ps{type} = 't';
	    $ps{tag}  = $1;
	} else { 
	    warn "Unknown type $type";
	}
	$lastseen = 'id';
    }

    if (s/^\s{10}//) { 
        # 10 leading spaces or more 
	# indicate commit metadata

	# date & author 
	if ($lastseen eq 'id' && m/^\d{4}-\d{2}-\d{2}/) {

	    my ($date, $authoremail) = split(m/\s{2,}/, $_);
	    $ps{date}   = "+0000 $date";

	    $authoremail =~ m/^(.+)\s(\S+)$/;
	    $ps{author} = $1;
	    $ps{email}  = $2;

	    $lastseen = 'date';

	} elsif ($lastseen eq 'date') {
	    # the only hint is position
	    # subject is after date
	    $ps{subj} = $_;
	    $lastseen = 'subj';

	} elsif ($lastseen eq 'subj' && $_ eq 'merges in:') {
	    $ps{merges} = [];
	    $lastseen = 'merges';

	} elsif ($lastseen eq 'merges' && s/^\s{2}//) {
	    push (@{$ps{merges}}, $_);
	} else {
	    warn 'more metadata after merges!?';
	}

    }
}
if (%ps) {
    my %temp = %ps; # break references
    push (@psets, \%temp);
    %ps = ();
}

close ABROWSE;

## Order patches by time
@psets = sort {$a->{date}.$b->{id} cmp $b->{date}.$b->{id}} @psets;

#print Dumper \@psets;

##
## TODO cleanup irrelevant patches
##      and put an initial import
##      or a full tag

if ($opt_i) { # initial import 
    if ($psets[0]{type} eq 'i' || $psets[0]{type} eq 't') {
	print "Starting import from $psets[0]{id}\n";
    } else {
	die "Need to start from an import or a tag -- cannot use $psets[0]{id}";
    }
    `git-init-db`;
    die $! if $?;
}

# process
my $lastbranch = branchname($psets[0]{id}); # only good for initial import

foreach my $ps (@psets) {

    $ps->{branch} =  branchname($ps->{id});

    #
    # ensure we have a clean state 
    # 
    if (`git diff-files`){
	die "Unclean tree when about to process $ps->{id} " .
	    " - did we fail to commit cleanly before?";
    }
    die $! if $?;

    # 
    # create the branch if needed
    # TODO: Find the ancestor!
    #
    unless ( -e ".git/refs/heads/$ps->{branch}") {
	`git checkout -b $ps->{branch}`;
    } else {
	`git checkout    $ps->{branch}`;
    }
    die $! if $?;

    #
    # Apply the import/changeset/merge into the working tree
    # 
    if ($ps->{type} eq 'i') {
	apply_import($ps) or die $!;
    } elsif ($ps->{type} eq 's') {
	apply_cset($ps);
    }

    #
    # prepare update git's index, based on what arch knows
    # about the pset, resolve parents, etc
    #

    # =for reference
    # A log entry looks like 
    # Revision: moodle-org--moodle--1.3.3--patch-15
    # Archive: arch-eduforge@catalyst.net.nz--2004
# Creator: Penny Leach <penny@catalyst.net.nz>
# Date: Wed May 25 14:15:34 NZST 2005
# Standard-date: 2005-05-25 02:15:34 GMT
# New-files: lang/de/.arch-ids/block_glossary_random.php.id
#     lang/de/.arch-ids/block_html.php.id
# New-directories: lang/de/help/questionnaire
#     lang/de/help/questionnaire/.arch-ids
# Removed-files: lang/be/docs/.arch-ids/release.html.id
#     lang/be/docs/.arch-ids/releaseold.html.id
# Modified-files: admin/cron.php admin/delete.php
#     admin/editor.html backup/lib.php backup/restore.php
# New-patches: arch-eduforge@catalyst.net.nz--2004/moodle-org--moodle--1.3.3--patch-15
# Summary: Updating to latest from MOODLE_14_STABLE (1.4.5+)
# Keywords:
#
# Updating yadda tadda tadda madda

    my ($logsummary, $logmessage, $tree);
    { # scope block for some vars
	my $commitlog = `tla cat-archive-log -A $opt_A $ps->{id}`; 
	die "Error in cat-archive-log: $!" if $?;
	
	# process_commitlog will git-add/rm files
	# and generally prepare things for the commit	
	$logmessage = $commitlog;

	# add all the files, re-adds will be ignored
	`find . -type f -print0 | grep -zv '^./.git' | xargs -0 git-update-cache --add`;
	# warn "errors when runnign git-update-cache! $!";
	$tree = `git-write-tree`;
	die "cannot write tree $!" if $?;
	chomp $tree;
	
	
	
    } # end scope block
    #
    # Commit and clean state
    #
    my @par;
    if ( -e ".git/refs/heads/$ps->{branch}"){
	if (open HEAD, "<.git/refs/heads/$ps->{branch}") {
	    my $p = <HEAD>;
	    close HEAD;
	    chomp $p;
	    push @par, '-p', $p;
	} else { 
	    if ($ps->{type} eq 's') {
		warn "Could not find the right head for the branch $ps->{branch}";
	    }
	}
    }
    my $par = join (' ', @par);
    $ENV{GIT_AUTHOR_NAME}     = $ps->{author};
    $ENV{GIT_AUTHOR_EMAIL}    = $ps->{email};
    $ENV{GIT_COMMITTER_NAME}  = $ps->{author};
    $ENV{GIT_COMMITTER_EMAIL} = $ps->{email};
    $ENV{COMMITTER_DATE}      = $ps->{date};

    my ($pid, $commit_rh, $commit_wh);
    $commit_rh = 'commit_rh';
    $commit_wh = 'commit_wh';
    
    $pid = open2(*READER, *WRITER, "git-commit-tree $tree $par") 
	or die $!;
    print WRITER $logmessage; # write
    close WRITER;
    my $commitid = <READER>; # read
    chomp $commitid;
    close READER;
    waitpid $pid,0;           # close;

    if (length $commitid != 40) {
	die "Something went wrong with the commit! $! $commitid";
    }
    #
    # Update the branch
    # 
    open  HEAD, ">.git/refs/heads/$ps->{branch}";
    print HEAD $commitid;
    close HEAD;
    unlink ('.git/HEAD');
    symlink("refs/heads/$ps->{branch}",".git/HEAD");

    print " * Committed $ps->{id}\n";

}

sub branchname {
    my $id = shift;
    $id =~ s#^.+?/##;
    my @parts = split(m/--/, $id);
    return join('--', @parts[0..1]);
}

sub apply_import {
    my $ps = shift;
   my $bname = branchname($ps->{id});

    `mkdir -p $tmp`;

    `tla get -s --no-pristine -A $opt_A $ps->{id} $tmp/import`;
    die "Cannot get import: $!" if $?;
    print "rsync --archive --delete --exclude '.git' --exclude '.arch-ids' --exclude '{arch}' $tmp/import/* ./";
    print `rsync -v --archive --delete --exclude '.git' --exclude '.arch-ids' --exclude '{arch}' $tmp/import/* ./`;
    die "Cannot rsync import:$!" if $?;
    
    `rm -fr $tmp/import`;
    die "Cannot remove tempdir: $!" if $?;
    

    return 1;
}

sub apply_cset {
    my $ps = shift;

    `mkdir -p $tmp`;

    # get the changeset
    `tla get-changeset  -A $opt_A  $ps->{id} $tmp/changeset`;
    die "Cannot get changeset: $!" if $?;
    
    # apply patches
    if (`find $tmp/changeset/patches -type f -name '*.patch'`) {
	`(find $tmp/changeset/patches -type f -print0 -name '*.patch' | xargs -0 --no-run-if-empty cat ) | patch -p1 --verbose --forward `;
	die "Problem applying patches! $!" if $?;
    }

    # bring in new files
    `rsync --archive --exclude '.git' --exclude '.arch-ids' --exclude '{arch}' $tmp/changeset/new-files-archive/* ./`;

    # deleted files are hinted from the commitlog processing

    `rm -fr $tmp/changeset`;
}


      reply	other threads:[~2005-08-23 13:13 UTC|newest]

Thread overview: 2+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2005-08-23 13:11 arch2git import script Martin Langhoff
2005-08-23 13:13 ` Martin Langhoff [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=46a038f90508230613527c11c8@mail.gmail.com \
    --to=martin.langhoff@gmail.com \
    --cc=catalin.marinas@gmail.com \
    --cc=git@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).