From: Han-Wen Nienhuys <hanwen@xs4all.nl>
To: git@vger.kernel.org
Subject: darcs2git.py - convert darcs repository using gfi
Date: Mon, 12 Feb 2007 00:56:34 +0100 [thread overview]
Message-ID: <eqoaf7$loq$1@sea.gmane.org> (raw)
[-- Attachment #1: Type: text/plain, Size: 1081 bytes --]
The python script attached is a try at providing a sane
conversion from Darcs to GIT. It tries to map darcs conflict
resolutions onto git branch merges.
Regarding GFI, it's a breeze to work with; my compliments to its
author. My only gripe is the need to specify a branch for each commit.
Darcs uses changeset based storage. It doesn't really have branches,
but it does record divergent changes and merges of resulting
conflicts. Hence, it's not clear which refs/head/BRANCH should be
used when creating a commit object.
I found it easiest to write each commit to a
refs/head/darcs-tmp-COUNT
branch, use the reset command to specify at the end which commits are
tops of branches, and delete the temporary branches.
So, my feature request: please make the "commit" command always accept
a "from" command, and make the "refs" argument optional. This will
cleanup my converter, and separate out two logical functions of the
gfi "commit" command: creating a commit object, and advancing the head
ref.
--
Han-Wen Nienhuys - hanwen@xs4all.nl - http://www.xs4all.nl/~hanwen
[-- Attachment #2: darcs2git.py --]
[-- Type: text/x-python, Size: 12160 bytes --]
import os
import sys
import time
import xml.dom.minidom
import re
import gdbm as dbmodule
import gzip
import optparse
################################################################
# globals
silent=False
mail_to_name_dict = {}
pending_patches = {}
used_tags = {}
################################################################
# utils
class PullConflict (Exception):
pass
class CommandFailed (Exception):
pass
def progress (s):
sys.stderr.write (s + '\n')
def get_cli_options ():
p = optparse.OptionParser ()
p.usage='''darcs2git [OPTIONS] DARCS-REPO'''
p.description='''Convert darcs repo to git.
This tool is a one shot conversion utility for Darcs repositories. It
requires Git version that has git-fast-import. It does not support
incremental updating.
This tool will import the patches in chronological order, and only creates
merges when a resolved conflict is detected.
TODO:
- correct time zone handling
-
'''
def update_map (option, opt, value, parser):
for l in open (value).readlines ():
(mail, name) = tuple (l.strip ().split ('='))
mail_to_name_dict[mail] = name
p.add_option ('-a', '--authors', action='callback',
callback=update_map,
type='string',
nargs=1,
help='read a text file, containing EMAIL=NAME lines')
p.add_option ('-d', '--destination', action='store',
type='string',
default='',
dest='target_git_repo',
help='where to put the resulting Git repo.')
p.add_option ('--verbose', action='store_true',
dest='verbose',
default=False,
help='show commands as they are invoked')
options, args = p.parse_args ()
if not args:
p.print_help ()
sys.exit (2)
global silent
silent = not options.verbose
if not options.target_git_repo:
p = args[0]
p = os.path.abspath (p)
options.target_git_repo = os.path.basename (p).replace ('.darcs', '')
options.target_git_repo += '.git'
return (options, args)
def read_pipe (cmd, ignore_errors=False):
if not silent:
progress ('pipe %s' % cmd)
pipe = os.popen (cmd)
val = pipe.read ()
if pipe.close () and not ignore_errors:
raise CommandFailed ("Pipe failed: %s" % cmd)
return val
def system (c, ignore_error=0):
if not silent:
progress ( c)
if os.system (c) and not ignore_error:
raise CommandFailed ("Command failed: %s" % c)
def darcs_date_to_git (x):
t = time.strptime (x, '%Y%m%d%H%M%S')
return '%d' % int (time.mktime (t))
def darcs_timezone (x) :
time.strptime (x, '%a %b %d %H:%M:%S %Z %Y')
# todo
return "+0100"
################################################################
# darcs
class DarcsConversionRepo:
def __init__ (self, dir, patches):
self.dir = dir
self.patches = patches
def clean (self):
system ('rm -rf %s' % self.dir)
def pull (self, patch):
id = patch.attributes['hash']
source_repo = patch.dir
dir = self.dir
system ('cd %(dir)s && darcs pull --quiet --all --match "hash %(id)s" %(source_repo)s ' % locals ())
def go_from_to (self, from_patch, to_patch):
"""Move the repo to FROM_PATCH, then go to TO_PATCH. Raise
PullConflict if conflict is detected
This uses the fishy technique of writing the inventory and
constructing the pristine tree with 'darcs repair'
It might be quicker and/or more correct to wind/rewind the
repo with pull and unpull. """
dir = os.path.abspath (self.dir)
system ('rm -rf %(dir)s && mkdir %(dir)s && darcs init --repo %(dir)s'
% locals ())
source = to_patch.dir
if from_patch:
iv = open (dir + '/_darcs/inventory', 'w')
for p in self.patches[:from_patch.number+1]:
os.link (p.filename (), dir + '/_darcs/patches/' + os.path.basename (p.filename ()))
iv.write (p.header ())
iv.close ()
progress ('Go to patch %d' % from_patch.number)
system ('cd %(dir)s && darcs repair --quiet' % locals ())
system ('rsync -a %(dir)s/_darcs/pristine/ %(dir)s/' % locals ())
try:
self.pull (to_patch)
success = 'No conflicts to resolve' in read_pipe ('cd %(dir)s && echo y|darcs resolve' % locals ())
except CommandFailed:
raise PullConflict ()
if not success:
raise PullConflict ()
def has_patch (self, p):
id = p.attributes['hash']
f = self.dir + '/_darcs/patches/' + id
return os.path.exists (f)
def pristine_tree (self):
return self.dir + '/_darcs/pristine'
class DarcsPatch:
def __init__ (self, xml, dir):
self.xml = xml
self.dir = dir
self.number = -1
self.attributes = {}
for (nm, value) in xml.attributes.items():
self.attributes[nm] = value
# fixme: ugh attributes vs. methods.
self.extract_author ()
self.extract_message ()
self.extract_time ()
def filename (self):
return self.dir + '/_darcs/patches/' + self.attributes['hash']
def contents (self):
f = gzip.open (self.filename ())
return f.read ()
def header (self):
lines = self.contents ().split ('\n')
name = lines[0]
committer = lines[1] + '\n'
committer = re.sub ('] {\n$', ']\n', committer)
committer = re.sub ('] *\n$', ']\n', committer)
comment = ''
if not committer.endswith (']\n'):
for l in lines[2:]:
if l[0] == ']':
comment += ']\n'
break
comment += l + '\n'
header = name + '\n' + committer
if comment:
header += comment
return header
def extract_author (self):
mail = self.attributes['author']
name = ''
m = re.search ("^(.*) <(.*)>$", mail)
if m:
name = m.group (1)
mail = m.group (2)
else:
try:
name = mail_to_name_dict[mail]
except KeyError:
name = mail.split ('@')[0]
self.author_name = name
self.author_mail = mail
def extract_time (self):
self.date = darcs_date_to_git (self.attributes['date']) + ' ' + darcs_timezone (self.attributes['local_date'])
def name (self):
patch_name = '(no comment)'
try:
name_elt = self.xml.getElementsByTagName ('name')[0]
patch_name = name_elt.childNodes[0].data
except IndexError:
pass
return patch_name
def extract_message (self):
patch_name = self.name ()
comment_elts = self.xml.getElementsByTagName ('comment')
comment = ''
if comment_elts:
comment = comment_elts[0].childNodes[0].data
if self.attributes['inverted'] == 'True':
patch_name = 'UNDO: ' + patch_name
self.message = '%s\n\n%s' % (patch_name, comment)
def tag_name (self):
patch_name = self.name ()
if patch_name.startswith ("TAG "):
tag = patch_name[4:]
tag = re.sub (r'\s', '_', tag).strip ()
tag = re.sub (r':', '_', tag).strip ()
return tag
return ''
def get_darcs_patches (darcs_repo):
progress ('reading patches.')
xml_string = read_pipe ('darcs changes --xml --reverse --repo ' + darcs_repo)
dom = xml.dom.minidom.parseString(xml_string)
xmls = dom.documentElement.getElementsByTagName('patch')
patches = [DarcsPatch (x, darcs_repo) for x in xmls]
n = 0
for p in patches:
p.number = n
n += 1
return patches
################################################################
# GIT export
def export_tree (tree, gfi):
tree = os.path.normpath (tree)
gfi.write ('deleteall\n')
for (root, dirs, files) in os.walk (tree):
for f in files:
rf = os.path.normpath (os.path.join (root, f))
s = open (rf).read ()
rf = rf.replace (tree + '/', '')
gfi.write ('M 644 inline %s\n' % rf)
gfi.write ('data %d\n%s\n' % (len (s), s))
gfi.write ('\n')
def export_commit (repo, patch, last_patch, gfi):
gfi.write ('commit refs/heads/darcstmp%d\n' % patch.number)
gfi.write ('mark :%d\n' % (patch.number + 1))
gfi.write ('committer %s <%s> %s\n' % (patch.author_name,
patch.author_mail,
patch.date))
gfi.write ('data %d\n%s\n' % (len (patch.message), patch.message))
if last_patch:
gfi.write ('from :%d\n' % (last_patch.number + 1))
if pending_patches.has_key (last_patch.number):
del pending_patches[last_patch.number]
for (n, p) in pending_patches.items ():
if repo.has_patch (p):
gfi.write ('merge :%d\n' % (n + 1))
del pending_patches[n]
pending_patches[patch.number] = patch
export_tree (repo.pristine_tree (), gfi)
def export_pending (gfi):
if len (pending_patches.items ()) == 1:
gfi.write ('reset refs/heads/master\n')
gfi.write ('from :%d\n\n' % (pending_patches.values()[0].number+1))
return
for (n, p) in pending_patches.items ():
gfi.write ('reset refs/heads/master%d\n' % n)
gfi.write ('from :%d\n\n' % (n+1))
patches = pending_patches.values()
patch = patches[0]
gfi.write ('commit refs/heads/master\n')
gfi.write ('committer %s <%s> %s\n' % (patch.author_name,
patch.author_mail,
patch.date))
msg = 'tie together'
gfi.write ('data %d\n%s\n' % (len(msg), msg))
gfi.write ('from :%d\n' % (patch.number + 1))
for p in patches[1:]:
gfi.write ('merge :%d\n' % (p.number + 1))
gfi.write ('\n')
def export_tag (patch, gfi):
gfi.write ('tag %s\n' % patch.tag_name ())
gfi.write ('from :%d\n' % (patch.number + 1))
gfi.write ('tagger %s <%s> %s\n' % (patch.author_name,
patch.author_mail,
patch.date))
gfi.write ('data %d\n%s\n' % (len (patch.message),
patch.message))
################################################################
# main.
def main ():
(options, args) = get_cli_options ()
darcs_repo = os.path.abspath (args[0])
git_repo = os.path.abspath (options.target_git_repo)
system ('mkdir %(git_repo)s && cd %(git_repo)s && git --bare init' % locals ())
os.environ['GIT_DIR'] = git_repo
gfi = os.popen ('git-fast-import', 'w') #
patches = get_darcs_patches (darcs_repo)
conv_repo = DarcsConversionRepo ("darcs2git.tmpdarcs", patches)
for p in patches:
parent = p.number - 1
last = None
while 1:
if parent >= 0:
last = patches[parent]
try:
conv_repo.go_from_to (last, p)
break
except PullConflict:
## simplistic, may not be enough.
progress ('conflict, going one back')
parent -= 1
if parent < 0:
raise Exception('urg')
progress ('Export %d -> %d (total %d)' % (parent,
p.number, len (patches)))
export_commit (conv_repo, p, last, gfi)
if p.tag_name ():
export_tag (p, gfi)
export_pending (gfi)
gfi.close ()
system ('rm %(git_repo)s/refs/heads/darcstmp*' % locals ())
conv_repo.clean ()
main ()
next reply other threads:[~2007-02-11 23:56 UTC|newest]
Thread overview: 5+ messages / expand[flat|nested] mbox.gz Atom feed top
2007-02-11 23:56 Han-Wen Nienhuys [this message]
2007-02-12 1:14 ` darcs2git.py - convert darcs repository using gfi Shawn O. Pearce
2007-02-13 22:42 ` Han-Wen Nienhuys
2007-02-18 12:45 ` git-fast-export ? Han-Wen Nienhuys
2007-02-19 8:25 ` Shawn O. Pearce
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to='eqoaf7$loq$1@sea.gmane.org' \
--to=hanwen@xs4all.nl \
--cc=git@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).