#!/usr/bin/env python USAGE = r''' git-p4c - written by John Chapman. License: You are free to use this under the terms of the GPL License http://www.fsf.org/licensing/licenses/gpl.html I may change the license at any date in the future, unless I have substantial contributions, but regardless of what licence I choose, it will be an open source license. Probably it will become whatever license Git itself is under, just to make my life easier. Example: ~/git-p4c/git-p4c \ --server=localhost:1666 \ --root=//depot \ --repo=/tmp/playground \ --user=arafangion \ --pass= \ --p4=/home/arafangion/perforce/p4 \ --max-changes=2 \ --branches=' trunk=//depot/(trunk)/(.*) branches=//depot/branches/(.*?)/(.*) ' ''' import datetime import fcntl import marshal import os import subprocess import sys import time import sre def main(): opts = ( '--server', '--user', '--pass', '--allow-case-changes', '--root', '--p4', '--branches', '--repo', '--initial', '--max-changes') config = git_config() # Now, override configuration if specified: for arg in sys.argv: for opt in opts: if arg.startswith(opt): config[opt[2:]] = arg.split('=', 1)[1] config = git_config(config) P4C = p4c_Connection(config) GIT = git_Connection(config) start = max(int(config['initial'])-1, GIT.latest()) print 'Downloading Changesets...' c = 0 t = time.time() for cs in P4C.changesets(start): if c != 0: print 'Processing:', cs.number, 'Avg: ', (time.time()-t)/float(c), ' at', datetime.datetime.today().ctime(), else: print 'Processing:', cs.number, g = GIT.commit(cs) if g is not None: for file in cs.files(): if file.is_interesting(): sys.stdout.write('.') g.add(file) sys.stdout.write('\n') g.commit() c += 1 if c >= int(config['max-changes']): break print 'Fetch Complete!' def git_config(conf=None): if conf is not None: if 'repo' in conf: try: os.mkdir(conf['repo']) except: pass os.chdir(conf['repo']) g = subprocess.Popen(('git', 'init')) g.wait() for key in conf: if '\n' in conf[key]: c = 1 for line in conf[key].split('\n'): line = line.strip() if line=='': continue p = subprocess.Popen(('git', 'config', 'git-p4c.'+key+'-'+str(c), line)) p.wait() c += 1 else: p = subprocess.Popen(('git', 'config', 'git-p4c.'+key, conf[key])) p.wait() else: conf = {} p = subprocess.Popen(('git', 'config', '-l'), stdout=subprocess.PIPE) p.wait() conf = {} for line in p.stdout.readlines(): line = line.strip() if line.startswith('git-p4c.'): key, value = line.split('=', 1) key = key[len('git-p4c.'):] if key.split('-')[0] == 'branches': if 'branches' not in conf: conf['branches'] = [] conf['branches'].append(value) else: conf[key] = value # Default Values: if 'initial' not in conf: conf['initial'] = '0' if 'max-changes' not in conf: conf['max-changes'] = '999999999' return conf class git_Connection: def __init__(self, config): self._latest_mark = 1 self._latest_changeset = 0 self.config = config self._tags = {} cmd = ('git', 'fast-import') self._fast_import = subprocess.Popen(cmd, bufsize=0, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) self._prev_p4changeset = None # Now, determine the current heads: g = subprocess.Popen(('git', 'tag'), stdout=subprocess.PIPE) heads = {} for line in g.stdout.readlines(): line = line.strip() branch, number = unformat_tag(config, line) if branch not in heads: heads[branch] = [line, number] if heads[branch][1] < number: heads[branch][1] = number max = 0 for head in heads: if max < heads[head][1]: max = heads[head][1] tag = 'refs/tags/'+heads[head][0] t = open('.git/'+tag, 'rb') committish = t.read().strip() self._record_tag( format_tag( self.config, head, heads[head][1]), committish) self._latest_changeset = max self._heads = heads def _record_tag(self, tag, committish): self._tags[tag] = committish def tag_sha1(self, tag): return self._tags[tag] def heads(self): return self._heads def latest(self): 'Returns the latest perforce changeset' return self._latest_changeset def commit(self, p4changeset): return git_Commit(self, p4changeset) def next_mark(self): 'TODO: Ensure that the latest mark in the marks file is used as the starting point.' self._latest_mark += 1 return self._latest_mark class git_Commit: def __init__(self, connection, commit): self._con = connection self._commit = commit self._files = {} def add(self, p4file): if not self._files.has_key(p4file.branch()): self._files[p4file.branch()] = [] self._files[p4file.branch()].append(p4file) if not p4file.action in ('delete', 'purge'): p4file.mark = self._con.next_mark() self._write('blob\nmark :%(mark)d\ndata %(size)d\n' % { 'mark':p4file.mark, 'size':p4file.size}) data = 'foo' while data != '': try: data = p4file.read(1024) self._write(data) except: time.sleep(0.1) data = 'foo' p4file.close_files() def _write(self, s): self._con._fast_import.stdin.write(s) def commit(self): self._mark = self._con.next_mark() mark = self._mark for branch in self._files.keys(): if branch in self._con.heads(): from_tag = format_tag(self._con.config, branch, self._con.heads()[branch][1]) else: from_tag = None self._con.heads()[branch] = [format_tag(self._con.config, branch, self._commit.number), self._commit.number] from_branch = self._files[branch][0].orig_branch() self._write( '''commit %(ref)s mark :%(mark)d committer %(name)s <%(email)s> %(when)d +0000 data %(length)d %(message)s ''' % {'ref':'refs/heads/'+branch, 'mark':mark, 'name':self._commit.author(), 'email':self._commit.email(), 'when':self._commit.time(), 'length':len(self._commit.commit_msg()), 'message':self._commit.commit_msg()}) if branch != from_branch: self._write( 'from %(from)s\n' % {'from':'refs/heads/'+from_branch}) elif from_tag is not None: self._write( 'from %(from)s\n' % {'from':self._con.tag_sha1(from_tag)}) for file in self._files[branch]: if file.action in ('add', 'edit', 'integrate', 'branch'): self._c_add(file) elif file.action in ('delete', 'purge'): self._c_delete(file) else: print 'Unhandled action:', file tagname = format_tag(self._con.config, branch, self._commit.number) self._write( '''tag %(tagname)s from %(committish)s tagger %(name)s <%(email)s> %(when)d +0000 data 0 ''' % { 'tagname':tagname, 'committish':':'+str(mark), 'name':self._commit.author(), 'email':self._commit.email(), 'when':self._commit.time()}) self._con._record_tag(tagname, ':'+str(mark)) def _c_add(self, file): self._write( '''M 100644 :%(mark)d %(path)s\n''' % { 'path':file.name(), 'mark':file.mark}) def _c_delete(self, file): self._write('D %(path)s\n' % {'path':file.name()}) class p4c_Connection: def __init__(self, details): self._p4_exe = details['p4'] self._p4port = details['server'] self.config = details self._users = None def _p4(self, args): return subprocess.Popen( (self._p4_exe,)+args, bufsize=0, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env={'P4PORT':self._p4port,'P4PASSWD':self.config['pass'],'P4USER':self.config['user']}, close_fds=True) def user(self, username): if self._users is None: p = self._p4(('-G', 'users')) try: self._users = {} while True: d = marshal.load(p.stdout) self._users[username] = {} self._users[username]['email'] = d['Email'] self._users[username]['name'] = d['FullName'] except EOFError, e: pass try: return self._users[username] except: return {'email':'Not A Current P4 User', 'name':username} def changesets(self, start): if self.config['root'][-1] != '/': self.config['root'] += '/' p = self._p4(('-G', 'changes', '-l', '-t', self.config['root']+'...')) try: self._changesets = [] while True: d = marshal.load(p.stdout) self._changesets.append((int(d['change']), int(d['time']))) except EOFError, e: pass def s(x, y): if x[1] < y[1]: return -1 if x[1] > y[1]: return 1 return 0 self._changesets.sort(s) for change, time in self._changesets: if change > start: yield self._Changeset(self, change, time) class _File: def __init__(self, connection, details): self._connection = connection self._details = details self.action = self._details['action'] p = self._connection._p4(('-G', 'sizes', self.p4name())) try: if not self.action in ('delete', 'purge'): self.size = marshal.load(p.stdout) self.size = int(self.size['fileSize']) except Exception, e: self.size = 0 self.action = 'purge' self._p = self._connection._p4(('print', '-q', self.p4name())) self.read = self._p.stdout.read oldflags = fcntl.fcntl(self._p.stdout, fcntl.F_GETFL) fcntl.fcntl(self._p.stdout, fcntl.F_SETFL, oldflags|os.O_NONBLOCK) try: self._branch_name, self._orig_branch, self._name = on_branch(self._connection.config, self.p4name()) except: self._branch_name = None self._orig_branch = None self._name = None def p4name(self): return '//'+self._details['file']+'#'+self.rev() def branch(self): return self._branch_name def orig_branch(self): return self._orig_branch def name(self): return self._name def is_interesting(self): return self._branch_name is not None def rev(self): return self._details['rev'] def tag(self): print self return self._details['tag'] def __str__(self): return '\t'.join([key+' '+self._details[key] for key in self._details.keys()]) def __del__(self): if self.read is not None: self.close_files() def close_files(self): self.read = None self._p.stdout.close() self._p.stderr.close() self._p.stdin.close() self._p.wait() del self._p class _Changeset: def __init__(self, connection, number, time): self.number = number self._time = time self._connection = connection self._desc = {} self._files = {} p = self._connection._p4(('-G', 'describe', str(self.number))) try: d = marshal.load(p.stdout) for key in d.keys(): if key[-1] in '0123456789': 'Is referring to a particular file.' num = 0 name = '' for c in key: if c in '0123456789': num *= 10 num += int(c) else: name += c if not self._files.has_key(num): self._files[num] = {} 'TODO: Determine which branch(es) this file belongs to.' if name == 'depotFile': self._files[num]['file'] = d[key][2:] else: self._files[num][name] = d[key] self._files[num][name] = d[key] else: self._desc[key] = d[key] except EOFError, e: pass def __str__(self): return 'Changeset: %s Time: %s' % (self.number, self.time) def commit_msg(self): return self._desc['desc'] def author(self): return self._connection.user(self._desc['client'])['name']+" '"+self._desc['client']+"'" def email(self): return self._connection.user(self._desc['client'])['email'] def time(self): return self._time def files(self): for number in self._files.keys(): yield self._connection._File(self._connection, self._files[number]) _seen = None _tree = None def generate_tree(): '''Reads the current git repo and iterates over every branch, reading all files and directories, in order to ensure that file case does not ever change''' def get_branches(): p = subprocess.Popen(('git', 'branch'), stdout=subprocess.PIPE) p.wait() for line in p.stdout.readlines(): yield line.strip() def get_ls_tree(BranchOrSha1, dir=''): '''This function is very recursive, it returns ALL the trees (ie, the directories).''' p = subprocess.Popen(('git', 'ls-tree', BranchOrSha1), stdout=subprocess.PIPE) p.wait() for line in p.stdout.readlines(): items = [item.strip() for item in line.strip().split(' ', 2)] last = items[-1] del items[-1] split_items = last.split('\t', 1) items.append(split_items[0]) items.append(dir+split_items[1]+'/') if items[1] == 'tree': yield items for item in get_ls_tree(items[2], items[-1]): yield item global _tree _tree = {} for branch in get_branches(): for items in get_ls_tree(branch): _tree[items[-1][:-1].lower()] = items[-1][:-1].split('/')[-1] def mangle_case(config, file): components = file.split('/') for i in range(len(components)-1): part = '/'.join(components[:i+1]).lower() if part in _tree: components[i] = _tree[part] else: _tree['/'.join(components[:i+1])] = components[i] file = '/'.join(components) return file def on_branch(config, p4_filename): # TODO: Need to change this so that: # * If desired, prevent changes in case, either by simply preventing changes in case, # or also by using an 'authoritative' perforce changeset. # * Stop using the stupid global for _seen, and consult the repo. # (Currently it's worse than nothing, because # it stuffs up the parent commits.) global _seen if _tree is None: generate_tree() if _seen is None: _seen = {} p = subprocess.Popen(('git', 'branch'), stdout=subprocess.PIPE) for line in p.stdout.readlines(): _seen[line.strip()] = None p.wait() first=None for b in config['branches']: b, p = b.split('=', 1) if first is None: first = b m = sre.match('^'+p+'\#.*$', p4_filename) if m: branch, file = m.groups() if branch in _seen: return branch, branch, mangle_case(config, file) else: _seen[branch] = None return branch, first, mangle_case(config, file) def format_tag(config, branch, number): return branch+'/'+str(number) def unformat_tag(config, tag): branch, number = tag.rsplit('/', 1) number = int(number) return branch, number if __name__ == '__main__': main()