#!/usr/bin/python # git-split: Split the history of a git repository by subdirectories and ranges # Copyright (C) 2006 Jamey Sharp, Josh Triplett # # You can redistribute this software and/or modify it under the terms of # the GNU General Public License as published by the Free Software # Foundation; version 2 dated June, 1991. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. from itertools import izip from subprocess import Popen, PIPE import os, sys def run(cmd, stdin=None, env={}): newenv = os.environ.copy() newenv.update(env) return Popen(cmd, stdin=PIPE, stdout=PIPE, env=newenv).communicate(stdin)[0] def parse_author_date(s): """Given a GIT author or committer string, return (name, email, date)""" (name, email, time, timezone) = s.rsplit(None, 3) return (name, email[1:-1], time + " " + timezone) def get_subtree(tree, name): output = run(["git-ls-tree", tree, name]) if not output: return None return output.split()[2] def is_ancestor(new_commits, cur, other): """Return True if cur has other as an ancestor, or False otherwise.""" return run(["git-merge-base", cur, other]).strip() == other def walk(commits, new_commits, commit_hash, project): commit = commits[commit_hash] if not(commit.has_key("new_hash")): tree = get_subtree(commit["tree"], project) commit["new_tree"] = tree if not tree: raise Exception("Did not find project in tree for commit " + commit_hash) new_parents = list(set([walk(commits, new_commits, parent, project) for parent in commit["parents"]])) new_hash = None if len(new_parents) == 1: new_hash = new_parents[0] elif len(new_parents) == 2: # Check for unnecessary merge if is_ancestor(new_commits, new_parents[0], new_parents[1]): new_hash = new_parents[0] elif is_ancestor(new_commits, new_parents[1], new_parents[0]): new_hash = new_parents[1] if new_hash and new_commits[new_hash]["new_tree"] != tree: new_hash = None if not new_hash: args = ["git-commit-tree", tree] for new_parent in new_parents: args.extend(["-p", new_parent]) env = dict(zip(["GIT_AUTHOR_"+n for n in ["NAME", "EMAIL", "DATE"]], parse_author_date(commit["author"])) +zip(["GIT_COMMITTER_"+n for n in ["NAME", "EMAIL", "DATE"]], parse_author_date(commit["committer"]))) new_hash = run(args, commit["message"], env).strip() commit["new_parents"] = new_parents commit["new_hash"] = new_hash if new_hash not in new_commits: new_commits[new_hash] = commit return commit["new_hash"] def main(args): if not(1 <= len(args) <= 3): print "Usage: git-split subdir [newest [oldest]]" return 1 project = args[0] if len(args) > 1: newest = args[1] else: newest = "HEAD" newest_hash = run(["git-rev-parse", newest]).strip() if len(args) > 2: oldest = args[2] oldest_hash = run(["git-rev-parse", oldest]).strip() else: oldest_hash = None grafts = {} try: for line in file(".git/info/grafts").read().split("\n"): if line: child, parents = line.split(None, 1) parents = parents.split() grafts[child] = parents except IOError: pass temp = run(["git-log", "--pretty=raw", newest_hash]).split("\n\n") commits = {} for headers,message in izip(temp[0::2], temp[1::2]): commit = {} commit_hash = None headers = [header.split(None, 1) for header in headers.split("\n")] for key,value in headers: if key == "parent": commit.setdefault("parents", []).append(value) elif key == "commit": commit_hash = value else: if key in commit: raise Exception('Duplicate key "%s"' % key) commit[key] = value commit["message"] = "".join([line[4:]+"\n" for line in message.split("\n")]) if commit_hash is None: raise Exception("Commit without hash") if commit_hash in grafts: commit["parents"] = grafts[commit_hash] if commit_hash == oldest_hash or "parents" not in commit: commit["parents"] = [] commits[commit_hash] = commit print walk(commits, dict(), newest_hash, project) try: import psyco psyco.full() except ImportError: pass if __name__ == "__main__": sys.exit(main(sys.argv[1:]))