From: David Barr <david.barr@cordelta.com>
To: Git Mailing List <git@vger.kernel.org>
Cc: Jonathan Nieder <jrnieder@gmail.com>,
Sverre Rabbelier <srabbelier@gmail.com>,
Ramkumar Ramachandra <artagnon@gmail.com>,
David Barr <david.barr@cordelta.com>
Subject: [PATCH] contrib/svn-fe: Fast script to remap svn history
Date: Thu, 7 Oct 2010 17:06:01 +1100 [thread overview]
Message-ID: <1286431561-24126-1-git-send-email-david.barr@cordelta.com> (raw)
This python script walks the commit sequence imported by svn-fe.
For each commit, it tries to identify the branch that was changed.
Commits are rewritten to be rooted according to the standard layout.
A basic heuristic of matching trees is used to find parents for the
first commit in a branch and for tags.
Signed-off-by: David Barr <david.barr@cordelta.com>
---
contrib/svn-fe/svn-filter-root.py | 107 +++++++++++++++++++++++++++++++++++++
fast-import.c | 9 +++
2 files changed, 116 insertions(+), 0 deletions(-)
create mode 100755 contrib/svn-fe/svn-filter-root.py
diff --git a/contrib/svn-fe/svn-filter-root.py b/contrib/svn-fe/svn-filter-root.py
new file mode 100755
index 0000000..72d248f
--- /dev/null
+++ b/contrib/svn-fe/svn-filter-root.py
@@ -0,0 +1,107 @@
+#!/usr/bin/python
+from subprocess import *
+import re
+import os
+
+subroot_re = re.compile("^trunk|^branches/[^/]*|^tags/[^/]*")
+
+tree_re = re.compile("^tree ([0-9a-f]{40})", flags=re.MULTILINE)
+parent_re = re.compile("^parent ([0-9a-f]{40})", flags=re.MULTILINE)
+author_re = re.compile("^author (.*)$", flags=re.MULTILINE)
+committer_re = re.compile("^committer (.*)$", flags=re.MULTILINE)
+
+git_svn_id_re = re.compile("^git-svn-id[^@]*", flags=re.MULTILINE)
+
+ref_commit = {}
+tree_commit = {}
+count = 1
+
+# Open a cat-file process for subtree lookups
+subtree_process = Popen(["git","cat-file","--batch-check"], stdin=PIPE, stdout=PIPE)
+
+# Iterate over commits from subversion imported with svn-fe
+revlist = Popen(["git","rev-list","--reverse","--topo-order","--default","HEAD"], stdout=PIPE)
+cat_file = Popen(["git","cat-file","--batch"], stdin=revlist.stdout, stdout=PIPE)
+object_header = cat_file.stdout.readline().strip().split(" ");
+while len(object_header) == 3:
+ object_body = cat_file.stdout.read(int(object_header[2]))
+ cat_file.stdout.read(1)
+ git_commit = object_header[0]
+ (commit_header, blank_line, commit_message) = object_body.partition("\n\n")
+ object_header = cat_file.stdout.readline().strip().split(" ");
+
+ author = author_re.search(commit_header).group()
+ committer = committer_re.search(commit_header).group()
+
+ # Diff against the empty tree if no parent
+ match = parent_re.search(commit_header)
+ if match:
+ parent = match.group(1)
+ else:
+ parent = "4b825dc642cb6eb9a060e54bf8d69288fbee4904"
+
+ # Find a common path prefix in the changes for the revision
+ subroot = ""
+ changes = Popen(["git","diff","--name-only",parent,git_commit], stdout=PIPE)
+ for path in changes.stdout:
+ match = subroot_re.match(path)
+ if match:
+ subroot = match.group()
+ changes.terminate()
+ break
+
+ # Attempt to rewrite the commit on top of the matching branch
+ if subroot == "":
+ print "progress Weird commit - no subroot."
+ else:
+ # Rewrite git-svn-id in the log to point to the subtree
+ commit_message = git_svn_id_re.sub('\g<0>/'+subroot, commit_message)
+ subtree_process.stdin.write(git_commit+":"+subroot+"\n")
+ subtree_process.stdin.flush()
+ subtree_line = subtree_process.stdout.readline()
+ if re.match("^.*missing$", subtree_line):
+ print "progress Weird commit - invalid subroot"
+ continue
+ subtree = subtree_line[0:40]
+ # Map the svn tag/branch name to a git-friendly one
+ ref = "refs/heads/" + re.sub(" ", "%20", subroot)
+ # Choose a parent for the rewritten commit
+ if ref in ref_commit:
+ parent = ref_commit[ref]
+ elif subtree in tree_commit:
+ parent = tree_commit[subtree]
+ else:
+ parent = ""
+ # Update tags if necessary
+ if re.match("^refs/heads/tags/", ref):
+ if parent == "":
+ print "progress Weird tag - no matching commit."
+ else:
+ tagname = ref[16:]
+ print "tag "+tagname
+ print "from "+parent
+ print "tagger "+committer[10:]
+ print "data "+str(len(commit_message))
+ print commit_message
+ else:
+ # Default to trunk if the branch is new
+ if parent == "" and "refs/heads/trunk" in ref_commit:
+ parent = ref_commit["refs/heads/trunk"]
+ print "commit "+ref
+ print "mark :"+str(count)
+ print author
+ print committer
+ print "data "+str(len(commit_message))
+ print commit_message
+ if parent != "":
+ print "from "+parent
+ print "M 040000 "+subtree+" \"\""
+ commit = ":"+str(count)
+ # Advance the matching branch
+ ref_commit[ref] = commit
+ # Update latest commit by tree to drive parent matching
+ tree_commit[subtree] = commit
+ print "progress " + str(count)
+ count = count + 1
+
+subtree_process.terminate()
diff --git a/fast-import.c b/fast-import.c
index 2317b0f..8f68a89 100644
--- a/fast-import.c
+++ b/fast-import.c
@@ -1454,6 +1454,15 @@ static int tree_content_set(
n = slash1 - p;
else
n = strlen(p);
+ if (!slash1 && !n) {
+ if (!S_ISDIR(mode))
+ die("Root cannot be a non-directory");
+ hashcpy(root->versions[1].sha1, sha1);
+ if (root->tree)
+ release_tree_content_recursive(root->tree);
+ root->tree = subtree;
+ return 1;
+ }
if (!n)
die("Empty path component found in input");
if (!slash1 && !S_ISDIR(mode) && subtree)
--
1.7.3.4.g45608.dirty
next reply other threads:[~2010-10-07 6:07 UTC|newest]
Thread overview: 7+ messages / expand[flat|nested] mbox.gz Atom feed top
2010-10-07 6:06 David Barr [this message]
2010-10-07 6:29 ` [PATCH] contrib/svn-fe: Fast script to remap svn history Sverre Rabbelier
2010-10-07 7:17 ` David Michael Barr
2010-10-07 8:28 ` Jonathan Nieder
2010-11-21 5:17 ` Jonathan Nieder
2010-11-22 14:01 ` Stephen Bash
2010-11-22 17:42 ` Jonathan Nieder
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1286431561-24126-1-git-send-email-david.barr@cordelta.com \
--to=david.barr@cordelta.com \
--cc=artagnon@gmail.com \
--cc=git@vger.kernel.org \
--cc=jrnieder@gmail.com \
--cc=srabbelier@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).