git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: "Shawn O. Pearce" <spearce@spearce.org>
To: David Frech <david@nimblemachines.com>
Cc: "Uwe Kleine-König" <ukleinek@informatik.uni-freiburg.de>,
	git@vger.kernel.org
Subject: Re: [PATCH] Support wholesale directory renames in fast-import
Date: Wed, 11 Jul 2007 03:57:44 -0400	[thread overview]
Message-ID: <20070711075744.GO4436@spearce.org> (raw)
In-Reply-To: <7154c5c60707101255k7fcd207fg62f1c59518ba5039@mail.gmail.com>

David Frech <david@nimblemachines.com> wrote:
> Hmm. I think Uwe is right. Copy is probably the "right" primitive, and
> rename can always be synthesized from copy+delete.
> 
> Since Subversion is built around the idea of "cheap copies" there is
> no incentive for them to represent renames other than as "copy, then
> delete".
> 
> But isn't the same true in a way of git? If I copy a directory (a
> tree), then the new tree is the same tree - it has the same SHA-1
> hash, so I can simply refer to the existing object. Same for file
> blobs.
> 
> Subversion dump files have *lots* of copies. Might be nice to be able
> to feed these directly into fast-import and have it DTRT, esp if it
> was smart about sharing identical data structures.

Yes.  All of that is true.  ;-)

I'm tired.  I just worked an 18 hour day.  I need to go do it all
over again in about 4 hours.  So I'm going to head off to bed.  But
I did manage to implement this (I think).  Its totally untested.
But feel free to poke at it:

  git://repo.or.cz/git/fastimport.git copy-wip

I'll write documentation and unit tests tomorrow.  And fix any bugs,
if any get identified.

The implementation should copy as little memory as possible to do the
actual copy.  This should make a C/D pair about as efficient as an
R command if the directory being copied has not yet been modified
as part of the current commit (this is probably typical for an
SVN dump file).  The only difference should be a slight increase
in running time for the C/D pair, as directory entry lookup in
fast-import is O(n).

Oh, and as always, C works with both files and directories...

Hmm.  Quickly reading this diff I can actually do it shorter with
a bit of refactoring.  I'll clean that up tomorrow night.  I blame
it on the lack of sleep that I'm suffering from right now.  ;-)


-->8--
WIP Teach fast import how to copy

Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
---
 fast-import.c |  121 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 files changed, 120 insertions(+), 1 deletions(-)

diff --git a/fast-import.c b/fast-import.c
index a1cb13f..41c0352 100644
--- a/fast-import.c
+++ b/fast-import.c
@@ -26,10 +26,16 @@ Format of STDIN stream:
     lf;
   commit_msg ::= data;
 
-  file_change ::= file_clr | file_del | file_rnm | file_obm | file_inm;
+  file_change ::= file_clr
+    | file_del
+    | file_rnm
+    | file_cpy
+    | file_obm
+    | file_inm;
   file_clr ::= 'deleteall' lf;
   file_del ::= 'D' sp path_str lf;
   file_rnm ::= 'R' sp path_str sp path_str lf;
+  file_cpy ::= 'C' sp path_str sp path_str lf;
   file_obm ::= 'M' sp mode sp (hexsha1 | idnum) sp path_str lf;
   file_inm ::= 'M' sp mode sp 'inline' sp path_str lf
     data;
@@ -623,6 +629,33 @@ static void release_tree_entry(struct tree_entry *e)
 	avail_tree_entry = e;
 }
 
+static struct tree_content *dup_tree_content(struct tree_content *s)
+{
+	struct tree_content *d;
+	struct tree_entry *a, *b;
+	unsigned int i, j;
+
+	if (!s)
+		return NULL;
+	d = new_tree_content(s->entry_count);
+	for (i = 0, j = 0; i < s->entry_count; i++) {
+		a = s->entries[i];
+		if (a->versions[1].mode) {
+			b = new_tree_entry();
+			memcpy(b, a, sizeof(*a));
+			if (is_null_sha1(b->versions[1].sha1))
+				b->tree = dup_tree_content(b->tree);
+			else
+				b->tree = NULL;
+			d->entries[j++] = a;
+		}
+	}
+	d->entry_count = j;
+	d->delta_depth = s->delta_depth;
+
+	return d;
+}
+
 static void start_packfile(void)
 {
 	static char tmpfile[PATH_MAX];
@@ -1273,6 +1306,43 @@ del_entry:
 	return 1;
 }
 
+static int tree_content_get(
+	struct tree_entry *root,
+	const char *p,
+	struct tree_entry *leaf)
+{
+	struct tree_content *t = root->tree;
+	const char *slash1;
+	unsigned int i, n;
+	struct tree_entry *e;
+
+	slash1 = strchr(p, '/');
+	if (slash1)
+		n = slash1 - p;
+	else
+		n = strlen(p);
+
+	for (i = 0; i < t->entry_count; i++) {
+		e = t->entries[i];
+		if (e->name->str_len == n && !strncmp(p, e->name->str_dat, n)) {
+			if (!slash1) {
+				memcpy(leaf, e, sizeof(*leaf));
+				if (is_null_sha1(e->versions[1].sha1))
+					leaf->tree = dup_tree_content(leaf->tree);
+				else
+					leaf->tree = NULL;
+				return 1;
+			}
+			if (!S_ISDIR(e->versions[1].mode))
+				return 0;
+			if (!e->tree)
+				load_tree(e);
+			return tree_content_get(e, slash1 + 1, leaf);
+		}
+	}
+	return 0;
+}
+
 static int update_branch(struct branch *b)
 {
 	static const char *msg = "fast-import";
@@ -1706,6 +1776,53 @@ static void file_change_r(struct branch *b)
 	free(d_uq);
 }
 
+static void file_change_c(struct branch *b)
+{
+	const char *s, *d;
+	char *s_uq, *d_uq;
+	const char *endp;
+	struct tree_entry leaf;
+
+	s = command_buf.buf + 2;
+	s_uq = unquote_c_style(s, &endp);
+	if (s_uq) {
+		if (*endp != ' ')
+			die("Missing space after source: %s", command_buf.buf);
+	}
+	else {
+		endp = strchr(s, ' ');
+		if (!endp)
+			die("Missing space after source: %s", command_buf.buf);
+		s_uq = xmalloc(endp - s + 1);
+		memcpy(s_uq, s, endp - s);
+		s_uq[endp - s] = 0;
+	}
+	s = s_uq;
+
+	endp++;
+	if (!*endp)
+		die("Missing dest: %s", command_buf.buf);
+
+	d = endp;
+	d_uq = unquote_c_style(d, &endp);
+	if (d_uq) {
+		if (*endp)
+			die("Garbage after dest in: %s", command_buf.buf);
+		d = d_uq;
+	}
+
+	memset(&leaf, 0, sizeof(leaf));
+	if (!tree_content_get(&b->branch_tree, s, &leaf))
+		die("Path %s not in branch", s);
+	tree_content_set(&b->branch_tree, d,
+		leaf.versions[1].sha1,
+		leaf.versions[1].mode,
+		leaf.tree);
+
+	free(s_uq);
+	free(d_uq);
+}
+
 static void file_change_deleteall(struct branch *b)
 {
 	release_tree_content_recursive(b->branch_tree.tree);
@@ -1875,6 +1992,8 @@ static void cmd_new_commit(void)
 			file_change_d(b);
 		else if (!prefixcmp(command_buf.buf, "R "))
 			file_change_r(b);
+		else if (!prefixcmp(command_buf.buf, "C "))
+			file_change_c(b);
 		else if (!strcmp("deleteall", command_buf.buf))
 			file_change_deleteall(b);
 		else
-- 
1.5.3.rc0.879.g64b8


-- 
Shawn.

  reply	other threads:[~2007-07-11  7:57 UTC|newest]

Thread overview: 10+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2007-07-10  1:09 how to do directory renames in fast-import David Frech
2007-07-10  3:10 ` [PATCH] Support wholesale " Shawn O. Pearce
2007-07-10  4:16   ` David Frech
2007-07-10 14:03     ` Uwe Kleine-König
2007-07-10 14:14       ` Shawn O. Pearce
2007-07-10 19:55         ` David Frech
2007-07-11  7:57           ` Shawn O. Pearce [this message]
2007-07-11 23:11             ` David Frech
2007-07-10  8:44   ` Rogan Dawes
2007-07-10 13:55     ` Shawn O. Pearce

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20070711075744.GO4436@spearce.org \
    --to=spearce@spearce.org \
    --cc=david@nimblemachines.com \
    --cc=git@vger.kernel.org \
    --cc=ukleinek@informatik.uni-freiburg.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).