From: Jonathan Nieder <jrnieder@gmail.com>
To: Dmitry Ivankov <divanorama@gmail.com>
Cc: git@vger.kernel.org, "Shawn O. Pearce" <spearce@spearce.org>,
David Barr <davidbarr@google.com>
Subject: Re: long fast-import errors out "failed to apply delta"
Date: Thu, 28 Jul 2011 11:56:34 +0200 [thread overview]
Message-ID: <20110728095634.GA6991@elie> (raw)
In-Reply-To: <CA+gfSn8m=_vd91Xe5EnFXUvZnuJf-yUE6H7FU+ak8S8a+NtCjA@mail.gmail.com>
Dmitry Ivankov wrote:
> --- a/fast-import.c
> +++ b/fast-import.c
> @@ -1514,6 +1514,9 @@ static int tree_content_set(
> if (e->tree)
> release_tree_content_recursive(e->tree);
> e->tree = subtree;
> + if (S_ISDIR(mode)) {
> + hashclr(e->versions[0].sha1);
> + }
> hashclr(root->versions[1].sha1);
> return 1;
> }
Based on your later explanations, it looks like this hit the nail on
the head. The idea is that normally e->tree includes entries for both
e->versions[0] and e->versions[1] and that in the "M 040000 ..." case,
this codepath at least currently doesn't have enough information to
decide on a good delta base.
Just doing a "hashclr(e->versions[0].sha1)" will change the preimage
tree object for the parent directory, causing invalid deltas _there_.
So that's not quite right.
Possible fixes:
a. invalidate preimage tree names all the way up the hierarchy.
This is what I misread the code as doing at first. It would
be a cop-out; I think we can do better.
b. merge the preimage and postimage trees in e->tree, and use
the existing preimage tree as delta basis. This would produce
an unnecessarily large delta, but it should work.
c. add a bit to "struct tree_content" (or abuse delta_depth) to
say "my parent entry's versions[0].sha1 has nothing to do with
me; please do not write me out as a delta against it"
d. invalidate preimage tree object names up the hierarchy, and allow
tree_content_set callers to pass a "const struct tree_entry_ms *"
argument indicating a preimage tree object name for the new
subtree.
e. instead of replacing a tree entry, delete it and add it again as a
new tree entry. Make sure internal APIs can deal with multiple
tree entries with the same name.
I find (c) tempting. Like this, vaguely. What do you think?
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
---
fast-import.c | 35 ++++++++++++++++++++++++++++++-----
1 files changed, 30 insertions(+), 5 deletions(-)
diff --git a/fast-import.c b/fast-import.c
index 9e8d1868..2880af7b 100644
--- a/fast-import.c
+++ b/fast-import.c
@@ -170,6 +170,11 @@ Format of STDIN stream:
#define DEPTH_BITS 13
#define MAX_DEPTH ((1<<DEPTH_BITS)-1)
+/*
+ * We abuse the setuid bit on directories to mean "do not delta".
+ */
+#define NO_DELTA S_ISUID
+
struct object_entry {
struct pack_idx_entry idx;
struct object_entry *next;
@@ -1415,8 +1420,9 @@ static void mktree(struct tree_content *t, int v, struct strbuf *b)
struct tree_entry *e = t->entries[i];
if (!e->versions[v].mode)
continue;
- strbuf_addf(b, "%o %s%c", (unsigned int)e->versions[v].mode,
- e->name->str_dat, '\0');
+ strbuf_addf(b, "%o %s%c",
+ (unsigned int)(e->versions[v].mode & ~NO_DELTA),
+ e->name->str_dat, '\0');
strbuf_add(b, e->versions[v].sha1, 20);
}
}
@@ -1426,7 +1432,7 @@ static void store_tree(struct tree_entry *root)
struct tree_content *t = root->tree;
unsigned int i, j, del;
struct last_object lo = { STRBUF_INIT, 0, 0, /* no_swap */ 1 };
- struct object_entry *le;
+ struct object_entry *le = NULL;
if (!is_null_sha1(root->versions[1].sha1))
return;
@@ -1436,7 +1442,8 @@ static void store_tree(struct tree_entry *root)
store_tree(t->entries[i]);
}
- le = find_object(root->versions[0].sha1);
+ if (!(root->versions[0].mode & NO_DELTA))
+ le = find_object(root->versions[0].sha1);
if (S_ISDIR(root->versions[0].mode) && le && le->pack_id == pack_id) {
mktree(t, 0, &old_tree);
lo.data = old_tree;
@@ -1470,6 +1477,7 @@ static void tree_content_replace(
{
if (!S_ISDIR(mode))
die("Root cannot be a non-directory");
+ hashclr(root->versions[0].sha1);
hashcpy(root->versions[1].sha1, sha1);
if (root->tree)
release_tree_content_recursive(root->tree);
@@ -1514,6 +1522,23 @@ static int tree_content_set(
if (e->tree)
release_tree_content_recursive(e->tree);
e->tree = subtree;
+
+ /*
+ * We need to leave e->versions[0].sha1 alone
+ * to avoid modifying the preimage tree used
+ * when writing out the parent directory.
+ * But after replacing the subdir with a
+ * completely different one, it's not a good
+ * delta base any more, and besides, we've
+ * thrown away the tree entries needed to
+ * make a delta against it.
+ *
+ * So let's just explicitly disable deltas
+ * for the subtree.
+ */
+ if (S_ISDIR(e->versions[0].mode))
+ e->versions[0].mode |= NO_DELTA;
+
hashclr(root->versions[1].sha1);
return 1;
}
@@ -2928,7 +2953,7 @@ static void print_ls(int mode, const unsigned char *sha1, const char *path)
/* mode SP type SP object_name TAB path LF */
strbuf_reset(&line);
strbuf_addf(&line, "%06o %s %s\t",
- mode, type, sha1_to_hex(sha1));
+ mode & ~NO_DELTA, type, sha1_to_hex(sha1));
quote_c_style(path, &line, NULL, 0);
strbuf_addch(&line, '\n');
}
--
1.7.6
next prev parent reply other threads:[~2011-07-28 9:56 UTC|newest]
Thread overview: 10+ messages / expand[flat|nested] mbox.gz Atom feed top
2011-07-07 10:47 long fast-import errors out "failed to apply delta" Dmitry Ivankov
2011-07-07 12:24 ` Dmitry Ivankov
2011-07-23 20:34 ` Dmitry Ivankov
2011-07-26 11:46 ` Dmitry Ivankov
2011-07-26 16:58 ` Jonathan Nieder
2011-07-26 18:22 ` Dmitry Ivankov
2011-07-26 18:55 ` Jonathan Nieder
2011-07-26 21:09 ` Dmitry Ivankov
2011-07-28 9:56 ` Jonathan Nieder [this message]
2011-07-28 11:24 ` Dmitry Ivankov
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20110728095634.GA6991@elie \
--to=jrnieder@gmail.com \
--cc=davidbarr@google.com \
--cc=divanorama@gmail.com \
--cc=git@vger.kernel.org \
--cc=spearce@spearce.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).