git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Dmitry Ivankov <divanorama@gmail.com>
To: git@vger.kernel.org
Cc: Jonathan Nieder <jrnieder@gmail.com>,
	"Shawn O. Pearce" <spearce@spearce.org>,
	David Barr <davidbarr@google.com>,
	Dmitry Ivankov <divanorama@gmail.com>
Subject: [PATCH/WIP 7/7] fast-import: fix data corruption in load_tree
Date: Thu, 28 Jul 2011 10:46:10 +0600	[thread overview]
Message-ID: <1311828370-30477-8-git-send-email-divanorama@gmail.com> (raw)
In-Reply-To: <1311828370-30477-1-git-send-email-divanorama@gmail.com>

load_tree could be used to load a tree having different base and
current sha1. For example it can happens after a parent tree was
set by sha1 (it's tree becomes NULL, versions[0].sha1 remain and
versions[1].sha1 change). But it doesn't look at versions[0].sha1
and just loads a new version resetting the base one to the new one.
This corrupts parent tree delta.

Try to detect that case. Load both base and new trees and merge them
together so that mktree is able to produce both base and new trees
correctly.

There still may be a delta data corruption. For example tree_content_set
with subtree != NULL can produce subtree entries bases and subtree's new
parent base mismatch. tree_content_set is used in file_modify_cr - copy
and move trees by names. And another place is notes writing thing that
does some trees magic too.

Signed-off-by: Dmitry Ivankov <divanorama@gmail.com>
---
 fast-import.c |   68 ++++++++++++++++++++++++++++++++++++++++++++++++++------
 1 files changed, 60 insertions(+), 8 deletions(-)

diff --git a/fast-import.c b/fast-import.c
index 14a2a63..feccd14 100644
--- a/fast-import.c
+++ b/fast-import.c
@@ -1389,14 +1389,6 @@ static void load_tree_content(struct tree_content **root, unsigned char *sha1)
 	free(buf);
 }
 
-static void load_tree(struct tree_entry *root)
-{
-	root->tree = t = new_tree_content(8);
-	if (is_null_sha1(sha1))
-		return;
-       load_tree_content(&root->tree, root->versions[1].sha1);
-}
-
 static int tecmp0 (const void *_a, const void *_b)
 {
 	struct tree_entry *a = *((struct tree_entry**)_a);
@@ -1442,6 +1434,66 @@ static void mktree(struct tree_content *t, int v, struct strbuf *b)
 	}
 }
 
+static void load_tree(struct tree_entry *root)
+{
+	struct tree_content *oldt;
+	size_t n, i, j;
+
+	root->tree = new_tree_content(8);
+	if (is_null_sha1(root->versions[1].sha1)) {
+		if (!S_ISDIR(root->versions[0].mode) || is_null_sha1(root->versions[0].sha1) || !hashcmp(root->versions[0].sha1, root->versions[1].sha1))
+			return;
+		// looks like it is currently unreachable, but let it be for a while
+		load_tree_content(&root->tree, root->versions[0].sha1);
+		for (i = 0; i < root->tree->entry_count; ++i) {
+			root->tree->entries[i]->versions[1].mode = 0;
+			hashclr(root->tree->entries[i]->versions[1].sha1);
+		}
+		return;
+	}
+
+	load_tree_content(&root->tree, root->versions[1].sha1);
+	if (!S_ISDIR(root->versions[0].mode) || is_null_sha1(root->versions[0].sha1) || !hashcmp(root->versions[0].sha1, root->versions[1].sha1))
+ 		return;
+
+	oldt = new_tree_content(8);
+	load_tree_content(&oldt, root->versions[0].sha1);
+
+	qsort(root->tree->entries, root->tree->entry_count, sizeof(root->tree->entries[0]), tecmp1);
+	qsort(oldt->entries, oldt->entry_count, sizeof(oldt->entries[0]), tecmp1);
+
+	n = root->tree->entry_count;
+	i = 0;
+	j = 0;
+	while (i < n || j < oldt->entry_count) {
+		int cmp = i == n ? 1 : j == oldt->entry_count ? -1 : tecmp1(root->tree->entries + i, oldt->entries + j);
+		if (cmp > 0) {
+			if (root->tree->entry_count == root->tree->entry_capacity)
+				root->tree = grow_tree_content(root->tree, root->tree->entry_count);
+			oldt->entries[j]->versions[1].mode = 0;
+			hashclr(oldt->entries[j]->versions[1].sha1);
+			root->tree->entries[root->tree->entry_count++] = oldt->entries[j];
+			oldt->entries[j] = NULL;
+			++j;
+		} else if (cmp < 0) {
+			root->tree->entries[i]->versions[0].mode = 0;
+			hashclr(root->tree->entries[i]->versions[0].sha1);
+			++i;
+		} else {
+			root->tree->entries[i]->versions[0].mode = oldt->entries[j]->versions[1].mode;
+			hashcpy(root->tree->entries[i]->versions[0].sha1, oldt->entries[j]->versions[1].sha1);
+			++i;
+			++j;
+		}
+	}
+	for (j = 0; j < oldt->entry_count; ++j)
+		if (oldt->entries[j]) {
+			release_tree_entry(oldt->entries[j]);
+			oldt->entries[j] = NULL;
+		}
+	release_tree_content(oldt);
+}
+
 static void drop_old(struct tree_entry *root)
 {
 	struct tree_content *t = root->tree;
-- 
1.7.3.4

      parent reply	other threads:[~2011-07-28  4:43 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-07-28  4:46 [PATCH/WIP 0/7] was: long fast-import errors out "failed to apply delta" Dmitry Ivankov
2011-07-28  4:46 ` [PATCH/WIP 1/7] fast-import: extract object preparation function Dmitry Ivankov
2011-07-28  4:46 ` [PATCH/WIP 2/7] fast-import: be saner with temporary trees Dmitry Ivankov
2011-07-28  7:27   ` Jonathan Nieder
2011-07-28  4:46 ` [PATCH/WIP 3/7] fast-import: fix a data corruption in parse_ls Dmitry Ivankov
2011-07-28  7:34   ` Jonathan Nieder
2011-07-28  4:46 ` [PATCH/WIP 4/7] fast-import: fix data corruption in store_tree Dmitry Ivankov
2011-07-28  7:42   ` Jonathan Nieder
2011-07-28  8:11     ` Dmitry Ivankov
2011-07-28  4:46 ` [PATCH/WIP 5/7] fast-import: extract tree_content reading function Dmitry Ivankov
2011-07-28  4:46 ` [PATCH/WIP 6/7] fast-import: workaround data corruption Dmitry Ivankov
2011-07-28  6:31   ` Jonathan Nieder
2011-07-28  4:46 ` Dmitry Ivankov [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1311828370-30477-8-git-send-email-divanorama@gmail.com \
    --to=divanorama@gmail.com \
    --cc=davidbarr@google.com \
    --cc=git@vger.kernel.org \
    --cc=jrnieder@gmail.com \
    --cc=spearce@spearce.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).