All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
To: git@vger.kernel.org
Cc: "Junio C Hamano" <gitster@pobox.com>,
	"Nicolas Pitre" <nico@fluxnic.net>,
	"Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
Subject: [PATCH 18/21] unpack-objects: decode v4 trees
Date: Wed, 11 Sep 2013 13:06:19 +0700	[thread overview]
Message-ID: <1378879582-15372-19-git-send-email-pclouds@gmail.com> (raw)
In-Reply-To: <1378879582-15372-1-git-send-email-pclouds@gmail.com>


Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 builtin/unpack-objects.c | 191 ++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 189 insertions(+), 2 deletions(-)

diff --git a/builtin/unpack-objects.c b/builtin/unpack-objects.c
index 044a087..9fd5640 100644
--- a/builtin/unpack-objects.c
+++ b/builtin/unpack-objects.c
@@ -12,6 +12,7 @@
 #include "decorate.h"
 #include "packv4-parse.h"
 #include "fsck.h"
+#include "varint.h"
 
 static int dry_run, quiet, recover, has_errors, strict;
 static const char unpack_usage[] = "git unpack-objects [-n] [-q] [-r] [--strict] < pack-file";
@@ -148,6 +149,27 @@ static const unsigned char *read_sha1ref(void)
 	return sha1_table + index * 20;
 }
 
+static void check_against_sha1table(const unsigned char *sha1)
+{
+	const unsigned char *found;
+	if (!packv4)
+		return;
+
+	found = bsearch(sha1, sha1_table, nr_objects, 20,
+			(int (*)(const void *, const void *))hashcmp);
+	if (!found)
+		die(_("object %s not found in SHA-1 table"),
+		    sha1_to_hex(sha1));
+}
+
+static const unsigned char *read_sha1table_ref(void)
+{
+	const unsigned char *sha1 = read_sha1ref();
+	if (sha1 < sha1_table || sha1 >= sha1_table + nr_objects * 20)
+		check_against_sha1table(sha1);
+	return sha1;
+}
+
 static const unsigned char *read_dictref(struct packv4_dict *dict)
 {
 	unsigned int index = read_varint();
@@ -327,6 +349,84 @@ static void write_object(unsigned nr, enum object_type type,
 	}
 }
 
+static void resolve_tree_v4(unsigned long nr_obj,
+			    const void *tree,
+			    unsigned long tree_len,
+			    const unsigned char *base_sha1,
+			    const void *base,
+			    unsigned long base_size)
+{
+	int nr;
+	struct strbuf sb = STRBUF_INIT;
+	const unsigned char *p = tree;
+	const unsigned char *end = p + tree_len;
+
+	nr = decode_varint(&p);
+	while (nr > 0 && p < end) {
+		unsigned int copy_start_or_path = decode_varint(&p);
+		if (copy_start_or_path & 1) { /* copy_start */
+			struct tree_desc desc;
+			struct name_entry entry;
+			unsigned int copy_count = decode_varint(&p);
+			unsigned int copy_start = copy_start_or_path >> 1;
+			if (!base_sha1)
+				die("we are not supposed to copy from another tree!");
+			if (copy_count & 1) { /* first delta */
+				unsigned int id = decode_varint(&p);
+				const unsigned char *last_base;
+				if (!id) {
+					last_base = p;
+					p += 20;
+				} else
+					last_base = sha1_table + (id - 1) * 20;
+				if (hashcmp(last_base, base_sha1))
+					die("bad base tree in resolve_tree_v4");
+			}
+
+			copy_count >>= 1;
+			nr -= copy_count;
+
+			init_tree_desc(&desc, base, base_size);
+			while (tree_entry(&desc, &entry)) {
+				if (copy_start)
+					copy_start--;
+				else if (copy_count) {
+					strbuf_addf(&sb, "%o %s%c",
+						    entry.mode, entry.path, '\0');
+					strbuf_add(&sb, entry.sha1, 20);
+					copy_count--;
+				} else
+					break;
+			}
+		} else {	/* path */
+			unsigned int path_idx = copy_start_or_path >> 1;
+			const unsigned char *path;
+			unsigned mode;
+			unsigned int id;
+			const unsigned char *entry_sha1;
+
+			id = decode_varint(&p);
+			if (!id) {
+				entry_sha1 = p;
+				p += 20;
+			} else
+				entry_sha1 = sha1_table + (id - 1) * 20;
+			nr--;
+
+			path = path_dict->data + path_dict->offsets[path_idx];
+			mode = (path[0] << 8) | path[1];
+			strbuf_addf(&sb, "%o %s%c", mode, path+2, '\0');
+			strbuf_add(&sb, entry_sha1, 20);
+		}
+	}
+	if (nr != 0 || p != end)
+		die(_("bad delta tree"));
+	if (!dry_run)
+		write_object(nr_obj, OBJ_TREE, sb.buf, sb.len);
+	else
+		strbuf_release(&sb);
+}
+
 static void resolve_delta(unsigned nr, enum object_type type,
 			  void *base, unsigned long base_size,
 			  void *delta, unsigned long delta_size)
@@ -358,8 +458,13 @@ static void added_object(unsigned nr, enum object_type type,
 		    info->base_offset == obj_list[nr].offset) {
 			*p = info->next;
 			p = &delta_list;
-			resolve_delta(info->nr, type, data, size,
-				      info->delta, info->size);
+			if (type == OBJ_TREE && packv4)
+				resolve_tree_v4(info->nr, info->delta,
+						info->size, info->base_sha1,
+						data, size);
+			else
+				resolve_delta(info->nr, type, data, size,
+					      info->delta, info->size);
 			free(info);
 			continue;
 		}
@@ -493,6 +598,85 @@ static void unpack_delta_entry(enum object_type type, unsigned long delta_size,
 	free(base);
 }
 
+static int resolve_tree_against_held(unsigned nr, const unsigned char *base,
+				     void *delta_data, unsigned long delta_size)
+{
+	struct object *obj;
+	struct obj_buffer *obj_buffer;
+	obj = lookup_object(base);
+	if (!obj || obj->type != OBJ_TREE)
+		return 0;
+	obj_buffer = lookup_object_buffer(obj);
+	if (!obj_buffer)
+		return 0;
+	resolve_tree_v4(nr, delta_data, delta_size,
+			base, obj_buffer->buffer, obj_buffer->size);
+	return 1;
+}
+
+static void unpack_tree_v4(unsigned long size, unsigned long nr_obj)
+{
+	unsigned int nr;
+	const unsigned char *last_base = NULL;
+
+	copy_back_buffer(1);
+	strbuf_reset(&back_buffer);
+	nr = read_varint();
+	while (nr) {
+		unsigned int copy_start_or_path = read_varint();
+		if (copy_start_or_path & 1) { /* copy_start */
+			unsigned int copy_count = read_varint();
+			if (copy_count & 1) { /* first delta */
+				const unsigned char *old_base = last_base;
+				last_base = read_sha1table_ref();
+				if (old_base && hashcmp(last_base, old_base))
+					die("multi-base trees are not supported");
+			} else if (!last_base)
+				die("missing delta base unpack_tree_v4 at %lu",
+				    (unsigned long)consumed_bytes);
+			copy_count >>= 1;
+			if (!copy_count || copy_count > nr)
+				die("bad copy count index in unpack_tree_v4 at %lu",
+				    (unsigned long)consumed_bytes);
+			nr -= copy_count;
+		} else {	/* path */
+			unsigned int path_idx = copy_start_or_path >> 1;
+			if (path_idx >= path_dict->nb_entries)
+				die("bad path index in unpack_tree_v4 at %lu",
+				    (unsigned long)consumed_bytes);
+			read_sha1ref();
+			nr--;
+		}
+	}
+	copy_back_buffer(0);
+
+	if (last_base) {
+		if (has_sha1_file(last_base)) {
+			enum object_type type;
+			unsigned long base_size;
+			void *base = read_sha1_file(last_base, &type, &base_size);
+			if (type != OBJ_TREE) {
+				die("base tree %s is not a tree", sha1_to_hex(last_base));
+				last_base = NULL;
+			}
+			resolve_tree_v4(nr_obj, back_buffer.buf, back_buffer.len,
+					last_base, base, base_size);
+			free(base);
+		} else if (resolve_tree_against_held(nr_obj, last_base,
+						     back_buffer.buf, back_buffer.len))
+			   ; /* resolved */
+		else {
+			unsigned long delta_size = back_buffer.len;
+			char *delta = strbuf_detach(&back_buffer, NULL);
+			/* cannot resolve yet --- queue it */
+			hashcpy(obj_list[nr].sha1, null_sha1);
+			add_delta_to_list(nr, last_base, 0, delta, delta_size);
+		}
+	} else
+		resolve_tree_v4(nr_obj, back_buffer.buf, back_buffer.len, NULL, NULL, 0);
+	strbuf_release(&back_buffer);
+}
+
 static void unpack_commit_v4(unsigned long size, unsigned long nr)
 {
 	unsigned int nb_parents;
@@ -588,6 +772,9 @@ static int unpack_one(unsigned nr)
 	case OBJ_PV4_COMMIT:
 		unpack_commit_v4(size, nr);
 		break;
+	case OBJ_PV4_TREE:
+		unpack_tree_v4(size, nr);
+		break;
 	default:
 		error("bad object type %d", type);
 		has_errors = 1;
-- 
1.8.2.82.gc24b958

  parent reply	other threads:[~2013-09-11  6:09 UTC|newest]

Thread overview: 36+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-09-09 19:52 [PULL REQUEST] initial pack v4 support Nicolas Pitre
2013-09-09 22:28 ` Junio C Hamano
2013-09-10 21:21 ` Junio C Hamano
2013-09-10 21:32   ` Nicolas Pitre
2013-09-10 21:52     ` Junio C Hamano
2013-09-10 22:31   ` Nicolas Pitre
2013-09-11  6:06   ` [PATCH 00/21] np/pack-v4 updates Nguyễn Thái Ngọc Duy
2013-09-11  6:06     ` [PATCH 01/21] fixup! pack-objects: prepare SHA-1 table in v4 Nguyễn Thái Ngọc Duy
2013-09-11  6:06     ` [PATCH 02/21] fixup! pack-objects: support writing pack v4 Nguyễn Thái Ngọc Duy
2013-09-11  6:06     ` [PATCH 03/21] fixup! pack v4: support "end-of-pack" indicator in index-pack and pack-objects Nguyễn Thái Ngọc Duy
2013-09-11  6:06     ` [PATCH 04/21] fixup! index-pack: parse v4 header and dictionaries Nguyễn Thái Ngọc Duy
2013-09-11  6:06     ` [PATCH 05/21] fixup! index-pack: record all delta bases in v4 (tree and ref-delta) Nguyễn Thái Ngọc Duy
2013-09-11  6:06     ` [PATCH 06/21] pack v4: lift dict size check in load_dict() Nguyễn Thái Ngọc Duy
2013-09-11  6:06     ` [PATCH 07/21] pack v4: move pv4 objhdr parsing code to packv4-parse.c Nguyễn Thái Ngọc Duy
2013-09-11  6:06     ` [PATCH 08/21] pack-objects: respect compression level in v4 Nguyễn Thái Ngọc Duy
2013-09-11  6:06     ` [PATCH 09/21] pack-objects: recognize v4 as pack source Nguyễn Thái Ngọc Duy
2013-09-11  6:06     ` [PATCH 10/21] pack v4: add a note that streaming does not support OBJ_PV4_* Nguyễn Thái Ngọc Duy
2013-09-11  6:06     ` [PATCH 11/21] unpack-objects: report missing object name Nguyễn Thái Ngọc Duy
2013-09-11  6:06     ` [PATCH 12/21] unpack-objects: recognize end-of-pack in v4 thin pack Nguyễn Thái Ngọc Duy
2013-09-11  6:06     ` [PATCH 13/21] unpack-objects: read v4 dictionaries Nguyễn Thái Ngọc Duy
2013-09-11  6:06     ` [PATCH 14/21] unpack-objects: decode v4 object header Nguyễn Thái Ngọc Duy
2013-09-11  6:06     ` [PATCH 15/21] unpack-objects: decode v4 ref-delta Nguyễn Thái Ngọc Duy
2013-09-11  6:06     ` [PATCH 16/21] unpack-objects: decode v4 commits Nguyễn Thái Ngọc Duy
2013-09-11  6:06     ` [PATCH 17/21] unpack-objects: allow to save processed bytes to a buffer Nguyễn Thái Ngọc Duy
2013-09-11  6:06     ` Nguyễn Thái Ngọc Duy [this message]
2013-09-11  6:06     ` [PATCH 19/21] index-pack, pack-objects: allow creating .idx v2 with .pack v4 Nguyễn Thái Ngọc Duy
2013-09-11 15:48       ` Nicolas Pitre
2013-09-11  6:06     ` [PATCH 20/21] show-index: acknowledge that it does not read .idx v3 Nguyễn Thái Ngọc Duy
2013-09-11 16:19       ` Nicolas Pitre
2013-09-11  6:06     ` [PATCH 21/21] t1050, t5500: replace the use of "show-index|wc -l" with verify-pack Nguyễn Thái Ngọc Duy
2013-09-11 14:21     ` [PATCH 00/21] np/pack-v4 updates Duy Nguyen
2013-09-11 16:25       ` Nicolas Pitre
2013-09-12  3:38         ` Duy Nguyen
2013-09-12 16:20           ` Nicolas Pitre
2013-09-13  1:11             ` Duy Nguyen
2013-09-11 16:24     ` Nicolas Pitre

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1378879582-15372-19-git-send-email-pclouds@gmail.com \
    --to=pclouds@gmail.com \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    --cc=nico@fluxnic.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.