From: Nicolas Pitre <nico@fluxnic.net>
To: git@vger.kernel.org
Subject: [PATCH 17/38] pack v4: tree object delta encoding
Date: Thu, 05 Sep 2013 02:19:40 -0400 [thread overview]
Message-ID: <1378362001-1738-18-git-send-email-nico@fluxnic.net> (raw)
In-Reply-To: <1378362001-1738-1-git-send-email-nico@fluxnic.net>
In order to be able to quickly walk tree objects, let's encode their
"delta" as a range of entries into another tree object.
In order to discriminate between a copy sequence from a regular entry,
the entry index LSB is reserved to indicate a copy sequence. Therefore
the actual index of a path component is shifted left one bit.
The encoding allows for the base object to change so multiple base
objects can be borrowed from. The code doesn't try to exploit this
possibility at the moment though.
The code isn't optimal at the moment as it doesn't consider the case
where a copy sequence could be larger than the local sequence it
means to replace.
Signed-off-by: Nicolas Pitre <nico@fluxnic.net>
---
packv4-create.c | 108 +++++++++++++++++++++++++++++++++++++++++++++++++++++---
1 file changed, 103 insertions(+), 5 deletions(-)
diff --git a/packv4-create.c b/packv4-create.c
index 5d76234..6830a0a 100644
--- a/packv4-create.c
+++ b/packv4-create.c
@@ -394,24 +394,53 @@ bad:
return NULL;
}
+static int compare_tree_entries(struct name_entry *e1, struct name_entry *e2)
+{
+ int len1 = tree_entry_len(e1);
+ int len2 = tree_entry_len(e2);
+ int len = len1 < len2 ? len1 : len2;
+ unsigned char c1, c2;
+ int cmp;
+
+ cmp = memcmp(e1->path, e2->path, len);
+ if (cmp)
+ return cmp;
+ c1 = e1->path[len];
+ c2 = e2->path[len];
+ if (!c1 && S_ISDIR(e1->mode))
+ c1 = '/';
+ if (!c2 && S_ISDIR(e2->mode))
+ c2 = '/';
+ return c1 - c2;
+}
+
/*
* This converts a canonical tree object buffer into its
* tightly packed representation using the already populated
* and sorted tree_path_table dictionary. The parsing is
* strict so to ensure the canonical version may always be
* regenerated and produce the same hash.
+ *
+ * If a delta buffer is provided, we may encode multiple ranges of tree
+ * entries against that buffer.
*/
-void *pv4_encode_tree(void *_buffer, unsigned long *sizep)
+void *pv4_encode_tree(void *_buffer, unsigned long *sizep,
+ void *delta, unsigned long delta_size,
+ const unsigned char *delta_sha1)
{
unsigned long size = *sizep;
unsigned char *in, *out, *end, *buffer = _buffer;
- struct tree_desc desc;
- struct name_entry name_entry;
+ struct tree_desc desc, delta_desc;
+ struct name_entry name_entry, delta_entry;
int nb_entries;
+ unsigned int copy_start, copy_count = 0, delta_pos = 0, first_delta = 1;
if (!size)
return NULL;
+ if (!delta_size)
+ delta = NULL;
+
/*
* We can't make sure the result will always be smaller than the
* input. The smallest possible entry is "0 x\0<40 byte SHA1>"
@@ -434,9 +463,42 @@ void *pv4_encode_tree(void *_buffer, unsigned long *sizep)
out += encode_varint(nb_entries, out);
init_tree_desc(&desc, in, size);
+ if (delta) {
+ init_tree_desc(&delta_desc, delta, delta_size);
+ if (!tree_entry(&delta_desc, &delta_entry))
+ delta = NULL;
+ }
+
while (tree_entry(&desc, &name_entry)) {
int pathlen, index;
+ /*
+ * Try to match entries against our delta object.
+ */
+ if (delta) {
+ int ret;
+
+ do {
+ ret = compare_tree_entries(&name_entry, &delta_entry);
+ if (ret <= 0 || copy_count != 0)
+ break;
+ delta_pos++;
+ if (!tree_entry(&delta_desc, &delta_entry))
+ delta = NULL;
+ } while (delta);
+
+ if (ret == 0 && name_entry.mode == delta_entry.mode &&
+ hashcmp(name_entry.sha1, delta_entry.sha1) == 0) {
+ if (!copy_count)
+ copy_start = delta_pos;
+ copy_count++;
+ delta_pos++;
+ if (!tree_entry(&delta_desc, &delta_entry))
+ delta = NULL;
+ continue;
+ }
+ }
+
if (end - out < 48) {
unsigned long sofar = out - buffer;
buffer = xrealloc(buffer, (sofar + 48)*2);
@@ -444,6 +506,32 @@ void *pv4_encode_tree(void *_buffer, unsigned long *sizep)
out = buffer + sofar;
}
+ if (copy_count) {
+ /*
+ * Let's write a sequence indicating we're copying
+ * entries from another object:
+ *
+ * entry_start + entry_count + object_ref
+ *
+ * To distinguish between 'entry_start' and an actual
+ * entry index, we use the LSB = 1.
+ *
+ * Furthermore, if object_ref is the same as the
+ * preceding one, we can omit it and save some
+ * more space, especially if that ends up being a
+ * full sha1 reference. Let's steal the LSB
+ * of entry_count for that purpose.
+ */
+ copy_start = (copy_start << 1) | 1;
+ copy_count = (copy_count << 1) | first_delta;
+ out += encode_varint(copy_start, out);
+ out += encode_varint(copy_count, out);
+ if (first_delta)
+ out += encode_sha1ref(delta_sha1, out);
+ copy_count = 0;
+ first_delta = 0;
+ }
+
pathlen = tree_entry_len(&name_entry);
index = dict_add_entry(tree_path_table, name_entry.mode,
name_entry.path, pathlen);
@@ -452,10 +540,20 @@ void *pv4_encode_tree(void *_buffer, unsigned long *sizep)
free(buffer);
return NULL;
}
- out += encode_varint(index, out);
+ out += encode_varint(index << 1, out);
out += encode_sha1ref(name_entry.sha1, out);
}
+ if (copy_count) {
+ /* flush the trailing copy */
+ copy_start = (copy_start << 1) | 1;
+ copy_count = (copy_count << 1) | first_delta;
+ out += encode_varint(copy_start, out);
+ out += encode_varint(copy_count, out);
+ if (first_delta)
+ out += encode_sha1ref(delta_sha1, out);
+ }
+
*sizep = out - buffer;
return buffer;
}
@@ -761,7 +859,7 @@ static off_t packv4_write_object(struct sha1file *f, struct packed_git *p,
result = pv4_encode_commit(src, &size);
break;
case OBJ_TREE:
- result = pv4_encode_tree(src, &size);
+ result = pv4_encode_tree(src, &size, NULL, 0, NULL);
break;
default:
die("unexpected object type %d", type);
--
1.8.4.38.g317e65b
next prev parent reply other threads:[~2013-09-05 6:21 UTC|newest]
Thread overview: 124+ messages / expand[flat|nested] mbox.gz Atom feed top
2013-09-05 6:19 [PATCH 00/38] pack version 4 basic functionalities Nicolas Pitre
2013-09-05 6:19 ` [PATCH 01/38] pack v4: initial pack dictionary structure and code Nicolas Pitre
2013-09-05 6:19 ` [PATCH 02/38] export packed_object_info() Nicolas Pitre
2013-09-05 6:19 ` [PATCH 03/38] pack v4: scan tree objects Nicolas Pitre
2013-09-05 6:19 ` [PATCH 04/38] pack v4: add tree entry mode support to dictionary entries Nicolas Pitre
2013-09-05 6:19 ` [PATCH 05/38] pack v4: add commit object parsing Nicolas Pitre
2013-09-05 10:30 ` SZEDER Gábor
2013-09-05 17:30 ` Nicolas Pitre
2013-09-05 6:19 ` [PATCH 06/38] pack v4: split the object list and dictionary creation Nicolas Pitre
2013-09-05 6:19 ` [PATCH 07/38] pack v4: move to struct pack_idx_entry and get rid of our own struct idx_entry Nicolas Pitre
2013-09-05 6:19 ` [PATCH 08/38] pack v4: basic SHA1 reference encoding Nicolas Pitre
2013-09-05 6:19 ` [PATCH 09/38] introduce get_sha1_lowhex() Nicolas Pitre
2013-09-05 6:19 ` [PATCH 10/38] pack v4: commit object encoding Nicolas Pitre
2013-09-06 6:57 ` Junio C Hamano
2013-09-06 21:28 ` Nicolas Pitre
2013-09-06 22:08 ` Junio C Hamano
2013-09-07 4:41 ` Nicolas Pitre
2013-09-05 6:19 ` [PATCH 11/38] pack v4: tree " Nicolas Pitre
2013-09-05 6:19 ` [PATCH 12/38] pack v4: dictionary table output Nicolas Pitre
2013-09-05 6:19 ` [PATCH 13/38] pack v4: creation code Nicolas Pitre
2013-09-05 6:19 ` [PATCH 14/38] pack v4: object headers Nicolas Pitre
2013-09-05 6:19 ` [PATCH 15/38] pack v4: object data copy Nicolas Pitre
2013-09-05 6:19 ` [PATCH 16/38] pack v4: object writing Nicolas Pitre
2013-09-05 6:19 ` Nicolas Pitre [this message]
2013-09-05 6:19 ` [PATCH 18/38] pack v4: load delta candidate for encoding tree objects Nicolas Pitre
2013-09-05 6:19 ` [PATCH 19/38] packv4-create: optimize delta encoding Nicolas Pitre
2013-09-05 6:19 ` [PATCH 20/38] pack v4: honor pack.compression config option Nicolas Pitre
2013-09-05 6:19 ` [PATCH 21/38] pack v4: relax commit parsing a bit Nicolas Pitre
2013-09-05 6:19 ` [PATCH 22/38] pack index v3 Nicolas Pitre
2013-09-05 6:19 ` [PATCH 23/38] packv4-create: normalize pack name to properly generate the pack index file name Nicolas Pitre
2013-09-05 6:19 ` [PATCH 24/38] packv4-create: add progress display Nicolas Pitre
2013-09-05 6:19 ` [PATCH 25/38] pack v4: initial pack index v3 support on the read side Nicolas Pitre
2013-09-05 6:19 ` [PATCH 26/38] pack v4: object header decode Nicolas Pitre
2013-09-05 6:19 ` [PATCH 27/38] pack v4: code to obtain a SHA1 from a sha1ref Nicolas Pitre
2013-09-05 6:19 ` [PATCH 28/38] pack v4: code to load and prepare a pack dictionary table for use Nicolas Pitre
2013-09-05 6:19 ` [PATCH 29/38] pack v4: code to retrieve a name Nicolas Pitre
2013-09-05 6:19 ` [PATCH 30/38] pack v4: code to recreate a canonical commit object Nicolas Pitre
2013-09-05 6:19 ` [PATCH 31/38] sha1_file.c: make use of decode_varint() Nicolas Pitre
2013-09-05 7:35 ` SZEDER Gábor
2013-09-05 6:19 ` [PATCH 32/38] pack v4: parse delta base reference Nicolas Pitre
2013-09-05 6:19 ` [PATCH 33/38] pack v4: we can read commit objects now Nicolas Pitre
2013-09-05 6:19 ` [PATCH 34/38] pack v4: code to retrieve a path component Nicolas Pitre
2013-09-05 6:19 ` [PATCH 35/38] pack v4: decode tree objects Nicolas Pitre
2013-09-05 6:19 ` [PATCH 36/38] pack v4: get " Nicolas Pitre
2013-09-05 6:20 ` [PATCH 37/38] pack v4: introduce "escape hatches" in the name and path indexes Nicolas Pitre
2013-09-05 19:02 ` Nicolas Pitre
2013-09-05 21:48 ` Nicolas Pitre
2013-09-05 23:57 ` Duy Nguyen
2013-09-05 6:20 ` [PATCH 38/38] packv4-create: add a command line argument to limit tree copy sequences Nicolas Pitre
2013-09-07 10:43 ` [PATCH 00/12] pack v4 support in index-pack Nguyễn Thái Ngọc Duy
2013-09-07 10:43 ` [PATCH 01/12] pack v4: split pv4_create_dict() out of load_dict() Nguyễn Thái Ngọc Duy
2013-09-07 10:43 ` [PATCH 02/12] index-pack: split out varint decoding code Nguyễn Thái Ngọc Duy
2013-09-07 10:43 ` [PATCH 03/12] index-pack: do not allocate buffer for unpacking deltas in the first pass Nguyễn Thái Ngọc Duy
2013-09-07 10:43 ` [PATCH 04/12] index-pack: split inflate/digest code out of unpack_entry_data Nguyễn Thái Ngọc Duy
2013-09-07 10:43 ` [PATCH 05/12] index-pack: parse v4 header and dictionaries Nguyễn Thái Ngọc Duy
2013-09-08 2:14 ` Nicolas Pitre
2013-09-07 10:43 ` [PATCH 06/12] index-pack: make sure all objects are registered in v4's SHA-1 table Nguyễn Thái Ngọc Duy
2013-09-07 10:43 ` [PATCH 07/12] index-pack: parse v4 commit format Nguyễn Thái Ngọc Duy
2013-09-07 10:43 ` [PATCH 08/12] index-pack: parse v4 tree format Nguyễn Thái Ngọc Duy
2013-09-08 2:52 ` Nicolas Pitre
2013-09-07 10:43 ` [PATCH 09/12] index-pack: move delta base queuing code to unpack_raw_entry Nguyễn Thái Ngọc Duy
2013-09-07 10:43 ` [PATCH 10/12] index-pack: record all delta bases in v4 (tree and ref-delta) Nguyễn Thái Ngọc Duy
2013-09-07 10:43 ` [PATCH 11/12] index-pack: skip looking for ofs-deltas in v4 as they are not allowed Nguyễn Thái Ngọc Duy
2013-09-07 10:43 ` [PATCH 12/12] index-pack: resolve v4 one-base trees Nguyễn Thái Ngọc Duy
2013-09-08 3:28 ` Nicolas Pitre
2013-09-08 3:44 ` Duy Nguyen
2013-09-08 7:22 ` [PATCH v2 00/14] pack v4 support in index-pack Nguyễn Thái Ngọc Duy
2013-09-08 7:22 ` [PATCH v2 01/14] pack v4: split pv4_create_dict() out of load_dict() Nguyễn Thái Ngọc Duy
2013-09-08 7:22 ` [PATCH v2 02/14] pack v4: add pv4_free_dict() Nguyễn Thái Ngọc Duy
2013-09-08 7:22 ` [PATCH v2 03/14] index-pack: add more comments on some big functions Nguyễn Thái Ngọc Duy
2013-09-08 7:22 ` [PATCH v2 04/14] index-pack: split out varint decoding code Nguyễn Thái Ngọc Duy
2013-09-08 7:22 ` [PATCH v2 05/14] index-pack: do not allocate buffer for unpacking deltas in the first pass Nguyễn Thái Ngọc Duy
2013-09-08 7:22 ` [PATCH v2 06/14] index-pack: split inflate/digest code out of unpack_entry_data Nguyễn Thái Ngọc Duy
2013-09-08 7:22 ` [PATCH v2 07/14] index-pack: parse v4 header and dictionaries Nguyễn Thái Ngọc Duy
2013-09-08 7:22 ` [PATCH v2 08/14] index-pack: make sure all objects are registered in v4's SHA-1 table Nguyễn Thái Ngọc Duy
2013-09-08 7:22 ` [PATCH v2 09/14] index-pack: parse v4 commit format Nguyễn Thái Ngọc Duy
2013-09-08 7:22 ` [PATCH v2 10/14] index-pack: parse v4 tree format Nguyễn Thái Ngọc Duy
2013-09-08 7:22 ` [PATCH v2 11/14] index-pack: move delta base queuing code to unpack_raw_entry Nguyễn Thái Ngọc Duy
2013-09-08 7:22 ` [PATCH v2 12/14] index-pack: record all delta bases in v4 (tree and ref-delta) Nguyễn Thái Ngọc Duy
2013-09-08 7:22 ` [PATCH v2 13/14] index-pack: skip looking for ofs-deltas in v4 as they are not allowed Nguyễn Thái Ngọc Duy
2013-09-08 7:22 ` [PATCH v2 14/14] index-pack: resolve v4 one-base trees Nguyễn Thái Ngọc Duy
2013-09-08 15:04 ` [PATCH 00/11] pack v4 support in pack-objects Nguyễn Thái Ngọc Duy
2013-09-08 15:04 ` [PATCH 01/11] pack v4: allocate dicts from the beginning Nguyễn Thái Ngọc Duy
2013-09-08 15:04 ` [PATCH 02/11] pack v4: stop using static/global variables in packv4-create.c Nguyễn Thái Ngọc Duy
2013-09-08 15:04 ` [PATCH 03/11] pack v4: move packv4-create.c to libgit.a Nguyễn Thái Ngọc Duy
2013-09-08 20:56 ` Nicolas Pitre
2013-09-08 15:04 ` [PATCH 04/11] pack v4: add version argument to write_pack_header Nguyễn Thái Ngọc Duy
2013-09-08 15:04 ` [PATCH 05/11] pack-write.c: add pv4_encode_in_pack_object_header Nguyễn Thái Ngọc Duy
2013-09-08 20:51 ` Nicolas Pitre
2013-09-08 15:04 ` [PATCH 06/11] pack-objects: add --version to specify written pack version Nguyễn Thái Ngọc Duy
2013-09-08 15:04 ` [PATCH 07/11] list-objects.c: add show_tree_entry callback to traverse_commit_list Nguyễn Thái Ngọc Duy
2013-09-08 15:04 ` [PATCH 08/11] pack-objects: create pack v4 tables Nguyễn Thái Ngọc Duy
2013-09-09 10:40 ` Duy Nguyen
2013-09-09 13:07 ` Nicolas Pitre
2013-09-09 15:21 ` Junio C Hamano
2013-09-08 15:04 ` [PATCH 09/11] pack-objects: do not cache delta for v4 trees Nguyễn Thái Ngọc Duy
2013-09-08 15:04 ` [PATCH 10/11] pack-objects: exclude commits out of delta objects in v4 Nguyễn Thái Ngọc Duy
2013-09-08 15:04 ` [PATCH 11/11] pack-objects: support writing pack v4 Nguyễn Thái Ngọc Duy
2013-09-09 13:57 ` [PATCH v2 00/16] pack v4 support in pack-objects Nguyễn Thái Ngọc Duy
2013-09-09 13:57 ` [PATCH v2 01/16] pack v4: allocate dicts from the beginning Nguyễn Thái Ngọc Duy
2013-09-09 13:57 ` [PATCH v2 02/16] pack v4: stop using static/global variables in packv4-create.c Nguyễn Thái Ngọc Duy
2013-09-09 13:57 ` [PATCH v2 03/16] pack v4: move packv4-create.c to libgit.a Nguyễn Thái Ngọc Duy
2013-09-09 13:57 ` [PATCH v2 04/16] pack v4: add version argument to write_pack_header Nguyễn Thái Ngọc Duy
2013-09-09 13:57 ` [PATCH v2 05/16] pack_write: tighten valid object type check in encode_in_pack_object_header Nguyễn Thái Ngọc Duy
2013-09-09 13:57 ` [PATCH v2 06/16] pack-write.c: add pv4_encode_object_header Nguyễn Thái Ngọc Duy
2013-09-09 13:57 ` [PATCH v2 07/16] pack-objects: add --version to specify written pack version Nguyễn Thái Ngọc Duy
2013-09-09 13:57 ` [PATCH v2 08/16] list-objects.c: add show_tree_entry callback to traverse_commit_list Nguyễn Thái Ngọc Duy
2013-09-09 13:58 ` [PATCH v2 09/16] pack-objects: do not cache delta for v4 trees Nguyễn Thái Ngọc Duy
2013-09-09 13:58 ` [PATCH v2 10/16] pack-objects: exclude commits out of delta objects in v4 Nguyễn Thái Ngọc Duy
2013-09-09 13:58 ` [PATCH v2 11/16] pack-objects: create pack v4 tables Nguyễn Thái Ngọc Duy
2013-09-09 13:58 ` [PATCH v2 12/16] pack-objects: prepare SHA-1 table in v4 Nguyễn Thái Ngọc Duy
2013-09-09 13:58 ` [PATCH v2 13/16] pack-objects: support writing pack v4 Nguyễn Thái Ngọc Duy
2013-09-09 13:58 ` [PATCH v2 14/16] pack v4: support "end-of-pack" indicator in index-pack and pack-objects Nguyễn Thái Ngọc Duy
2013-09-09 13:58 ` [PATCH v2 15/16] index-pack: use nr_objects_final as sha1_table size Nguyễn Thái Ngọc Duy
2013-09-09 15:01 ` Nicolas Pitre
2013-09-09 18:34 ` Junio C Hamano
2013-09-09 18:46 ` Nicolas Pitre
2013-09-09 18:56 ` Junio C Hamano
2013-09-09 19:11 ` Nicolas Pitre
2013-09-09 19:30 ` Junio C Hamano
2013-09-09 19:56 ` Nicolas Pitre
2013-09-10 0:45 ` Duy Nguyen
2013-09-12 15:34 ` Nicolas Pitre
2013-09-09 13:58 ` [PATCH v2 16/16] index-pack: support completing thin packs v4 Nguyễn Thái Ngọc Duy
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1378362001-1738-18-git-send-email-nico@fluxnic.net \
--to=nico@fluxnic.net \
--cc=git@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).