git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: "Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
To: git@vger.kernel.org
Cc: "Nicolas Pitre" <nico@fluxnic.net>,
	"Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
Subject: [PATCH 3/4] pack v4: cache flattened v4 trees in delta base cache
Date: Thu, 12 Sep 2013 17:38:03 +0700	[thread overview]
Message-ID: <1378982284-7848-3-git-send-email-pclouds@gmail.com> (raw)
In-Reply-To: <1378982284-7848-1-git-send-email-pclouds@gmail.com>


Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 The memmove in pv4_get_tree() may look inefficient. I added a
 heuristics to avoid moving if nb_entries takes 2 bytes (most common,
 I think), but it does not improve much. So memmove() is probably ok.

 packv4-parse.c | 60 +++++++++++++++++++++++++++++++++++++++++++++++++++-------
 packv4-parse.h |  3 ++-
 sha1_file.c    |  8 +++++++-
 3 files changed, 62 insertions(+), 9 deletions(-)

diff --git a/packv4-parse.c b/packv4-parse.c
index ae3e6a5..5002f42 100644
--- a/packv4-parse.c
+++ b/packv4-parse.c
@@ -406,7 +406,10 @@ static int tree_entry_prefix(unsigned char *buf, unsigned long size,
 
 static int decode_entries(struct packed_git *p, struct pack_window **w_curs,
 			  off_t offset, unsigned int start, unsigned int count,
-			  unsigned char **dstp, unsigned long *sizep, int hdr)
+			  unsigned char **dstp, unsigned long *sizep,
+			  unsigned char **v4_dstp, unsigned long *v4_sizep,
+			  unsigned int *v4_entries,
+			  int hdr)
 {
 	unsigned long avail;
 	unsigned int nb_entries;
@@ -422,10 +425,18 @@ static int decode_entries(struct packed_git *p, struct pack_window **w_curs,
 			if (++scp - src >= avail - 20)
 				return -1;
 		/* is this a canonical tree object? */
-		if ((*scp & 0xf) == OBJ_TREE)
+		if ((*scp & 0xf) == OBJ_TREE) {
+			/*
+			 * we could try to convert to v4 tree before
+			 * giving up, provided that the number of
+			 * inconvertible trees is small. But that's
+			 * for later.
+			 */
+			*v4_dstp = NULL;
 			return copy_canonical_tree_entries(p, offset,
 							   start, count,
 							   dstp, sizep);
+		}
 		/* let's still make sure this is actually a pv4 tree */
 		if ((*scp++ & 0xf) != OBJ_PV4_TREE)
 			return -1;
@@ -484,6 +495,16 @@ static int decode_entries(struct packed_git *p, struct pack_window **w_curs,
 			*dstp += len + 20;
 			*sizep -= len + 20;
 			count--;
+			if (*v4_dstp) {
+				if (scp - src > *v4_sizep)
+					*v4_dstp = NULL;
+				else {
+					memcpy(*v4_dstp, src, scp - src);
+					*v4_dstp += scp - src;
+					*v4_sizep -= scp - src;
+					(*v4_entries)++;
+				}
+			}
 		} else if (what & 1) {
 			/*
 			 * Copy from another tree object.
@@ -537,7 +558,8 @@ static int decode_entries(struct packed_git *p, struct pack_window **w_curs,
 				count -= copy_count;
 				ret = decode_entries(p, w_curs,
 					copy_objoffset, copy_start, copy_count,
-					dstp, sizep, 1);
+					dstp, sizep, v4_dstp, v4_sizep,
+					v4_entries, 1);
 				if (ret)
 					return ret;
 				/* force pack window readjustment */
@@ -554,11 +576,13 @@ static int decode_entries(struct packed_git *p, struct pack_window **w_curs,
 }
 
 void *pv4_get_tree(struct packed_git *p, struct pack_window **w_curs,
-		   off_t offset, unsigned long size)
+		   off_t offset, unsigned long size,
+		   void **v4_data, unsigned long *v4_size)
 {
-	unsigned long avail;
-	unsigned int nb_entries;
+	unsigned long avail, v4_max_size;
+	unsigned int nb_entries, v4_entries;
 	unsigned char *dst, *dcp;
+	unsigned char *v4_dst, *v4_dcp;
 	const unsigned char *src, *scp;
 	int ret;
 
@@ -570,11 +594,33 @@ void *pv4_get_tree(struct packed_git *p, struct pack_window **w_curs,
 
 	dst = xmallocz(size);
 	dcp = dst;
-	ret = decode_entries(p, w_curs, offset, 0, nb_entries, &dcp, &size, 0);
+	if (v4_data) {
+		/*
+		 * v4 can't be larger than canonical, so "size" should
+		 * be enough
+		 */
+		v4_max_size = size;
+		v4_dst = v4_dcp = xmallocz(v4_max_size);
+		v4_entries = 0;
+	}
+	ret = decode_entries(p, w_curs, offset, 0, nb_entries,
+			     &dcp, &size,
+			     v4_data ? &v4_dcp : NULL, &v4_max_size,
+			     &v4_entries, 0);
 	if (ret < 0 || size != 0) {
 		free(dst);
+		free(v4_dst);
 		return NULL;
 	}
+	if (v4_data && v4_dcp) {
+		unsigned char hdr[10];
+		int len = encode_varint(v4_entries, hdr);
+		memmove(v4_dst + len, v4_dst, v4_dcp - v4_dst);
+		memcpy(v4_dst, hdr, len);
+		*v4_data = v4_dst;
+		*v4_size = len + v4_dcp - v4_dst;
+	} else
+		free(v4_dst);
 	return dst;
 }
 
diff --git a/packv4-parse.h b/packv4-parse.h
index d674a3f..647b73c 100644
--- a/packv4-parse.h
+++ b/packv4-parse.h
@@ -20,6 +20,7 @@ const unsigned char *get_sha1ref(struct packed_git *p,
 void *pv4_get_commit(struct packed_git *p, struct pack_window **w_curs,
 		     off_t offset, unsigned long size);
 void *pv4_get_tree(struct packed_git *p, struct pack_window **w_curs,
-		   off_t offset, unsigned long size);
+		   off_t offset, unsigned long size,
+		   void **v4_data, unsigned long *v4_size);
 
 #endif
diff --git a/sha1_file.c b/sha1_file.c
index 03c66bb..b176316 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -2103,6 +2103,8 @@ void *unpack_entry(struct packed_git *p, off_t obj_offset,
 	struct unpack_entry_stack_ent *delta_stack = small_delta_stack;
 	int delta_stack_nr = 0, delta_stack_alloc = UNPACK_ENTRY_STACK_PREALLOC;
 	int base_from_cache = 0;
+	void *v4_data;
+	unsigned long v4_size;
 
 	if (log_pack_access != no_log_pack_access)
 		write_pack_access_log(p, obj_offset);
@@ -2181,7 +2183,11 @@ void *unpack_entry(struct packed_git *p, off_t obj_offset,
 		type -= 8;
 		break;
 	case OBJ_PV4_TREE:
-		data = pv4_get_tree(p, &w_curs, curpos, size);
+		v4_data = NULL;
+		data = pv4_get_tree(p, &w_curs, curpos, size, &v4_data, &v4_size);
+		if (v4_data)
+			add_delta_base_cache(p, obj_offset, v4_data,
+					     size, v4_size, type);
 		type -= 8;
 		break;
 	case OBJ_COMMIT:
-- 
1.8.2.83.gc99314b

  parent reply	other threads:[~2013-09-12 10:37 UTC|newest]

Thread overview: 12+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-09-12 10:38 [PATCH 1/4] pack v4: avoid strlen() in tree_entry_prefix Nguyễn Thái Ngọc Duy
2013-09-12 10:38 ` [PATCH 2/4] pack v4: add v4_size to struct delta_base_cache_entry Nguyễn Thái Ngọc Duy
2013-09-13 13:27   ` Nicolas Pitre
2013-09-13 13:59     ` Duy Nguyen
2013-09-14  2:06       ` Nicolas Pitre
2013-09-14  4:22         ` Nicolas Pitre
2013-09-15  7:35           ` Duy Nguyen
2013-09-16  4:42             ` Nicolas Pitre
2013-09-16  5:24               ` Duy Nguyen
2013-09-12 10:38 ` Nguyễn Thái Ngọc Duy [this message]
2013-09-12 10:38 ` [PATCH 4/4] pack v4: make use of cached v4 trees when unpacking Nguyễn Thái Ngọc Duy
2013-09-12 13:29 ` [PATCH 5/4] pack v4: convert v4 tree to canonical format if found in base cache Nguyễn Thái Ngọc Duy

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1378982284-7848-3-git-send-email-pclouds@gmail.com \
    --to=pclouds@gmail.com \
    --cc=git@vger.kernel.org \
    --cc=nico@fluxnic.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).