All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
To: git@vger.kernel.org
Cc: "Nicolas Pitre" <nico@fluxnic.net>,
	"Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
Subject: [PATCH 3/4] pack v4: cache flattened v4 trees in delta base cache
Date: Thu, 12 Sep 2013 17:38:03 +0700	[thread overview]
Message-ID: <1378982284-7848-3-git-send-email-pclouds@gmail.com> (raw)
In-Reply-To: <1378982284-7848-1-git-send-email-pclouds@gmail.com>


Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 The memmove in pv4_get_tree() may look inefficient. I added a
 heuristics to avoid moving if nb_entries takes 2 bytes (most common,
 I think), but it does not improve much. So memmove() is probably ok.

 packv4-parse.c | 60 +++++++++++++++++++++++++++++++++++++++++++++++++++-------
 packv4-parse.h |  3 ++-
 sha1_file.c    |  8 +++++++-
 3 files changed, 62 insertions(+), 9 deletions(-)

diff --git a/packv4-parse.c b/packv4-parse.c
index ae3e6a5..5002f42 100644
--- a/packv4-parse.c
+++ b/packv4-parse.c
@@ -406,7 +406,10 @@ static int tree_entry_prefix(unsigned char *buf, unsigned long size,
 
 static int decode_entries(struct packed_git *p, struct pack_window **w_curs,
 			  off_t offset, unsigned int start, unsigned int count,
-			  unsigned char **dstp, unsigned long *sizep, int hdr)
+			  unsigned char **dstp, unsigned long *sizep,
+			  unsigned char **v4_dstp, unsigned long *v4_sizep,
+			  unsigned int *v4_entries,
+			  int hdr)
 {
 	unsigned long avail;
 	unsigned int nb_entries;
@@ -422,10 +425,18 @@ static int decode_entries(struct packed_git *p, struct pack_window **w_curs,
 			if (++scp - src >= avail - 20)
 				return -1;
 		/* is this a canonical tree object? */
-		if ((*scp & 0xf) == OBJ_TREE)
+		if ((*scp & 0xf) == OBJ_TREE) {
+			/*
+			 * we could try to convert to v4 tree before
+			 * giving up, provided that the number of
+			 * inconvertible trees is small. But that's
+			 * for later.
+			 */
+			*v4_dstp = NULL;
 			return copy_canonical_tree_entries(p, offset,
 							   start, count,
 							   dstp, sizep);
+		}
 		/* let's still make sure this is actually a pv4 tree */
 		if ((*scp++ & 0xf) != OBJ_PV4_TREE)
 			return -1;
@@ -484,6 +495,16 @@ static int decode_entries(struct packed_git *p, struct pack_window **w_curs,
 			*dstp += len + 20;
 			*sizep -= len + 20;
 			count--;
+			if (*v4_dstp) {
+				if (scp - src > *v4_sizep)
+					*v4_dstp = NULL;
+				else {
+					memcpy(*v4_dstp, src, scp - src);
+					*v4_dstp += scp - src;
+					*v4_sizep -= scp - src;
+					(*v4_entries)++;
+				}
+			}
 		} else if (what & 1) {
 			/*
 			 * Copy from another tree object.
@@ -537,7 +558,8 @@ static int decode_entries(struct packed_git *p, struct pack_window **w_curs,
 				count -= copy_count;
 				ret = decode_entries(p, w_curs,
 					copy_objoffset, copy_start, copy_count,
-					dstp, sizep, 1);
+					dstp, sizep, v4_dstp, v4_sizep,
+					v4_entries, 1);
 				if (ret)
 					return ret;
 				/* force pack window readjustment */
@@ -554,11 +576,13 @@ static int decode_entries(struct packed_git *p, struct pack_window **w_curs,
 }
 
 void *pv4_get_tree(struct packed_git *p, struct pack_window **w_curs,
-		   off_t offset, unsigned long size)
+		   off_t offset, unsigned long size,
+		   void **v4_data, unsigned long *v4_size)
 {
-	unsigned long avail;
-	unsigned int nb_entries;
+	unsigned long avail, v4_max_size;
+	unsigned int nb_entries, v4_entries;
 	unsigned char *dst, *dcp;
+	unsigned char *v4_dst, *v4_dcp;
 	const unsigned char *src, *scp;
 	int ret;
 
@@ -570,11 +594,33 @@ void *pv4_get_tree(struct packed_git *p, struct pack_window **w_curs,
 
 	dst = xmallocz(size);
 	dcp = dst;
-	ret = decode_entries(p, w_curs, offset, 0, nb_entries, &dcp, &size, 0);
+	if (v4_data) {
+		/*
+		 * v4 can't be larger than canonical, so "size" should
+		 * be enough
+		 */
+		v4_max_size = size;
+		v4_dst = v4_dcp = xmallocz(v4_max_size);
+		v4_entries = 0;
+	}
+	ret = decode_entries(p, w_curs, offset, 0, nb_entries,
+			     &dcp, &size,
+			     v4_data ? &v4_dcp : NULL, &v4_max_size,
+			     &v4_entries, 0);
 	if (ret < 0 || size != 0) {
 		free(dst);
+		free(v4_dst);
 		return NULL;
 	}
+	if (v4_data && v4_dcp) {
+		unsigned char hdr[10];
+		int len = encode_varint(v4_entries, hdr);
+		memmove(v4_dst + len, v4_dst, v4_dcp - v4_dst);
+		memcpy(v4_dst, hdr, len);
+		*v4_data = v4_dst;
+		*v4_size = len + v4_dcp - v4_dst;
+	} else
+		free(v4_dst);
 	return dst;
 }
 
diff --git a/packv4-parse.h b/packv4-parse.h
index d674a3f..647b73c 100644
--- a/packv4-parse.h
+++ b/packv4-parse.h
@@ -20,6 +20,7 @@ const unsigned char *get_sha1ref(struct packed_git *p,
 void *pv4_get_commit(struct packed_git *p, struct pack_window **w_curs,
 		     off_t offset, unsigned long size);
 void *pv4_get_tree(struct packed_git *p, struct pack_window **w_curs,
-		   off_t offset, unsigned long size);
+		   off_t offset, unsigned long size,
+		   void **v4_data, unsigned long *v4_size);
 
 #endif
diff --git a/sha1_file.c b/sha1_file.c
index 03c66bb..b176316 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -2103,6 +2103,8 @@ void *unpack_entry(struct packed_git *p, off_t obj_offset,
 	struct unpack_entry_stack_ent *delta_stack = small_delta_stack;
 	int delta_stack_nr = 0, delta_stack_alloc = UNPACK_ENTRY_STACK_PREALLOC;
 	int base_from_cache = 0;
+	void *v4_data;
+	unsigned long v4_size;
 
 	if (log_pack_access != no_log_pack_access)
 		write_pack_access_log(p, obj_offset);
@@ -2181,7 +2183,11 @@ void *unpack_entry(struct packed_git *p, off_t obj_offset,
 		type -= 8;
 		break;
 	case OBJ_PV4_TREE:
-		data = pv4_get_tree(p, &w_curs, curpos, size);
+		v4_data = NULL;
+		data = pv4_get_tree(p, &w_curs, curpos, size, &v4_data, &v4_size);
+		if (v4_data)
+			add_delta_base_cache(p, obj_offset, v4_data,
+					     size, v4_size, type);
 		type -= 8;
 		break;
 	case OBJ_COMMIT:
-- 
1.8.2.83.gc99314b

  parent reply	other threads:[~2013-09-12 10:37 UTC|newest]

Thread overview: 12+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-09-12 10:38 [PATCH 1/4] pack v4: avoid strlen() in tree_entry_prefix Nguyễn Thái Ngọc Duy
2013-09-12 10:38 ` [PATCH 2/4] pack v4: add v4_size to struct delta_base_cache_entry Nguyễn Thái Ngọc Duy
2013-09-13 13:27   ` Nicolas Pitre
2013-09-13 13:59     ` Duy Nguyen
2013-09-14  2:06       ` Nicolas Pitre
2013-09-14  4:22         ` Nicolas Pitre
2013-09-15  7:35           ` Duy Nguyen
2013-09-16  4:42             ` Nicolas Pitre
2013-09-16  5:24               ` Duy Nguyen
2013-09-12 10:38 ` Nguyễn Thái Ngọc Duy [this message]
2013-09-12 10:38 ` [PATCH 4/4] pack v4: make use of cached v4 trees when unpacking Nguyễn Thái Ngọc Duy
2013-09-12 13:29 ` [PATCH 5/4] pack v4: convert v4 tree to canonical format if found in base cache Nguyễn Thái Ngọc Duy

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1378982284-7848-3-git-send-email-pclouds@gmail.com \
    --to=pclouds@gmail.com \
    --cc=git@vger.kernel.org \
    --cc=nico@fluxnic.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.