git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Jeff King <peff@peff.net>
To: Junio C Hamano <gitster@pobox.com>
Cc: git@vger.kernel.org
Subject: Re: [PATCH 00/11] writing out a huge blob to working tree
Date: Wed, 18 May 2011 03:08:37 -0400	[thread overview]
Message-ID: <20110518070837.GC27482@sigill.intra.peff.net> (raw)
In-Reply-To: <20110518064158.GB27482@sigill.intra.peff.net>

On Wed, May 18, 2011 at 02:41:58AM -0400, Jeff King wrote:

> According to perf, though, it's not the increased writes; the slowdown
> is actually from create_pack_revindex, in this call chain:
> 
>  create_pack_revindex
>  find_pack_revindex
>  packed_object_info_detail
>  sha1_object_info_extended
>  istream_source
>  open_istream
>  streaming_write_entry

Part of the problem is that with the current code, all you care about is
"Is it loose, packed non-delta, or packed delta?". And
packed_object_info_detail will tell you not just whether it's deltafied,
but will go to a lot more work to make the revindex. One solution is to
let the cheap packed_object_info() report back on delta status, since
it's free there, and then we don't have to deal with the revindex at
all.

Of course, it may turn out that the extra information is useful if and
when open_istream_* actually gets implemented for delta-fied objects.

The patch below implements the cheap "is_delta" check. But it only
shaves off a half second (dropping us from 7s to 6.5s). Prior to your
patches, we were at 4.5 seconds. So there's still quite a bit of
slowdown to figure out.

diff --git a/cache.h b/cache.h
index 39e53c8..829c3a4 100644
--- a/cache.h
+++ b/cache.h
@@ -1026,6 +1026,7 @@ extern int unpack_object_header(struct packed_git *, struct pack_window **, off_
 struct object_info {
 	/* Request */
 	unsigned long *sizep;
+	int *is_deltap;
 	int want_deltainfo;
 
 	/* Response */
diff --git a/sha1_file.c b/sha1_file.c
index 15f1c05..35be909 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -1481,7 +1481,7 @@ static off_t get_delta_base(struct packed_git *p,
 
 /* forward declaration for a mutually recursive function */
 static int packed_object_info(struct packed_git *p, off_t offset,
-			      unsigned long *sizep);
+			      unsigned long *sizep, int *is_delta);
 
 static int packed_delta_info(struct packed_git *p,
 			     struct pack_window **w_curs,
@@ -1495,7 +1495,7 @@ static int packed_delta_info(struct packed_git *p,
 	base_offset = get_delta_base(p, w_curs, &curpos, type, obj_offset);
 	if (!base_offset)
 		return OBJ_BAD;
-	type = packed_object_info(p, base_offset, NULL);
+	type = packed_object_info(p, base_offset, NULL, NULL);
 	if (type <= OBJ_NONE) {
 		struct revindex_entry *revidx;
 		const unsigned char *base_sha1;
@@ -1605,7 +1605,7 @@ int packed_object_info_detail(struct packed_git *p,
 }
 
 static int packed_object_info(struct packed_git *p, off_t obj_offset,
-			      unsigned long *sizep)
+			      unsigned long *sizep, int *is_delta)
 {
 	struct pack_window *w_curs = NULL;
 	unsigned long size;
@@ -1619,6 +1619,8 @@ static int packed_object_info(struct packed_git *p, off_t obj_offset,
 	case OBJ_REF_DELTA:
 		type = packed_delta_info(p, &w_curs, curpos,
 					 type, obj_offset, sizep);
+		if (is_delta)
+			*is_delta = 1;
 		break;
 	case OBJ_COMMIT:
 	case OBJ_TREE:
@@ -1626,11 +1628,15 @@ static int packed_object_info(struct packed_git *p, off_t obj_offset,
 	case OBJ_TAG:
 		if (sizep)
 			*sizep = size;
+		if (is_delta)
+			*is_delta = 0;
 		break;
 	default:
 		error("unknown object type %i at offset %"PRIuMAX" in %s",
 		      type, (uintmax_t)obj_offset, p->pack_name);
 		type = OBJ_BAD;
+		if (is_delta)
+			*is_delta = 0;
 	}
 	unuse_pack(&w_curs);
 	return type;
@@ -2130,7 +2136,8 @@ int sha1_object_info_extended(const unsigned char *sha1, struct object_info *oi)
 	}
 
 	if (!oi->want_deltainfo) {
-		status = packed_object_info(e.p, e.offset, oi->sizep);
+		status = packed_object_info(e.p, e.offset, oi->sizep,
+					    oi->is_deltap);
 	} else {
 		unsigned long size, store_size;
 		unsigned int delta_chain_length;
diff --git a/streaming.c b/streaming.c
index 03c58b2..a2c0e84 100644
--- a/streaming.c
+++ b/streaming.c
@@ -84,10 +84,12 @@ static enum input_source istream_source(const unsigned char *sha1,
 					struct object_info *oi)
 {
 	unsigned long size;
+	int is_delta;
 	int status;
 
 	oi->sizep = &size;
-	oi->want_deltainfo = 1;
+	oi->is_deltap = &is_delta;
+	oi->want_deltainfo = 0;
 
 	status = sha1_object_info_extended(sha1, oi);
 	if (status < 0)
@@ -98,7 +100,7 @@ static enum input_source istream_source(const unsigned char *sha1,
 	case OI_LOOSE:
 		return loose;
 	case OI_PACKED:
-		if (!oi->u.packed.delta && big_file_threshold <= size)
+		if (!is_delta && big_file_threshold <= size)
 			return pack_non_delta;
 		/* fallthru */
 	default:

  reply	other threads:[~2011-05-18  7:08 UTC|newest]

Thread overview: 49+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-05-16  0:30 [PATCH 00/11] writing out a huge blob to working tree Junio C Hamano
2011-05-16  0:30 ` [PATCH 01/11] packed_object_info_detail(): do not return a string Junio C Hamano
2011-05-17  0:45   ` Thiago Farina
2011-05-17  2:36     ` Junio C Hamano
2011-05-16  0:30 ` [PATCH 02/11] sha1_object_info_extended(): expose a bit more info Junio C Hamano
2011-05-16  0:30 ` [PATCH 03/11] sha1_object_info_extended(): hint about objects in delta-base cache Junio C Hamano
2011-05-16  0:40   ` Shawn Pearce
2011-05-16  0:30 ` [PATCH 04/11] unpack_object_header(): make it public Junio C Hamano
2011-05-16  0:30 ` [PATCH 05/11] write_entry(): separate two helper functions out Junio C Hamano
2011-05-16  0:30 ` [PATCH 06/11] streaming: a new API to read from the object store Junio C Hamano
2011-05-18  8:09   ` Jeff King
2011-05-19  1:52     ` Junio C Hamano
2011-05-16  0:30 ` [PATCH 07/11] streaming_write_entry(): use streaming API in write_entry() Junio C Hamano
2011-05-16  0:30 ` [PATCH 08/11] streaming_write_entry(): support files with holes Junio C Hamano
2011-05-16 10:53   ` Nguyen Thai Ngoc Duy
2011-05-16 14:39     ` Junio C Hamano
2011-05-17  1:18       ` Nguyen Thai Ngoc Duy
2011-05-17  5:23         ` Junio C Hamano
2011-05-16 13:03   ` Thiago Farina
2011-05-16  0:30 ` [PATCH 09/11] streaming: read non-delta incrementally from a pack Junio C Hamano
2011-05-16  0:58   ` Shawn Pearce
2011-05-16  5:00     ` Junio C Hamano
2011-05-16  0:30 ` [PATCH 10/11] sha1_file.c: expose helpers to read loose objects Junio C Hamano
2011-05-16  0:30 ` [PATCH 11/11] streaming: read loose objects incrementally Junio C Hamano
2011-05-16  0:47 ` [PATCH 00/11] writing out a huge blob to working tree Shawn Pearce
2011-05-18  6:41 ` Jeff King
2011-05-18  7:08   ` Jeff King [this message]
2011-05-18  7:50     ` Jeff King
2011-05-18 15:12       ` Junio C Hamano
2011-05-18  8:17 ` Jeff King
2011-05-19 21:33 ` [PATCH v2 " Junio C Hamano
2011-05-19 21:33   ` [PATCH v2 01/11] packed_object_info_detail(): do not return a string Junio C Hamano
2011-05-19 21:33   ` [PATCH v2 02/11] sha1_object_info_extended(): expose a bit more info Junio C Hamano
2011-05-19 21:33   ` [PATCH v2 03/11] sha1_object_info_extended(): hint about objects in delta-base cache Junio C Hamano
2011-05-20 23:05     ` René Scharfe
2011-05-21  1:49       ` Junio C Hamano
2011-05-19 21:33   ` [PATCH v2 04/11] unpack_object_header(): make it public Junio C Hamano
2011-05-19 21:33   ` [PATCH v2 05/11] write_entry(): separate two helper functions out Junio C Hamano
2011-05-19 21:33   ` [PATCH v2 06/11] streaming: a new API to read from the object store Junio C Hamano
2011-05-20 23:05     ` René Scharfe
2011-05-21  1:49       ` Junio C Hamano
2011-05-19 21:33   ` [PATCH v2 07/11] streaming_write_entry(): use streaming API in write_entry() Junio C Hamano
2011-05-20 22:52     ` Junio C Hamano
2011-05-19 21:33   ` [PATCH v2 08/11] streaming_write_entry(): support files with holes Junio C Hamano
2011-05-19 21:33   ` [PATCH v2 09/11] streaming: read non-delta incrementally from a pack Junio C Hamano
2011-05-19 21:33   ` [PATCH v2 10/11] sha1_file.c: expose helpers to read loose objects Junio C Hamano
2011-05-19 21:33   ` [PATCH v2 11/11] streaming: read loose objects incrementally Junio C Hamano
2011-05-19 21:44   ` [Not A PATCH v2 02/11] interdiff Junio C Hamano
2011-05-19 22:21   ` [PATCH v2 00/11] writing out a huge blob to working tree Jeff King

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20110518070837.GC27482@sigill.intra.peff.net \
    --to=peff@peff.net \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).