All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
To: git@vger.kernel.org
Cc: "Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
Subject: [PATCH] {fetch,receive}-pack: drop unpack-objects, delay loosing objects until the end
Date: Mon,  2 Sep 2013 10:05:07 +0700	[thread overview]
Message-ID: <1378091107-31682-1-git-send-email-pclouds@gmail.com> (raw)

Current code peaks into the transfered pack's header, if the number of
objects is under a limit, unpack-objects is called to handle the rest,
otherwise index-pack is. This patch makes fetch-pack use index-pack
unconditionally, then turn objects loose and remove the pack at the
end. unpack-objects is deprecated and may be removed in future.

There are a few reasons for this:

 - down to two code paths to maintain regarding pack reading
   (sha1_file.c and index-pack.c). When .pack v4 comes, we don't need
   to duplicate work in index-pack and unpack-objects.

 - the number of objects should not be the only indicator for
   unpacking. If there are a few large objects in the pack, the pack
   should be kept anyway. Keeping the pack lets us examine the whole
   pack later and make a better decision.

 - by going through index-pack first, then unpack, we pay extra cost
   for completing a thin pack into a full one. But compared to fetch's
   total time, it should not be noticeable because unpack-objects is
   only called when the pack contains a small number of objects.

 - unpack-objects does not support streaming large blobs. Granted
   force_object_loose(), which is used by this patch, does not either.
   But it'll be easier to do so because sha1_file.c interface supports
   streaming.

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 builtin/receive-pack.c | 120 +++++++++++++------------------------------------
 cache.h                |   1 +
 fetch-pack.c           |  77 +++++++++++--------------------
 sha1_file.c            |  70 ++++++++++++++++++++++++++++-
 4 files changed, 128 insertions(+), 140 deletions(-)

diff --git a/builtin/receive-pack.c b/builtin/receive-pack.c
index e3eb5fc..6eb4ffb 100644
--- a/builtin/receive-pack.c
+++ b/builtin/receive-pack.c
@@ -792,105 +792,49 @@ static struct command *read_head_info(void)
 	return commands;
 }
 
-static const char *parse_pack_header(struct pack_header *hdr)
-{
-	switch (read_pack_header(0, hdr)) {
-	case PH_ERROR_EOF:
-		return "eof before pack header was fully read";
-
-	case PH_ERROR_PACK_SIGNATURE:
-		return "protocol error (pack signature mismatch detected)";
-
-	case PH_ERROR_PROTOCOL:
-		return "protocol error (pack version unsupported)";
-
-	default:
-		return "unknown error in parse_pack_header";
-
-	case 0:
-		return NULL;
-	}
-}
-
 static const char *pack_lockfile;
 
 static const char *unpack(int err_fd)
 {
-	struct pack_header hdr;
-	const char *hdr_err;
-	char hdr_arg[38];
 	int fsck_objects = (receive_fsck_objects >= 0
 			    ? receive_fsck_objects
 			    : transfer_fsck_objects >= 0
 			    ? transfer_fsck_objects
 			    : 0);
 
-	hdr_err = parse_pack_header(&hdr);
-	if (hdr_err) {
-		if (err_fd > 0)
-			close(err_fd);
-		return hdr_err;
-	}
-	snprintf(hdr_arg, sizeof(hdr_arg),
-			"--pack_header=%"PRIu32",%"PRIu32,
-			ntohl(hdr.hdr_version), ntohl(hdr.hdr_entries));
-
-	if (ntohl(hdr.hdr_entries) < unpack_limit) {
-		int code, i = 0;
-		struct child_process child;
-		const char *unpacker[5];
-		unpacker[i++] = "unpack-objects";
-		if (quiet)
-			unpacker[i++] = "-q";
-		if (fsck_objects)
-			unpacker[i++] = "--strict";
-		unpacker[i++] = hdr_arg;
-		unpacker[i++] = NULL;
-		memset(&child, 0, sizeof(child));
-		child.argv = unpacker;
-		child.no_stdout = 1;
-		child.err = err_fd;
-		child.git_cmd = 1;
-		code = run_command(&child);
-		if (!code)
-			return NULL;
-		return "unpack-objects abnormal exit";
-	} else {
-		const char *keeper[7];
-		int s, status, i = 0;
-		char keep_arg[256];
-		struct child_process ip;
-
-		s = sprintf(keep_arg, "--keep=receive-pack %"PRIuMAX" on ", (uintmax_t) getpid());
-		if (gethostname(keep_arg + s, sizeof(keep_arg) - s))
-			strcpy(keep_arg + s, "localhost");
-
-		keeper[i++] = "index-pack";
-		keeper[i++] = "--stdin";
-		if (fsck_objects)
-			keeper[i++] = "--strict";
-		keeper[i++] = "--fix-thin";
-		keeper[i++] = hdr_arg;
-		keeper[i++] = keep_arg;
-		keeper[i++] = NULL;
-		memset(&ip, 0, sizeof(ip));
-		ip.argv = keeper;
-		ip.out = -1;
-		ip.err = err_fd;
-		ip.git_cmd = 1;
-		status = start_command(&ip);
-		if (status) {
-			return "index-pack fork failed";
-		}
-		pack_lockfile = index_pack_lockfile(ip.out);
-		close(ip.out);
-		status = finish_command(&ip);
-		if (!status) {
-			reprepare_packed_git();
-			return NULL;
-		}
+	const char *keeper[7];
+	int s, status, i = 0;
+	char keep_arg[256];
+	struct child_process ip;
+
+	s = sprintf(keep_arg, "--keep=receive-pack %"PRIuMAX" on ", (uintmax_t) getpid());
+	if (gethostname(keep_arg + s, sizeof(keep_arg) - s))
+		strcpy(keep_arg + s, "localhost");
+
+	keeper[i++] = "index-pack";
+	keeper[i++] = "--stdin";
+	if (fsck_objects)
+		keeper[i++] = "--strict";
+	keeper[i++] = "--fix-thin";
+	keeper[i++] = keep_arg;
+	keeper[i++] = NULL;
+	memset(&ip, 0, sizeof(ip));
+	ip.argv = keeper;
+	ip.out = -1;
+	ip.err = err_fd;
+	ip.git_cmd = 1;
+	status = start_command(&ip);
+	if (status) {
+		return "index-pack fork failed";
+	}
+	pack_lockfile = index_pack_lockfile(ip.out);
+	close(ip.out);
+	status = finish_command(&ip);
+	if (status)
 		return "index-pack abnormal exit";
-	}
+	if (!maybe_unpack_objects(pack_lockfile, unpack_limit))
+		pack_lockfile = NULL;
+	return NULL;
 }
 
 static const char *unpack_with_sideband(void)
diff --git a/cache.h b/cache.h
index 85b544f..0fff958 100644
--- a/cache.h
+++ b/cache.h
@@ -789,6 +789,7 @@ extern int hash_sha1_file(const void *buf, unsigned long len, const char *type,
 extern int write_sha1_file(const void *buf, unsigned long len, const char *type, unsigned char *return_sha1);
 extern int pretend_sha1_file(void *, unsigned long, enum object_type, unsigned char *);
 extern int force_object_loose(const unsigned char *sha1, time_t mtime);
+extern int maybe_unpack_objects(const char *pack_lockfile, int limit);
 extern void *map_sha1_file(const unsigned char *sha1, unsigned long *size);
 extern int unpack_sha1_header(git_zstream *stream, unsigned char *map, unsigned long mapsize, void *buffer, unsigned long bufsiz);
 extern int parse_sha1_header(const char *hdr, unsigned long *sizep);
diff --git a/fetch-pack.c b/fetch-pack.c
index f5d99c1..9c81a15 100644
--- a/fetch-pack.c
+++ b/fetch-pack.c
@@ -687,9 +687,7 @@ static int get_pack(struct fetch_pack_args *args,
 	struct async demux;
 	const char *argv[22];
 	char keep_arg[256];
-	char hdr_arg[256];
 	const char **av;
-	int do_keep = args->keep_pack;
 	struct child_process cmd;
 	int ret;
 
@@ -711,54 +709,29 @@ static int get_pack(struct fetch_pack_args *args,
 
 	memset(&cmd, 0, sizeof(cmd));
 	cmd.argv = argv;
+	cmd.out = -1;
 	av = argv;
-	*hdr_arg = 0;
-	if (!args->keep_pack && unpack_limit) {
-		struct pack_header header;
-
-		if (read_pack_header(demux.out, &header))
-			die("protocol error: bad pack header");
-		snprintf(hdr_arg, sizeof(hdr_arg),
-			 "--pack_header=%"PRIu32",%"PRIu32,
-			 ntohl(header.hdr_version), ntohl(header.hdr_entries));
-		if (ntohl(header.hdr_entries) < unpack_limit)
-			do_keep = 0;
-		else
-			do_keep = 1;
-	}
 
 	if (alternate_shallow_file) {
 		*av++ = "--shallow-file";
 		*av++ = alternate_shallow_file;
 	}
 
-	if (do_keep) {
-		if (pack_lockfile)
-			cmd.out = -1;
-		*av++ = "index-pack";
-		*av++ = "--stdin";
-		if (!args->quiet && !args->no_progress)
-			*av++ = "-v";
-		if (args->use_thin_pack)
-			*av++ = "--fix-thin";
-		if (args->lock_pack || unpack_limit) {
-			int s = sprintf(keep_arg,
-					"--keep=fetch-pack %"PRIuMAX " on ", (uintmax_t) getpid());
-			if (gethostname(keep_arg + s, sizeof(keep_arg) - s))
-				strcpy(keep_arg + s, "localhost");
-			*av++ = keep_arg;
-		}
-		if (args->check_self_contained_and_connected)
-			*av++ = "--check-self-contained-and-connected";
-	}
-	else {
-		*av++ = "unpack-objects";
-		if (args->quiet || args->no_progress)
-			*av++ = "-q";
-		args->check_self_contained_and_connected = 0;
-	}
-	if (*hdr_arg)
-		*av++ = hdr_arg;
+	*av++ = "index-pack";
+	*av++ = "--stdin";
+	if (!args->quiet && !args->no_progress)
+		*av++ = "-v";
+	if (args->use_thin_pack)
+		*av++ = "--fix-thin";
+	if (args->lock_pack || unpack_limit) {
+		int s = sprintf(keep_arg,
+				"--keep=fetch-pack %"PRIuMAX " on ", (uintmax_t) getpid());
+		if (gethostname(keep_arg + s, sizeof(keep_arg) - s))
+			strcpy(keep_arg + s, "localhost");
+		*av++ = keep_arg;
+	}
+	if (args->check_self_contained_and_connected)
+		*av++ = "--check-self-contained-and-connected";
 	if (fetch_fsck_objects >= 0
 	    ? fetch_fsck_objects
 	    : transfer_fsck_objects >= 0
@@ -771,10 +744,8 @@ static int get_pack(struct fetch_pack_args *args,
 	cmd.git_cmd = 1;
 	if (start_command(&cmd))
 		die("fetch-pack: unable to fork off %s", argv[0]);
-	if (do_keep && pack_lockfile) {
-		*pack_lockfile = index_pack_lockfile(cmd.out);
-		close(cmd.out);
-	}
+	*pack_lockfile = index_pack_lockfile(cmd.out);
+	close(cmd.out);
 
 	ret = finish_command(&cmd);
 	if (!ret || (args->check_self_contained_and_connected && ret == 1))
@@ -820,11 +791,12 @@ static struct ref *do_fetch_pack(struct fetch_pack_args *args,
 				 int fd[2],
 				 const struct ref *orig_ref,
 				 struct ref **sought, int nr_sought,
-				 char **pack_lockfile)
+				 char **pack_lockfile_p)
 {
 	struct ref *ref = copy_ref_list(orig_ref);
 	unsigned char sha1[20];
 	const char *agent_feature;
+	char *pack_lockfile = NULL;
 	int agent_len;
 
 	sort_ref_list(&ref, ref_compare_name);
@@ -899,9 +871,15 @@ static struct ref *do_fetch_pack(struct fetch_pack_args *args,
 		setup_alternate_shallow();
 	else
 		alternate_shallow_file = NULL;
-	if (get_pack(args, fd, pack_lockfile))
+	if (get_pack(args, fd, &pack_lockfile))
 		die("git fetch-pack: fetch failed.");
 
+	if (!maybe_unpack_objects(pack_lockfile,
+				  args->keep_pack ? 0 : unpack_limit))
+		pack_lockfile = NULL;
+	if (pack_lockfile_p)
+		*pack_lockfile_p = pack_lockfile;
+
  all_done:
 	return ref;
 }
@@ -997,6 +975,5 @@ struct ref *fetch_pack(struct fetch_pack_args *args,
 			commit_lock_file(&shallow_lock);
 	}
 
-	reprepare_packed_git();
 	return ref_cpy;
 }
diff --git a/sha1_file.c b/sha1_file.c
index 8e27db1..a0cdeae 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -61,6 +61,8 @@ static struct cached_object empty_tree = {
 };
 
 static struct packed_git *last_found_pack;
+/* temporarily skip one pack in find_pack_entry() */
+static struct packed_git *skip_this_pack;
 
 static struct cached_object *find_cached_object(const unsigned char *sha1)
 {
@@ -2376,11 +2378,15 @@ static int find_pack_entry(const unsigned char *sha1, struct pack_entry *e)
 	if (!packed_git)
 		return 0;
 
-	if (last_found_pack && fill_pack_entry(sha1, e, last_found_pack))
+	if (last_found_pack &&
+	    last_found_pack != skip_this_pack &&
+	    fill_pack_entry(sha1, e, last_found_pack))
 		return 1;
 
 	for (p = packed_git; p; p = p->next) {
-		if (p == last_found_pack || !fill_pack_entry(sha1, e, p))
+		if (p == last_found_pack ||
+		    p == skip_this_pack ||
+		    !fill_pack_entry(sha1, e, p))
 			continue;
 
 		last_found_pack = p;
@@ -3133,3 +3139,63 @@ void assert_sha1_type(const unsigned char *sha1, enum object_type expect)
 		die("%s is not a valid '%s' object", sha1_to_hex(sha1),
 		    typename(expect));
 }
+
+static int has_sha1_except_in(struct packed_git *p,
+			      const unsigned char *sha1)
+{
+	int has;
+	skip_this_pack = p;
+	has = has_sha1_file(sha1);
+	skip_this_pack = NULL;
+	return has;
+}
+
+/*
+ * Unpack objects if the number of objects in the pack is lower than
+ * specified limit. Otherwise make sure the new pack is registered
+ * (including when pack_lockfile is NULL). Return 0 is the pack is
+ * removed.
+ */
+int maybe_unpack_objects(const char *pack_lockfile, int limit)
+{
+	const char *ext[] = { ".pack", ".idx", ".keep", NULL };
+	struct packed_git *p;
+	char *path;
+	int len, ret = 0;
+	uint32_t i;
+
+	reprepare_packed_git();
+	if (!pack_lockfile)
+		return -1;
+
+	/* must be enough for any extensions in ext[] */
+	path = xstrdup(pack_lockfile);
+	len = strlen(pack_lockfile) - strlen(".keep");
+	strcpy(path + len, ".pack");
+	for (p = packed_git; p; p = p->next)
+		if (!strcmp(p->pack_name, path))
+			break;
+	if (!p || open_pack_index(p))
+		die("unable to find pack %s", path);
+
+	if (p->num_objects >= limit) {
+		free(path);
+		return -1;
+	}
+
+	for (i = 0; i < p->num_objects; i++) {
+		const unsigned char *sha1 = nth_packed_object_sha1(p, i);
+		if (!has_sha1_except_in(p, sha1)) /* skip --fix-thin objects */
+			ret |= force_object_loose(sha1, p->mtime);
+	}
+
+	if (!ret) {
+		free_pack_by_name(p->pack_name);
+		for (i = 0; ext[i]; i++) {
+			strcpy(path + len, ext[i]);
+			unlink_or_warn(path);
+		}
+	}
+	free(path);
+	return ret;
+}
-- 
1.8.2.83.gc99314b

             reply	other threads:[~2013-09-02  3:02 UTC|newest]

Thread overview: 12+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-09-02  3:05 Nguyễn Thái Ngọc Duy [this message]
2013-09-02  4:38 ` [PATCH] {fetch,receive}-pack: drop unpack-objects, delay loosing objects until the end Eric Sunshine
2013-09-03  6:49 ` Jeff King
2013-09-03 11:56   ` Duy Nguyen
2013-09-03 17:25     ` Jeff King
2013-09-06  0:46 ` [PATCH v2 1/3] index-pack: add --unpack-limit to unpack objects Nguyễn Thái Ngọc Duy
2013-09-06  0:46   ` [PATCH v2 2/3] fetch-pack: use index-pack --unpack-limit instead of unpack-objects Nguyễn Thái Ngọc Duy
2013-09-08  4:45     ` Jeff King
2013-09-06  0:46   ` [PATCH v2 3/3] receive-pack: " Nguyễn Thái Ngọc Duy
2013-09-08  4:44   ` [PATCH v2 1/3] index-pack: add --unpack-limit to unpack objects Jeff King
2013-09-08  6:28     ` Duy Nguyen
2013-09-08  6:35       ` Jeff King

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1378091107-31682-1-git-send-email-pclouds@gmail.com \
    --to=pclouds@gmail.com \
    --cc=git@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.