From: "Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
To: git@vger.kernel.org
Cc: "Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
Subject: [PATCH] {fetch,receive}-pack: drop unpack-objects, delay loosing objects until the end
Date: Mon, 2 Sep 2013 10:05:07 +0700 [thread overview]
Message-ID: <1378091107-31682-1-git-send-email-pclouds@gmail.com> (raw)
Current code peaks into the transfered pack's header, if the number of
objects is under a limit, unpack-objects is called to handle the rest,
otherwise index-pack is. This patch makes fetch-pack use index-pack
unconditionally, then turn objects loose and remove the pack at the
end. unpack-objects is deprecated and may be removed in future.
There are a few reasons for this:
- down to two code paths to maintain regarding pack reading
(sha1_file.c and index-pack.c). When .pack v4 comes, we don't need
to duplicate work in index-pack and unpack-objects.
- the number of objects should not be the only indicator for
unpacking. If there are a few large objects in the pack, the pack
should be kept anyway. Keeping the pack lets us examine the whole
pack later and make a better decision.
- by going through index-pack first, then unpack, we pay extra cost
for completing a thin pack into a full one. But compared to fetch's
total time, it should not be noticeable because unpack-objects is
only called when the pack contains a small number of objects.
- unpack-objects does not support streaming large blobs. Granted
force_object_loose(), which is used by this patch, does not either.
But it'll be easier to do so because sha1_file.c interface supports
streaming.
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
builtin/receive-pack.c | 120 +++++++++++++------------------------------------
cache.h | 1 +
fetch-pack.c | 77 +++++++++++--------------------
sha1_file.c | 70 ++++++++++++++++++++++++++++-
4 files changed, 128 insertions(+), 140 deletions(-)
diff --git a/builtin/receive-pack.c b/builtin/receive-pack.c
index e3eb5fc..6eb4ffb 100644
--- a/builtin/receive-pack.c
+++ b/builtin/receive-pack.c
@@ -792,105 +792,49 @@ static struct command *read_head_info(void)
return commands;
}
-static const char *parse_pack_header(struct pack_header *hdr)
-{
- switch (read_pack_header(0, hdr)) {
- case PH_ERROR_EOF:
- return "eof before pack header was fully read";
-
- case PH_ERROR_PACK_SIGNATURE:
- return "protocol error (pack signature mismatch detected)";
-
- case PH_ERROR_PROTOCOL:
- return "protocol error (pack version unsupported)";
-
- default:
- return "unknown error in parse_pack_header";
-
- case 0:
- return NULL;
- }
-}
-
static const char *pack_lockfile;
static const char *unpack(int err_fd)
{
- struct pack_header hdr;
- const char *hdr_err;
- char hdr_arg[38];
int fsck_objects = (receive_fsck_objects >= 0
? receive_fsck_objects
: transfer_fsck_objects >= 0
? transfer_fsck_objects
: 0);
- hdr_err = parse_pack_header(&hdr);
- if (hdr_err) {
- if (err_fd > 0)
- close(err_fd);
- return hdr_err;
- }
- snprintf(hdr_arg, sizeof(hdr_arg),
- "--pack_header=%"PRIu32",%"PRIu32,
- ntohl(hdr.hdr_version), ntohl(hdr.hdr_entries));
-
- if (ntohl(hdr.hdr_entries) < unpack_limit) {
- int code, i = 0;
- struct child_process child;
- const char *unpacker[5];
- unpacker[i++] = "unpack-objects";
- if (quiet)
- unpacker[i++] = "-q";
- if (fsck_objects)
- unpacker[i++] = "--strict";
- unpacker[i++] = hdr_arg;
- unpacker[i++] = NULL;
- memset(&child, 0, sizeof(child));
- child.argv = unpacker;
- child.no_stdout = 1;
- child.err = err_fd;
- child.git_cmd = 1;
- code = run_command(&child);
- if (!code)
- return NULL;
- return "unpack-objects abnormal exit";
- } else {
- const char *keeper[7];
- int s, status, i = 0;
- char keep_arg[256];
- struct child_process ip;
-
- s = sprintf(keep_arg, "--keep=receive-pack %"PRIuMAX" on ", (uintmax_t) getpid());
- if (gethostname(keep_arg + s, sizeof(keep_arg) - s))
- strcpy(keep_arg + s, "localhost");
-
- keeper[i++] = "index-pack";
- keeper[i++] = "--stdin";
- if (fsck_objects)
- keeper[i++] = "--strict";
- keeper[i++] = "--fix-thin";
- keeper[i++] = hdr_arg;
- keeper[i++] = keep_arg;
- keeper[i++] = NULL;
- memset(&ip, 0, sizeof(ip));
- ip.argv = keeper;
- ip.out = -1;
- ip.err = err_fd;
- ip.git_cmd = 1;
- status = start_command(&ip);
- if (status) {
- return "index-pack fork failed";
- }
- pack_lockfile = index_pack_lockfile(ip.out);
- close(ip.out);
- status = finish_command(&ip);
- if (!status) {
- reprepare_packed_git();
- return NULL;
- }
+ const char *keeper[7];
+ int s, status, i = 0;
+ char keep_arg[256];
+ struct child_process ip;
+
+ s = sprintf(keep_arg, "--keep=receive-pack %"PRIuMAX" on ", (uintmax_t) getpid());
+ if (gethostname(keep_arg + s, sizeof(keep_arg) - s))
+ strcpy(keep_arg + s, "localhost");
+
+ keeper[i++] = "index-pack";
+ keeper[i++] = "--stdin";
+ if (fsck_objects)
+ keeper[i++] = "--strict";
+ keeper[i++] = "--fix-thin";
+ keeper[i++] = keep_arg;
+ keeper[i++] = NULL;
+ memset(&ip, 0, sizeof(ip));
+ ip.argv = keeper;
+ ip.out = -1;
+ ip.err = err_fd;
+ ip.git_cmd = 1;
+ status = start_command(&ip);
+ if (status) {
+ return "index-pack fork failed";
+ }
+ pack_lockfile = index_pack_lockfile(ip.out);
+ close(ip.out);
+ status = finish_command(&ip);
+ if (status)
return "index-pack abnormal exit";
- }
+ if (!maybe_unpack_objects(pack_lockfile, unpack_limit))
+ pack_lockfile = NULL;
+ return NULL;
}
static const char *unpack_with_sideband(void)
diff --git a/cache.h b/cache.h
index 85b544f..0fff958 100644
--- a/cache.h
+++ b/cache.h
@@ -789,6 +789,7 @@ extern int hash_sha1_file(const void *buf, unsigned long len, const char *type,
extern int write_sha1_file(const void *buf, unsigned long len, const char *type, unsigned char *return_sha1);
extern int pretend_sha1_file(void *, unsigned long, enum object_type, unsigned char *);
extern int force_object_loose(const unsigned char *sha1, time_t mtime);
+extern int maybe_unpack_objects(const char *pack_lockfile, int limit);
extern void *map_sha1_file(const unsigned char *sha1, unsigned long *size);
extern int unpack_sha1_header(git_zstream *stream, unsigned char *map, unsigned long mapsize, void *buffer, unsigned long bufsiz);
extern int parse_sha1_header(const char *hdr, unsigned long *sizep);
diff --git a/fetch-pack.c b/fetch-pack.c
index f5d99c1..9c81a15 100644
--- a/fetch-pack.c
+++ b/fetch-pack.c
@@ -687,9 +687,7 @@ static int get_pack(struct fetch_pack_args *args,
struct async demux;
const char *argv[22];
char keep_arg[256];
- char hdr_arg[256];
const char **av;
- int do_keep = args->keep_pack;
struct child_process cmd;
int ret;
@@ -711,54 +709,29 @@ static int get_pack(struct fetch_pack_args *args,
memset(&cmd, 0, sizeof(cmd));
cmd.argv = argv;
+ cmd.out = -1;
av = argv;
- *hdr_arg = 0;
- if (!args->keep_pack && unpack_limit) {
- struct pack_header header;
-
- if (read_pack_header(demux.out, &header))
- die("protocol error: bad pack header");
- snprintf(hdr_arg, sizeof(hdr_arg),
- "--pack_header=%"PRIu32",%"PRIu32,
- ntohl(header.hdr_version), ntohl(header.hdr_entries));
- if (ntohl(header.hdr_entries) < unpack_limit)
- do_keep = 0;
- else
- do_keep = 1;
- }
if (alternate_shallow_file) {
*av++ = "--shallow-file";
*av++ = alternate_shallow_file;
}
- if (do_keep) {
- if (pack_lockfile)
- cmd.out = -1;
- *av++ = "index-pack";
- *av++ = "--stdin";
- if (!args->quiet && !args->no_progress)
- *av++ = "-v";
- if (args->use_thin_pack)
- *av++ = "--fix-thin";
- if (args->lock_pack || unpack_limit) {
- int s = sprintf(keep_arg,
- "--keep=fetch-pack %"PRIuMAX " on ", (uintmax_t) getpid());
- if (gethostname(keep_arg + s, sizeof(keep_arg) - s))
- strcpy(keep_arg + s, "localhost");
- *av++ = keep_arg;
- }
- if (args->check_self_contained_and_connected)
- *av++ = "--check-self-contained-and-connected";
- }
- else {
- *av++ = "unpack-objects";
- if (args->quiet || args->no_progress)
- *av++ = "-q";
- args->check_self_contained_and_connected = 0;
- }
- if (*hdr_arg)
- *av++ = hdr_arg;
+ *av++ = "index-pack";
+ *av++ = "--stdin";
+ if (!args->quiet && !args->no_progress)
+ *av++ = "-v";
+ if (args->use_thin_pack)
+ *av++ = "--fix-thin";
+ if (args->lock_pack || unpack_limit) {
+ int s = sprintf(keep_arg,
+ "--keep=fetch-pack %"PRIuMAX " on ", (uintmax_t) getpid());
+ if (gethostname(keep_arg + s, sizeof(keep_arg) - s))
+ strcpy(keep_arg + s, "localhost");
+ *av++ = keep_arg;
+ }
+ if (args->check_self_contained_and_connected)
+ *av++ = "--check-self-contained-and-connected";
if (fetch_fsck_objects >= 0
? fetch_fsck_objects
: transfer_fsck_objects >= 0
@@ -771,10 +744,8 @@ static int get_pack(struct fetch_pack_args *args,
cmd.git_cmd = 1;
if (start_command(&cmd))
die("fetch-pack: unable to fork off %s", argv[0]);
- if (do_keep && pack_lockfile) {
- *pack_lockfile = index_pack_lockfile(cmd.out);
- close(cmd.out);
- }
+ *pack_lockfile = index_pack_lockfile(cmd.out);
+ close(cmd.out);
ret = finish_command(&cmd);
if (!ret || (args->check_self_contained_and_connected && ret == 1))
@@ -820,11 +791,12 @@ static struct ref *do_fetch_pack(struct fetch_pack_args *args,
int fd[2],
const struct ref *orig_ref,
struct ref **sought, int nr_sought,
- char **pack_lockfile)
+ char **pack_lockfile_p)
{
struct ref *ref = copy_ref_list(orig_ref);
unsigned char sha1[20];
const char *agent_feature;
+ char *pack_lockfile = NULL;
int agent_len;
sort_ref_list(&ref, ref_compare_name);
@@ -899,9 +871,15 @@ static struct ref *do_fetch_pack(struct fetch_pack_args *args,
setup_alternate_shallow();
else
alternate_shallow_file = NULL;
- if (get_pack(args, fd, pack_lockfile))
+ if (get_pack(args, fd, &pack_lockfile))
die("git fetch-pack: fetch failed.");
+ if (!maybe_unpack_objects(pack_lockfile,
+ args->keep_pack ? 0 : unpack_limit))
+ pack_lockfile = NULL;
+ if (pack_lockfile_p)
+ *pack_lockfile_p = pack_lockfile;
+
all_done:
return ref;
}
@@ -997,6 +975,5 @@ struct ref *fetch_pack(struct fetch_pack_args *args,
commit_lock_file(&shallow_lock);
}
- reprepare_packed_git();
return ref_cpy;
}
diff --git a/sha1_file.c b/sha1_file.c
index 8e27db1..a0cdeae 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -61,6 +61,8 @@ static struct cached_object empty_tree = {
};
static struct packed_git *last_found_pack;
+/* temporarily skip one pack in find_pack_entry() */
+static struct packed_git *skip_this_pack;
static struct cached_object *find_cached_object(const unsigned char *sha1)
{
@@ -2376,11 +2378,15 @@ static int find_pack_entry(const unsigned char *sha1, struct pack_entry *e)
if (!packed_git)
return 0;
- if (last_found_pack && fill_pack_entry(sha1, e, last_found_pack))
+ if (last_found_pack &&
+ last_found_pack != skip_this_pack &&
+ fill_pack_entry(sha1, e, last_found_pack))
return 1;
for (p = packed_git; p; p = p->next) {
- if (p == last_found_pack || !fill_pack_entry(sha1, e, p))
+ if (p == last_found_pack ||
+ p == skip_this_pack ||
+ !fill_pack_entry(sha1, e, p))
continue;
last_found_pack = p;
@@ -3133,3 +3139,63 @@ void assert_sha1_type(const unsigned char *sha1, enum object_type expect)
die("%s is not a valid '%s' object", sha1_to_hex(sha1),
typename(expect));
}
+
+static int has_sha1_except_in(struct packed_git *p,
+ const unsigned char *sha1)
+{
+ int has;
+ skip_this_pack = p;
+ has = has_sha1_file(sha1);
+ skip_this_pack = NULL;
+ return has;
+}
+
+/*
+ * Unpack objects if the number of objects in the pack is lower than
+ * specified limit. Otherwise make sure the new pack is registered
+ * (including when pack_lockfile is NULL). Return 0 is the pack is
+ * removed.
+ */
+int maybe_unpack_objects(const char *pack_lockfile, int limit)
+{
+ const char *ext[] = { ".pack", ".idx", ".keep", NULL };
+ struct packed_git *p;
+ char *path;
+ int len, ret = 0;
+ uint32_t i;
+
+ reprepare_packed_git();
+ if (!pack_lockfile)
+ return -1;
+
+ /* must be enough for any extensions in ext[] */
+ path = xstrdup(pack_lockfile);
+ len = strlen(pack_lockfile) - strlen(".keep");
+ strcpy(path + len, ".pack");
+ for (p = packed_git; p; p = p->next)
+ if (!strcmp(p->pack_name, path))
+ break;
+ if (!p || open_pack_index(p))
+ die("unable to find pack %s", path);
+
+ if (p->num_objects >= limit) {
+ free(path);
+ return -1;
+ }
+
+ for (i = 0; i < p->num_objects; i++) {
+ const unsigned char *sha1 = nth_packed_object_sha1(p, i);
+ if (!has_sha1_except_in(p, sha1)) /* skip --fix-thin objects */
+ ret |= force_object_loose(sha1, p->mtime);
+ }
+
+ if (!ret) {
+ free_pack_by_name(p->pack_name);
+ for (i = 0; ext[i]; i++) {
+ strcpy(path + len, ext[i]);
+ unlink_or_warn(path);
+ }
+ }
+ free(path);
+ return ret;
+}
--
1.8.2.83.gc99314b
next reply other threads:[~2013-09-02 3:02 UTC|newest]
Thread overview: 12+ messages / expand[flat|nested] mbox.gz Atom feed top
2013-09-02 3:05 Nguyễn Thái Ngọc Duy [this message]
2013-09-02 4:38 ` [PATCH] {fetch,receive}-pack: drop unpack-objects, delay loosing objects until the end Eric Sunshine
2013-09-03 6:49 ` Jeff King
2013-09-03 11:56 ` Duy Nguyen
2013-09-03 17:25 ` Jeff King
2013-09-06 0:46 ` [PATCH v2 1/3] index-pack: add --unpack-limit to unpack objects Nguyễn Thái Ngọc Duy
2013-09-06 0:46 ` [PATCH v2 2/3] fetch-pack: use index-pack --unpack-limit instead of unpack-objects Nguyễn Thái Ngọc Duy
2013-09-08 4:45 ` Jeff King
2013-09-06 0:46 ` [PATCH v2 3/3] receive-pack: " Nguyễn Thái Ngọc Duy
2013-09-08 4:44 ` [PATCH v2 1/3] index-pack: add --unpack-limit to unpack objects Jeff King
2013-09-08 6:28 ` Duy Nguyen
2013-09-08 6:35 ` Jeff King
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1378091107-31682-1-git-send-email-pclouds@gmail.com \
--to=pclouds@gmail.com \
--cc=git@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).