git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: "Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
To: git@vger.kernel.org
Cc: "Junio C Hamano" <gitster@pobox.com>,
	"Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
Subject: [PATCH] Perform cheaper connectivity check when pack is used as medium
Date: Mon, 27 Feb 2012 22:39:51 +0700	[thread overview]
Message-ID: <1330357191-32011-1-git-send-email-pclouds@gmail.com> (raw)

When we fetch or push, usually "git rev-list --verify-objects --not
--all --stdin" is used to make sure that there are no gaps between
existing refs and new refs. --verify-objects calls parse_object(),
which internally calls check_sha1_signature() to verify object content
matches its SHA-1 signature.

check_sha1_signature() is an expensive operation, especially when new
refs are far away from existing ones because all objects in between
are re-hashed. However, if we receive new objects by pack, we can
skip the operation because packs themselves do not contain SHA-1
signatures. All signatures are recreated by index-pack's hashing the
pack, which we can trust.

Detect pack transfer cases and turn --verify-objects to --objects.
--objects is similar to --verify-objects except that it does not call
check_sha1_signature().

As an (extreme) example, a repository is created with only one commit:
e83c516 (Initial revision of "git", the information manager from hell
- 2005-04-07). The rest of git.git is fetched on top. Without the
patch:

$ time git fetch file:///home/pclouds/w/git/.git
remote: Counting objects: 125638, done.
remote: Compressing objects: 100% (33201/33201), done.
remote: Total 125638 (delta 92568), reused 123517 (delta 90743)
Receiving objects: 100% (125638/125638), 34.58 MiB | 8.07 MiB/s, done.
Resolving deltas: 100% (92568/92568), done.
From file:///home/pclouds/w/git/
 * branch            HEAD       -> FETCH_HEAD

real    1m30.972s
user    1m31.410s
sys     0m1.757s

With the patch:

$ time git fetch file:///home/pclouds/w/git/.git
remote: Counting objects: 125647, done.
remote: Compressing objects: 100% (33209/33209), done.
remote: Total 125647 (delta 92576), reused 123516 (delta 90744)
Receiving objects: 100% (125647/125647), 34.58 MiB | 7.99 MiB/s, done.
Resolving deltas: 100% (92576/92576), done.
From file:///home/pclouds/w/git/
 * branch            HEAD       -> FETCH_HEAD

real    0m51.456s
user    0m52.737s
sys     0m1.548s

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 builtin/fetch.c        |   12 +++++++-----
 builtin/receive-pack.c |    4 ++--
 connected.c            |    5 ++++-
 connected.h            |    3 ++-
 transport.c            |    5 +++++
 transport.h            |    1 +
 6 files changed, 21 insertions(+), 9 deletions(-)

diff --git a/builtin/fetch.c b/builtin/fetch.c
index 65f5f9b..2b62f42 100644
--- a/builtin/fetch.c
+++ b/builtin/fetch.c
@@ -368,7 +368,7 @@ static int iterate_ref_map(void *cb_data, unsigned char sha1[20])
 }
 
 static int store_updated_refs(const char *raw_url, const char *remote_name,
-		struct ref *ref_map)
+			      struct ref *ref_map, int pack_transport)
 {
 	FILE *fp;
 	struct commit *commit;
@@ -389,7 +389,8 @@ static int store_updated_refs(const char *raw_url, const char *remote_name,
 		url = xstrdup("foreign");
 
 	rm = ref_map;
-	if (check_everything_connected(iterate_ref_map, 0, &rm)) {
+	if (check_everything_connected(iterate_ref_map, 0,
+				       pack_transport ? 0 : 1, &rm)) {
 		rc = error(_("%s did not send all necessary objects\n"), url);
 		goto abort;
 	}
@@ -516,7 +517,7 @@ static int quickfetch(struct ref *ref_map)
 	 */
 	if (depth)
 		return -1;
-	return check_everything_connected(iterate_ref_map, 1, &rm);
+	return check_everything_connected(iterate_ref_map, 1, 0, &rm);
 }
 
 static int fetch_refs(struct transport *transport, struct ref *ref_map)
@@ -526,8 +527,9 @@ static int fetch_refs(struct transport *transport, struct ref *ref_map)
 		ret = transport_fetch_refs(transport, ref_map);
 	if (!ret)
 		ret |= store_updated_refs(transport->url,
-				transport->remote->name,
-				ref_map);
+					  transport->remote->name,
+					  ref_map,
+					  is_pack_transport(transport));
 	transport_unlock_pack(transport);
 	return ret;
 }
diff --git a/builtin/receive-pack.c b/builtin/receive-pack.c
index 0afb8b2..5935751 100644
--- a/builtin/receive-pack.c
+++ b/builtin/receive-pack.c
@@ -669,7 +669,7 @@ static void set_connectivity_errors(struct command *commands)
 	for (cmd = commands; cmd; cmd = cmd->next) {
 		struct command *singleton = cmd;
 		if (!check_everything_connected(command_singleton_iterator,
-						0, &singleton))
+						0, 0, &singleton))
 			continue;
 		cmd->error_string = "missing necessary objects";
 	}
@@ -705,7 +705,7 @@ static void execute_commands(struct command *commands, const char *unpacker_erro
 
 	cmd = commands;
 	if (check_everything_connected(iterate_receive_command_list,
-				       0, &cmd))
+				       0, 0, &cmd))
 		set_connectivity_errors(commands);
 
 	if (run_receive_hook(commands, pre_receive_hook, 0)) {
diff --git a/connected.c b/connected.c
index d762423..6ebd330 100644
--- a/connected.c
+++ b/connected.c
@@ -14,7 +14,8 @@
  *
  * Returns 0 if everything is connected, non-zero otherwise.
  */
-int check_everything_connected(sha1_iterate_fn fn, int quiet, void *cb_data)
+int check_everything_connected(sha1_iterate_fn fn, int quiet,
+			       int strict, void *cb_data)
 {
 	struct child_process rev_list;
 	const char *argv[] = {"rev-list", "--verify-objects",
@@ -26,6 +27,8 @@ int check_everything_connected(sha1_iterate_fn fn, int quiet, void *cb_data)
 	if (fn(cb_data, sha1))
 		return err;
 
+	if (!strict)
+		argv[1] = "--objects";
 	if (quiet)
 		argv[5] = "--quiet";
 
diff --git a/connected.h b/connected.h
index 7e4585a..1f191da 100644
--- a/connected.h
+++ b/connected.h
@@ -15,6 +15,7 @@ typedef int (*sha1_iterate_fn)(void *, unsigned char [20]);
  *
  * Return 0 if Ok, non zero otherwise (i.e. some missing objects)
  */
-extern int check_everything_connected(sha1_iterate_fn, int quiet, void *cb_data);
+extern int check_everything_connected(sha1_iterate_fn, int quiet,
+				      int strict, void *cb_data);
 
 #endif /* CONNECTED_H */
diff --git a/transport.c b/transport.c
index 181f8f2..cd5e0ca 100644
--- a/transport.c
+++ b/transport.c
@@ -1248,3 +1248,8 @@ void for_each_alternate_ref(alternate_ref_fn fn, void *data)
 	cb.data = data;
 	foreach_alt_odb(refs_from_alternate_cb, &cb);
 }
+
+int is_pack_transport(const struct transport *transport)
+{
+	return transport->fetch == fetch_refs_via_pack;
+}
diff --git a/transport.h b/transport.h
index ce99ef8..7cf72ff 100644
--- a/transport.h
+++ b/transport.h
@@ -150,6 +150,7 @@ int transport_disconnect(struct transport *transport);
 char *transport_anonymize_url(const char *url);
 void transport_take_over(struct transport *transport,
 			 struct child_process *child);
+int is_pack_transport(const struct transport *transport);
 
 int transport_connect(struct transport *transport, const char *name,
 		      const char *exec, int fd[2]);
-- 
1.7.8.36.g69ee2

             reply	other threads:[~2012-02-27 15:40 UTC|newest]

Thread overview: 23+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-02-27 15:39 Nguyễn Thái Ngọc Duy [this message]
2012-02-27 18:14 ` [PATCH] Perform cheaper connectivity check when pack is used as medium Junio C Hamano
2012-02-28 13:18   ` [PATCH v2] " Nguyễn Thái Ngọc Duy
2012-02-28 15:40     ` Johannes Sixt
2012-02-28 15:47       ` Andreas Ericsson
2012-02-29  1:41       ` Nguyen Thai Ngoc Duy
2012-03-02  6:10     ` Junio C Hamano
2012-03-02 14:05       ` Nguyen Thai Ngoc Duy
2012-03-02 17:31         ` Junio C Hamano
2012-03-03  3:05           ` Nguyen Thai Ngoc Duy
2012-03-03  6:59             ` Junio C Hamano
2012-03-03 10:09               ` Nguyen Thai Ngoc Duy
2012-03-06 11:20               ` Nguyen Thai Ngoc Duy
2012-03-06 15:56                 ` Junio C Hamano
2012-03-14 14:40               ` [PATCH] {fetch,receive}-pack: skip sha-1 integrity test on objects from new pack Nguyễn Thái Ngọc Duy
2012-03-14 19:05                 ` Junio C Hamano
2012-03-15 21:09                 ` Junio C Hamano
2012-03-15 21:57                   ` Junio C Hamano
2012-03-16  2:28                     ` Nguyen Thai Ngoc Duy
2012-03-16  3:42                       ` Junio C Hamano
2012-03-16  3:42                         ` Nguyen Thai Ngoc Duy
2012-03-16  2:24                   ` Nguyen Thai Ngoc Duy
2012-03-16  3:46                     ` Junio C Hamano

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1330357191-32011-1-git-send-email-pclouds@gmail.com \
    --to=pclouds@gmail.com \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).