All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
To: git@vger.kernel.org
Cc: "Junio C Hamano" <gitster@pobox.com>,
	"Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
Subject: [PATCH] Perform cheaper connectivity check when pack is used as medium
Date: Mon, 27 Feb 2012 22:39:51 +0700	[thread overview]
Message-ID: <1330357191-32011-1-git-send-email-pclouds@gmail.com> (raw)

When we fetch or push, usually "git rev-list --verify-objects --not
--all --stdin" is used to make sure that there are no gaps between
existing refs and new refs. --verify-objects calls parse_object(),
which internally calls check_sha1_signature() to verify object content
matches its SHA-1 signature.

check_sha1_signature() is an expensive operation, especially when new
refs are far away from existing ones because all objects in between
are re-hashed. However, if we receive new objects by pack, we can
skip the operation because packs themselves do not contain SHA-1
signatures. All signatures are recreated by index-pack's hashing the
pack, which we can trust.

Detect pack transfer cases and turn --verify-objects to --objects.
--objects is similar to --verify-objects except that it does not call
check_sha1_signature().

As an (extreme) example, a repository is created with only one commit:
e83c516 (Initial revision of "git", the information manager from hell
- 2005-04-07). The rest of git.git is fetched on top. Without the
patch:

$ time git fetch file:///home/pclouds/w/git/.git
remote: Counting objects: 125638, done.
remote: Compressing objects: 100% (33201/33201), done.
remote: Total 125638 (delta 92568), reused 123517 (delta 90743)
Receiving objects: 100% (125638/125638), 34.58 MiB | 8.07 MiB/s, done.
Resolving deltas: 100% (92568/92568), done.
From file:///home/pclouds/w/git/
 * branch            HEAD       -> FETCH_HEAD

real    1m30.972s
user    1m31.410s
sys     0m1.757s

With the patch:

$ time git fetch file:///home/pclouds/w/git/.git
remote: Counting objects: 125647, done.
remote: Compressing objects: 100% (33209/33209), done.
remote: Total 125647 (delta 92576), reused 123516 (delta 90744)
Receiving objects: 100% (125647/125647), 34.58 MiB | 7.99 MiB/s, done.
Resolving deltas: 100% (92576/92576), done.
From file:///home/pclouds/w/git/
 * branch            HEAD       -> FETCH_HEAD

real    0m51.456s
user    0m52.737s
sys     0m1.548s

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 builtin/fetch.c        |   12 +++++++-----
 builtin/receive-pack.c |    4 ++--
 connected.c            |    5 ++++-
 connected.h            |    3 ++-
 transport.c            |    5 +++++
 transport.h            |    1 +
 6 files changed, 21 insertions(+), 9 deletions(-)

diff --git a/builtin/fetch.c b/builtin/fetch.c
index 65f5f9b..2b62f42 100644
--- a/builtin/fetch.c
+++ b/builtin/fetch.c
@@ -368,7 +368,7 @@ static int iterate_ref_map(void *cb_data, unsigned char sha1[20])
 }
 
 static int store_updated_refs(const char *raw_url, const char *remote_name,
-		struct ref *ref_map)
+			      struct ref *ref_map, int pack_transport)
 {
 	FILE *fp;
 	struct commit *commit;
@@ -389,7 +389,8 @@ static int store_updated_refs(const char *raw_url, const char *remote_name,
 		url = xstrdup("foreign");
 
 	rm = ref_map;
-	if (check_everything_connected(iterate_ref_map, 0, &rm)) {
+	if (check_everything_connected(iterate_ref_map, 0,
+				       pack_transport ? 0 : 1, &rm)) {
 		rc = error(_("%s did not send all necessary objects\n"), url);
 		goto abort;
 	}
@@ -516,7 +517,7 @@ static int quickfetch(struct ref *ref_map)
 	 */
 	if (depth)
 		return -1;
-	return check_everything_connected(iterate_ref_map, 1, &rm);
+	return check_everything_connected(iterate_ref_map, 1, 0, &rm);
 }
 
 static int fetch_refs(struct transport *transport, struct ref *ref_map)
@@ -526,8 +527,9 @@ static int fetch_refs(struct transport *transport, struct ref *ref_map)
 		ret = transport_fetch_refs(transport, ref_map);
 	if (!ret)
 		ret |= store_updated_refs(transport->url,
-				transport->remote->name,
-				ref_map);
+					  transport->remote->name,
+					  ref_map,
+					  is_pack_transport(transport));
 	transport_unlock_pack(transport);
 	return ret;
 }
diff --git a/builtin/receive-pack.c b/builtin/receive-pack.c
index 0afb8b2..5935751 100644
--- a/builtin/receive-pack.c
+++ b/builtin/receive-pack.c
@@ -669,7 +669,7 @@ static void set_connectivity_errors(struct command *commands)
 	for (cmd = commands; cmd; cmd = cmd->next) {
 		struct command *singleton = cmd;
 		if (!check_everything_connected(command_singleton_iterator,
-						0, &singleton))
+						0, 0, &singleton))
 			continue;
 		cmd->error_string = "missing necessary objects";
 	}
@@ -705,7 +705,7 @@ static void execute_commands(struct command *commands, const char *unpacker_erro
 
 	cmd = commands;
 	if (check_everything_connected(iterate_receive_command_list,
-				       0, &cmd))
+				       0, 0, &cmd))
 		set_connectivity_errors(commands);
 
 	if (run_receive_hook(commands, pre_receive_hook, 0)) {
diff --git a/connected.c b/connected.c
index d762423..6ebd330 100644
--- a/connected.c
+++ b/connected.c
@@ -14,7 +14,8 @@
  *
  * Returns 0 if everything is connected, non-zero otherwise.
  */
-int check_everything_connected(sha1_iterate_fn fn, int quiet, void *cb_data)
+int check_everything_connected(sha1_iterate_fn fn, int quiet,
+			       int strict, void *cb_data)
 {
 	struct child_process rev_list;
 	const char *argv[] = {"rev-list", "--verify-objects",
@@ -26,6 +27,8 @@ int check_everything_connected(sha1_iterate_fn fn, int quiet, void *cb_data)
 	if (fn(cb_data, sha1))
 		return err;
 
+	if (!strict)
+		argv[1] = "--objects";
 	if (quiet)
 		argv[5] = "--quiet";
 
diff --git a/connected.h b/connected.h
index 7e4585a..1f191da 100644
--- a/connected.h
+++ b/connected.h
@@ -15,6 +15,7 @@ typedef int (*sha1_iterate_fn)(void *, unsigned char [20]);
  *
  * Return 0 if Ok, non zero otherwise (i.e. some missing objects)
  */
-extern int check_everything_connected(sha1_iterate_fn, int quiet, void *cb_data);
+extern int check_everything_connected(sha1_iterate_fn, int quiet,
+				      int strict, void *cb_data);
 
 #endif /* CONNECTED_H */
diff --git a/transport.c b/transport.c
index 181f8f2..cd5e0ca 100644
--- a/transport.c
+++ b/transport.c
@@ -1248,3 +1248,8 @@ void for_each_alternate_ref(alternate_ref_fn fn, void *data)
 	cb.data = data;
 	foreach_alt_odb(refs_from_alternate_cb, &cb);
 }
+
+int is_pack_transport(const struct transport *transport)
+{
+	return transport->fetch == fetch_refs_via_pack;
+}
diff --git a/transport.h b/transport.h
index ce99ef8..7cf72ff 100644
--- a/transport.h
+++ b/transport.h
@@ -150,6 +150,7 @@ int transport_disconnect(struct transport *transport);
 char *transport_anonymize_url(const char *url);
 void transport_take_over(struct transport *transport,
 			 struct child_process *child);
+int is_pack_transport(const struct transport *transport);
 
 int transport_connect(struct transport *transport, const char *name,
 		      const char *exec, int fd[2]);
-- 
1.7.8.36.g69ee2

             reply	other threads:[~2012-02-27 15:40 UTC|newest]

Thread overview: 23+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-02-27 15:39 Nguyễn Thái Ngọc Duy [this message]
2012-02-27 18:14 ` [PATCH] Perform cheaper connectivity check when pack is used as medium Junio C Hamano
2012-02-28 13:18   ` [PATCH v2] " Nguyễn Thái Ngọc Duy
2012-02-28 15:40     ` Johannes Sixt
2012-02-28 15:47       ` Andreas Ericsson
2012-02-29  1:41       ` Nguyen Thai Ngoc Duy
2012-03-02  6:10     ` Junio C Hamano
2012-03-02 14:05       ` Nguyen Thai Ngoc Duy
2012-03-02 17:31         ` Junio C Hamano
2012-03-03  3:05           ` Nguyen Thai Ngoc Duy
2012-03-03  6:59             ` Junio C Hamano
2012-03-03 10:09               ` Nguyen Thai Ngoc Duy
2012-03-06 11:20               ` Nguyen Thai Ngoc Duy
2012-03-06 15:56                 ` Junio C Hamano
2012-03-14 14:40               ` [PATCH] {fetch,receive}-pack: skip sha-1 integrity test on objects from new pack Nguyễn Thái Ngọc Duy
2012-03-14 19:05                 ` Junio C Hamano
2012-03-15 21:09                 ` Junio C Hamano
2012-03-15 21:57                   ` Junio C Hamano
2012-03-16  2:28                     ` Nguyen Thai Ngoc Duy
2012-03-16  3:42                       ` Junio C Hamano
2012-03-16  3:42                         ` Nguyen Thai Ngoc Duy
2012-03-16  2:24                   ` Nguyen Thai Ngoc Duy
2012-03-16  3:46                     ` Junio C Hamano

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1330357191-32011-1-git-send-email-pclouds@gmail.com \
    --to=pclouds@gmail.com \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.