From: Junio C Hamano <gitster@pobox.com>
To: git@vger.kernel.org
Subject: [PATCH 3/3] fetch: verify we have everything we need before updating our ref
Date: Thu, 1 Sep 2011 15:43:35 -0700 [thread overview]
Message-ID: <1314917015-3587-4-git-send-email-gitster@pobox.com> (raw)
In-Reply-To: <1314917015-3587-1-git-send-email-gitster@pobox.com>
The "git fetch" command works in two phases. The remote side tells us what
objects are at the tip of the refs we are fetching from, and transfers the
objects missing from our side. After storing the objects in our repository,
we update our remote tracking branches to point at the updated tips of the
refs.
A broken or malicious remote side could send a perfectly well-formed pack
data during the object transfer phase, but there is no guarantee that the
given data actually fill the gap between the objects we originally had and
the refs we are updating to.
Although this kind of breakage can be caught by running fsck after a
fetch, it is much cheaper to verify that everything that is reachable from
the tips of the refs we fetched are indeed fully connected to the tips of
our current set of refs before we update them.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
builtin/fetch.c | 119 +++++++++++++++++++++++++++++--------------------------
1 files changed, 63 insertions(+), 56 deletions(-)
diff --git a/builtin/fetch.c b/builtin/fetch.c
index f9c41da..bdb03ff 100644
--- a/builtin/fetch.c
+++ b/builtin/fetch.c
@@ -345,6 +345,64 @@ static int update_local_ref(struct ref *ref,
}
}
+/*
+ * The ref_map records the tips of the refs we are fetching. If
+ *
+ * $ git rev-list --verify-objects --stdin --not --all
+ *
+ * (feeding all the refs in ref_map on its standard input) does not
+ * error out, that means everything reachable from these updated refs
+ * locally exists and is connected to some of our existing refs.
+ *
+ * Returns 0 if everything is connected, non-zero otherwise.
+ */
+static int check_everything_connected(struct ref *ref_map, int quiet)
+{
+ struct child_process rev_list;
+ const char *argv[] = {"rev-list", "--verify-objects",
+ "--stdin", "--not", "--all", NULL, NULL};
+ char commit[41];
+ struct ref *ref;
+ int err = 0;
+
+ if (!ref_map)
+ return 0;
+
+ if (quiet)
+ argv[5] = "--quiet";
+
+ memset(&rev_list, 0, sizeof(rev_list));
+ rev_list.argv = argv;
+ rev_list.git_cmd = 1;
+ rev_list.in = -1;
+ rev_list.no_stdout = 1;
+ rev_list.no_stderr = quiet;
+ if (start_command(&rev_list))
+ return error(_("Could not run 'git rev-list'"));
+
+ sigchain_push(SIGPIPE, SIG_IGN);
+
+ memcpy(commit + 40, "\n", 2);
+ for (ref = ref_map; ref; ref = ref->next) {
+ memcpy(commit, sha1_to_hex(ref->old_sha1), 40);
+ if (write_in_full(rev_list.in, commit, 41) < 0) {
+ if (errno != EPIPE && errno != EINVAL)
+ error(_("failed write to rev-list: %s"),
+ strerror(errno));
+ err = -1;
+ break;
+ }
+ }
+ if (close(rev_list.in)) {
+ error(_("failed to close rev-list's stdin: %s"), strerror(errno));
+ err = -1;
+ }
+
+ sigchain_pop(SIGPIPE);
+
+ return finish_command(&rev_list) || err;
+}
+
static int store_updated_refs(const char *raw_url, const char *remote_name,
struct ref *ref_map)
{
@@ -364,6 +422,10 @@ static int store_updated_refs(const char *raw_url, const char *remote_name,
url = transport_anonymize_url(raw_url);
else
url = xstrdup("foreign");
+
+ if (check_everything_connected(ref_map, 0))
+ return error(_("%s did not send all necessary objects\n"), url);
+
for (rm = ref_map; rm; rm = rm->next) {
struct ref *ref = NULL;
@@ -457,24 +519,9 @@ static int store_updated_refs(const char *raw_url, const char *remote_name,
* We would want to bypass the object transfer altogether if
* everything we are going to fetch already exists and is connected
* locally.
- *
- * The refs we are going to fetch are in ref_map. If running
- *
- * $ git rev-list --objects --stdin --not --all
- *
- * (feeding all the refs in ref_map on its standard input)
- * does not error out, that means everything reachable from the
- * refs we are going to fetch exists and is connected to some of
- * our existing refs.
*/
static int quickfetch(struct ref *ref_map)
{
- struct child_process revlist;
- struct ref *ref;
- int err;
- const char *argv[] = {"rev-list",
- "--quiet", "--objects", "--stdin", "--not", "--all", NULL};
-
/*
* If we are deepening a shallow clone we already have these
* objects reachable. Running rev-list here will return with
@@ -484,47 +531,7 @@ static int quickfetch(struct ref *ref_map)
*/
if (depth)
return -1;
-
- if (!ref_map)
- return 0;
-
- memset(&revlist, 0, sizeof(revlist));
- revlist.argv = argv;
- revlist.git_cmd = 1;
- revlist.no_stdout = 1;
- revlist.no_stderr = 1;
- revlist.in = -1;
-
- err = start_command(&revlist);
- if (err) {
- error(_("could not run rev-list"));
- return err;
- }
-
- /*
- * If rev-list --stdin encounters an unknown commit, it terminates,
- * which will cause SIGPIPE in the write loop below.
- */
- sigchain_push(SIGPIPE, SIG_IGN);
-
- for (ref = ref_map; ref; ref = ref->next) {
- if (write_in_full(revlist.in, sha1_to_hex(ref->old_sha1), 40) < 0 ||
- write_str_in_full(revlist.in, "\n") < 0) {
- if (errno != EPIPE && errno != EINVAL)
- error(_("failed write to rev-list: %s"), strerror(errno));
- err = -1;
- break;
- }
- }
-
- if (close(revlist.in)) {
- error(_("failed to close rev-list's stdin: %s"), strerror(errno));
- err = -1;
- }
-
- sigchain_pop(SIGPIPE);
-
- return finish_command(&revlist) || err;
+ return check_everything_connected(ref_map, 1);
}
static int fetch_refs(struct transport *transport, struct ref *ref_map)
--
1.7.7.rc0.72.g4b5ea
next prev parent reply other threads:[~2011-09-01 22:43 UTC|newest]
Thread overview: 18+ messages / expand[flat|nested] mbox.gz Atom feed top
2011-09-01 17:53 Funnies with "git fetch" Junio C Hamano
2011-09-01 22:42 ` Junio C Hamano
2011-09-01 23:31 ` Jeff King
2011-09-02 3:09 ` Junio C Hamano
2011-09-02 3:28 ` Junio C Hamano
2011-09-02 5:03 ` Jeff King
2011-09-01 22:43 ` [PATCH 0/3] Verify the objects fetch obtained before updating ref Junio C Hamano
2011-09-01 22:43 ` [PATCH 1/3] list-objects: pass callback data to show_objects() Junio C Hamano
2011-09-01 22:43 ` [PATCH 2/3] rev-list --verify-object Junio C Hamano
2011-09-01 22:43 ` Junio C Hamano [this message]
2011-09-02 3:55 ` [PATCH 3/3] fetch: verify we have everything we need before updating our ref Nguyen Thai Ngoc Duy
2011-09-02 4:25 ` Junio C Hamano
2011-09-02 23:14 ` Junio C Hamano
2011-09-04 19:15 ` Funnies with "git fetch" Junio C Hamano
2011-09-05 2:21 ` [PATCH 0/3] Add fetch.fsckobjects Junio C Hamano
2011-09-05 2:21 ` [PATCH 1/3] fetch.fsckobjects: verify downloaded objects Junio C Hamano
2011-09-05 2:21 ` [PATCH 2/3] transfer.fsckobjects: unify fetch/receive.fsckobjects Junio C Hamano
2011-09-05 2:21 ` [PATCH 3/3] test: fetch/receive with fsckobjects Junio C Hamano
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1314917015-3587-4-git-send-email-gitster@pobox.com \
--to=gitster@pobox.com \
--cc=git@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).