git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Junio C Hamano <junkio@cox.net>
To: Linus Torvalds <torvalds@osdl.org>
Cc: git@vger.kernel.org
Subject: [PATCH] pack-objects: re-validate data we copy from elsewhere.
Date: Fri, 01 Sep 2006 16:14:23 -0700	[thread overview]
Message-ID: <7vveo741tc.fsf_-_@assigned-by-dhcp.cox.net> (raw)
In-Reply-To: <Pine.LNX.4.64.0609011129270.27779@g5.osdl.org> (Linus Torvalds's message of "Fri, 1 Sep 2006 11:35:23 -0700 (PDT)")

When reusing data from an existing pack and from a new style
loose objects, we used to just copy it staight into the
resulting pack.  Instead make sure they are not corrupt, but
do so only when we are not streaming to stdout, in which case
the receiving end will do the validation either by unpacking
the stream or by constructing the .idx file.

Signed-off-by: Junio C Hamano <junkio@cox.net>
---

  Linus Torvalds <torvalds@osdl.org> writes:

  > On Fri, 1 Sep 2006, A Large Angry SCM wrote:
  >> 
  >> Unfortunately, the zlib CRC is of the _uncompressed_ data [1], so
  >> inflating the stream is still necessary to check for corruption.
  >
  > I don't think that is unfortunate.
  >
  > We really should inflate the stream anyway, since not only inflating it, 
  > but also applying any deltas to the base object is really the only way to 
  > verify its correctness for a delta thing. Otherwise, the SHA1 of the base 
  > could be totally corrupt.
  >
  > And once you inflate it and apply all deltas, you obviously also get the 
  > full SHA1 check, so you're _really_ safe.
  >
  > So let's do the really safe thing first, and see if it actually results in 
  > any problems.

  So here is an attempt to do this.

 builtin-pack-objects.c |   64 +++++++++++++++++++++++++++++++++++++++++++++---
 1 files changed, 60 insertions(+), 4 deletions(-)

diff --git a/builtin-pack-objects.c b/builtin-pack-objects.c
index 46f524d..10ba866 100644
--- a/builtin-pack-objects.c
+++ b/builtin-pack-objects.c
@@ -65,6 +65,7 @@ static unsigned char pack_file_sha1[20];
 static int progress = 1;
 static volatile sig_atomic_t progress_update;
 static int window = 10;
+static int pack_to_stdout;
 
 /*
  * The object names in objects array are hashed with this hashtable,
@@ -242,6 +243,55 @@ static int encode_header(enum object_typ
 	return n;
 }
 
+static int revalidate_one(struct object_entry *entry,
+			  void *data, char *type, unsigned long size)
+{
+	unsigned char hdr[50];
+	int hdrlen;
+	unsigned char sha1[20];
+
+	if (!data)
+		return -1;
+	if (size != entry->size)
+		return -1;
+	write_sha1_file_prepare(data, size, type, sha1, hdr, &hdrlen);
+	free(data);
+	if (hashcmp(sha1, entry->sha1))
+		return -1;
+	return 0;
+}
+
+/*
+ * we are going to reuse the existing pack entry data.  make
+ * sure it is not corrupt.
+ */
+static int revalidate_pack_entry(struct object_entry *entry)
+{
+	void *data;
+	char type[20];
+	unsigned long size;
+	struct pack_entry e;
+
+	e.p = entry->in_pack;
+	e.offset = entry->in_pack_offset;
+
+	/* the caller has already called use_packed_git() for us */
+	data = unpack_entry_gently(&e, type, &size);
+	return revalidate_one(entry, data, type, size);
+}
+
+static int revalidate_loose_object(struct object_entry *entry,
+				   unsigned char *map,
+				   unsigned long mapsize)
+{
+	/* we already know this is a loose object with new type header. */
+	void *data;
+	char type[20];
+	unsigned long size;
+	data = unpack_sha1_file(map, mapsize, type, &size);
+	return revalidate_one(entry, data, type, size);
+}
+
 static unsigned long write_object(struct sha1file *f,
 				  struct object_entry *entry)
 {
@@ -276,6 +326,9 @@ static unsigned long write_object(struct
 		map = map_sha1_file(entry->sha1, &mapsize);
 		if (map && !legacy_loose_object(map)) {
 			/* We can copy straight into the pack file */
+			if (revalidate_loose_object(entry, map, mapsize))
+				die("corrupt loose object %s",
+				    sha1_to_hex(entry->sha1));
 			sha1write(f, map, mapsize);
 			munmap(map, mapsize);
 			written++;
@@ -286,7 +339,7 @@ static unsigned long write_object(struct
 			munmap(map, mapsize);
 	}
 
-	if (! to_reuse) {
+	if (!to_reuse) {
 		buf = read_sha1_file(entry->sha1, type, &size);
 		if (!buf)
 			die("unable to read %s", sha1_to_hex(entry->sha1));
@@ -319,6 +372,9 @@ static unsigned long write_object(struct
 
 		datalen = find_packed_object_size(p, entry->in_pack_offset);
 		buf = (char *) p->pack_base + entry->in_pack_offset;
+
+		if (revalidate_pack_entry(entry))
+			die("corrupt delta in pack %s", sha1_to_hex(entry->sha1));
 		sha1write(f, buf, datalen);
 		unuse_packed_git(p);
 		hdrlen = 0; /* not really */
@@ -1163,7 +1219,7 @@ static void prepare_pack(int window, int
 		find_deltas(sorted_by_type, window+1, depth);
 }
 
-static int reuse_cached_pack(unsigned char *sha1, int pack_to_stdout)
+static int reuse_cached_pack(unsigned char *sha1)
 {
 	static const char cache[] = "pack-cache/pack-%s.%s";
 	char *cached_pack, *cached_idx;
@@ -1247,7 +1303,7 @@ int cmd_pack_objects(int argc, const cha
 {
 	SHA_CTX ctx;
 	char line[40 + 1 + PATH_MAX + 2];
-	int depth = 10, pack_to_stdout = 0;
+	int depth = 10;
 	struct object_entry **list;
 	int num_preferred_base = 0;
 	int i;
@@ -1367,7 +1423,7 @@ int cmd_pack_objects(int argc, const cha
 	if (progress && (nr_objects != nr_result))
 		fprintf(stderr, "Result has %d objects.\n", nr_result);
 
-	if (reuse_cached_pack(object_list_sha1, pack_to_stdout))
+	if (reuse_cached_pack(object_list_sha1))
 		;
 	else {
 		if (nr_result)
-- 
1.4.2.ga2654



-- 
VGER BF report: U 0.5

  parent reply	other threads:[~2006-09-01 23:14 UTC|newest]

Thread overview: 52+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <9e4733910608290943g6aa79855q62b98caf4f19510@mail.gmail.com>
     [not found] ` <20060829165811.GB21729@spearce.org>
     [not found]   ` <9e4733910608291037k2d9fb791v18abc19bdddf5e89@mail.gmail.com>
     [not found]     ` <20060829175819.GE21729@spearce.org>
     [not found]       ` <9e4733910608291155g782953bbv5df1b74878f4fcf1@mail.gmail.com>
     [not found]         ` <20060829190548.GK21729@spearce.org>
     [not found]           ` <9e4733910608291252q130fc723r945e6ab906ca6969@mail.gmail.com>
     [not found]             ` <20060829232007.GC22935@spearce.org>
     [not found]               ` <9e4733910608291807q9b896e4sdbfaa9e49de58c2b@mail.gmail.com>
2006-08-30  1:51                 ` Mozilla .git tree Shawn Pearce
2006-08-30  2:25                   ` Shawn Pearce
2006-08-30  2:58                   ` Jon Smirl
2006-08-30  3:10                     ` Shawn Pearce
2006-08-30  3:27                       ` Jon Smirl
2006-08-30  5:53                       ` Nicolas Pitre
2006-08-30 11:42                         ` Junio C Hamano
2006-09-01  7:42                           ` Junio C Hamano
2006-09-02  1:19                             ` Shawn Pearce
2006-09-02  4:01                               ` Junio C Hamano
2006-09-02  4:39                                 ` Shawn Pearce
2006-09-02 11:06                                   ` Junio C Hamano
2006-09-02 14:20                                     ` Jon Smirl
2006-09-02 17:39                                       ` Shawn Pearce
2006-09-02 18:56                                         ` Linus Torvalds
2006-09-02 20:53                                           ` Junio C Hamano
2006-09-02 17:44                                     ` Shawn Pearce
2006-09-02  2:04                             ` Shawn Pearce
2006-09-02 11:02                               ` Junio C Hamano
2006-09-02 17:51                                 ` Shawn Pearce
2006-09-02 20:55                                   ` Junio C Hamano
2006-09-03  3:54                                     ` Shawn Pearce
2006-09-01 17:45                           ` A Large Angry SCM
2006-09-01 18:35                             ` Linus Torvalds
2006-09-01 19:56                               ` Junio C Hamano
2006-09-01 23:14                               ` Junio C Hamano [this message]
2006-09-02  0:23                                 ` [PATCH] pack-objects: re-validate data we copy from elsewhere Linus Torvalds
2006-09-02  1:39                                   ` VGER BF report? Johannes Schindelin
2006-09-02  5:58                                     ` Sam Ravnborg
2006-09-02  1:52                                   ` [PATCH] pack-objects: re-validate data we copy from elsewhere Junio C Hamano
2006-09-02  3:52                                   ` Junio C Hamano
2006-09-02  4:52                                     ` Shawn Pearce
2006-09-02  9:42                                       ` Junio C Hamano
2006-09-02 17:43                                         ` Linus Torvalds
2006-09-02 10:09                                       ` Junio C Hamano
2006-09-02 17:54                                         ` Shawn Pearce
2006-09-03 21:00                                           ` Junio C Hamano
2006-09-04  4:10                                             ` Shawn Pearce
2006-09-04  5:50                                               ` Junio C Hamano
2006-09-04  6:44                                                 ` Shawn Pearce
2006-09-04  7:39                                                   ` Junio C Hamano
2006-09-03  0:27                                         ` Linus Torvalds
2006-09-03  0:32                                           ` Junio C Hamano
2006-09-05  8:12                                           ` Junio C Hamano
2006-09-02 18:43                                     ` Linus Torvalds
2006-09-02 20:56                                       ` Junio C Hamano
2006-09-03 21:48                                       ` Junio C Hamano
2006-09-03 22:00                                         ` Linus Torvalds
2006-09-03 22:16                                           ` Linus Torvalds
2006-09-03 22:34                                           ` Junio C Hamano
2006-09-04  4:06                                             ` Junio C Hamano
2006-09-04 15:19                                               ` Linus Torvalds

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=7vveo741tc.fsf_-_@assigned-by-dhcp.cox.net \
    --to=junkio@cox.net \
    --cc=git@vger.kernel.org \
    --cc=torvalds@osdl.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).