From: "Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
To: git@vger.kernel.org
Cc: "Junio C Hamano" <gitster@pobox.com>,
"Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
Subject: [PATCH v2 4/6] index-pack: use streaming interface for collision test on large blobs
Date: Wed, 23 May 2012 21:09:49 +0700 [thread overview]
Message-ID: <1337782191-10091-4-git-send-email-pclouds@gmail.com> (raw)
In-Reply-To: <1337782191-10091-1-git-send-email-pclouds@gmail.com>
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
the use of sha1_object_info() instead of has_sha1_file() is the cause
of 6/6 as it is called on non-existent objects too.
builtin/index-pack.c | 83 +++++++++++++++++++++++++++++++++++++++++++++++---
t/t5300-pack-object.sh | 5 +++
2 files changed, 83 insertions(+), 5 deletions(-)
diff --git a/builtin/index-pack.c b/builtin/index-pack.c
index 9129299..05b1d35 100644
--- a/builtin/index-pack.c
+++ b/builtin/index-pack.c
@@ -9,6 +9,7 @@
#include "progress.h"
#include "fsck.h"
#include "exec_cmd.h"
+#include "streaming.h"
#include "thread-utils.h"
static const char index_pack_usage[] =
@@ -621,21 +622,94 @@ static void find_delta_children(const union delta_base *base,
*last_index = last;
}
+struct compare_data {
+ struct object_entry *entry;
+ struct git_istream *st;
+ unsigned char *buf;
+ unsigned long buf_size;
+};
+
+static int compare_objects(const unsigned char *buf, unsigned long size,
+ void *cb_data)
+{
+ struct compare_data *data = cb_data;
+
+ if (data->buf_size < size) {
+ free(data->buf);
+ data->buf = xmalloc(size);
+ data->buf_size = size;
+ }
+
+ while (size) {
+ ssize_t len = read_istream(data->st, data->buf, size);
+ if (len == 0)
+ die(_("SHA1 COLLISION FOUND WITH %s !"),
+ sha1_to_hex(data->entry->idx.sha1));
+ if (len < 0)
+ die(_("unable to read %s"),
+ sha1_to_hex(data->entry->idx.sha1));
+ if (memcmp(buf, data->buf, len))
+ die(_("SHA1 COLLISION FOUND WITH %s !"),
+ sha1_to_hex(data->entry->idx.sha1));
+ size -= len;
+ buf += len;
+ }
+ return 0;
+}
+
+static int check_collison(struct object_entry *entry)
+{
+ struct compare_data data;
+ enum object_type type;
+ unsigned long size;
+
+ if (entry->size <= big_file_threshold || entry->type != OBJ_BLOB)
+ return -1;
+
+ memset(&data, 0, sizeof(data));
+ data.entry = entry;
+ data.st = open_istream(entry->idx.sha1, &type, &size, NULL);
+ if (!data.st)
+ return -1;
+ if (size != entry->size || type != entry->type)
+ die(_("SHA1 COLLISION FOUND WITH %s !"),
+ sha1_to_hex(entry->idx.sha1));
+ unpack_data(entry, compare_objects, &data);
+ close_istream(data.st);
+ free(data.buf);
+ return 0;
+}
+
static void sha1_object(const void *data, struct object_entry *obj_entry,
unsigned long size, enum object_type type,
const unsigned char *sha1)
{
void *new_data = NULL;
+ int collision_test_needed = 1;
+ enum object_type has_type;
+ unsigned long has_size;
assert(data || obj_entry);
read_lock();
- if (has_sha1_file(sha1)) {
+ has_type = sha1_object_info(sha1, &has_size);
+ read_unlock();
+
+ if (has_type < 0)
+ collision_test_needed = 0;
+ if (collision_test_needed && !data) {
+ read_lock();
+ if (!check_collison(obj_entry))
+ collision_test_needed = 0;
+ read_unlock();
+ }
+ if (collision_test_needed) {
void *has_data;
- enum object_type has_type;
- unsigned long has_size;
+ if (has_type != type || has_size != size)
+ die(_("SHA1 COLLISION FOUND WITH %s !"), sha1_to_hex(sha1));
if (!data)
data = new_data = get_data_from_pack(obj_entry);
+ read_lock();
has_data = read_sha1_file(sha1, &has_type, &has_size);
read_unlock();
if (!has_data)
@@ -644,8 +718,7 @@ static void sha1_object(const void *data, struct object_entry *obj_entry,
memcmp(data, has_data, size) != 0)
die(_("SHA1 COLLISION FOUND WITH %s !"), sha1_to_hex(sha1));
free(has_data);
- } else
- read_unlock();
+ }
if (strict) {
read_lock();
diff --git a/t/t5300-pack-object.sh b/t/t5300-pack-object.sh
index d9d856b..300ed91 100755
--- a/t/t5300-pack-object.sh
+++ b/t/t5300-pack-object.sh
@@ -418,4 +418,9 @@ test_expect_success \
'test_must_fail git index-pack -o bad.idx test-3.pack 2>msg &&
grep "SHA1 COLLISION FOUND" msg'
+test_expect_success \
+ 'make sure index-pack detects the SHA1 collision (large blobs)' \
+ 'test_must_fail git -c core.bigfilethreshold=1 index-pack -o bad.idx test-3.pack 2>msg &&
+ grep "SHA1 COLLISION FOUND" msg'
+
test_done
--
1.7.10.2.549.g9354186
next prev parent reply other threads:[~2012-05-23 14:14 UTC|newest]
Thread overview: 16+ messages / expand[flat|nested] mbox.gz Atom feed top
2012-05-16 12:50 [PATCH 1/2] index-pack: hash non-delta objects while reading from stream Nguyễn Thái Ngọc Duy
2012-05-16 12:50 ` [PATCH 2/2] index-pack: use streaming interface on large blobs (most of the time) Nguyễn Thái Ngọc Duy
2012-05-18 22:20 ` Junio C Hamano
2012-05-19 5:31 ` Nguyen Thai Ngoc Duy
2012-05-18 22:15 ` [PATCH 1/2] index-pack: hash non-delta objects while reading from stream Junio C Hamano
2012-05-23 14:09 ` [PATCH v2 1/6] " Nguyễn Thái Ngọc Duy
2012-05-23 14:09 ` [PATCH v2 2/6] index-pack: use streaming interface on large blobs (most of the time) Nguyễn Thái Ngọc Duy
2012-05-23 14:09 ` [PATCH v2 3/6] index-pack: factor out unpack core from get_data_from_pack Nguyễn Thái Ngọc Duy
2012-05-23 14:09 ` Nguyễn Thái Ngọc Duy [this message]
2012-05-23 16:03 ` [PATCH v2 4/6] index-pack: use streaming interface for collision test on large blobs Junio C Hamano
2012-05-24 13:55 ` [PATCH v2.1 " Nguyễn Thái Ngọc Duy
2012-05-23 14:09 ` [PATCH v2 5/6] index-pack: avoid collision test when verifying in-repo pack Nguyễn Thái Ngọc Duy
2012-05-23 14:09 ` [PATCH v2 6/6] sha1_loose_object_info: do not complain out loud on non-existent objects Nguyễn Thái Ngọc Duy
2012-05-23 14:24 ` Nguyen Thai Ngoc Duy
2012-05-23 16:01 ` Junio C Hamano
2012-05-24 13:12 ` Nguyen Thai Ngoc Duy
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1337782191-10091-4-git-send-email-pclouds@gmail.com \
--to=pclouds@gmail.com \
--cc=git@vger.kernel.org \
--cc=gitster@pobox.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.