From: "Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
To: git@vger.kernel.org
Cc: "Junio C Hamano" <gitster@pobox.com>,
"Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
Subject: [PATCH v2 08/10] pack-check: do not unpack blobs
Date: Sun, 4 Mar 2012 19:59:54 +0700 [thread overview]
Message-ID: <1330865996-2069-9-git-send-email-pclouds@gmail.com> (raw)
In-Reply-To: <1330865996-2069-1-git-send-email-pclouds@gmail.com>
blob content is not used by verify_pack caller (currently only fsck),
we only need to make sure blob sha-1 signature matches its
content. unpack_entry() is taught to hash pack entry as it is
unpacked, eliminating the need to keep whole blob in memory.
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
cache.h | 2 +-
fast-import.c | 2 +-
pack-check.c | 21 ++++++++++++++++++++-
sha1_file.c | 45 +++++++++++++++++++++++++++++++++++----------
t/t1050-large.sh | 2 +-
5 files changed, 58 insertions(+), 14 deletions(-)
diff --git a/cache.h b/cache.h
index e12b15f..3365f89 100644
--- a/cache.h
+++ b/cache.h
@@ -1062,7 +1062,7 @@ extern const unsigned char *nth_packed_object_sha1(struct packed_git *, uint32_t
extern off_t nth_packed_object_offset(const struct packed_git *, uint32_t);
extern off_t find_pack_entry_one(const unsigned char *, struct packed_git *);
extern int is_pack_valid(struct packed_git *);
-extern void *unpack_entry(struct packed_git *, off_t, enum object_type *, unsigned long *);
+extern void *unpack_entry(struct packed_git *, off_t, enum object_type *, unsigned long *, unsigned char *);
extern unsigned long unpack_object_header_buffer(const unsigned char *buf, unsigned long len, enum object_type *type, unsigned long *sizep);
extern unsigned long get_size_from_delta(struct packed_git *, struct pack_window **, off_t);
extern int unpack_object_header(struct packed_git *, struct pack_window **, off_t *, unsigned long *);
diff --git a/fast-import.c b/fast-import.c
index 6cd19e5..5e94a64 100644
--- a/fast-import.c
+++ b/fast-import.c
@@ -1303,7 +1303,7 @@ static void *gfi_unpack_entry(
*/
p->pack_size = pack_size + 20;
}
- return unpack_entry(p, oe->idx.offset, &type, sizep);
+ return unpack_entry(p, oe->idx.offset, &type, sizep, NULL);
}
static const char *get_mode(const char *str, uint16_t *modep)
diff --git a/pack-check.c b/pack-check.c
index 63a595c..1920bdb 100644
--- a/pack-check.c
+++ b/pack-check.c
@@ -105,6 +105,7 @@ static int verify_packfile(struct packed_git *p,
void *data;
enum object_type type;
unsigned long size;
+ off_t curpos = entries[i].offset;
if (p->index_version > 1) {
off_t offset = entries[i].offset;
@@ -116,7 +117,25 @@ static int verify_packfile(struct packed_git *p,
sha1_to_hex(entries[i].sha1),
p->pack_name, (uintmax_t)offset);
}
- data = unpack_entry(p, entries[i].offset, &type, &size);
+ type = unpack_object_header(p, w_curs, &curpos, &size);
+ unuse_pack(w_curs);
+ if (type == OBJ_BLOB) {
+ unsigned char sha1[20];
+ data = unpack_entry(p, entries[i].offset, &type, &size, sha1);
+ if (!data) {
+ if (hashcmp(entries[i].sha1, sha1))
+ err = error("packed %s from %s is corrupt",
+ sha1_to_hex(entries[i].sha1), p->pack_name);
+ else if (fn) {
+ int eaten = 0;
+ fn(entries[i].sha1, type, size, NULL, &eaten);
+ }
+ if (((base_count + i) & 1023) == 0)
+ display_progress(progress, base_count + i);
+ continue;
+ }
+ }
+ data = unpack_entry(p, entries[i].offset, &type, &size, NULL);
if (!data)
err = error("cannot unpack %s from %s at offset %"PRIuMAX"",
sha1_to_hex(entries[i].sha1), p->pack_name,
diff --git a/sha1_file.c b/sha1_file.c
index a77ef0a..d68a5b0 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -1653,28 +1653,51 @@ static int packed_object_info(struct packed_git *p, off_t obj_offset,
}
static void *unpack_compressed_entry(struct packed_git *p,
- struct pack_window **w_curs,
- off_t curpos,
- unsigned long size)
+ struct pack_window **w_curs,
+ off_t curpos,
+ unsigned long size,
+ enum object_type type,
+ unsigned char *sha1)
{
+ static unsigned char fixed_buf[8192];
int st;
git_zstream stream;
unsigned char *buffer, *in;
+ git_SHA_CTX c;
+
+ if (sha1) { /* do hash_sha1_file internally */
+ char hdr[32];
+ int hdrlen = sprintf(hdr, "%s %lu", typename(type), size)+1;
+ git_SHA1_Init(&c);
+ git_SHA1_Update(&c, hdr, hdrlen);
+
+ buffer = fixed_buf;
+ } else {
+ buffer = xmallocz(size);
+ }
- buffer = xmallocz(size);
memset(&stream, 0, sizeof(stream));
stream.next_out = buffer;
- stream.avail_out = size + 1;
+ stream.avail_out = buffer == fixed_buf ? sizeof(fixed_buf) : size + 1;
git_inflate_init(&stream);
do {
in = use_pack(p, w_curs, curpos, &stream.avail_in);
stream.next_in = in;
st = git_inflate(&stream, Z_FINISH);
- if (!stream.avail_out)
+ if (sha1) {
+ git_SHA1_Update(&c, buffer, stream.next_out - (unsigned char *)buffer);
+ stream.next_out = buffer;
+ stream.avail_out = sizeof(fixed_buf);
+ }
+ else if (!stream.avail_out)
break; /* the payload is larger than it should be */
curpos += stream.next_in - in;
} while (st == Z_OK || st == Z_BUF_ERROR);
+ if (sha1) {
+ git_SHA1_Final(sha1, &c);
+ buffer = NULL;
+ }
git_inflate_end(&stream);
if ((st != Z_STREAM_END) || stream.total_out != size) {
free(buffer);
@@ -1727,7 +1750,7 @@ static void *cache_or_unpack_entry(struct packed_git *p, off_t base_offset,
ret = ent->data;
if (!ret || ent->p != p || ent->base_offset != base_offset)
- return unpack_entry(p, base_offset, type, base_size);
+ return unpack_entry(p, base_offset, type, base_size, NULL);
if (!keep_cache) {
ent->data = NULL;
@@ -1844,7 +1867,7 @@ static void *unpack_delta_entry(struct packed_git *p,
return NULL;
}
- delta_data = unpack_compressed_entry(p, w_curs, curpos, delta_size);
+ delta_data = unpack_compressed_entry(p, w_curs, curpos, delta_size, OBJ_NONE, NULL);
if (!delta_data) {
error("failed to unpack compressed delta "
"at offset %"PRIuMAX" from %s",
@@ -1883,7 +1906,8 @@ static void write_pack_access_log(struct packed_git *p, off_t obj_offset)
int do_check_packed_object_crc;
void *unpack_entry(struct packed_git *p, off_t obj_offset,
- enum object_type *type, unsigned long *sizep)
+ enum object_type *type, unsigned long *sizep,
+ unsigned char *sha1)
{
struct pack_window *w_curs = NULL;
off_t curpos = obj_offset;
@@ -1917,7 +1941,8 @@ void *unpack_entry(struct packed_git *p, off_t obj_offset,
case OBJ_TREE:
case OBJ_BLOB:
case OBJ_TAG:
- data = unpack_compressed_entry(p, &w_curs, curpos, *sizep);
+ data = unpack_compressed_entry(p, &w_curs, curpos,
+ *sizep, *type, sha1);
break;
default:
data = NULL;
diff --git a/t/t1050-large.sh b/t/t1050-large.sh
index 7e78c72..c749ecb 100755
--- a/t/t1050-large.sh
+++ b/t/t1050-large.sh
@@ -141,7 +141,7 @@ test_expect_success 'fetch updates' '
)
'
-test_expect_failure 'fsck' '
+test_expect_success 'fsck' '
git fsck --full
'
--
1.7.8.36.g69ee2
next prev parent reply other threads:[~2012-03-04 13:03 UTC|newest]
Thread overview: 48+ messages / expand[flat|nested] mbox.gz Atom feed top
2012-02-27 7:55 [PATCH 00/11] Large blob fixes Nguyễn Thái Ngọc Duy
2012-02-27 7:55 ` [PATCH 01/11] Add more large blob test cases Nguyễn Thái Ngọc Duy
2012-02-27 20:18 ` Peter Baumann
2012-02-27 7:55 ` [PATCH 02/11] Factor out and export large blob writing code to arbitrary file handle Nguyễn Thái Ngọc Duy
2012-02-27 17:29 ` Junio C Hamano
2012-02-27 21:50 ` Junio C Hamano
2012-02-27 7:55 ` [PATCH 03/11] cat-file: use streaming interface to print blobs Nguyễn Thái Ngọc Duy
2012-02-27 17:44 ` Junio C Hamano
2012-02-28 1:08 ` Nguyen Thai Ngoc Duy
2012-02-27 7:55 ` [PATCH 04/11] parse_object: special code path for blobs to avoid putting whole object in memory Nguyễn Thái Ngọc Duy
2012-02-27 7:55 ` [PATCH 05/11] show: use streaming interface for showing blobs Nguyễn Thái Ngọc Duy
2012-02-27 18:00 ` Junio C Hamano
2012-02-27 7:55 ` [PATCH 06/11] index-pack --verify: skip sha-1 collision test Nguyễn Thái Ngọc Duy
2012-02-27 7:55 ` [PATCH 07/11] index-pack: split second pass obj handling into own function Nguyễn Thái Ngọc Duy
2012-02-27 7:55 ` [PATCH 08/11] index-pack: reduce memory usage when the pack has large blobs Nguyễn Thái Ngọc Duy
2012-02-27 7:55 ` [PATCH 09/11] pack-check: do not unpack blobs Nguyễn Thái Ngọc Duy
2012-02-27 7:55 ` [PATCH 10/11] archive: support streaming large files to a tar archive Nguyễn Thái Ngọc Duy
2012-02-27 7:55 ` [PATCH 11/11] fsck: use streaming interface for writing lost-found blobs Nguyễn Thái Ngọc Duy
2012-02-27 18:43 ` [PATCH 00/11] Large blob fixes Junio C Hamano
2012-02-28 1:23 ` Nguyen Thai Ngoc Duy
2012-03-04 12:59 ` [PATCH v2 00/10] " Nguyễn Thái Ngọc Duy
2012-03-04 12:59 ` [PATCH v2 01/10] Add more large blob test cases Nguyễn Thái Ngọc Duy
2012-03-06 0:59 ` Junio C Hamano
2012-03-04 12:59 ` [PATCH v2 02/10] streaming: make streaming-write-entry to be more reusable Nguyễn Thái Ngọc Duy
2012-03-04 12:59 ` [PATCH v2 03/10] cat-file: use streaming interface to print blobs Nguyễn Thái Ngọc Duy
2012-03-04 23:12 ` Junio C Hamano
2012-03-05 2:42 ` Nguyen Thai Ngoc Duy
2012-03-04 12:59 ` [PATCH v2 04/10] parse_object: special code path for blobs to avoid putting whole object in memory Nguyễn Thái Ngọc Duy
2012-03-04 12:59 ` [PATCH v2 05/10] show: use streaming interface for showing blobs Nguyễn Thái Ngọc Duy
2012-03-04 12:59 ` [PATCH v2 06/10] index-pack: split second pass obj handling into own function Nguyễn Thái Ngọc Duy
2012-03-04 12:59 ` [PATCH v2 07/10] index-pack: reduce memory usage when the pack has large blobs Nguyễn Thái Ngọc Duy
2012-03-04 12:59 ` Nguyễn Thái Ngọc Duy [this message]
2012-03-04 12:59 ` [PATCH v2 09/10] archive: support streaming large files to a tar archive Nguyễn Thái Ngọc Duy
2012-03-04 12:59 ` [PATCH v2 10/10] fsck: use streaming interface for writing lost-found blobs Nguyễn Thái Ngọc Duy
2012-03-05 3:43 ` [PATCH v3 00/11] Large blob fixes Nguyễn Thái Ngọc Duy
2012-03-05 3:43 ` [PATCH v3 01/11] Add more large blob test cases Nguyễn Thái Ngọc Duy
2012-03-05 3:43 ` [PATCH v3 02/11] streaming: make streaming-write-entry to be more reusable Nguyễn Thái Ngọc Duy
2012-03-05 3:43 ` [PATCH v3 03/11] cat-file: use streaming interface to print blobs Nguyễn Thái Ngọc Duy
2012-03-05 3:43 ` [PATCH v3 04/11] parse_object: special code path for blobs to avoid putting whole object in memory Nguyễn Thái Ngọc Duy
2012-03-06 0:57 ` Junio C Hamano
2012-03-05 3:43 ` [PATCH v3 05/11] show: use streaming interface for showing blobs Nguyễn Thái Ngọc Duy
2012-03-05 3:43 ` [PATCH v3 06/11] index-pack: split second pass obj handling into own function Nguyễn Thái Ngọc Duy
2012-03-05 3:43 ` [PATCH v3 07/11] index-pack: reduce memory usage when the pack has large blobs Nguyễn Thái Ngọc Duy
2012-03-05 3:43 ` [PATCH v3 08/11] pack-check: do not unpack blobs Nguyễn Thái Ngọc Duy
2012-03-05 3:43 ` [PATCH v3 09/11] archive: support streaming large files to a tar archive Nguyễn Thái Ngọc Duy
2012-03-06 0:57 ` Junio C Hamano
2012-03-05 3:43 ` [PATCH v3 10/11] fsck: use streaming interface for writing lost-found blobs Nguyễn Thái Ngọc Duy
2012-03-05 3:43 ` [PATCH v3 11/11] update-server-info: respect core.bigfilethreshold Nguyễn Thái Ngọc Duy
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1330865996-2069-9-git-send-email-pclouds@gmail.com \
--to=pclouds@gmail.com \
--cc=git@vger.kernel.org \
--cc=gitster@pobox.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).