From: "Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
To: git@vger.kernel.org
Cc: "Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
Subject: [PATCH 3/4] fsck: avoid reading every object twice
Date: Fri, 4 Nov 2011 22:47:49 +0700 [thread overview]
Message-ID: <1320421670-518-4-git-send-email-pclouds@gmail.com> (raw)
In-Reply-To: <1320421670-518-1-git-send-email-pclouds@gmail.com>
During verify_pack() all objects are read for SHA-1 check. Then
fsck_sha1() is called on every object, which read the object again
(fsck_sha1 -> parse_object -> read_sha1_file).
Avoid reading an object twice, do fsck_sha1 while we have an object
uncompressed data in verify_pack.
On git.git, with this patch I got:
$ /usr/bin/time ./git fsck >/dev/null
98.97user 0.90system 1:40.01elapsed 99%CPU (0avgtext+0avgdata 616624maxresident)k
0inputs+0outputs (0major+194186minor)pagefaults 0swaps
Without it:
$ /usr/bin/time ./git fsck >/dev/null
231.23user 2.35system 3:53.82elapsed 99%CPU (0avgtext+0avgdata 636688maxresident)k
0inputs+0outputs (0major+461629minor)pagefaults 0swaps
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
builtin/fsck.c | 42 +++++++++++++++++++++++++-----------------
pack-check.c | 13 ++++++++++---
pack.h | 5 ++++-
3 files changed, 39 insertions(+), 21 deletions(-)
diff --git a/builtin/fsck.c b/builtin/fsck.c
index 4ead98d..0603f64 100644
--- a/builtin/fsck.c
+++ b/builtin/fsck.c
@@ -282,14 +282,8 @@ static void check_connectivity(void)
}
}
-static int fsck_sha1(const unsigned char *sha1)
+static int fsck_obj(struct object *obj)
{
- struct object *obj = parse_object(sha1);
- if (!obj) {
- errors_found |= ERROR_OBJECT;
- return error("%s: object corrupt or missing",
- sha1_to_hex(sha1));
- }
if (obj->flags & SEEN)
return 0;
obj->flags |= SEEN;
@@ -332,6 +326,29 @@ static int fsck_sha1(const unsigned char *sha1)
return 0;
}
+static int fsck_sha1(const unsigned char *sha1)
+{
+ struct object *obj = parse_object(sha1);
+ if (!obj) {
+ errors_found |= ERROR_OBJECT;
+ return error("%s: object corrupt or missing",
+ sha1_to_hex(sha1));
+ }
+ return fsck_obj(obj);
+}
+
+static int fsck_obj_buffer(const unsigned char *sha1, enum object_type type,
+ unsigned long size, void *buffer, int *eaten)
+{
+ struct object *obj;
+ obj = parse_object_buffer(sha1, type, size, buffer, eaten);
+ if (!obj) {
+ errors_found |= ERROR_OBJECT;
+ return error("%s: object corrupt or missing", sha1_to_hex(sha1));
+ }
+ return fsck_obj(obj);
+}
+
/*
* This is the sorting chunk size: make it reasonably
* big so that we can sort well..
@@ -627,17 +644,8 @@ int cmd_fsck(int argc, const char **argv, const char *prefix)
prepare_packed_git();
for (p = packed_git; p; p = p->next)
/* verify gives error messages itself */
- if (verify_pack(p))
+ if (verify_pack(p, fsck_obj_buffer))
errors_found |= ERROR_PACK;
-
- for (p = packed_git; p; p = p->next) {
- uint32_t j, num;
- if (open_pack_index(p))
- continue;
- num = p->num_objects;
- for (j = 0; j < num; j++)
- fsck_sha1(nth_packed_object_sha1(p, j));
- }
}
heads = 0;
diff --git a/pack-check.c b/pack-check.c
index e33ea79..372d6b2 100644
--- a/pack-check.c
+++ b/pack-check.c
@@ -42,7 +42,8 @@ int check_pack_crc(struct packed_git *p, struct pack_window **w_curs,
}
static int verify_packfile(struct packed_git *p,
- struct pack_window **w_curs)
+ struct pack_window **w_curs,
+ verify_fn fn)
{
off_t index_size = p->index_size;
const unsigned char *index_base = p->index_data;
@@ -129,6 +130,12 @@ static int verify_packfile(struct packed_git *p,
free(data);
break;
}
+ if (fn) {
+ int eaten = 0;
+ fn(entries[i].sha1, type, size, data, &eaten);
+ if (eaten)
+ data = NULL;
+ }
free(data);
}
free(entries);
@@ -159,7 +166,7 @@ int verify_pack_index(struct packed_git *p)
return err;
}
-int verify_pack(struct packed_git *p)
+int verify_pack(struct packed_git *p, verify_fn fn)
{
int err = 0;
struct pack_window *w_curs = NULL;
@@ -168,7 +175,7 @@ int verify_pack(struct packed_git *p)
if (!p->index_data)
return -1;
- err |= verify_packfile(p, &w_curs);
+ err |= verify_packfile(p, &w_curs, fn);
unuse_pack(&w_curs);
return err;
diff --git a/pack.h b/pack.h
index 722a54e..70f3c29 100644
--- a/pack.h
+++ b/pack.h
@@ -70,10 +70,13 @@ struct pack_idx_entry {
off_t offset;
};
+
+typedef int (*verify_fn)(const unsigned char*, enum object_type, unsigned long, void*, int*);
+
extern const char *write_idx_file(const char *index_name, struct pack_idx_entry **objects, int nr_objects, const struct pack_idx_option *, unsigned char *sha1);
extern int check_pack_crc(struct packed_git *p, struct pack_window **w_curs, off_t offset, off_t len, unsigned int nr);
extern int verify_pack_index(struct packed_git *);
-extern int verify_pack(struct packed_git *);
+extern int verify_pack(struct packed_git *, verify_fn fn);
extern void fixup_pack_header_footer(int, unsigned char *, const char *, uint32_t, unsigned char *, off_t);
extern char *index_pack_lockfile(int fd);
extern int encode_in_pack_object_header(enum object_type, uintmax_t, unsigned char *);
--
1.7.4.74.g639db
next prev parent reply other threads:[~2011-11-04 15:49 UTC|newest]
Thread overview: 10+ messages / expand[flat|nested] mbox.gz Atom feed top
2011-11-04 15:47 [PATCH 0/4] fsck improvements Nguyễn Thái Ngọc Duy
2011-11-04 15:47 ` [PATCH 1/4] fsck: return error code when verify_pack() goes wrong Nguyễn Thái Ngọc Duy
2011-11-04 15:47 ` [PATCH 2/4] Stop verify_packfile() as soon as an error occurs Nguyễn Thái Ngọc Duy
2011-11-04 15:47 ` Nguyễn Thái Ngọc Duy [this message]
2011-11-04 15:47 ` [PATCH 4/4] fsck: print progress Nguyễn Thái Ngọc Duy
2011-11-04 20:14 ` Jeff King
2011-11-05 3:26 ` Nguyen Thai Ngoc Duy
2011-11-04 18:43 ` [PATCH 0/4] fsck improvements Junio C Hamano
2011-11-05 3:18 ` Nguyen Thai Ngoc Duy
2011-11-05 5:26 ` Junio C Hamano
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1320421670-518-4-git-send-email-pclouds@gmail.com \
--to=pclouds@gmail.com \
--cc=git@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.