* [PATCH 3/4] fsck: avoid reading every object twice
2011-11-04 15:47 [PATCH 0/4] fsck improvements Nguyễn Thái Ngọc Duy
2011-11-04 15:47 ` [PATCH 1/4] fsck: return error code when verify_pack() goes wrong Nguyễn Thái Ngọc Duy
2011-11-04 15:47 ` [PATCH 2/4] Stop verify_packfile() as soon as an error occurs Nguyễn Thái Ngọc Duy
@ 2011-11-04 15:47 ` Nguyễn Thái Ngọc Duy
2011-11-04 15:47 ` [PATCH 4/4] fsck: print progress Nguyễn Thái Ngọc Duy
2011-11-04 18:43 ` [PATCH 0/4] fsck improvements Junio C Hamano
4 siblings, 0 replies; 10+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2011-11-04 15:47 UTC (permalink / raw)
To: git; +Cc: Nguyễn Thái Ngọc Duy
During verify_pack() all objects are read for SHA-1 check. Then
fsck_sha1() is called on every object, which read the object again
(fsck_sha1 -> parse_object -> read_sha1_file).
Avoid reading an object twice, do fsck_sha1 while we have an object
uncompressed data in verify_pack.
On git.git, with this patch I got:
$ /usr/bin/time ./git fsck >/dev/null
98.97user 0.90system 1:40.01elapsed 99%CPU (0avgtext+0avgdata 616624maxresident)k
0inputs+0outputs (0major+194186minor)pagefaults 0swaps
Without it:
$ /usr/bin/time ./git fsck >/dev/null
231.23user 2.35system 3:53.82elapsed 99%CPU (0avgtext+0avgdata 636688maxresident)k
0inputs+0outputs (0major+461629minor)pagefaults 0swaps
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
builtin/fsck.c | 42 +++++++++++++++++++++++++-----------------
pack-check.c | 13 ++++++++++---
pack.h | 5 ++++-
3 files changed, 39 insertions(+), 21 deletions(-)
diff --git a/builtin/fsck.c b/builtin/fsck.c
index 4ead98d..0603f64 100644
--- a/builtin/fsck.c
+++ b/builtin/fsck.c
@@ -282,14 +282,8 @@ static void check_connectivity(void)
}
}
-static int fsck_sha1(const unsigned char *sha1)
+static int fsck_obj(struct object *obj)
{
- struct object *obj = parse_object(sha1);
- if (!obj) {
- errors_found |= ERROR_OBJECT;
- return error("%s: object corrupt or missing",
- sha1_to_hex(sha1));
- }
if (obj->flags & SEEN)
return 0;
obj->flags |= SEEN;
@@ -332,6 +326,29 @@ static int fsck_sha1(const unsigned char *sha1)
return 0;
}
+static int fsck_sha1(const unsigned char *sha1)
+{
+ struct object *obj = parse_object(sha1);
+ if (!obj) {
+ errors_found |= ERROR_OBJECT;
+ return error("%s: object corrupt or missing",
+ sha1_to_hex(sha1));
+ }
+ return fsck_obj(obj);
+}
+
+static int fsck_obj_buffer(const unsigned char *sha1, enum object_type type,
+ unsigned long size, void *buffer, int *eaten)
+{
+ struct object *obj;
+ obj = parse_object_buffer(sha1, type, size, buffer, eaten);
+ if (!obj) {
+ errors_found |= ERROR_OBJECT;
+ return error("%s: object corrupt or missing", sha1_to_hex(sha1));
+ }
+ return fsck_obj(obj);
+}
+
/*
* This is the sorting chunk size: make it reasonably
* big so that we can sort well..
@@ -627,17 +644,8 @@ int cmd_fsck(int argc, const char **argv, const char *prefix)
prepare_packed_git();
for (p = packed_git; p; p = p->next)
/* verify gives error messages itself */
- if (verify_pack(p))
+ if (verify_pack(p, fsck_obj_buffer))
errors_found |= ERROR_PACK;
-
- for (p = packed_git; p; p = p->next) {
- uint32_t j, num;
- if (open_pack_index(p))
- continue;
- num = p->num_objects;
- for (j = 0; j < num; j++)
- fsck_sha1(nth_packed_object_sha1(p, j));
- }
}
heads = 0;
diff --git a/pack-check.c b/pack-check.c
index e33ea79..372d6b2 100644
--- a/pack-check.c
+++ b/pack-check.c
@@ -42,7 +42,8 @@ int check_pack_crc(struct packed_git *p, struct pack_window **w_curs,
}
static int verify_packfile(struct packed_git *p,
- struct pack_window **w_curs)
+ struct pack_window **w_curs,
+ verify_fn fn)
{
off_t index_size = p->index_size;
const unsigned char *index_base = p->index_data;
@@ -129,6 +130,12 @@ static int verify_packfile(struct packed_git *p,
free(data);
break;
}
+ if (fn) {
+ int eaten = 0;
+ fn(entries[i].sha1, type, size, data, &eaten);
+ if (eaten)
+ data = NULL;
+ }
free(data);
}
free(entries);
@@ -159,7 +166,7 @@ int verify_pack_index(struct packed_git *p)
return err;
}
-int verify_pack(struct packed_git *p)
+int verify_pack(struct packed_git *p, verify_fn fn)
{
int err = 0;
struct pack_window *w_curs = NULL;
@@ -168,7 +175,7 @@ int verify_pack(struct packed_git *p)
if (!p->index_data)
return -1;
- err |= verify_packfile(p, &w_curs);
+ err |= verify_packfile(p, &w_curs, fn);
unuse_pack(&w_curs);
return err;
diff --git a/pack.h b/pack.h
index 722a54e..70f3c29 100644
--- a/pack.h
+++ b/pack.h
@@ -70,10 +70,13 @@ struct pack_idx_entry {
off_t offset;
};
+
+typedef int (*verify_fn)(const unsigned char*, enum object_type, unsigned long, void*, int*);
+
extern const char *write_idx_file(const char *index_name, struct pack_idx_entry **objects, int nr_objects, const struct pack_idx_option *, unsigned char *sha1);
extern int check_pack_crc(struct packed_git *p, struct pack_window **w_curs, off_t offset, off_t len, unsigned int nr);
extern int verify_pack_index(struct packed_git *);
-extern int verify_pack(struct packed_git *);
+extern int verify_pack(struct packed_git *, verify_fn fn);
extern void fixup_pack_header_footer(int, unsigned char *, const char *, uint32_t, unsigned char *, off_t);
extern char *index_pack_lockfile(int fd);
extern int encode_in_pack_object_header(enum object_type, uintmax_t, unsigned char *);
--
1.7.4.74.g639db
^ permalink raw reply related [flat|nested] 10+ messages in thread
* [PATCH 4/4] fsck: print progress
2011-11-04 15:47 [PATCH 0/4] fsck improvements Nguyễn Thái Ngọc Duy
` (2 preceding siblings ...)
2011-11-04 15:47 ` [PATCH 3/4] fsck: avoid reading every object twice Nguyễn Thái Ngọc Duy
@ 2011-11-04 15:47 ` Nguyễn Thái Ngọc Duy
2011-11-04 20:14 ` Jeff King
2011-11-04 18:43 ` [PATCH 0/4] fsck improvements Junio C Hamano
4 siblings, 1 reply; 10+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2011-11-04 15:47 UTC (permalink / raw)
To: git; +Cc: Nguyễn Thái Ngọc Duy
fsck is usually a long process and it would be nice if it prints
progress from time to time.
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
Documentation/git-fsck.txt | 12 +++++++++++-
builtin/fsck.c | 40 ++++++++++++++++++++++++++++++++++++++--
pack-check.c | 14 +++++++++++---
pack.h | 3 ++-
4 files changed, 62 insertions(+), 7 deletions(-)
diff --git a/Documentation/git-fsck.txt b/Documentation/git-fsck.txt
index a2a508d..5245101 100644
--- a/Documentation/git-fsck.txt
+++ b/Documentation/git-fsck.txt
@@ -10,7 +10,8 @@ SYNOPSIS
--------
[verse]
'git fsck' [--tags] [--root] [--unreachable] [--cache] [--no-reflogs]
- [--[no-]full] [--strict] [--verbose] [--lost-found] [<object>*]
+ [--[no-]full] [--strict] [--verbose] [--lost-found]
+ [--[no-]progress] [<object>*]
DESCRIPTION
-----------
@@ -72,6 +73,15 @@ index file, all SHA1 references in .git/refs/*, and all reflogs (unless
a blob, the contents are written into the file, rather than
its object name.
+--progress::
+--no-progress::
+ When fsck is run in a terminal, it will show the progress.
+ These options can force progress to be shown or not
+ regardless terminal check.
++
+Progress is not shown when --verbose is used. --progress is ignored
+in this case.
+
It tests SHA1 and general object sanity, and it does full tracking of
the resulting reachability and everything else. It prints out any
corruption it finds (missing or bad objects), and if you use the
diff --git a/builtin/fsck.c b/builtin/fsck.c
index 0603f64..c4b1ca6 100644
--- a/builtin/fsck.c
+++ b/builtin/fsck.c
@@ -11,6 +11,7 @@
#include "fsck.h"
#include "parse-options.h"
#include "dir.h"
+#include "progress.h"
#define REACHABLE 0x0001
#define SEEN 0x0002
@@ -27,6 +28,7 @@ static const char *head_points_at;
static int errors_found;
static int write_lost_and_found;
static int verbose;
+static int show_progress = -1;
#define ERROR_OBJECT 01
#define ERROR_REACHABLE 02
#define ERROR_PACK 04
@@ -138,7 +140,11 @@ static int traverse_one_object(struct object *obj)
static int traverse_reachable(void)
{
+ struct progress *progress = NULL;
+ unsigned int nr = 0;
int result = 0;
+ if (show_progress)
+ progress = start_progress_delay("Checking connectivity", 0, 0, 2);
while (pending.nr) {
struct object_array_entry *entry;
struct object *obj;
@@ -146,7 +152,9 @@ static int traverse_reachable(void)
entry = pending.objects + --pending.nr;
obj = entry->item;
result |= traverse_one_object(obj);
+ display_progress(progress, ++nr);
}
+ stop_progress(&progress);
return !!result;
}
@@ -530,15 +538,20 @@ static void get_default_heads(void)
static void fsck_object_dir(const char *path)
{
int i;
+ struct progress *progress = NULL;
if (verbose)
fprintf(stderr, "Checking object directory\n");
+ if (show_progress)
+ progress = start_progress("Checking object directories", 256);
for (i = 0; i < 256; i++) {
static char dir[4096];
sprintf(dir, "%s/%02x", path, i);
fsck_dir(i, dir);
+ display_progress(progress, i+1);
}
+ stop_progress(&progress);
fsck_sha1_list();
}
@@ -609,6 +622,7 @@ static struct option fsck_opts[] = {
OPT_BOOLEAN(0, "strict", &check_strict, "enable more strict checking"),
OPT_BOOLEAN(0, "lost-found", &write_lost_and_found,
"write dangling objects in .git/lost-found"),
+ OPT_BOOL (0, "progress", &show_progress, "show progress"),
OPT_END(),
};
@@ -621,6 +635,12 @@ int cmd_fsck(int argc, const char **argv, const char *prefix)
read_replace_refs = 0;
argc = parse_options(argc, argv, prefix, fsck_opts, fsck_usage, 0);
+
+ if (show_progress == -1)
+ show_progress = isatty(2);
+ if (verbose)
+ show_progress = 0;
+
if (write_lost_and_found) {
check_full = 1;
include_reflogs = 0;
@@ -640,12 +660,28 @@ int cmd_fsck(int argc, const char **argv, const char *prefix)
if (check_full) {
struct packed_git *p;
+ uint32_t total = 0, count = 0;
+ struct progress *progress = NULL;
prepare_packed_git();
- for (p = packed_git; p; p = p->next)
+
+ if (show_progress) {
+ for (p = packed_git; p; p = p->next) {
+ if (open_pack_index(p))
+ continue;
+ total += p->num_objects;
+ }
+
+ progress = start_progress("Checking objects", total);
+ }
+ for (p = packed_git; p; p = p->next) {
/* verify gives error messages itself */
- if (verify_pack(p, fsck_obj_buffer))
+ if (verify_pack(p, fsck_obj_buffer,
+ progress, count))
errors_found |= ERROR_PACK;
+ count += p->num_objects;
+ }
+ stop_progress(&progress);
}
heads = 0;
diff --git a/pack-check.c b/pack-check.c
index 372d6b2..a3262af 100644
--- a/pack-check.c
+++ b/pack-check.c
@@ -1,6 +1,7 @@
#include "cache.h"
#include "pack.h"
#include "pack-revindex.h"
+#include "progress.h"
struct idx_entry {
off_t offset;
@@ -43,7 +44,9 @@ int check_pack_crc(struct packed_git *p, struct pack_window **w_curs,
static int verify_packfile(struct packed_git *p,
struct pack_window **w_curs,
- verify_fn fn)
+ verify_fn fn,
+ struct progress *progress, uint32_t base_count)
+
{
off_t index_size = p->index_size;
const unsigned char *index_base = p->index_data;
@@ -136,8 +139,12 @@ static int verify_packfile(struct packed_git *p,
if (eaten)
data = NULL;
}
+ if (((base_count + i) & 1023) == 0)
+ display_progress(progress, base_count + i);
free(data);
+
}
+ display_progress(progress, base_count + i);
free(entries);
return err;
@@ -166,7 +173,8 @@ int verify_pack_index(struct packed_git *p)
return err;
}
-int verify_pack(struct packed_git *p, verify_fn fn)
+int verify_pack(struct packed_git *p, verify_fn fn,
+ struct progress *progress, uint32_t base_count)
{
int err = 0;
struct pack_window *w_curs = NULL;
@@ -175,7 +183,7 @@ int verify_pack(struct packed_git *p, verify_fn fn)
if (!p->index_data)
return -1;
- err |= verify_packfile(p, &w_curs, fn);
+ err |= verify_packfile(p, &w_curs, fn, progress, base_count);
unuse_pack(&w_curs);
return err;
diff --git a/pack.h b/pack.h
index 70f3c29..324a1d7 100644
--- a/pack.h
+++ b/pack.h
@@ -71,12 +71,13 @@ struct pack_idx_entry {
};
+struct progress;
typedef int (*verify_fn)(const unsigned char*, enum object_type, unsigned long, void*, int*);
extern const char *write_idx_file(const char *index_name, struct pack_idx_entry **objects, int nr_objects, const struct pack_idx_option *, unsigned char *sha1);
extern int check_pack_crc(struct packed_git *p, struct pack_window **w_curs, off_t offset, off_t len, unsigned int nr);
extern int verify_pack_index(struct packed_git *);
-extern int verify_pack(struct packed_git *, verify_fn fn);
+extern int verify_pack(struct packed_git *, verify_fn fn, struct progress *, uint32_t);
extern void fixup_pack_header_footer(int, unsigned char *, const char *, uint32_t, unsigned char *, off_t);
extern char *index_pack_lockfile(int fd);
extern int encode_in_pack_object_header(enum object_type, uintmax_t, unsigned char *);
--
1.7.4.74.g639db
^ permalink raw reply related [flat|nested] 10+ messages in thread