* [GSoC][PATCH v5 01/12] fsck: rename "fsck_options" to "fsck_objects_options"
2024-06-27 15:08 ` [GSoC][PATCH v5 00/12] ref consistency check infra setup shejialuo
@ 2024-06-27 15:12 ` shejialuo
2024-06-27 21:32 ` Junio C Hamano
2024-06-27 15:13 ` [GSoC][PATCH v5 02/12] fsck: use "fsck_configs" to set up configs shejialuo
` (11 subsequent siblings)
12 siblings, 1 reply; 282+ messages in thread
From: shejialuo @ 2024-06-27 15:12 UTC (permalink / raw)
To: git; +Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine
The git-fsck(1) focuses on object database consistency check. It relies
on the "fsck_options" to interact with fsck error levels. However,
"fsck_options" aims at checking the object database which contains a lot
of fields only related to object database.
In order to provide git-fsck(1) with refs consistency check. Rename
"fsck_options" to "fsck_objects_options" to explicitly indicate this
structure is used to check the object database.
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
builtin/fsck.c | 13 ++++----
builtin/index-pack.c | 18 +++++------
builtin/mktag.c | 13 ++++----
builtin/unpack-objects.c | 14 ++++-----
fetch-pack.c | 12 ++++----
fsck.c | 64 ++++++++++++++++++++++------------------
fsck.h | 43 ++++++++++++++-------------
object-file.c | 4 +--
8 files changed, 95 insertions(+), 86 deletions(-)
diff --git a/builtin/fsck.c b/builtin/fsck.c
index d13a226c2e..ec3220880d 100644
--- a/builtin/fsck.c
+++ b/builtin/fsck.c
@@ -42,8 +42,8 @@ static int check_full = 1;
static int connectivity_only;
static int check_strict;
static int keep_cache_objects;
-static struct fsck_options fsck_walk_options = FSCK_OPTIONS_DEFAULT;
-static struct fsck_options fsck_obj_options = FSCK_OPTIONS_DEFAULT;
+static struct fsck_objects_options fsck_walk_options = FSCK_OBJECTS_OPTIONS_DEFAULT;
+static struct fsck_objects_options fsck_obj_options = FSCK_OBJECTS_OPTIONS_DEFAULT;
static int errors_found;
static int write_lost_and_found;
static int verbose;
@@ -89,7 +89,7 @@ static int objerror(struct object *obj, const char *err)
return -1;
}
-static int fsck_error_func(struct fsck_options *o UNUSED,
+static int fsck_error_func(struct fsck_objects_options *o UNUSED,
const struct object_id *oid,
enum object_type object_type,
enum fsck_msg_type msg_type,
@@ -118,7 +118,7 @@ static int fsck_error_func(struct fsck_options *o UNUSED,
static struct object_array pending;
static int mark_object(struct object *obj, enum object_type type,
- void *data, struct fsck_options *options UNUSED)
+ void *data, struct fsck_objects_options *options UNUSED)
{
struct object *parent = data;
@@ -204,7 +204,8 @@ static int traverse_reachable(void)
}
static int mark_used(struct object *obj, enum object_type type UNUSED,
- void *data UNUSED, struct fsck_options *options UNUSED)
+ void *data UNUSED,
+ struct fsck_objects_options *options UNUSED)
{
if (!obj)
return 1;
@@ -214,7 +215,7 @@ static int mark_used(struct object *obj, enum object_type type UNUSED,
static void mark_unreachable_referents(const struct object_id *oid)
{
- struct fsck_options options = FSCK_OPTIONS_DEFAULT;
+ struct fsck_objects_options options = FSCK_OBJECTS_OPTIONS_DEFAULT;
struct object *obj = lookup_object(the_repository, oid);
if (!obj || !(obj->flags & HAS_OBJ))
diff --git a/builtin/index-pack.c b/builtin/index-pack.c
index 856428fef9..08ebeedfd3 100644
--- a/builtin/index-pack.c
+++ b/builtin/index-pack.c
@@ -127,7 +127,7 @@ static int nr_threads;
static int from_stdin;
static int strict;
static int do_fsck_object;
-static struct fsck_options fsck_options = FSCK_OPTIONS_MISSING_GITMODULES;
+static struct fsck_objects_options fsck_objects_options = FSCK_OBJECTS_OPTIONS_MISSING_GITMODULES;
static int verbose;
static const char *progress_title;
static int show_resolving_progress;
@@ -220,7 +220,7 @@ static void cleanup_thread(void)
static int mark_link(struct object *obj, enum object_type type,
void *data UNUSED,
- struct fsck_options *options UNUSED)
+ struct fsck_objects_options *options UNUSED)
{
if (!obj)
return -1;
@@ -852,7 +852,7 @@ static void sha1_object(const void *data, struct object_entry *obj_entry,
else
die(_("invalid blob object %s"), oid_to_hex(oid));
if (do_fsck_object &&
- fsck_object(&blob->object, (void *)data, size, &fsck_options))
+ fsck_object(&blob->object, (void *)data, size, &fsck_objects_options))
die(_("fsck error in packed object"));
} else {
struct object *obj;
@@ -871,9 +871,9 @@ static void sha1_object(const void *data, struct object_entry *obj_entry,
if (!obj)
die(_("invalid %s"), type_name(type));
if (do_fsck_object &&
- fsck_object(obj, buf, size, &fsck_options))
+ fsck_object(obj, buf, size, &fsck_objects_options))
die(_("fsck error in packed object"));
- if (strict && fsck_walk(obj, NULL, &fsck_options))
+ if (strict && fsck_walk(obj, NULL, &fsck_objects_options))
die(_("Not all child objects of %s are reachable"), oid_to_hex(&obj->oid));
if (obj->type == OBJ_TREE) {
@@ -1746,7 +1746,7 @@ int cmd_index_pack(int argc, const char **argv, const char *prefix)
usage(index_pack_usage);
disable_replace_refs();
- fsck_options.walk = mark_link;
+ fsck_objects_options.walk = mark_link;
reset_pack_idx_option(&opts);
opts.flags |= WRITE_REV;
@@ -1770,13 +1770,13 @@ int cmd_index_pack(int argc, const char **argv, const char *prefix)
} else if (skip_to_optional_arg(arg, "--strict", &arg)) {
strict = 1;
do_fsck_object = 1;
- fsck_set_msg_types(&fsck_options, arg);
+ fsck_set_msg_types(&fsck_objects_options, arg);
} else if (!strcmp(arg, "--check-self-contained-and-connected")) {
strict = 1;
check_self_contained_and_connected = 1;
} else if (skip_to_optional_arg(arg, "--fsck-objects", &arg)) {
do_fsck_object = 1;
- fsck_set_msg_types(&fsck_options, arg);
+ fsck_set_msg_types(&fsck_objects_options, arg);
} else if (!strcmp(arg, "--verify")) {
verify = 1;
} else if (!strcmp(arg, "--verify-stat")) {
@@ -1943,7 +1943,7 @@ int cmd_index_pack(int argc, const char **argv, const char *prefix)
else
close(input_fd);
- if (do_fsck_object && fsck_finish(&fsck_options))
+ if (do_fsck_object && fsck_finish(&fsck_objects_options))
die(_("fsck error in pack objects"));
free(opts.anomaly);
diff --git a/builtin/mktag.c b/builtin/mktag.c
index 4767f1a97e..be2abc71d8 100644
--- a/builtin/mktag.c
+++ b/builtin/mktag.c
@@ -15,9 +15,9 @@ static char const * const builtin_mktag_usage[] = {
};
static int option_strict = 1;
-static struct fsck_options fsck_options = FSCK_OPTIONS_STRICT;
+static struct fsck_objects_options fsck_objects_options = FSCK_OBJECTS_OPTIONS_STRICT;
-static int mktag_fsck_error_func(struct fsck_options *o UNUSED,
+static int mktag_fsck_error_func(struct fsck_objects_options *o UNUSED,
const struct object_id *oid UNUSED,
enum object_type object_type UNUSED,
enum fsck_msg_type msg_type,
@@ -91,12 +91,13 @@ int cmd_mktag(int argc, const char **argv, const char *prefix)
if (strbuf_read(&buf, 0, 0) < 0)
die_errno(_("could not read from stdin"));
- fsck_options.error_func = mktag_fsck_error_func;
- fsck_set_msg_type_from_ids(&fsck_options, FSCK_MSG_EXTRA_HEADER_ENTRY,
+ fsck_objects_options.error_func = mktag_fsck_error_func;
+ fsck_set_msg_type_from_ids(&fsck_objects_options,
+ FSCK_MSG_EXTRA_HEADER_ENTRY,
FSCK_WARN);
/* config might set fsck.extraHeaderEntry=* again */
- git_config(git_fsck_config, &fsck_options);
- if (fsck_tag_standalone(NULL, buf.buf, buf.len, &fsck_options,
+ git_config(git_fsck_config, &fsck_objects_options);
+ if (fsck_tag_standalone(NULL, buf.buf, buf.len, &fsck_objects_options,
&tagged_oid, &tagged_type))
die(_("tag on stdin did not pass our strict fsck check"));
diff --git a/builtin/unpack-objects.c b/builtin/unpack-objects.c
index f1c85a00ae..c59e330db9 100644
--- a/builtin/unpack-objects.c
+++ b/builtin/unpack-objects.c
@@ -25,7 +25,7 @@ static unsigned int offset, len;
static off_t consumed_bytes;
static off_t max_input_size;
static git_hash_ctx ctx;
-static struct fsck_options fsck_options = FSCK_OPTIONS_STRICT;
+static struct fsck_objects_options fsck_objects_options = FSCK_OBJECTS_OPTIONS_STRICT;
static struct progress *progress;
/*
@@ -212,7 +212,7 @@ static void write_cached_object(struct object *obj, struct obj_buffer *obj_buf)
*/
static int check_object(struct object *obj, enum object_type type,
void *data UNUSED,
- struct fsck_options *options UNUSED)
+ struct fsck_objects_options *options UNUSED)
{
struct obj_buffer *obj_buf;
@@ -237,10 +237,10 @@ static int check_object(struct object *obj, enum object_type type,
obj_buf = lookup_object_buffer(obj);
if (!obj_buf)
die("Whoops! Cannot find object '%s'", oid_to_hex(&obj->oid));
- if (fsck_object(obj, obj_buf->buffer, obj_buf->size, &fsck_options))
+ if (fsck_object(obj, obj_buf->buffer, obj_buf->size, &fsck_objects_options))
die("fsck error in packed object");
- fsck_options.walk = check_object;
- if (fsck_walk(obj, NULL, &fsck_options))
+ fsck_objects_options.walk = check_object;
+ if (fsck_walk(obj, NULL, &fsck_objects_options))
die("Error on reachable objects of %s", oid_to_hex(&obj->oid));
write_cached_object(obj, obj_buf);
return 0;
@@ -635,7 +635,7 @@ int cmd_unpack_objects(int argc, const char **argv, const char *prefix UNUSED)
}
if (skip_prefix(arg, "--strict=", &arg)) {
strict = 1;
- fsck_set_msg_types(&fsck_options, arg);
+ fsck_set_msg_types(&fsck_objects_options, arg);
continue;
}
if (starts_with(arg, "--pack_header=")) {
@@ -671,7 +671,7 @@ int cmd_unpack_objects(int argc, const char **argv, const char *prefix UNUSED)
the_hash_algo->final_oid_fn(&oid, &tmp_ctx);
if (strict) {
write_rest();
- if (fsck_finish(&fsck_options))
+ if (fsck_finish(&fsck_objects_options))
die(_("fsck error in pack objects"));
}
if (!hasheq(fill(the_hash_algo->rawsz), oid.hash))
diff --git a/fetch-pack.c b/fetch-pack.c
index eba9e420ea..7d1e4a5087 100644
--- a/fetch-pack.c
+++ b/fetch-pack.c
@@ -46,7 +46,7 @@ static int server_supports_filtering;
static int advertise_sid;
static struct shallow_lock shallow_lock;
static const char *alternate_shallow_file;
-static struct fsck_options fsck_options = FSCK_OPTIONS_MISSING_GITMODULES;
+static struct fsck_objects_options fsck_objects_options = FSCK_OBJECTS_OPTIONS_MISSING_GITMODULES;
static struct strbuf fsck_msg_types = STRBUF_INIT;
static struct string_list uri_protocols = STRING_LIST_INIT_DUP;
@@ -1220,9 +1220,9 @@ static struct ref *do_fetch_pack(struct fetch_pack_args *args,
} else
alternate_shallow_file = NULL;
if (get_pack(args, fd, pack_lockfiles, NULL, sought, nr_sought,
- &fsck_options.gitmodules_found))
+ &fsck_objects_options.gitmodules_found))
die(_("git fetch-pack: fetch failed."));
- if (fsck_finish(&fsck_options))
+ if (fsck_finish(&fsck_objects_options))
die("fsck failed");
all_done:
@@ -1780,7 +1780,7 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args,
if (get_pack(args, fd, pack_lockfiles,
packfile_uris.nr ? &index_pack_args : NULL,
- sought, nr_sought, &fsck_options.gitmodules_found))
+ sought, nr_sought, &fsck_objects_options.gitmodules_found))
die(_("git fetch-pack: fetch failed."));
do_check_stateless_delimiter(args->stateless_rpc, &reader);
@@ -1823,7 +1823,7 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args,
packname[the_hash_algo->hexsz] = '\0';
- parse_gitmodules_oids(cmd.out, &fsck_options.gitmodules_found);
+ parse_gitmodules_oids(cmd.out, &fsck_objects_options.gitmodules_found);
close(cmd.out);
@@ -1844,7 +1844,7 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args,
string_list_clear(&packfile_uris, 0);
strvec_clear(&index_pack_args);
- if (fsck_finish(&fsck_options))
+ if (fsck_finish(&fsck_objects_options))
die("fsck failed");
if (negotiator)
diff --git a/fsck.c b/fsck.c
index e193930ae7..c24a0f9fae 100644
--- a/fsck.c
+++ b/fsck.c
@@ -99,7 +99,7 @@ void list_config_fsck_msg_ids(struct string_list *list, const char *prefix)
}
static enum fsck_msg_type fsck_msg_type(enum fsck_msg_id msg_id,
- struct fsck_options *options)
+ struct fsck_objects_options *options)
{
assert(msg_id >= 0 && msg_id < FSCK_MSG_MAX);
@@ -134,7 +134,7 @@ int is_valid_msg_type(const char *msg_id, const char *msg_type)
return 1;
}
-void fsck_set_msg_type_from_ids(struct fsck_options *options,
+void fsck_set_msg_type_from_ids(struct fsck_objects_options *options,
enum fsck_msg_id msg_id,
enum fsck_msg_type msg_type)
{
@@ -150,7 +150,7 @@ void fsck_set_msg_type_from_ids(struct fsck_options *options,
options->msg_type[msg_id] = msg_type;
}
-void fsck_set_msg_type(struct fsck_options *options,
+void fsck_set_msg_type(struct fsck_objects_options *options,
const char *msg_id_str, const char *msg_type_str)
{
int msg_id = parse_msg_id(msg_id_str);
@@ -179,7 +179,7 @@ void fsck_set_msg_type(struct fsck_options *options,
free(to_free);
}
-void fsck_set_msg_types(struct fsck_options *options, const char *values)
+void fsck_set_msg_types(struct fsck_objects_options *options, const char *values)
{
char *buf = xstrdup(values), *to_free = buf;
int done = 0;
@@ -217,14 +217,14 @@ void fsck_set_msg_types(struct fsck_options *options, const char *values)
free(to_free);
}
-static int object_on_skiplist(struct fsck_options *opts,
+static int object_on_skiplist(struct fsck_objects_options *opts,
const struct object_id *oid)
{
return opts && oid && oidset_contains(&opts->skiplist, oid);
}
__attribute__((format (printf, 5, 6)))
-static int report(struct fsck_options *options,
+static int report(struct fsck_objects_options *options,
const struct object_id *oid, enum object_type object_type,
enum fsck_msg_id msg_id, const char *fmt, ...)
{
@@ -257,13 +257,13 @@ static int report(struct fsck_options *options,
return result;
}
-void fsck_enable_object_names(struct fsck_options *options)
+void fsck_enable_object_names(struct fsck_objects_options *options)
{
if (!options->object_names)
options->object_names = kh_init_oid_map();
}
-const char *fsck_get_object_name(struct fsck_options *options,
+const char *fsck_get_object_name(struct fsck_objects_options *options,
const struct object_id *oid)
{
khiter_t pos;
@@ -275,7 +275,7 @@ const char *fsck_get_object_name(struct fsck_options *options,
return kh_value(options->object_names, pos);
}
-void fsck_put_object_name(struct fsck_options *options,
+void fsck_put_object_name(struct fsck_objects_options *options,
const struct object_id *oid,
const char *fmt, ...)
{
@@ -296,7 +296,7 @@ void fsck_put_object_name(struct fsck_options *options,
va_end(ap);
}
-const char *fsck_describe_object(struct fsck_options *options,
+const char *fsck_describe_object(struct fsck_objects_options *options,
const struct object_id *oid)
{
static struct strbuf bufs[] = {
@@ -316,7 +316,8 @@ const char *fsck_describe_object(struct fsck_options *options,
return buf->buf;
}
-static int fsck_walk_tree(struct tree *tree, void *data, struct fsck_options *options)
+static int fsck_walk_tree(struct tree *tree, void *data,
+ struct fsck_objects_options *options)
{
struct tree_desc desc;
struct name_entry entry;
@@ -364,7 +365,8 @@ static int fsck_walk_tree(struct tree *tree, void *data, struct fsck_options *op
return res;
}
-static int fsck_walk_commit(struct commit *commit, void *data, struct fsck_options *options)
+static int fsck_walk_commit(struct commit *commit, void *data,
+ struct fsck_objects_options *options)
{
int counter = 0, generation = 0, name_prefix_len = 0;
struct commit_list *parents;
@@ -433,7 +435,8 @@ static int fsck_walk_commit(struct commit *commit, void *data, struct fsck_optio
return res;
}
-static int fsck_walk_tag(struct tag *tag, void *data, struct fsck_options *options)
+static int fsck_walk_tag(struct tag *tag, void *data,
+ struct fsck_objects_options *options)
{
const char *name = fsck_get_object_name(options, &tag->object.oid);
@@ -444,7 +447,8 @@ static int fsck_walk_tag(struct tag *tag, void *data, struct fsck_options *optio
return options->walk(tag->tagged, OBJ_ANY, data, options);
}
-int fsck_walk(struct object *obj, void *data, struct fsck_options *options)
+int fsck_walk(struct object *obj, void *data,
+ struct fsck_objects_options *options)
{
if (!obj)
return -1;
@@ -580,7 +584,7 @@ static int verify_ordered(unsigned mode1, const char *name1,
static int fsck_tree(const struct object_id *tree_oid,
const char *buffer, unsigned long size,
- struct fsck_options *options)
+ struct fsck_objects_options *options)
{
int retval = 0;
int has_null_sha1 = 0;
@@ -793,7 +797,7 @@ static int fsck_tree(const struct object_id *tree_oid,
*/
static int verify_headers(const void *data, unsigned long size,
const struct object_id *oid, enum object_type type,
- struct fsck_options *options)
+ struct fsck_objects_options *options)
{
const char *buffer = (const char *)data;
unsigned long i;
@@ -825,7 +829,7 @@ static int verify_headers(const void *data, unsigned long size,
static int fsck_ident(const char **ident,
const struct object_id *oid, enum object_type type,
- struct fsck_options *options)
+ struct fsck_objects_options *options)
{
const char *p = *ident;
char *end;
@@ -885,7 +889,7 @@ static int fsck_ident(const char **ident,
static int fsck_commit(const struct object_id *oid,
const char *buffer, unsigned long size,
- struct fsck_options *options)
+ struct fsck_objects_options *options)
{
struct object_id tree_oid, parent_oid;
unsigned author_count;
@@ -946,7 +950,7 @@ static int fsck_commit(const struct object_id *oid,
}
static int fsck_tag(const struct object_id *oid, const char *buffer,
- unsigned long size, struct fsck_options *options)
+ unsigned long size, struct fsck_objects_options *options)
{
struct object_id tagged_oid;
int tagged_type;
@@ -955,7 +959,7 @@ static int fsck_tag(const struct object_id *oid, const char *buffer,
}
int fsck_tag_standalone(const struct object_id *oid, const char *buffer,
- unsigned long size, struct fsck_options *options,
+ unsigned long size, struct fsck_objects_options *options,
struct object_id *tagged_oid,
int *tagged_type)
{
@@ -1050,7 +1054,7 @@ int fsck_tag_standalone(const struct object_id *oid, const char *buffer,
struct fsck_gitmodules_data {
const struct object_id *oid;
- struct fsck_options *options;
+ struct fsck_objects_options *options;
int ret;
};
@@ -1100,7 +1104,8 @@ static int fsck_gitmodules_fn(const char *var, const char *value,
}
static int fsck_blob(const struct object_id *oid, const char *buf,
- unsigned long size, struct fsck_options *options)
+ unsigned long size,
+ struct fsck_objects_options *options)
{
int ret = 0;
@@ -1170,7 +1175,7 @@ static int fsck_blob(const struct object_id *oid, const char *buf,
}
int fsck_object(struct object *obj, void *data, unsigned long size,
- struct fsck_options *options)
+ struct fsck_objects_options *options)
{
if (!obj)
return report(options, NULL, OBJ_NONE, FSCK_MSG_BAD_OBJECT_SHA1, "no valid object to fsck");
@@ -1180,7 +1185,7 @@ int fsck_object(struct object *obj, void *data, unsigned long size,
int fsck_buffer(const struct object_id *oid, enum object_type type,
const void *data, unsigned long size,
- struct fsck_options *options)
+ struct fsck_objects_options *options)
{
if (type == OBJ_BLOB)
return fsck_blob(oid, data, size, options);
@@ -1197,7 +1202,7 @@ int fsck_buffer(const struct object_id *oid, enum object_type type,
type);
}
-int fsck_error_function(struct fsck_options *o,
+int fsck_error_function(struct fsck_objects_options *o,
const struct object_id *oid,
enum object_type object_type UNUSED,
enum fsck_msg_type msg_type,
@@ -1214,7 +1219,8 @@ int fsck_error_function(struct fsck_options *o,
static int fsck_blobs(struct oidset *blobs_found, struct oidset *blobs_done,
enum fsck_msg_id msg_missing, enum fsck_msg_id msg_type,
- struct fsck_options *options, const char *blob_type)
+ struct fsck_objects_options *options,
+ const char *blob_type)
{
int ret = 0;
struct oidset_iter iter;
@@ -1253,7 +1259,7 @@ static int fsck_blobs(struct oidset *blobs_found, struct oidset *blobs_done,
return ret;
}
-int fsck_finish(struct fsck_options *options)
+int fsck_finish(struct fsck_objects_options *options)
{
int ret = 0;
@@ -1270,7 +1276,7 @@ int fsck_finish(struct fsck_options *options)
int git_fsck_config(const char *var, const char *value,
const struct config_context *ctx, void *cb)
{
- struct fsck_options *options = cb;
+ struct fsck_objects_options *options = cb;
const char *msg_id;
if (strcmp(var, "fsck.skiplist") == 0) {
@@ -1300,7 +1306,7 @@ int git_fsck_config(const char *var, const char *value,
* Custom error callbacks that are used in more than one place.
*/
-int fsck_error_cb_print_missing_gitmodules(struct fsck_options *o,
+int fsck_error_cb_print_missing_gitmodules(struct fsck_objects_options *o,
const struct object_id *oid,
enum object_type object_type,
enum fsck_msg_type msg_type,
diff --git a/fsck.h b/fsck.h
index 6085a384f6..b64164db17 100644
--- a/fsck.h
+++ b/fsck.h
@@ -92,15 +92,15 @@ enum fsck_msg_id {
};
#undef MSG_ID
-struct fsck_options;
+struct fsck_objects_options;
struct object;
-void fsck_set_msg_type_from_ids(struct fsck_options *options,
+void fsck_set_msg_type_from_ids(struct fsck_objects_options *options,
enum fsck_msg_id msg_id,
enum fsck_msg_type msg_type);
-void fsck_set_msg_type(struct fsck_options *options,
+void fsck_set_msg_type(struct fsck_objects_options *options,
const char *msg_id, const char *msg_type);
-void fsck_set_msg_types(struct fsck_options *options, const char *values);
+void fsck_set_msg_types(struct fsck_objects_options *options, const char *values);
int is_valid_msg_type(const char *msg_id, const char *msg_type);
/*
@@ -112,26 +112,26 @@ int is_valid_msg_type(const char *msg_id, const char *msg_type);
* >0 error signaled and do not abort
*/
typedef int (*fsck_walk_func)(struct object *obj, enum object_type object_type,
- void *data, struct fsck_options *options);
+ void *data, struct fsck_objects_options *options);
/* callback for fsck_object, type is FSCK_ERROR or FSCK_WARN */
-typedef int (*fsck_error)(struct fsck_options *o,
+typedef int (*fsck_error)(struct fsck_objects_options *o,
const struct object_id *oid, enum object_type object_type,
enum fsck_msg_type msg_type, enum fsck_msg_id msg_id,
const char *message);
-int fsck_error_function(struct fsck_options *o,
+int fsck_error_function(struct fsck_objects_options *o,
const struct object_id *oid, enum object_type object_type,
enum fsck_msg_type msg_type, enum fsck_msg_id msg_id,
const char *message);
-int fsck_error_cb_print_missing_gitmodules(struct fsck_options *o,
+int fsck_error_cb_print_missing_gitmodules(struct fsck_objects_options *o,
const struct object_id *oid,
enum object_type object_type,
enum fsck_msg_type msg_type,
enum fsck_msg_id msg_id,
const char *message);
-struct fsck_options {
+struct fsck_objects_options {
fsck_walk_func walk;
fsck_error error_func;
unsigned strict:1;
@@ -144,7 +144,7 @@ struct fsck_options {
kh_oid_map_t *object_names;
};
-#define FSCK_OPTIONS_DEFAULT { \
+#define FSCK_OBJECTS_OPTIONS_DEFAULT { \
.skiplist = OIDSET_INIT, \
.gitmodules_found = OIDSET_INIT, \
.gitmodules_done = OIDSET_INIT, \
@@ -152,7 +152,7 @@ struct fsck_options {
.gitattributes_done = OIDSET_INIT, \
.error_func = fsck_error_function \
}
-#define FSCK_OPTIONS_STRICT { \
+#define FSCK_OBJECTS_OPTIONS_STRICT { \
.strict = 1, \
.gitmodules_found = OIDSET_INIT, \
.gitmodules_done = OIDSET_INIT, \
@@ -160,7 +160,7 @@ struct fsck_options {
.gitattributes_done = OIDSET_INIT, \
.error_func = fsck_error_function, \
}
-#define FSCK_OPTIONS_MISSING_GITMODULES { \
+#define FSCK_OBJECTS_OPTIONS_MISSING_GITMODULES { \
.strict = 1, \
.gitmodules_found = OIDSET_INIT, \
.gitmodules_done = OIDSET_INIT, \
@@ -176,14 +176,15 @@ struct fsck_options {
* >0 return value of the first signaled error >0 (in the case of no other errors)
* 0 everything OK
*/
-int fsck_walk(struct object *obj, void *data, struct fsck_options *options);
+int fsck_walk(struct object *obj, void *data,
+ struct fsck_objects_options *options);
/*
* Blob objects my pass a NULL data pointer, which indicates they are too large
* to fit in memory. All other types must pass a real buffer.
*/
int fsck_object(struct object *obj, void *data, unsigned long size,
- struct fsck_options *options);
+ struct fsck_objects_options *options);
/*
* Same as fsck_object(), but for when the caller doesn't have an object
@@ -191,14 +192,14 @@ int fsck_object(struct object *obj, void *data, unsigned long size,
*/
int fsck_buffer(const struct object_id *oid, enum object_type,
const void *data, unsigned long size,
- struct fsck_options *options);
+ struct fsck_objects_options *options);
/*
* fsck a tag, and pass info about it back to the caller. This is
* exposed fsck_object() internals for git-mktag(1).
*/
int fsck_tag_standalone(const struct object_id *oid, const char *buffer,
- unsigned long size, struct fsck_options *options,
+ unsigned long size, struct fsck_objects_options *options,
struct object_id *tagged_oid,
int *tag_type);
@@ -207,7 +208,7 @@ int fsck_tag_standalone(const struct object_id *oid, const char *buffer,
* after completing all fsck_object() calls in order to resolve any remaining
* checks.
*/
-int fsck_finish(struct fsck_options *options);
+int fsck_finish(struct fsck_objects_options *options);
/*
* Subsystem for storing human-readable names for each object.
@@ -224,14 +225,14 @@ int fsck_finish(struct fsck_options *options);
* points to a rotating array of static buffers, and may be invalidated by a
* subsequent call.
*/
-void fsck_enable_object_names(struct fsck_options *options);
-const char *fsck_get_object_name(struct fsck_options *options,
+void fsck_enable_object_names(struct fsck_objects_options *options);
+const char *fsck_get_object_name(struct fsck_objects_options *options,
const struct object_id *oid);
__attribute__((format (printf,3,4)))
-void fsck_put_object_name(struct fsck_options *options,
+void fsck_put_object_name(struct fsck_objects_options *options,
const struct object_id *oid,
const char *fmt, ...);
-const char *fsck_describe_object(struct fsck_options *options,
+const char *fsck_describe_object(struct fsck_objects_options *options,
const struct object_id *oid);
struct key_value_info;
diff --git a/object-file.c b/object-file.c
index d3cf4b8b2e..ec44ac3d82 100644
--- a/object-file.c
+++ b/object-file.c
@@ -2472,7 +2472,7 @@ int repo_has_object_file(struct repository *r,
* report the minimal fsck error here, and rely on the caller to
* give more context.
*/
-static int hash_format_check_report(struct fsck_options *opts UNUSED,
+static int hash_format_check_report(struct fsck_objects_options *opts UNUSED,
const struct object_id *oid UNUSED,
enum object_type object_type UNUSED,
enum fsck_msg_type msg_type UNUSED,
@@ -2507,7 +2507,7 @@ static int index_mem(struct index_state *istate,
}
}
if (flags & HASH_FORMAT_CHECK) {
- struct fsck_options opts = FSCK_OPTIONS_DEFAULT;
+ struct fsck_objects_options opts = FSCK_OBJECTS_OPTIONS_DEFAULT;
opts.strict = 1;
opts.error_func = hash_format_check_report;
--
2.45.2
^ permalink raw reply related [flat|nested] 282+ messages in thread
* Re: [GSoC][PATCH v5 01/12] fsck: rename "fsck_options" to "fsck_objects_options"
2024-06-27 15:12 ` [GSoC][PATCH v5 01/12] fsck: rename "fsck_options" to "fsck_objects_options" shejialuo
@ 2024-06-27 21:32 ` Junio C Hamano
2024-06-28 3:43 ` shejialuo
0 siblings, 1 reply; 282+ messages in thread
From: Junio C Hamano @ 2024-06-27 21:32 UTC (permalink / raw)
To: shejialuo; +Cc: git, Patrick Steinhardt, Karthik Nayak, Eric Sunshine
shejialuo <shejialuo@gmail.com> writes:
> -static int fsck_error_func(struct fsck_options *o UNUSED,
> +static int fsck_error_func(struct fsck_objects_options *o UNUSED,
> const struct object_id *oid,
> enum object_type object_type,
> enum fsck_msg_type msg_type,
It is curious that the addition/renaming of fsck_objects_options is
presumably to allow fsck_${xyzzy}_options to be added for different
$xyzzy (the first one being "refs"), and this function is only about
fsck_objects_options. What name would the corresponding error
function, called by checkers that take fsck_${xyzzy}_options, be
given? fsck_${xyzzy}_error_func()? Shouldn't this be then become
fsck_objects_error_func() or something?
Having said that.
Do we really need such a parallel system between "objects" and other
kinds of things being checked that you are introducing with this
step? What benefit are we getting from this additional complexity?
I would have expected that adding ref-related new members that
object consistency checkers has no interest in to the fsck_options
structure would be sufficient for the purpose of this series. Or if
we really wanted to prepare for more complex future, use of the
"union of variants, switched with a tag" pattern to arrange the data
this way:
struct fsck_options {
enum fsck_type {
FSCK_OBJECTS,
FSCK_REFS,
...
} t;
union {
struct fsck_objects_options objects;
struct fsck_refs_options refs;
} u;
};
would still allow functions like fsck_error_func(), and
fsck_set_msg_types(), etc. to work on the common "fsck_options".
I dunno.
^ permalink raw reply [flat|nested] 282+ messages in thread
* Re: [GSoC][PATCH v5 01/12] fsck: rename "fsck_options" to "fsck_objects_options"
2024-06-27 21:32 ` Junio C Hamano
@ 2024-06-28 3:43 ` shejialuo
0 siblings, 0 replies; 282+ messages in thread
From: shejialuo @ 2024-06-28 3:43 UTC (permalink / raw)
To: Junio C Hamano; +Cc: git, Patrick Steinhardt, Karthik Nayak, Eric Sunshine
On Thu, Jun 27, 2024 at 02:32:44PM -0700, Junio C Hamano wrote:
> shejialuo <shejialuo@gmail.com> writes:
>
> > -static int fsck_error_func(struct fsck_options *o UNUSED,
> > +static int fsck_error_func(struct fsck_objects_options *o UNUSED,
> > const struct object_id *oid,
> > enum object_type object_type,
> > enum fsck_msg_type msg_type,
>
> It is curious that the addition/renaming of fsck_objects_options is
> presumably to allow fsck_${xyzzy}_options to be added for different
> $xyzzy (the first one being "refs"), and this function is only about
> fsck_objects_options. What name would the corresponding error
> function, called by checkers that take fsck_${xyzzy}_options, be
> given? fsck_${xyzzy}_error_func()? Shouldn't this be then become
> fsck_objects_error_func() or something?
>
Yes, it should be definitely changed here. Will improve in the next
version.
> Having said that.
>
> Do we really need such a parallel system between "objects" and other
> kinds of things being checked that you are introducing with this
> step? What benefit are we getting from this additional complexity?
>
I am agree that the most simple way to handle for this series is add
some ref-related new members. Thus, we can reuse existing code. However,
it makes me feel so weird when implementing the code using this idea.
For example,
struct fsck_options {
struct fsck_refs_options;
...
}
When we create a new "fsck_options", it will be so misleading that the
caller may think we will handle both refs and objects checks by using
"fsck_options". So I just introduce this parallel system. When checking
objects, caller should explicitly create "fsck_objects_options", when
checking refs, caller should explicitly create "fsck_refs_options".
Because in semantics, we introduce a new check here. Combination means
we will check the both. Although it is simple, but it will cause a lot
of trouble in the future.
> I would have expected that adding ref-related new members that
> object consistency checkers has no interest in to the fsck_options
> structure would be sufficient for the purpose of this series. Or if
> we really wanted to prepare for more complex future, use of the
> "union of variants, switched with a tag" pattern to arrange the data
> this way:
>
> struct fsck_options {
> enum fsck_type {
> FSCK_OBJECTS,
> FSCK_REFS,
> ...
> } t;
> union {
> struct fsck_objects_options objects;
> struct fsck_refs_options refs;
> } u;
> };
>
> would still allow functions like fsck_error_func(), and
> fsck_set_msg_types(), etc. to work on the common "fsck_options".
>
I agree that we could use this pattern, using union will make the
semantics more clear.
> I dunno.
^ permalink raw reply [flat|nested] 282+ messages in thread
* [GSoC][PATCH v5 02/12] fsck: use "fsck_configs" to set up configs
2024-06-27 15:08 ` [GSoC][PATCH v5 00/12] ref consistency check infra setup shejialuo
2024-06-27 15:12 ` [GSoC][PATCH v5 01/12] fsck: rename "fsck_options" to "fsck_objects_options" shejialuo
@ 2024-06-27 15:13 ` shejialuo
2024-06-27 21:43 ` Junio C Hamano
2024-06-27 15:13 ` [GSoC][PATCH v5 03/12] fsck: abstract common options for reusing shejialuo
` (10 subsequent siblings)
12 siblings, 1 reply; 282+ messages in thread
From: shejialuo @ 2024-06-27 15:13 UTC (permalink / raw)
To: git; +Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine
Some fields such as "msg_type" and "skiplist" in "fsck_objects_options"
are not options, these fields are related to "git-config(1)" which are
initialized using "git_fsck_config" function. Create a static variable
named "fsck_configs" in "fsck.c" which aims at handling configs. Thus we
don't need to reply on the "fsck_objects_options" to set up the fsck
error message severity.
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
fsck.c | 29 ++++++++++++++++++-----------
fsck.h | 3 ---
2 files changed, 18 insertions(+), 14 deletions(-)
diff --git a/fsck.c b/fsck.c
index c24a0f9fae..81b93f02fc 100644
--- a/fsck.c
+++ b/fsck.c
@@ -24,6 +24,14 @@
static ssize_t max_tree_entry_len = 4096;
+static struct {
+ enum fsck_msg_type *msg_type;
+ struct oidset oid_skiplist;
+} fsck_configs = {
+ .msg_type = NULL,
+ .oid_skiplist = OIDSET_INIT
+};
+
#define STR(x) #x
#define MSG_ID(id, msg_type) { STR(id), NULL, NULL, FSCK_##msg_type },
static struct {
@@ -103,7 +111,7 @@ static enum fsck_msg_type fsck_msg_type(enum fsck_msg_id msg_id,
{
assert(msg_id >= 0 && msg_id < FSCK_MSG_MAX);
- if (!options->msg_type) {
+ if (!fsck_configs.msg_type) {
enum fsck_msg_type msg_type = msg_id_info[msg_id].msg_type;
if (options->strict && msg_type == FSCK_WARN)
@@ -111,7 +119,7 @@ static enum fsck_msg_type fsck_msg_type(enum fsck_msg_id msg_id,
return msg_type;
}
- return options->msg_type[msg_id];
+ return fsck_configs.msg_type[msg_id];
}
static enum fsck_msg_type parse_msg_type(const char *str)
@@ -138,16 +146,16 @@ void fsck_set_msg_type_from_ids(struct fsck_objects_options *options,
enum fsck_msg_id msg_id,
enum fsck_msg_type msg_type)
{
- if (!options->msg_type) {
+ if (!fsck_configs.msg_type) {
int i;
enum fsck_msg_type *severity;
ALLOC_ARRAY(severity, FSCK_MSG_MAX);
for (i = 0; i < FSCK_MSG_MAX; i++)
severity[i] = fsck_msg_type(i, options);
- options->msg_type = severity;
+ fsck_configs.msg_type = severity;
}
- options->msg_type[msg_id] = msg_type;
+ fsck_configs.msg_type[msg_id] = msg_type;
}
void fsck_set_msg_type(struct fsck_objects_options *options,
@@ -203,7 +211,7 @@ void fsck_set_msg_types(struct fsck_objects_options *options, const char *values
if (!strcmp(buf, "skiplist")) {
if (equal == len)
die("skiplist requires a path");
- oidset_parse_file(&options->skiplist, buf + equal + 1);
+ oidset_parse_file(&fsck_configs.oid_skiplist, buf + equal + 1);
buf += len + 1;
continue;
}
@@ -217,10 +225,9 @@ void fsck_set_msg_types(struct fsck_objects_options *options, const char *values
free(to_free);
}
-static int object_on_skiplist(struct fsck_objects_options *opts,
- const struct object_id *oid)
+static int object_on_skiplist(const struct object_id *oid)
{
- return opts && oid && oidset_contains(&opts->skiplist, oid);
+ return oid && oidset_contains(&fsck_configs.oid_skiplist, oid);
}
__attribute__((format (printf, 5, 6)))
@@ -236,7 +243,7 @@ static int report(struct fsck_objects_options *options,
if (msg_type == FSCK_IGNORE)
return 0;
- if (object_on_skiplist(options, oid))
+ if (object_on_skiplist(oid))
return 0;
if (msg_type == FSCK_FATAL)
@@ -1109,7 +1116,7 @@ static int fsck_blob(const struct object_id *oid, const char *buf,
{
int ret = 0;
- if (object_on_skiplist(options, oid))
+ if (object_on_skiplist(oid))
return 0;
if (oidset_contains(&options->gitmodules_found, oid)) {
diff --git a/fsck.h b/fsck.h
index b64164db17..37deadc4bd 100644
--- a/fsck.h
+++ b/fsck.h
@@ -135,8 +135,6 @@ struct fsck_objects_options {
fsck_walk_func walk;
fsck_error error_func;
unsigned strict:1;
- enum fsck_msg_type *msg_type;
- struct oidset skiplist;
struct oidset gitmodules_found;
struct oidset gitmodules_done;
struct oidset gitattributes_found;
@@ -145,7 +143,6 @@ struct fsck_objects_options {
};
#define FSCK_OBJECTS_OPTIONS_DEFAULT { \
- .skiplist = OIDSET_INIT, \
.gitmodules_found = OIDSET_INIT, \
.gitmodules_done = OIDSET_INIT, \
.gitattributes_found = OIDSET_INIT, \
--
2.45.2
^ permalink raw reply related [flat|nested] 282+ messages in thread
* Re: [GSoC][PATCH v5 02/12] fsck: use "fsck_configs" to set up configs
2024-06-27 15:13 ` [GSoC][PATCH v5 02/12] fsck: use "fsck_configs" to set up configs shejialuo
@ 2024-06-27 21:43 ` Junio C Hamano
2024-06-28 4:22 ` shejialuo
0 siblings, 1 reply; 282+ messages in thread
From: Junio C Hamano @ 2024-06-27 21:43 UTC (permalink / raw)
To: shejialuo; +Cc: git, Patrick Steinhardt, Karthik Nayak, Eric Sunshine
shejialuo <shejialuo@gmail.com> writes:
> Some fields such as "msg_type" and "skiplist" in "fsck_objects_options"
> are not options, these fields are related to "git-config(1)" which are
> initialized using "git_fsck_config" function. Create a static variable
> named "fsck_configs" in "fsck.c" which aims at handling configs. Thus we
> don't need to reply on the "fsck_objects_options" to set up the fsck
> error message severity.
reply???
As configuration often is used to prepopulate options, I need a lot
stonger justification to split these into a different structure than
"'config' is a noun that is different from a noun 'option'".
If we intend to have many "option" instances but what these two
members store will be the same across these "option" instances, for
example, that would be a lot better reason why we may want to
separate these two members out of it, but I have a suspicion that if
we were to use the "union with tags" approach, these two would
become members of the common part shared between "objects' and
"refs", i.e. the overall data structure might look more like this:
struct fsck_options {
enum fsck_msg_type *msg_type;
struct oidset oid_skiplist;
enum fsck_what_check { O, R } tag;
union {
struct fsck_object_options o;
struct fsck_ref_options r;
} u;
};
by moving these two members out of fsck_object_options and moving
them to the shared part.
I dunno. It is unclear what the real reason is for these two things
to be extracted out and made to appear totally independent from the
"options" thing to begin with, and I am not sure if I agree with the
reason when it is known.
^ permalink raw reply [flat|nested] 282+ messages in thread
* Re: [GSoC][PATCH v5 02/12] fsck: use "fsck_configs" to set up configs
2024-06-27 21:43 ` Junio C Hamano
@ 2024-06-28 4:22 ` shejialuo
0 siblings, 0 replies; 282+ messages in thread
From: shejialuo @ 2024-06-28 4:22 UTC (permalink / raw)
To: Junio C Hamano; +Cc: git, Patrick Steinhardt, Karthik Nayak, Eric Sunshine
On Thu, Jun 27, 2024 at 02:43:47PM -0700, Junio C Hamano wrote:
> shejialuo <shejialuo@gmail.com> writes:
>
> > Some fields such as "msg_type" and "skiplist" in "fsck_objects_options"
> > are not options, these fields are related to "git-config(1)" which are
> > initialized using "git_fsck_config" function. Create a static variable
> > named "fsck_configs" in "fsck.c" which aims at handling configs. Thus we
> > don't need to reply on the "fsck_objects_options" to set up the fsck
> > error message severity.
>
> reply???
>
Sorry, I often make mistake to type "rely" as "reply".
> As configuration often is used to prepopulate options, I need a lot
> stonger justification to split these into a different structure than
> "'config' is a noun that is different from a noun 'option'".
>
> If we intend to have many "option" instances but what these two
> members store will be the same across these "option" instances, for
> example, that would be a lot better reason why we may want to
> separate these two members out of it, but I have a suspicion that if
> we were to use the "union with tags" approach, these two would
> become members of the common part shared between "objects' and
> "refs", i.e. the overall data structure might look more like this:
>
Actually, I feel really wired for this part. Let me elaborate on this.
"fsck.c::git_fsck_config()" is used to set up the configs. It will
eventually call the "fsck.c::fsck_set_msg_type_from_ids" like the
following:
void fsck_set_msg_type_from_ids(struct fsck_options *options,
enum fsck_msg_id msg_id,
enum fsck_msg_type msg_type)
{
if (!options->msg_type) {
int i;
enum fsck_msg_type *severity.
ALLOC_ARRAY(severity, FSCK_MSG_MAX);
for (i = 0; i < FSCK_MSG_MAX; i++)
severity[i] = fsck_msg_type(i, options);
options->msg_type = severity;
}
options->msg_type[msg_id] = msg_type;
}
In the current codebase, the caller will simply create a "fsck_options"
and setup the fsck error message severity. However, let's see
"builtin/fskc.c", it creates the following two "fsck_options" and it
only uses
static struct fsck_options fsck_walk_options = FSCK_OPTIONS_DEFAULT;
static struct fsck_options fsck_obj_options = FSCK_OPTIONS_DEFAULT;
However, the code only uses "fsck_obj_options" to setup the configs. So
it makes me feel so strange. So I just want to make it separation. Maybe
a little wrong here.
> struct fsck_options {
> enum fsck_msg_type *msg_type;
> struct oidset oid_skiplist;
> enum fsck_what_check { O, R } tag;
> union {
> struct fsck_object_options o;
> struct fsck_ref_options r;
> } u;
> };
>
> by moving these two members out of fsck_object_options and moving
> them to the shared part.
>
> I dunno. It is unclear what the real reason is for these two things
> to be extracted out and made to appear totally independent from the
> "options" thing to begin with, and I am not sure if I agree with the
> reason when it is known.
^ permalink raw reply [flat|nested] 282+ messages in thread
* [GSoC][PATCH v5 03/12] fsck: abstract common options for reusing
2024-06-27 15:08 ` [GSoC][PATCH v5 00/12] ref consistency check infra setup shejialuo
2024-06-27 15:12 ` [GSoC][PATCH v5 01/12] fsck: rename "fsck_options" to "fsck_objects_options" shejialuo
2024-06-27 15:13 ` [GSoC][PATCH v5 02/12] fsck: use "fsck_configs" to set up configs shejialuo
@ 2024-06-27 15:13 ` shejialuo
2024-06-27 15:13 ` [GSoC][PATCH v5 04/12] fsck: add "fsck_refs_options" struct shejialuo
` (9 subsequent siblings)
12 siblings, 0 replies; 282+ messages in thread
From: shejialuo @ 2024-06-27 15:13 UTC (permalink / raw)
To: git; +Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine
Create "fsck_options" struct to contain the general fields. Change
"fsck_objects_options" to incorporate the general "fsck_options"
and object-speicifc options.
The fsck message will use "strict" option to set the corresponding
"msg_type". Rename the parameter name from "struct fsck_objects_options
*" to "struct fsck_options *".
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
builtin/fsck.c | 6 +++---
builtin/index-pack.c | 4 ++--
builtin/mktag.c | 6 +++---
builtin/unpack-objects.c | 2 +-
fsck.c | 25 ++++++++++++++++---------
fsck.h | 32 ++++++++++++++++++++++----------
object-file.c | 4 ++--
7 files changed, 49 insertions(+), 30 deletions(-)
diff --git a/builtin/fsck.c b/builtin/fsck.c
index ec3220880d..c383125027 100644
--- a/builtin/fsck.c
+++ b/builtin/fsck.c
@@ -939,9 +939,9 @@ int cmd_fsck(int argc, const char **argv, const char *prefix)
fsck_walk_options.walk = mark_object;
fsck_obj_options.walk = mark_used;
- fsck_obj_options.error_func = fsck_error_func;
+ fsck_obj_options.fsck_options.error_func = fsck_error_func;
if (check_strict)
- fsck_obj_options.strict = 1;
+ fsck_obj_options.fsck_options.strict = 1;
if (show_progress == -1)
show_progress = isatty(2);
@@ -956,7 +956,7 @@ int cmd_fsck(int argc, const char **argv, const char *prefix)
if (name_objects)
fsck_enable_object_names(&fsck_walk_options);
- git_config(git_fsck_config, &fsck_obj_options);
+ git_config(git_fsck_config, &fsck_obj_options.fsck_options);
prepare_repo_settings(the_repository);
if (connectivity_only) {
diff --git a/builtin/index-pack.c b/builtin/index-pack.c
index 08ebeedfd3..360106b0c8 100644
--- a/builtin/index-pack.c
+++ b/builtin/index-pack.c
@@ -1770,13 +1770,13 @@ int cmd_index_pack(int argc, const char **argv, const char *prefix)
} else if (skip_to_optional_arg(arg, "--strict", &arg)) {
strict = 1;
do_fsck_object = 1;
- fsck_set_msg_types(&fsck_objects_options, arg);
+ fsck_set_msg_types(&fsck_objects_options.fsck_options, arg);
} else if (!strcmp(arg, "--check-self-contained-and-connected")) {
strict = 1;
check_self_contained_and_connected = 1;
} else if (skip_to_optional_arg(arg, "--fsck-objects", &arg)) {
do_fsck_object = 1;
- fsck_set_msg_types(&fsck_objects_options, arg);
+ fsck_set_msg_types(&fsck_objects_options.fsck_options, arg);
} else if (!strcmp(arg, "--verify")) {
verify = 1;
} else if (!strcmp(arg, "--verify-stat")) {
diff --git a/builtin/mktag.c b/builtin/mktag.c
index be2abc71d8..76860f4c7c 100644
--- a/builtin/mktag.c
+++ b/builtin/mktag.c
@@ -91,12 +91,12 @@ int cmd_mktag(int argc, const char **argv, const char *prefix)
if (strbuf_read(&buf, 0, 0) < 0)
die_errno(_("could not read from stdin"));
- fsck_objects_options.error_func = mktag_fsck_error_func;
- fsck_set_msg_type_from_ids(&fsck_objects_options,
+ fsck_objects_options.fsck_options.error_func = mktag_fsck_error_func;
+ fsck_set_msg_type_from_ids(&fsck_objects_options.fsck_options,
FSCK_MSG_EXTRA_HEADER_ENTRY,
FSCK_WARN);
/* config might set fsck.extraHeaderEntry=* again */
- git_config(git_fsck_config, &fsck_objects_options);
+ git_config(git_fsck_config, &fsck_objects_options.fsck_options);
if (fsck_tag_standalone(NULL, buf.buf, buf.len, &fsck_objects_options,
&tagged_oid, &tagged_type))
die(_("tag on stdin did not pass our strict fsck check"));
diff --git a/builtin/unpack-objects.c b/builtin/unpack-objects.c
index c59e330db9..d8d0b14018 100644
--- a/builtin/unpack-objects.c
+++ b/builtin/unpack-objects.c
@@ -635,7 +635,7 @@ int cmd_unpack_objects(int argc, const char **argv, const char *prefix UNUSED)
}
if (skip_prefix(arg, "--strict=", &arg)) {
strict = 1;
- fsck_set_msg_types(&fsck_objects_options, arg);
+ fsck_set_msg_types(&fsck_objects_options.fsck_options, arg);
continue;
}
if (starts_with(arg, "--pack_header=")) {
diff --git a/fsck.c b/fsck.c
index 81b93f02fc..7ac6e4587c 100644
--- a/fsck.c
+++ b/fsck.c
@@ -107,7 +107,7 @@ void list_config_fsck_msg_ids(struct string_list *list, const char *prefix)
}
static enum fsck_msg_type fsck_msg_type(enum fsck_msg_id msg_id,
- struct fsck_objects_options *options)
+ struct fsck_options *options)
{
assert(msg_id >= 0 && msg_id < FSCK_MSG_MAX);
@@ -142,7 +142,7 @@ int is_valid_msg_type(const char *msg_id, const char *msg_type)
return 1;
}
-void fsck_set_msg_type_from_ids(struct fsck_objects_options *options,
+void fsck_set_msg_type_from_ids(struct fsck_options *options,
enum fsck_msg_id msg_id,
enum fsck_msg_type msg_type)
{
@@ -158,7 +158,7 @@ void fsck_set_msg_type_from_ids(struct fsck_objects_options *options,
fsck_configs.msg_type[msg_id] = msg_type;
}
-void fsck_set_msg_type(struct fsck_objects_options *options,
+void fsck_set_msg_type(struct fsck_options *options,
const char *msg_id_str, const char *msg_type_str)
{
int msg_id = parse_msg_id(msg_id_str);
@@ -187,7 +187,7 @@ void fsck_set_msg_type(struct fsck_objects_options *options,
free(to_free);
}
-void fsck_set_msg_types(struct fsck_objects_options *options, const char *values)
+void fsck_set_msg_types(struct fsck_options *options, const char *values)
{
char *buf = xstrdup(values), *to_free = buf;
int done = 0;
@@ -237,9 +237,16 @@ static int report(struct fsck_objects_options *options,
{
va_list ap;
struct strbuf sb = STRBUF_INIT;
- enum fsck_msg_type msg_type = fsck_msg_type(msg_id, options);
+ struct fsck_options *fsck_options;
+ enum fsck_msg_type msg_type;
int result;
+ if (options)
+ fsck_options = &options->fsck_options;
+ else
+ BUG("fsck_options is not set");
+
+ msg_type = fsck_msg_type(msg_id, fsck_options);
if (msg_type == FSCK_IGNORE)
return 0;
@@ -256,8 +263,8 @@ static int report(struct fsck_objects_options *options,
va_start(ap, fmt);
strbuf_vaddf(&sb, fmt, ap);
- result = options->error_func(options, oid, object_type,
- msg_type, msg_id, sb.buf);
+ result = fsck_options->error_func(options, oid, object_type,
+ msg_type, msg_id, sb.buf);
strbuf_release(&sb);
va_end(ap);
@@ -711,7 +718,7 @@ static int fsck_tree(const struct object_id *tree_oid,
* bits..
*/
case S_IFREG | 0664:
- if (!options->strict)
+ if (!options->fsck_options.strict)
break;
/* fallthrough */
default:
@@ -1283,7 +1290,7 @@ int fsck_finish(struct fsck_objects_options *options)
int git_fsck_config(const char *var, const char *value,
const struct config_context *ctx, void *cb)
{
- struct fsck_objects_options *options = cb;
+ struct fsck_options *options = cb;
const char *msg_id;
if (strcmp(var, "fsck.skiplist") == 0) {
diff --git a/fsck.h b/fsck.h
index 37deadc4bd..e531b44a66 100644
--- a/fsck.h
+++ b/fsck.h
@@ -92,15 +92,16 @@ enum fsck_msg_id {
};
#undef MSG_ID
+struct fsck_options;
struct fsck_objects_options;
struct object;
-void fsck_set_msg_type_from_ids(struct fsck_objects_options *options,
+void fsck_set_msg_type_from_ids(struct fsck_options *options,
enum fsck_msg_id msg_id,
enum fsck_msg_type msg_type);
-void fsck_set_msg_type(struct fsck_objects_options *options,
+void fsck_set_msg_type(struct fsck_options *options,
const char *msg_id, const char *msg_type);
-void fsck_set_msg_types(struct fsck_objects_options *options, const char *values);
+void fsck_set_msg_types(struct fsck_options *options, const char *values);
int is_valid_msg_type(const char *msg_id, const char *msg_type);
/*
@@ -131,10 +132,15 @@ int fsck_error_cb_print_missing_gitmodules(struct fsck_objects_options *o,
enum fsck_msg_id msg_id,
const char *message);
+struct fsck_options {
+ fsck_error error_func;
+ unsigned verbose:1,
+ strict:1;
+};
+
struct fsck_objects_options {
+ struct fsck_options fsck_options;
fsck_walk_func walk;
- fsck_error error_func;
- unsigned strict:1;
struct oidset gitmodules_found;
struct oidset gitmodules_done;
struct oidset gitattributes_found;
@@ -143,27 +149,33 @@ struct fsck_objects_options {
};
#define FSCK_OBJECTS_OPTIONS_DEFAULT { \
+ .fsck_options = { \
+ .error_func = fsck_error_function, \
+ }, \
.gitmodules_found = OIDSET_INIT, \
.gitmodules_done = OIDSET_INIT, \
.gitattributes_found = OIDSET_INIT, \
.gitattributes_done = OIDSET_INIT, \
- .error_func = fsck_error_function \
}
#define FSCK_OBJECTS_OPTIONS_STRICT { \
- .strict = 1, \
+ .fsck_options = { \
+ .error_func = fsck_error_function, \
+ .strict = 1, \
+ }, \
.gitmodules_found = OIDSET_INIT, \
.gitmodules_done = OIDSET_INIT, \
.gitattributes_found = OIDSET_INIT, \
.gitattributes_done = OIDSET_INIT, \
- .error_func = fsck_error_function, \
}
#define FSCK_OBJECTS_OPTIONS_MISSING_GITMODULES { \
- .strict = 1, \
+ .fsck_options = { \
+ .error_func = fsck_error_cb_print_missing_gitmodules, \
+ .strict = 1, \
+ }, \
.gitmodules_found = OIDSET_INIT, \
.gitmodules_done = OIDSET_INIT, \
.gitattributes_found = OIDSET_INIT, \
.gitattributes_done = OIDSET_INIT, \
- .error_func = fsck_error_cb_print_missing_gitmodules, \
}
/* descend in all linked child objects
diff --git a/object-file.c b/object-file.c
index ec44ac3d82..9eda05ee01 100644
--- a/object-file.c
+++ b/object-file.c
@@ -2509,8 +2509,8 @@ static int index_mem(struct index_state *istate,
if (flags & HASH_FORMAT_CHECK) {
struct fsck_objects_options opts = FSCK_OBJECTS_OPTIONS_DEFAULT;
- opts.strict = 1;
- opts.error_func = hash_format_check_report;
+ opts.fsck_options.strict = 1;
+ opts.fsck_options.error_func = hash_format_check_report;
if (fsck_buffer(null_oid(), type, buf, size, &opts))
die(_("refusing to create malformed object"));
fsck_finish(&opts);
--
2.45.2
^ permalink raw reply related [flat|nested] 282+ messages in thread
* [GSoC][PATCH v5 04/12] fsck: add "fsck_refs_options" struct
2024-06-27 15:08 ` [GSoC][PATCH v5 00/12] ref consistency check infra setup shejialuo
` (2 preceding siblings ...)
2024-06-27 15:13 ` [GSoC][PATCH v5 03/12] fsck: abstract common options for reusing shejialuo
@ 2024-06-27 15:13 ` shejialuo
2024-06-27 15:14 ` [GSoC][PATCH v5 05/12] fsck: add a unified interface for reporting fsck messages shejialuo
` (8 subsequent siblings)
12 siblings, 0 replies; 282+ messages in thread
From: shejialuo @ 2024-06-27 15:13 UTC (permalink / raw)
To: git; +Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine
Add "fsck_refs_options" to support ref consistency checks.
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
fsck.h | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/fsck.h b/fsck.h
index e531b44a66..a17fee30b4 100644
--- a/fsck.h
+++ b/fsck.h
@@ -138,6 +138,10 @@ struct fsck_options {
strict:1;
};
+struct fsck_refs_options {
+ struct fsck_options fsck_options;
+};
+
struct fsck_objects_options {
struct fsck_options fsck_options;
fsck_walk_func walk;
--
2.45.2
^ permalink raw reply related [flat|nested] 282+ messages in thread
* [GSoC][PATCH v5 05/12] fsck: add a unified interface for reporting fsck messages
2024-06-27 15:08 ` [GSoC][PATCH v5 00/12] ref consistency check infra setup shejialuo
` (3 preceding siblings ...)
2024-06-27 15:13 ` [GSoC][PATCH v5 04/12] fsck: add "fsck_refs_options" struct shejialuo
@ 2024-06-27 15:14 ` shejialuo
2024-06-27 15:14 ` [GSoC][PATCH v5 06/12] fsck: add "fsck_refs_options" initialization macros shejialuo
` (7 subsequent siblings)
12 siblings, 0 replies; 282+ messages in thread
From: shejialuo @ 2024-06-27 15:14 UTC (permalink / raw)
To: git; +Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine
The static function "report" provided by "fsck.c" aims at reporting the
problems related to object database which cannot be reused for refs.
In order to provide a unified interface which can report either objects
or refs, create a new function "fsck_report" by adding two parameters
"refs_options" and "checked_ref_name" following the "report" prototype.
However, instead of using "...", provide "va_list" to allow more
flexibility.
The "vfsck_report" function will use "error_func" registered in
"fsck_options" function to report customized messages. Change
"error_func" prototype to align with the "vfsck_report".
Change "report" function to make it use "vfsck_report" to report
objects-related messages. Add a new function called "fsck_refs_report"
to use "vfsck_report" to report refs-related messages. Also, create a
general function "fsck_report" for future use.
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
builtin/fsck.c | 4 ++-
builtin/mktag.c | 4 ++-
fsck.c | 88 ++++++++++++++++++++++++++++++++++++++++---------
fsck.h | 39 +++++++++++++++++++---
object-file.c | 14 ++++----
5 files changed, 122 insertions(+), 27 deletions(-)
diff --git a/builtin/fsck.c b/builtin/fsck.c
index c383125027..2a3b536c1b 100644
--- a/builtin/fsck.c
+++ b/builtin/fsck.c
@@ -89,9 +89,11 @@ static int objerror(struct object *obj, const char *err)
return -1;
}
-static int fsck_error_func(struct fsck_objects_options *o UNUSED,
+static int fsck_error_func(struct fsck_objects_options *objects_options UNUSED,
+ struct fsck_refs_options *refs_options UNUSED,
const struct object_id *oid,
enum object_type object_type,
+ const char *checked_ref_name UNUSED,
enum fsck_msg_type msg_type,
enum fsck_msg_id msg_id UNUSED,
const char *message)
diff --git a/builtin/mktag.c b/builtin/mktag.c
index 76860f4c7c..0779a778e9 100644
--- a/builtin/mktag.c
+++ b/builtin/mktag.c
@@ -17,9 +17,11 @@ static int option_strict = 1;
static struct fsck_objects_options fsck_objects_options = FSCK_OBJECTS_OPTIONS_STRICT;
-static int mktag_fsck_error_func(struct fsck_objects_options *o UNUSED,
+static int mktag_fsck_error_func(struct fsck_objects_options *objects_options UNUSED,
+ struct fsck_refs_options *refs_options UNUSED,
const struct object_id *oid UNUSED,
enum object_type object_type UNUSED,
+ const char *checked_ref_name UNUSED,
enum fsck_msg_type msg_type,
enum fsck_msg_id msg_id UNUSED,
const char *message)
diff --git a/fsck.c b/fsck.c
index 7ac6e4587c..5184d17736 100644
--- a/fsck.c
+++ b/fsck.c
@@ -230,19 +230,23 @@ static int object_on_skiplist(const struct object_id *oid)
return oid && oidset_contains(&fsck_configs.oid_skiplist, oid);
}
-__attribute__((format (printf, 5, 6)))
-static int report(struct fsck_objects_options *options,
- const struct object_id *oid, enum object_type object_type,
- enum fsck_msg_id msg_id, const char *fmt, ...)
+static int vfsck_report(struct fsck_objects_options *objects_options,
+ struct fsck_refs_options *refs_options,
+ const struct object_id *oid,
+ enum object_type object_type,
+ const char *checked_ref_name,
+ enum fsck_msg_id msg_id, const char *fmt, va_list ap)
{
- va_list ap;
+ va_list ap_copy;
struct strbuf sb = STRBUF_INIT;
struct fsck_options *fsck_options;
enum fsck_msg_type msg_type;
int result;
- if (options)
- fsck_options = &options->fsck_options;
+ if (objects_options)
+ fsck_options = &objects_options->fsck_options;
+ else if (refs_options)
+ fsck_options = &refs_options->fsck_options;
else
BUG("fsck_options is not set");
@@ -261,9 +265,10 @@ static int report(struct fsck_objects_options *options,
prepare_msg_ids();
strbuf_addf(&sb, "%s: ", msg_id_info[msg_id].camelcased);
- va_start(ap, fmt);
- strbuf_vaddf(&sb, fmt, ap);
- result = fsck_options->error_func(options, oid, object_type,
+ va_copy(ap_copy, ap);
+ strbuf_vaddf(&sb, fmt, ap_copy);
+ result = fsck_options->error_func(objects_options, NULL,
+ oid, object_type, checked_ref_name,
msg_type, msg_id, sb.buf);
strbuf_release(&sb);
va_end(ap);
@@ -271,6 +276,51 @@ static int report(struct fsck_objects_options *options,
return result;
}
+__attribute__((format (printf, 5, 6)))
+static int report(struct fsck_objects_options *objects_options,
+ const struct object_id *oid, enum object_type object_type,
+ enum fsck_msg_id msg_id, const char *fmt, ...)
+{
+ va_list ap;
+ int result;
+ va_start(ap, fmt);
+ result = vfsck_report(objects_options, NULL, oid, object_type, "",
+ msg_id, fmt, ap);
+ va_end(ap);
+ return result;
+}
+
+int fsck_report(struct fsck_objects_options *objects_options,
+ struct fsck_refs_options *refs_options,
+ const struct object_id *oid,
+ enum object_type object_type,
+ const char *checked_ref_name,
+ enum fsck_msg_id msg_id, const char *fmt, ...)
+{
+ va_list ap;
+ int result;
+ va_start(ap, fmt);
+ result = vfsck_report(objects_options, refs_options, oid, object_type,
+ checked_ref_name, msg_id, fmt, ap);
+ va_end(ap);
+ return result;
+
+}
+
+int fsck_refs_report(struct fsck_refs_options *refs_options,
+ const struct object_id *oid,
+ const char *checked_ref_name,
+ enum fsck_msg_id msg_id, const char *fmt, ...)
+{
+ va_list ap;
+ int result;
+ va_start(ap, fmt);
+ result = fsck_report(NULL, refs_options, oid, OBJ_NONE,
+ checked_ref_name, msg_id, fmt, ap);
+ va_end(ap);
+ return result;
+}
+
void fsck_enable_object_names(struct fsck_objects_options *options)
{
if (!options->object_names)
@@ -1216,18 +1266,22 @@ int fsck_buffer(const struct object_id *oid, enum object_type type,
type);
}
-int fsck_error_function(struct fsck_objects_options *o,
+int fsck_error_function(struct fsck_objects_options *objects_options,
+ struct fsck_refs_options *refs_options UNUSED,
const struct object_id *oid,
enum object_type object_type UNUSED,
+ const char *checked_ref_name UNUSED,
enum fsck_msg_type msg_type,
enum fsck_msg_id msg_id UNUSED,
const char *message)
{
if (msg_type == FSCK_WARN) {
- warning("object %s: %s", fsck_describe_object(o, oid), message);
+ warning("object %s: %s",
+ fsck_describe_object(objects_options, oid), message);
return 0;
}
- error("object %s: %s", fsck_describe_object(o, oid), message);
+ error("object %s: %s",
+ fsck_describe_object(objects_options, oid), message);
return 1;
}
@@ -1320,9 +1374,11 @@ int git_fsck_config(const char *var, const char *value,
* Custom error callbacks that are used in more than one place.
*/
-int fsck_error_cb_print_missing_gitmodules(struct fsck_objects_options *o,
+int fsck_error_cb_print_missing_gitmodules(struct fsck_objects_options *objects_options,
+ struct fsck_refs_options *refs_options,
const struct object_id *oid,
enum object_type object_type,
+ const char *checked_ref_name,
enum fsck_msg_type msg_type,
enum fsck_msg_id msg_id,
const char *message)
@@ -1331,5 +1387,7 @@ int fsck_error_cb_print_missing_gitmodules(struct fsck_objects_options *o,
puts(oid_to_hex(oid));
return 0;
}
- return fsck_error_function(o, oid, object_type, msg_type, msg_id, message);
+ return fsck_error_function(objects_options, refs_options,
+ oid, object_type, checked_ref_name,
+ msg_type, msg_id, message);
}
diff --git a/fsck.h b/fsck.h
index a17fee30b4..70d5e78ae6 100644
--- a/fsck.h
+++ b/fsck.h
@@ -93,6 +93,7 @@ enum fsck_msg_id {
#undef MSG_ID
struct fsck_options;
+struct fsck_refs_options;
struct fsck_objects_options;
struct object;
@@ -115,19 +116,27 @@ int is_valid_msg_type(const char *msg_id, const char *msg_type);
typedef int (*fsck_walk_func)(struct object *obj, enum object_type object_type,
void *data, struct fsck_objects_options *options);
-/* callback for fsck_object, type is FSCK_ERROR or FSCK_WARN */
-typedef int (*fsck_error)(struct fsck_objects_options *o,
+/*
+ * callback function for reporting errors when checking either objects or refs
+ */
+typedef int (*fsck_error)(struct fsck_objects_options *objects_options,
+ struct fsck_refs_options *refs_options,
const struct object_id *oid, enum object_type object_type,
+ const char *checked_ref_name,
enum fsck_msg_type msg_type, enum fsck_msg_id msg_id,
const char *message);
-int fsck_error_function(struct fsck_objects_options *o,
+int fsck_error_function(struct fsck_objects_options *objects_options,
+ struct fsck_refs_options *refs_options,
const struct object_id *oid, enum object_type object_type,
+ const char *checked_ref_name,
enum fsck_msg_type msg_type, enum fsck_msg_id msg_id,
const char *message);
-int fsck_error_cb_print_missing_gitmodules(struct fsck_objects_options *o,
+int fsck_error_cb_print_missing_gitmodules(struct fsck_objects_options *objects_options,
+ struct fsck_refs_options *refs_options,
const struct object_id *oid,
enum object_type object_type,
+ const char *checked_ref_name,
enum fsck_msg_type msg_type,
enum fsck_msg_id msg_id,
const char *message);
@@ -223,6 +232,28 @@ int fsck_tag_standalone(const struct object_id *oid, const char *buffer,
*/
int fsck_finish(struct fsck_objects_options *options);
+/*
+ * Provide a unified interface for either fscking refs or objects.
+ * It will get the current msg error type and call the error_func callback
+ * which is registered in the "fsck_options" struct. For refs, the caller
+ * should pass NULL for "objs_options". For objects, the caller should pass
+ * NULL for "refs_options".
+ */
+__attribute__((format (printf, 7, 8)))
+int fsck_report(struct fsck_objects_options *objects_options,
+ struct fsck_refs_options *refs_options,
+ const struct object_id *oid,
+ enum object_type object_type,
+ const char *checked_ref_name,
+ enum fsck_msg_id msg_id, const char *fmt, ...);
+
+__attribute__((format (printf, 5, 6)))
+int fsck_refs_report(struct fsck_refs_options *refs_options,
+ const struct object_id *oid,
+ const char *checked_ref_name,
+ enum fsck_msg_id msg_id,
+ const char *fmt, ...);
+
/*
* Subsystem for storing human-readable names for each object.
*
diff --git a/object-file.c b/object-file.c
index 9eda05ee01..5cb9117fc4 100644
--- a/object-file.c
+++ b/object-file.c
@@ -2472,12 +2472,14 @@ int repo_has_object_file(struct repository *r,
* report the minimal fsck error here, and rely on the caller to
* give more context.
*/
-static int hash_format_check_report(struct fsck_objects_options *opts UNUSED,
- const struct object_id *oid UNUSED,
- enum object_type object_type UNUSED,
- enum fsck_msg_type msg_type UNUSED,
- enum fsck_msg_id msg_id UNUSED,
- const char *message)
+static int hash_format_check_report(struct fsck_objects_options *objects_options UNUSED,
+ struct fsck_refs_options *refs_options UNUSED,
+ const struct object_id *oid UNUSED,
+ enum object_type object_type UNUSED,
+ const char *chekced_ref_name UNUSED,
+ enum fsck_msg_type msg_type UNUSED,
+ enum fsck_msg_id msg_id UNUSED,
+ const char *message)
{
error(_("object fails fsck: %s"), message);
return 1;
--
2.45.2
^ permalink raw reply related [flat|nested] 282+ messages in thread
* [GSoC][PATCH v5 06/12] fsck: add "fsck_refs_options" initialization macros
2024-06-27 15:08 ` [GSoC][PATCH v5 00/12] ref consistency check infra setup shejialuo
` (4 preceding siblings ...)
2024-06-27 15:14 ` [GSoC][PATCH v5 05/12] fsck: add a unified interface for reporting fsck messages shejialuo
@ 2024-06-27 15:14 ` shejialuo
2024-06-27 15:15 ` [GSoC][PATCH v5 07/12] refs: set up ref consistency check infrastructure shejialuo
` (6 subsequent siblings)
12 siblings, 0 replies; 282+ messages in thread
From: shejialuo @ 2024-06-27 15:14 UTC (permalink / raw)
To: git; +Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine
Add "FSCK_REFS_OPTIONS_DEFAULT" and "FSCK_REFS_OPTIONS_STRICT" macros to
create the "fsck_refs_options" easily. Add refs-specific "error_func"
callback "fsck_refs_error_function".
"fsck_refs_error_function" will use the "oid" parameter. When the caller
passed the oid, it will use "oid_to_hex" to get the corresponding hex
value to report to the user.
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
fsck.c | 23 +++++++++++++++++++++++
fsck.h | 19 +++++++++++++++++++
2 files changed, 42 insertions(+)
diff --git a/fsck.c b/fsck.c
index 5184d17736..4869566d19 100644
--- a/fsck.c
+++ b/fsck.c
@@ -1285,6 +1285,29 @@ int fsck_error_function(struct fsck_objects_options *objects_options,
return 1;
}
+int fsck_refs_error_function(struct fsck_objects_options *objects_options UNUSED,
+ struct fsck_refs_options *refs_options,
+ const struct object_id *oid,
+ enum object_type object_type UNUSED,
+ const char *checked_ref_name,
+ enum fsck_msg_type msg_type,
+ enum fsck_msg_id msg_id UNUSED,
+ const char *message)
+{
+ static struct strbuf sb = STRBUF_INIT;
+
+ strbuf_addstr(&sb, checked_ref_name);
+ if (oid)
+ strbuf_addf(&sb, " -> (%s)", oid_to_hex(oid));
+
+ if (msg_type == FSCK_WARN) {
+ warning("%s: %s", sb.buf, message);
+ return 0;
+ }
+ error("%s: %s", sb.buf, message);
+ return 1;
+}
+
static int fsck_blobs(struct oidset *blobs_found, struct oidset *blobs_done,
enum fsck_msg_id msg_missing, enum fsck_msg_id msg_type,
struct fsck_objects_options *options,
diff --git a/fsck.h b/fsck.h
index 70d5e78ae6..e903845690 100644
--- a/fsck.h
+++ b/fsck.h
@@ -140,6 +140,14 @@ int fsck_error_cb_print_missing_gitmodules(struct fsck_objects_options *objects_
enum fsck_msg_type msg_type,
enum fsck_msg_id msg_id,
const char *message);
+int fsck_refs_error_function(struct fsck_objects_options *objects_options,
+ struct fsck_refs_options *refs_options,
+ const struct object_id *oid,
+ enum object_type object_type,
+ const char *checked_ref_name,
+ enum fsck_msg_type msg_type,
+ enum fsck_msg_id msg_id,
+ const char *message);
struct fsck_options {
fsck_error error_func;
@@ -150,6 +158,17 @@ struct fsck_options {
struct fsck_refs_options {
struct fsck_options fsck_options;
};
+#define FSCK_REFS_OPTIONS_DEFAULT { \
+ .fsck_options = { \
+ .error_func = fsck_refs_error_function, \
+ }, \
+}
+#define FSCK_REFS_OPTIONS_STRICT { \
+ .fsck_options = { \
+ .error_func = fsck_refs_error_function, \
+ .strict = 1, \
+ }, \
+}
struct fsck_objects_options {
struct fsck_options fsck_options;
--
2.45.2
^ permalink raw reply related [flat|nested] 282+ messages in thread
* [GSoC][PATCH v5 07/12] refs: set up ref consistency check infrastructure
2024-06-27 15:08 ` [GSoC][PATCH v5 00/12] ref consistency check infra setup shejialuo
` (5 preceding siblings ...)
2024-06-27 15:14 ` [GSoC][PATCH v5 06/12] fsck: add "fsck_refs_options" initialization macros shejialuo
@ 2024-06-27 15:15 ` shejialuo
2024-06-27 15:15 ` [GSoC][PATCH v5 08/12] builtin/refs: add verify subcommand shejialuo
` (5 subsequent siblings)
12 siblings, 0 replies; 282+ messages in thread
From: shejialuo @ 2024-06-27 15:15 UTC (permalink / raw)
To: git; +Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine
The interfaces defined in the `ref_storage_be` are carefully structured
in semantic. It's organized as the five parts:
1. The name and the initialization interfaces.
2. The ref transaction interfaces.
3. The ref internal interfaces (pack, rename and copy).
4. The ref filesystem interfaces.
5. The reflog related interfaces.
To keep consistent with the git-fsck(1), add a new interface named
"fsck_refs_fn" to the end of "ref_storage_be". This semantic cannot be
grouped into any above five categories. Explicitly add blank line to
make it different from others.
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
refs.c | 5 +++++
refs.h | 8 ++++++++
refs/debug.c | 11 +++++++++++
refs/files-backend.c | 15 ++++++++++++++-
refs/packed-backend.c | 8 ++++++++
refs/refs-internal.h | 6 ++++++
refs/reftable-backend.c | 8 ++++++++
7 files changed, 60 insertions(+), 1 deletion(-)
diff --git a/refs.c b/refs.c
index dd0d9c360f..646fdfaf22 100644
--- a/refs.c
+++ b/refs.c
@@ -316,6 +316,11 @@ int check_refname_format(const char *refname, int flags)
return check_or_sanitize_refname(refname, flags, NULL);
}
+int refs_fsck(struct ref_store *refs, struct fsck_refs_options *o)
+{
+ return refs->be->fsck(refs, o);
+}
+
void sanitize_refname_component(const char *refname, struct strbuf *out)
{
if (check_or_sanitize_refname(refname, REFNAME_ALLOW_ONELEVEL, out))
diff --git a/refs.h b/refs.h
index df8cbf6124..a8f4deb108 100644
--- a/refs.h
+++ b/refs.h
@@ -3,6 +3,7 @@
#include "commit.h"
+struct fsck_refs_options;
struct object_id;
struct ref_store;
struct repository;
@@ -547,6 +548,13 @@ int refs_for_each_reflog(struct ref_store *refs, each_reflog_fn fn, void *cb_dat
*/
int check_refname_format(const char *refname, int flags);
+/*
+ * Check the reference database for consistency. Return 0 if refs and
+ * reflogs are consistent, and non-zero otherwise. The errors will be
+ * written to stderr.
+ */
+int refs_fsck(struct ref_store *refs, struct fsck_refs_options *o);
+
/*
* Apply the rules from check_refname_format, but mutate the result until it
* is acceptable, and place the result in "out".
diff --git a/refs/debug.c b/refs/debug.c
index 547d9245b9..6306c25fd3 100644
--- a/refs/debug.c
+++ b/refs/debug.c
@@ -419,6 +419,15 @@ static int debug_reflog_expire(struct ref_store *ref_store, const char *refname,
return res;
}
+static int debug_fsck(struct ref_store *ref_store,
+ struct fsck_refs_options *o)
+{
+ struct debug_ref_store *drefs = (struct debug_ref_store *)ref_store;
+ int res = drefs->refs->be->fsck(drefs->refs, o);
+ trace_printf_key(&trace_refs, "fsck: %d\n", res);
+ return res;
+}
+
struct ref_storage_be refs_be_debug = {
.name = "debug",
.init = NULL,
@@ -451,4 +460,6 @@ struct ref_storage_be refs_be_debug = {
.create_reflog = debug_create_reflog,
.delete_reflog = debug_delete_reflog,
.reflog_expire = debug_reflog_expire,
+
+ .fsck = debug_fsck,
};
diff --git a/refs/files-backend.c b/refs/files-backend.c
index 44c5c3b201..7257fffb77 100644
--- a/refs/files-backend.c
+++ b/refs/files-backend.c
@@ -3406,6 +3406,17 @@ static int files_ref_store_remove_on_disk(struct ref_store *ref_store,
return ret;
}
+static int files_fsck(struct ref_store *ref_store,
+ struct fsck_refs_options *o)
+{
+ int ret;
+ struct files_ref_store *refs =
+ files_downcast(ref_store, REF_STORE_READ, "fsck");
+
+ ret = refs->packed_ref_store->be->fsck(refs->packed_ref_store, o);
+ return ret;
+}
+
struct ref_storage_be refs_be_files = {
.name = "files",
.init = files_ref_store_init,
@@ -3432,5 +3443,7 @@ struct ref_storage_be refs_be_files = {
.reflog_exists = files_reflog_exists,
.create_reflog = files_create_reflog,
.delete_reflog = files_delete_reflog,
- .reflog_expire = files_reflog_expire
+ .reflog_expire = files_reflog_expire,
+
+ .fsck = files_fsck,
};
diff --git a/refs/packed-backend.c b/refs/packed-backend.c
index c4c1e36aa2..db152053f8 100644
--- a/refs/packed-backend.c
+++ b/refs/packed-backend.c
@@ -1733,6 +1733,12 @@ static struct ref_iterator *packed_reflog_iterator_begin(struct ref_store *ref_s
return empty_ref_iterator_begin();
}
+static int packed_fsck(struct ref_store *ref_store,
+ struct fsck_refs_options *o)
+{
+ return 0;
+}
+
struct ref_storage_be refs_be_packed = {
.name = "packed",
.init = packed_ref_store_init,
@@ -1760,4 +1766,6 @@ struct ref_storage_be refs_be_packed = {
.create_reflog = NULL,
.delete_reflog = NULL,
.reflog_expire = NULL,
+
+ .fsck = packed_fsck,
};
diff --git a/refs/refs-internal.h b/refs/refs-internal.h
index fa975d69aa..ecc082baf8 100644
--- a/refs/refs-internal.h
+++ b/refs/refs-internal.h
@@ -4,6 +4,7 @@
#include "refs.h"
#include "iterator.h"
+struct fsck_options;
struct ref_transaction;
/*
@@ -650,6 +651,9 @@ typedef int read_raw_ref_fn(struct ref_store *ref_store, const char *refname,
typedef int read_symbolic_ref_fn(struct ref_store *ref_store, const char *refname,
struct strbuf *referent);
+typedef int fsck_fn(struct ref_store *ref_store,
+ struct fsck_refs_options *o);
+
struct ref_storage_be {
const char *name;
ref_store_init_fn *init;
@@ -677,6 +681,8 @@ struct ref_storage_be {
create_reflog_fn *create_reflog;
delete_reflog_fn *delete_reflog;
reflog_expire_fn *reflog_expire;
+
+ fsck_fn *fsck;
};
extern struct ref_storage_be refs_be_files;
diff --git a/refs/reftable-backend.c b/refs/reftable-backend.c
index 7d872a32ac..2a358b40ca 100644
--- a/refs/reftable-backend.c
+++ b/refs/reftable-backend.c
@@ -2292,6 +2292,12 @@ static int reftable_be_reflog_expire(struct ref_store *ref_store,
return ret;
}
+static int reftable_be_fsck(struct ref_store *ref_store,
+ struct fsck_refs_options *o)
+{
+ return 0;
+}
+
struct ref_storage_be refs_be_reftable = {
.name = "reftable",
.init = reftable_be_init,
@@ -2319,4 +2325,6 @@ struct ref_storage_be refs_be_reftable = {
.create_reflog = reftable_be_create_reflog,
.delete_reflog = reftable_be_delete_reflog,
.reflog_expire = reftable_be_reflog_expire,
+
+ .fsck = reftable_be_fsck,
};
--
2.45.2
^ permalink raw reply related [flat|nested] 282+ messages in thread
* [GSoC][PATCH v5 08/12] builtin/refs: add verify subcommand
2024-06-27 15:08 ` [GSoC][PATCH v5 00/12] ref consistency check infra setup shejialuo
` (6 preceding siblings ...)
2024-06-27 15:15 ` [GSoC][PATCH v5 07/12] refs: set up ref consistency check infrastructure shejialuo
@ 2024-06-27 15:15 ` shejialuo
2024-06-27 15:16 ` [GSoC][PATCH v5 09/12] builtin/fsck: add `git-refs verify` child process shejialuo
` (4 subsequent siblings)
12 siblings, 0 replies; 282+ messages in thread
From: shejialuo @ 2024-06-27 15:15 UTC (permalink / raw)
To: git; +Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine
Introduce a new subcommand "verify" in git-refs(1) to allow the user to
check the reference database consistency.
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
Documentation/git-refs.txt | 13 +++++++++++++
builtin/refs.c | 36 ++++++++++++++++++++++++++++++++++++
2 files changed, 49 insertions(+)
diff --git a/Documentation/git-refs.txt b/Documentation/git-refs.txt
index 5b99e04385..1244a85b64 100644
--- a/Documentation/git-refs.txt
+++ b/Documentation/git-refs.txt
@@ -10,6 +10,7 @@ SYNOPSIS
--------
[verse]
'git refs migrate' --ref-format=<format> [--dry-run]
+'git refs verify' [--strict] [--verbose]
DESCRIPTION
-----------
@@ -22,6 +23,9 @@ COMMANDS
migrate::
Migrate ref store between different formats.
+verify::
+ Verify reference database consistency.
+
OPTIONS
-------
@@ -39,6 +43,15 @@ include::ref-storage-format.txt[]
can be used to double check that the migration works as expected before
performing the actual migration.
+The following options are specific to 'git refs verify':
+
+--strict::
+ Enable more strict checking, every WARN severity for the `Fsck Messages`
+ be seen as ERROR. See linkgit:git-fsck[1].
+
+--verbose::
+ When verifying the reference database consistency, be chatty.
+
KNOWN LIMITATIONS
-----------------
diff --git a/builtin/refs.c b/builtin/refs.c
index 46dcd150d4..511b0d0f8b 100644
--- a/builtin/refs.c
+++ b/builtin/refs.c
@@ -1,4 +1,6 @@
#include "builtin.h"
+#include "config.h"
+#include "fsck.h"
#include "parse-options.h"
#include "refs.h"
#include "repository.h"
@@ -7,6 +9,9 @@
#define REFS_MIGRATE_USAGE \
N_("git refs migrate --ref-format=<format> [--dry-run]")
+#define REFS_VERIFY_USAGE \
+ N_("git refs verify [--strict] [--verbose]")
+
static int cmd_refs_migrate(int argc, const char **argv, const char *prefix)
{
const char * const migrate_usage[] = {
@@ -58,15 +63,46 @@ static int cmd_refs_migrate(int argc, const char **argv, const char *prefix)
return err;
}
+static int cmd_refs_verify(int argc, const char **argv, const char *prefix)
+{
+ struct fsck_refs_options fsck_refs_options = FSCK_REFS_OPTIONS_DEFAULT;
+ const char * const verify_usage[] = {
+ REFS_VERIFY_USAGE,
+ NULL,
+ };
+ unsigned int verbose = 0, strict = 0;
+ struct option options[] = {
+ OPT__VERBOSE(&verbose, N_("be verbose")),
+ OPT_BOOL(0, "strict", &strict, N_("enable strict checking")),
+ OPT_END(),
+ };
+
+ argc = parse_options(argc, argv, prefix, options, verify_usage, 0);
+ if (argc)
+ usage(_("too many arguments"));
+
+ if (verbose)
+ fsck_refs_options.fsck_options.verbose = 1;
+ if (strict)
+ fsck_refs_options.fsck_options.strict = 1;
+
+ git_config(git_fsck_config, &fsck_refs_options.fsck_options);
+ prepare_repo_settings(the_repository);
+
+ return refs_fsck(get_main_ref_store(the_repository), &fsck_refs_options);
+}
+
int cmd_refs(int argc, const char **argv, const char *prefix)
{
const char * const refs_usage[] = {
REFS_MIGRATE_USAGE,
+ REFS_VERIFY_USAGE,
NULL,
};
parse_opt_subcommand_fn *fn = NULL;
struct option opts[] = {
OPT_SUBCOMMAND("migrate", &fn, cmd_refs_migrate),
+ OPT_SUBCOMMAND("verify", &fn, cmd_refs_verify),
OPT_END(),
};
--
2.45.2
^ permalink raw reply related [flat|nested] 282+ messages in thread
* [GSoC][PATCH v5 09/12] builtin/fsck: add `git-refs verify` child process
2024-06-27 15:08 ` [GSoC][PATCH v5 00/12] ref consistency check infra setup shejialuo
` (7 preceding siblings ...)
2024-06-27 15:15 ` [GSoC][PATCH v5 08/12] builtin/refs: add verify subcommand shejialuo
@ 2024-06-27 15:16 ` shejialuo
2024-06-27 15:16 ` [GSoC][PATCH v5 10/12] files-backend: add unified interface for refs scanning shejialuo
` (3 subsequent siblings)
12 siblings, 0 replies; 282+ messages in thread
From: shejialuo @ 2024-06-27 15:16 UTC (permalink / raw)
To: git; +Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine
Introduce a new function "fsck_refs" that initializes and runs a child
process to execute the "git-refs verify" command.
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
builtin/fsck.c | 17 +++++++++++++++++
1 file changed, 17 insertions(+)
diff --git a/builtin/fsck.c b/builtin/fsck.c
index 2a3b536c1b..a5654da62b 100644
--- a/builtin/fsck.c
+++ b/builtin/fsck.c
@@ -899,6 +899,21 @@ static int check_pack_rev_indexes(struct repository *r, int show_progress)
return res;
}
+static void fsck_refs(void)
+{
+ struct child_process refs_verify = CHILD_PROCESS_INIT;
+ child_process_init(&refs_verify);
+ refs_verify.git_cmd = 1;
+ strvec_pushl(&refs_verify.args, "refs", "verify", NULL);
+ if (verbose)
+ strvec_push(&refs_verify.args, "--verbose");
+ if (check_strict)
+ strvec_push(&refs_verify.args, "--strict");
+
+ if (run_command(&refs_verify))
+ errors_found |= ERROR_REFS;
+}
+
static char const * const fsck_usage[] = {
N_("git fsck [--tags] [--root] [--unreachable] [--cache] [--no-reflogs]\n"
" [--[no-]full] [--strict] [--verbose] [--lost-found]\n"
@@ -1068,6 +1083,8 @@ int cmd_fsck(int argc, const char **argv, const char *prefix)
check_connectivity();
+ fsck_refs();
+
if (the_repository->settings.core_commit_graph) {
struct child_process commit_graph_verify = CHILD_PROCESS_INIT;
--
2.45.2
^ permalink raw reply related [flat|nested] 282+ messages in thread
* [GSoC][PATCH v5 10/12] files-backend: add unified interface for refs scanning
2024-06-27 15:08 ` [GSoC][PATCH v5 00/12] ref consistency check infra setup shejialuo
` (8 preceding siblings ...)
2024-06-27 15:16 ` [GSoC][PATCH v5 09/12] builtin/fsck: add `git-refs verify` child process shejialuo
@ 2024-06-27 15:16 ` shejialuo
2024-06-27 15:17 ` [GSoC][PATCH v5 11/12] fsck: add ref name check for files backend shejialuo
` (2 subsequent siblings)
12 siblings, 0 replies; 282+ messages in thread
From: shejialuo @ 2024-06-27 15:16 UTC (permalink / raw)
To: git; +Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine
For refs and reflogs, we need to scan its corresponding directories to
check every regular file or symbolic link which shares the same pattern.
Introduce a unified interface for scanning directories for
files-backend.
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
refs/files-backend.c | 77 +++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 76 insertions(+), 1 deletion(-)
diff --git a/refs/files-backend.c b/refs/files-backend.c
index 7257fffb77..aefc947fc3 100644
--- a/refs/files-backend.c
+++ b/refs/files-backend.c
@@ -4,6 +4,7 @@
#include "../gettext.h"
#include "../hash.h"
#include "../hex.h"
+#include "../fsck.h"
#include "../refs.h"
#include "refs-internal.h"
#include "ref-cache.h"
@@ -3406,6 +3407,78 @@ static int files_ref_store_remove_on_disk(struct ref_store *ref_store,
return ret;
}
+/*
+ * For refs and reflogs, they share a unified interface when scanning
+ * the whole directory. This function is used as the callback for each
+ * regular file or symlink in the directory.
+ */
+typedef int (*files_fsck_refs_fn)(struct fsck_refs_options *o,
+ const char *gitdir,
+ const char *refs_check_dir,
+ struct dir_iterator *iter);
+
+static int files_fsck_refs_dir(struct ref_store *ref_store,
+ struct fsck_refs_options *o,
+ const char *refs_check_dir,
+ files_fsck_refs_fn *fsck_refs_fns)
+{
+ const char *gitdir = ref_store->gitdir;
+ struct strbuf sb = STRBUF_INIT;
+ struct dir_iterator *iter;
+ int iter_status;
+ int ret = 0;
+
+ strbuf_addf(&sb, "%s/%s", gitdir, refs_check_dir);
+
+ iter = dir_iterator_begin(sb.buf, 0);
+
+ if (!iter) {
+ ret = error_errno("cannot open directory %s", sb.buf);
+ goto out;
+ }
+
+ while ((iter_status = dir_iterator_advance(iter)) == ITER_OK) {
+ if (S_ISDIR(iter->st.st_mode)) {
+ continue;
+ } else if (S_ISREG(iter->st.st_mode) ||
+ S_ISLNK(iter->st.st_mode)) {
+ if (o->fsck_options.verbose)
+ fprintf_ln(stderr, "Checking %s/%s",
+ refs_check_dir, iter->relative_path);
+ for (size_t i = 0; fsck_refs_fns[i]; i++) {
+ if (fsck_refs_fns[i](o, gitdir, refs_check_dir, iter))
+ ret = -1;
+ }
+ } else {
+ ret = error(_("unexpected file type for '%s'"),
+ iter->basename);
+ }
+ }
+
+ if (iter_status != ITER_DONE)
+ ret = error(_("failed to iterate over '%s'"), sb.buf);
+
+out:
+ strbuf_release(&sb);
+ return ret;
+}
+
+static int files_fsck_refs(struct ref_store *ref_store,
+ struct fsck_refs_options *o)
+{
+ int ret;
+ files_fsck_refs_fn fsck_refs_fns[]= {
+ NULL
+ };
+
+ if (o->fsck_options.verbose)
+ fprintf_ln(stderr, "Checking references consistency");
+
+ ret = files_fsck_refs_dir(ref_store, o, "refs", fsck_refs_fns);
+
+ return ret;
+}
+
static int files_fsck(struct ref_store *ref_store,
struct fsck_refs_options *o)
{
@@ -3413,7 +3486,9 @@ static int files_fsck(struct ref_store *ref_store,
struct files_ref_store *refs =
files_downcast(ref_store, REF_STORE_READ, "fsck");
- ret = refs->packed_ref_store->be->fsck(refs->packed_ref_store, o);
+ ret = refs->packed_ref_store->be->fsck(refs->packed_ref_store, o)
+ | files_fsck_refs(ref_store, o);
+
return ret;
}
--
2.45.2
^ permalink raw reply related [flat|nested] 282+ messages in thread
* [GSoC][PATCH v5 11/12] fsck: add ref name check for files backend
2024-06-27 15:08 ` [GSoC][PATCH v5 00/12] ref consistency check infra setup shejialuo
` (9 preceding siblings ...)
2024-06-27 15:16 ` [GSoC][PATCH v5 10/12] files-backend: add unified interface for refs scanning shejialuo
@ 2024-06-27 15:17 ` shejialuo
2024-06-27 15:18 ` [GSoC][PATCH v5 12/12] fsck: add ref content " shejialuo
2024-07-01 15:13 ` [GSoC][PATCH v6 00/11] ref consistency check infra setup shejialuo
12 siblings, 0 replies; 282+ messages in thread
From: shejialuo @ 2024-06-27 15:17 UTC (permalink / raw)
To: git; +Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine
The git-fsck(1) only implicitly checks the reference, it does not fully
check refs with bad format name such as standalone "@" and name ending
with ".lock".
In order to provide such checks, add a new fsck message id "badRefName"
with default ERROR type. Use existing "check_refname_format" to explicit
check the ref name. And add a new unit test to verify the functionality.
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
Documentation/fsck-msgids.txt | 3 +
fsck.h | 1 +
refs/files-backend.c | 20 +++++++
t/t0602-reffiles-fsck.sh | 101 ++++++++++++++++++++++++++++++++++
4 files changed, 125 insertions(+)
create mode 100755 t/t0602-reffiles-fsck.sh
diff --git a/Documentation/fsck-msgids.txt b/Documentation/fsck-msgids.txt
index f643585a34..dab4012246 100644
--- a/Documentation/fsck-msgids.txt
+++ b/Documentation/fsck-msgids.txt
@@ -19,6 +19,9 @@
`badParentSha1`::
(ERROR) A commit object has a bad parent sha1.
+`badRefName`::
+ (ERROR) A ref has a bad name.
+
`badTagName`::
(INFO) A tag has an invalid format.
diff --git a/fsck.h b/fsck.h
index e903845690..aad2211bff 100644
--- a/fsck.h
+++ b/fsck.h
@@ -31,6 +31,7 @@ enum fsck_msg_type {
FUNC(BAD_NAME, ERROR) \
FUNC(BAD_OBJECT_SHA1, ERROR) \
FUNC(BAD_PARENT_SHA1, ERROR) \
+ FUNC(BAD_REF_NAME, ERROR) \
FUNC(BAD_TIMEZONE, ERROR) \
FUNC(BAD_TREE, ERROR) \
FUNC(BAD_TREE_SHA1, ERROR) \
diff --git a/refs/files-backend.c b/refs/files-backend.c
index aefc947fc3..e840a768b4 100644
--- a/refs/files-backend.c
+++ b/refs/files-backend.c
@@ -3417,6 +3417,25 @@ typedef int (*files_fsck_refs_fn)(struct fsck_refs_options *o,
const char *refs_check_dir,
struct dir_iterator *iter);
+static int files_fsck_refs_name(struct fsck_refs_options *o,
+ const char *gitdir UNUSED,
+ const char *refs_check_dir,
+ struct dir_iterator *iter)
+{
+ struct strbuf sb = STRBUF_INIT;
+ int ret = 0;
+
+ if (check_refname_format(iter->basename, REFNAME_ALLOW_ONELEVEL)) {
+ strbuf_addf(&sb, "%s/%s", refs_check_dir, iter->relative_path);
+ ret = fsck_refs_report(o, NULL, sb.buf,
+ FSCK_MSG_BAD_REF_NAME,
+ "invalid refname format");
+ }
+
+ strbuf_release(&sb);
+ return ret;
+}
+
static int files_fsck_refs_dir(struct ref_store *ref_store,
struct fsck_refs_options *o,
const char *refs_check_dir,
@@ -3468,6 +3487,7 @@ static int files_fsck_refs(struct ref_store *ref_store,
{
int ret;
files_fsck_refs_fn fsck_refs_fns[]= {
+ files_fsck_refs_name,
NULL
};
diff --git a/t/t0602-reffiles-fsck.sh b/t/t0602-reffiles-fsck.sh
new file mode 100755
index 0000000000..b2db58d2c6
--- /dev/null
+++ b/t/t0602-reffiles-fsck.sh
@@ -0,0 +1,101 @@
+#!/bin/sh
+
+test_description='Test reffiles backend consistency check'
+
+GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=main
+export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME
+GIT_TEST_DEFAULT_REF_FORMAT=files
+export GIT_TEST_DEFAULT_REF_FORMAT
+
+. ./test-lib.sh
+
+test_expect_success 'ref name should be checked' '
+ test_when_finished "rm -rf repo" &&
+ git init repo &&
+ branch_dir_prefix=.git/refs/heads &&
+ tag_dir_prefix=.git/refs/tags &&
+ (
+ cd repo &&
+ git commit --allow-empty -m initial &&
+ git checkout -b branch-1 &&
+ git tag tag-1 &&
+ git commit --allow-empty -m second &&
+ git checkout -b branch-2 &&
+ git tag tag-2 &&
+ git tag multi_hierarchy/tag-2
+ ) &&
+ (
+ cd repo &&
+ cp $branch_dir_prefix/branch-1 $branch_dir_prefix/.branch-1 &&
+ test_must_fail git fsck 2>err &&
+ cat >expect <<-EOF &&
+ error: refs/heads/.branch-1: badRefName: invalid refname format
+ EOF
+ rm $branch_dir_prefix/.branch-1 &&
+ test_cmp expect err
+ ) &&
+ (
+ cd repo &&
+ cp $tag_dir_prefix/tag-1 $tag_dir_prefix/tag-1.lock &&
+ test_must_fail git fsck 2>err &&
+ cat >expect <<-EOF &&
+ error: refs/tags/tag-1.lock: badRefName: invalid refname format
+ EOF
+ rm $tag_dir_prefix/tag-1.lock &&
+ test_cmp expect err
+ ) &&
+ (
+ cd repo &&
+ cp $branch_dir_prefix/branch-1 $branch_dir_prefix/@ &&
+ test_must_fail git fsck 2>err &&
+ cat >expect <<-EOF &&
+ error: refs/heads/@: badRefName: invalid refname format
+ EOF
+ rm $branch_dir_prefix/@ &&
+ test_cmp expect err
+ ) &&
+ (
+ cd repo &&
+ cp $tag_dir_prefix/multi_hierarchy/tag-2 $tag_dir_prefix/multi_hierarchy/@ &&
+ test_must_fail git fsck 2>err &&
+ cat >expect <<-EOF &&
+ error: refs/tags/multi_hierarchy/@: badRefName: invalid refname format
+ EOF
+ rm $tag_dir_prefix/multi_hierarchy/@ &&
+ test_cmp expect err
+ )
+'
+
+test_expect_success 'ref name check should be adapted into fsck messages' '
+ test_when_finished "rm -rf repo" &&
+ git init repo &&
+ branch_dir_prefix=.git/refs/heads &&
+ tag_dir_prefix=.git/refs/tags &&
+ (
+ cd repo &&
+ git commit --allow-empty -m initial &&
+ git checkout -b branch-1 &&
+ git tag tag-1 &&
+ git commit --allow-empty -m second &&
+ git checkout -b branch-2 &&
+ git tag tag-2
+ ) &&
+ (
+ cd repo &&
+ cp $branch_dir_prefix/branch-1 $branch_dir_prefix/.branch-1 &&
+ git -c fsck.badRefName=warn fsck 2>err &&
+ cat >expect <<-EOF &&
+ warning: refs/heads/.branch-1: badRefName: invalid refname format
+ EOF
+ rm $branch_dir_prefix/.branch-1 &&
+ test_cmp expect err
+ ) &&
+ (
+ cd repo &&
+ cp $branch_dir_prefix/branch-1 $branch_dir_prefix/@ &&
+ git -c fsck.badRefName=ignore fsck 2>err &&
+ test_must_be_empty err
+ )
+'
+
+test_done
--
2.45.2
^ permalink raw reply related [flat|nested] 282+ messages in thread
* [GSoC][PATCH v5 12/12] fsck: add ref content check for files backend
2024-06-27 15:08 ` [GSoC][PATCH v5 00/12] ref consistency check infra setup shejialuo
` (10 preceding siblings ...)
2024-06-27 15:17 ` [GSoC][PATCH v5 11/12] fsck: add ref name check for files backend shejialuo
@ 2024-06-27 15:18 ` shejialuo
2024-07-01 15:13 ` [GSoC][PATCH v6 00/11] ref consistency check infra setup shejialuo
12 siblings, 0 replies; 282+ messages in thread
From: shejialuo @ 2024-06-27 15:18 UTC (permalink / raw)
To: git; +Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine
Enhance the git-fsck(1) command by adding a check for reference content
in the files backend. The new functionality ensures that symrefs, real
symbolic link and regular refs are validated correctly.
In order to check the trailing content of the regular refs, add a new
parameter `trailing` to `parse_loose_ref_contents`.
For symrefs, `parse_loose_ref_contents` will set the "referent".
However, symbolic link could be either absolute or relative. Use
"strbuf_add_real_path" to read the symbolic link and convert the
relative path to absolute path. Then use "skip_prefix" to make it align
with symref "referent".
Thus, the symrefs and symbolic links could share the same interface. Add
a new function "files_fsck_symref_target" which aims at checking the
following things:
1. whether the pointee is under the `refs/` directory.
2. whether the pointee name is correct.
3. whether the pointee path is a wrong type in filesystem.
Last, add the following FSCK MESSAGEs:
1. "badRefContent(ERROR)": A ref has a bad content
2. "badSymrefPointee(ERROR)": The pointee of a symref is bad.
3. "trailingRefContent(WARN)": A ref content has trailing contents.
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
Documentation/fsck-msgids.txt | 9 +++
fsck.h | 3 +
refs.c | 2 +-
refs/files-backend.c | 145 +++++++++++++++++++++++++++++++++-
refs/refs-internal.h | 5 +-
t/t0602-reffiles-fsck.sh | 110 ++++++++++++++++++++++++++
6 files changed, 269 insertions(+), 5 deletions(-)
diff --git a/Documentation/fsck-msgids.txt b/Documentation/fsck-msgids.txt
index dab4012246..b1630a478b 100644
--- a/Documentation/fsck-msgids.txt
+++ b/Documentation/fsck-msgids.txt
@@ -19,9 +19,15 @@
`badParentSha1`::
(ERROR) A commit object has a bad parent sha1.
+`badRefContent`::
+ (ERROR) A ref has a bad content.
+
`badRefName`::
(ERROR) A ref has a bad name.
+`badSymrefPointee`::
+ (ERROR) The pointee of a symref is bad.
+
`badTagName`::
(INFO) A tag has an invalid format.
@@ -167,6 +173,9 @@
`nullSha1`::
(WARN) Tree contains entries pointing to a null sha1.
+`trailingRefContent`::
+ (WARN) A ref content has trailing contents.
+
`treeNotSorted`::
(ERROR) A tree is not properly sorted.
diff --git a/fsck.h b/fsck.h
index aad2211bff..5c1b113674 100644
--- a/fsck.h
+++ b/fsck.h
@@ -32,6 +32,8 @@ enum fsck_msg_type {
FUNC(BAD_OBJECT_SHA1, ERROR) \
FUNC(BAD_PARENT_SHA1, ERROR) \
FUNC(BAD_REF_NAME, ERROR) \
+ FUNC(BAD_REF_CONTENT, ERROR) \
+ FUNC(BAD_SYMREF_POINTEE, ERROR) \
FUNC(BAD_TIMEZONE, ERROR) \
FUNC(BAD_TREE, ERROR) \
FUNC(BAD_TREE_SHA1, ERROR) \
@@ -72,6 +74,7 @@ enum fsck_msg_type {
FUNC(HAS_DOTDOT, WARN) \
FUNC(HAS_DOTGIT, WARN) \
FUNC(NULL_SHA1, WARN) \
+ FUNC(TRAILING_REF_CONTENT, WARN) \
FUNC(ZERO_PADDED_FILEMODE, WARN) \
FUNC(NUL_IN_COMMIT, WARN) \
FUNC(LARGE_PATHNAME, WARN) \
diff --git a/refs.c b/refs.c
index 646fdfaf22..55d581ab12 100644
--- a/refs.c
+++ b/refs.c
@@ -1758,7 +1758,7 @@ static int refs_read_special_head(struct ref_store *ref_store,
}
result = parse_loose_ref_contents(content.buf, oid, referent, type,
- failure_errno);
+ failure_errno, NULL);
done:
strbuf_release(&full_path);
diff --git a/refs/files-backend.c b/refs/files-backend.c
index e840a768b4..0e6b6d3f6a 100644
--- a/refs/files-backend.c
+++ b/refs/files-backend.c
@@ -1,4 +1,5 @@
#include "../git-compat-util.h"
+#include "../abspath.h"
#include "../copy.h"
#include "../environment.h"
#include "../gettext.h"
@@ -551,7 +552,7 @@ static int read_ref_internal(struct ref_store *ref_store, const char *refname,
strbuf_rtrim(&sb_contents);
buf = sb_contents.buf;
- ret = parse_loose_ref_contents(buf, oid, referent, type, &myerr);
+ ret = parse_loose_ref_contents(buf, oid, referent, type, &myerr, NULL);
out:
if (ret && !myerr)
@@ -587,7 +588,7 @@ static int files_read_symbolic_ref(struct ref_store *ref_store, const char *refn
int parse_loose_ref_contents(const char *buf, struct object_id *oid,
struct strbuf *referent, unsigned int *type,
- int *failure_errno)
+ int *failure_errno, const char **trailing)
{
const char *p;
if (skip_prefix(buf, "ref:", &buf)) {
@@ -609,6 +610,10 @@ int parse_loose_ref_contents(const char *buf, struct object_id *oid,
*failure_errno = EINVAL;
return -1;
}
+
+ if (trailing)
+ *trailing = p;
+
return 0;
}
@@ -3436,6 +3441,141 @@ static int files_fsck_refs_name(struct fsck_refs_options *o,
return ret;
}
+/*
+ * Check the symref "pointee_name" and "pointee_path". The caller should
+ * make sure that "pointee_path" is absolute. For symbolic ref, "pointee_name"
+ * would be the content after "refs:". For symblic link, "pointee_name" would
+ * be the relative path agaignst "gitdir".
+ */
+static int files_fsck_symref_target(struct fsck_refs_options *o,
+ const char *refname,
+ const char *pointee_name,
+ const char *pointee_path)
+{
+ const char *p = NULL;
+ struct stat st;
+ int ret = 0;
+
+ if (!skip_prefix(pointee_name, "refs/", &p)) {
+
+ ret = fsck_refs_report(o, NULL, refname,
+ FSCK_MSG_BAD_SYMREF_POINTEE,
+ "point to target out of refs hierarchy");
+ goto out;
+ }
+
+ if (check_refname_format(pointee_name, 0)) {
+ ret = fsck_refs_report(o, NULL, refname,
+ FSCK_MSG_BAD_SYMREF_POINTEE,
+ "point to invalid refname");
+ }
+
+ if (lstat(pointee_path, &st) < 0)
+ goto out;
+
+ if (!S_ISREG(st.st_mode) && !S_ISLNK(st.st_mode)) {
+ ret = fsck_refs_report(o, NULL, refname,
+ FSCK_MSG_BAD_SYMREF_POINTEE,
+ "point to invalid target");
+ goto out;
+ }
+out:
+ return ret;
+}
+
+static int files_fsck_refs_content(struct fsck_refs_options *o,
+ const char *gitdir,
+ const char *refs_check_dir,
+ struct dir_iterator *iter)
+{
+ struct strbuf pointee_path = STRBUF_INIT,
+ ref_content = STRBUF_INIT,
+ abs_gitdir = STRBUF_INIT,
+ referent = STRBUF_INIT,
+ refname = STRBUF_INIT;
+ const char *trailing = NULL;
+ int failure_errno = 0;
+ unsigned int type = 0;
+ struct object_id oid;
+ int ret = 0;
+
+ strbuf_addf(&refname, "%s/%s", refs_check_dir, iter->relative_path);
+
+ /*
+ * If the file is a symlink, we need to only check the connectivity
+ * of the destination object.
+ */
+ if (S_ISLNK(iter->st.st_mode)) {
+ const char *pointee_name = NULL;
+
+ strbuf_add_real_path(&pointee_path, iter->path.buf);
+
+ strbuf_add_absolute_path(&abs_gitdir, gitdir);
+ strbuf_normalize_path(&abs_gitdir);
+ if (!is_dir_sep(abs_gitdir.buf[abs_gitdir.len - 1]))
+ strbuf_addch(&abs_gitdir, '/');
+
+ if (!skip_prefix(pointee_path.buf,
+ abs_gitdir.buf, &pointee_name)) {
+ ret = fsck_refs_report(o, NULL, refname.buf,
+ FSCK_MSG_BAD_SYMREF_POINTEE,
+ "point to target outside gitdir");
+ goto clean;
+ }
+
+ ret = files_fsck_symref_target(o, refname.buf, pointee_name,
+ pointee_path.buf);
+ goto clean;
+ }
+
+ if (strbuf_read_file(&ref_content, iter->path.buf, 0) < 0) {
+ ret = error_errno(_("%s/%s: unable to read the ref"),
+ refs_check_dir, iter->relative_path);
+ goto clean;
+ }
+
+ if (parse_loose_ref_contents(ref_content.buf, &oid,
+ &referent, &type,
+ &failure_errno, &trailing)) {
+ ret = fsck_refs_report(o, NULL, refname.buf,
+ FSCK_MSG_BAD_REF_CONTENT,
+ "invalid ref content");
+ goto clean;
+ }
+
+ /*
+ * If the ref is a symref, we need to check the destination name and
+ * connectivity.
+ */
+ if (referent.len && (type & REF_ISSYMREF)) {
+ strbuf_addf(&pointee_path, "%s/%s", gitdir, referent.buf);
+ strbuf_rtrim(&referent);
+
+ ret = files_fsck_symref_target(o, refname.buf, referent.buf,
+ pointee_path.buf);
+ goto clean;
+ } else {
+ /*
+ * Only regular refs could have a trailing garbage. Should
+ * be reported as a warning.
+ */
+ if (trailing && (*trailing != '\0' && *trailing != '\n')) {
+ ret = fsck_refs_report(o, NULL, refname.buf,
+ FSCK_MSG_TRAILING_REF_CONTENT,
+ "trailing garbage in ref");
+ goto clean;
+ }
+ }
+
+clean:
+ strbuf_release(&abs_gitdir);
+ strbuf_release(&pointee_path);
+ strbuf_release(&refname);
+ strbuf_release(&ref_content);
+ strbuf_release(&referent);
+ return ret;
+}
+
static int files_fsck_refs_dir(struct ref_store *ref_store,
struct fsck_refs_options *o,
const char *refs_check_dir,
@@ -3488,6 +3628,7 @@ static int files_fsck_refs(struct ref_store *ref_store,
int ret;
files_fsck_refs_fn fsck_refs_fns[]= {
files_fsck_refs_name,
+ files_fsck_refs_content,
NULL
};
diff --git a/refs/refs-internal.h b/refs/refs-internal.h
index ecc082baf8..915d0d1f20 100644
--- a/refs/refs-internal.h
+++ b/refs/refs-internal.h
@@ -709,11 +709,12 @@ struct ref_store {
/*
* Parse contents of a loose ref file. *failure_errno maybe be set to EINVAL for
- * invalid contents.
+ * invalid contents. Also *trailing is set to the first character after the
+ * refname or NULL if the referent is not empty.
*/
int parse_loose_ref_contents(const char *buf, struct object_id *oid,
struct strbuf *referent, unsigned int *type,
- int *failure_errno);
+ int *failure_errno, const char **trailing);
/*
* Fill in the generic part of refs and add it to our collection of
diff --git a/t/t0602-reffiles-fsck.sh b/t/t0602-reffiles-fsck.sh
index b2db58d2c6..35bf40ee64 100755
--- a/t/t0602-reffiles-fsck.sh
+++ b/t/t0602-reffiles-fsck.sh
@@ -98,4 +98,114 @@ test_expect_success 'ref name check should be adapted into fsck messages' '
)
'
+test_expect_success 'regular ref content should be checked' '
+ test_when_finished "rm -rf repo" &&
+ git init repo &&
+ branch_dir_prefix=.git/refs/heads &&
+ tag_dir_prefix=.git/refs/tags &&
+ (
+ cd repo &&
+ git commit --allow-empty -m initial &&
+ git checkout -b branch-1 &&
+ git tag tag-1 &&
+ git commit --allow-empty -m second &&
+ git checkout -b branch-2 &&
+ git tag tag-2 &&
+ git checkout -b a/b/tag-2
+ ) &&
+ (
+ cd repo &&
+ printf "%s garbage" "$(git rev-parse branch-1)" > $branch_dir_prefix/branch-1-garbage &&
+ git fsck 2>err &&
+ cat >expect <<-EOF &&
+ warning: refs/heads/branch-1-garbage: trailingRefContent: trailing garbage in ref
+ EOF
+ rm $branch_dir_prefix/branch-1-garbage &&
+ test_cmp expect err
+ ) &&
+ (
+ cd repo &&
+ printf "%s garbage" "$(git rev-parse tag-1)" > $tag_dir_prefix/tag-1-garbage &&
+ test_must_fail git -c fsck.trailingRefContent=error fsck 2>err &&
+ cat >expect <<-EOF &&
+ error: refs/tags/tag-1-garbage: trailingRefContent: trailing garbage in ref
+ EOF
+ rm $tag_dir_prefix/tag-1-garbage &&
+ test_cmp expect err
+ ) &&
+ (
+ cd repo &&
+ printf "%s " "$(git rev-parse tag-2)" > $tag_dir_prefix/tag-2-garbage &&
+ git fsck 2>err &&
+ cat >expect <<-EOF &&
+ warning: refs/tags/tag-2-garbage: trailingRefContent: trailing garbage in ref
+ EOF
+ rm $tag_dir_prefix/tag-2-garbage &&
+ test_cmp expect err
+ ) &&
+ (
+ cd repo &&
+ printf "xfsazqfxcadas" > $tag_dir_prefix/tag-2-bad &&
+ test_must_fail git refs verify 2>err &&
+ cat >expect <<-EOF &&
+ error: refs/tags/tag-2-bad: badRefContent: invalid ref content
+ EOF
+ rm $tag_dir_prefix/tag-2-bad &&
+ test_cmp expect err
+ ) &&
+ (
+ cd repo &&
+ printf "xfsazqfxcadas" > $branch_dir_prefix/a/b/branch-2-bad &&
+ test_must_fail git refs verify 2>err &&
+ cat >expect <<-EOF &&
+ error: refs/heads/a/b/branch-2-bad: badRefContent: invalid ref content
+ EOF
+ rm $branch_dir_prefix/a/b/branch-2-bad &&
+ test_cmp expect err
+ )
+'
+
+test_expect_success 'symbolic ref content should be checked' '
+ test_when_finished "rm -rf repo" &&
+ git init repo &&
+ branch_dir_prefix=.git/refs/heads &&
+ tag_dir_prefix=.git/refs/tags &&
+ (
+ cd repo &&
+ git commit --allow-empty -m initial &&
+ git checkout -b branch-1 &&
+ git tag tag-1
+ ) &&
+ (
+ cd repo &&
+ printf "ref: refs/heads/.branch" > $branch_dir_prefix/branch-2-bad &&
+ test_must_fail git refs verify 2>err &&
+ cat >expect <<-EOF &&
+ error: refs/heads/branch-2-bad: badSymrefPointee: point to invalid refname
+ EOF
+ rm $branch_dir_prefix/branch-2-bad &&
+ test_cmp expect err
+ ) &&
+ (
+ cd repo &&
+ printf "ref: refs/heads" > $branch_dir_prefix/branch-2-bad &&
+ test_must_fail git refs verify 2>err &&
+ cat >expect <<-EOF &&
+ error: refs/heads/branch-2-bad: badSymrefPointee: point to invalid target
+ EOF
+ rm $branch_dir_prefix/branch-2-bad &&
+ test_cmp expect err
+ ) &&
+ (
+ cd repo &&
+ printf "ref: logs/maint-v2.45" > $branch_dir_prefix/branch-2-bad &&
+ test_must_fail git refs verify 2>err &&
+ cat >expect <<-EOF &&
+ error: refs/heads/branch-2-bad: badSymrefPointee: point to target out of refs hierarchy
+ EOF
+ rm $branch_dir_prefix/branch-2-bad &&
+ test_cmp expect err
+ )
+'
+
test_done
--
2.45.2
^ permalink raw reply related [flat|nested] 282+ messages in thread
* [GSoC][PATCH v6 00/11] ref consistency check infra setup
2024-06-27 15:08 ` [GSoC][PATCH v5 00/12] ref consistency check infra setup shejialuo
` (11 preceding siblings ...)
2024-06-27 15:18 ` [GSoC][PATCH v5 12/12] fsck: add ref content " shejialuo
@ 2024-07-01 15:13 ` shejialuo
2024-07-01 15:18 ` [PATCH v6 01/11] fsck: add "fsck_objects_options" to hold objects-related options shejialuo
` (12 more replies)
12 siblings, 13 replies; 282+ messages in thread
From: shejialuo @ 2024-07-01 15:13 UTC (permalink / raw)
To: git; +Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine
Hi All:
This version follows the Junio's advice. Instead of creating the
following data structure:
struct fsck_options {
enum fsck_type {
FSCK_OBJECTS,
FSCK_REFS,
...
} t;
union {
struct fsck_objects_options objects;
struct fsck_refs_options refs;
} u;
};
I simply use the combination idea where "fsck_options" will incorporate
"fsck_objects_options" and "fsck_refs_options". Karthik has told me that
I should balance the job I should does and the extensibility for future.
So I use the most clear way to do this. Also Junio has said:
> I would have expected that adding ref-related new members that
> object consistency checkers has no interest in to the fsck_options
> structure would be sufficient for the purpose of this series.
So this patch makes the design simple here.
Thanks for every reviewer.
Jialuo.
shejialuo (11):
fsck: add "fsck_objects_options" to hold objects-related options
fsck: rename "skiplist" to "oid_skiplist"
fsck: add "fsck_refs_options" into "fsck_options"
fsck: add a unified interface for reporting fsck messages
fsck: add "fsck_refs_options" initialization macros
refs: set up ref consistency check infrastructure
builtin/refs: add verify subcommand
builtin/fsck: add `git-refs verify` child process
files-backend: add unified interface for refs scanning
fsck: add ref name check for files backend
fsck: add ref content check for files backend
Documentation/fsck-msgids.txt | 12 ++
Documentation/git-refs.txt | 13 ++
builtin/fsck.c | 44 ++++--
builtin/index-pack.c | 4 +-
builtin/mktag.c | 3 +-
builtin/refs.c | 44 ++++++
builtin/unpack-objects.c | 4 +-
fetch-pack.c | 10 +-
fsck.c | 158 +++++++++++++++------
fsck.h | 115 ++++++++++-----
object-file.c | 13 +-
refs.c | 7 +-
refs.h | 8 ++
refs/debug.c | 11 ++
refs/files-backend.c | 255 +++++++++++++++++++++++++++++++++-
refs/packed-backend.c | 8 ++
refs/refs-internal.h | 11 +-
refs/reftable-backend.c | 8 ++
t/t0602-reffiles-fsck.sh | 211 ++++++++++++++++++++++++++++
19 files changed, 824 insertions(+), 115 deletions(-)
create mode 100755 t/t0602-reffiles-fsck.sh
Range-diff against v5:
1: d83b5797cb < -: ---------- fsck: rename "fsck_options" to "fsck_objects_options"
2: 88000b092a < -: ---------- fsck: use "fsck_configs" to set up configs
3: 65093bae64 < -: ---------- fsck: abstract common options for reusing
4: 403750da30 < -: ---------- fsck: add "fsck_refs_options" struct
5: 8dea5654d5 < -: ---------- fsck: add a unified interface for reporting fsck messages
-: ---------- > 1: d32ae41a4d fsck: add "fsck_objects_options" to hold objects-related options
-: ---------- > 2: 589a7a6fac fsck: rename "skiplist" to "oid_skiplist"
-: ---------- > 3: bab97d7c82 fsck: add "fsck_refs_options" into "fsck_options"
-: ---------- > 4: 276da52a6b fsck: add a unified interface for reporting fsck messages
6: 8fb62b54c6 ! 5: e93940c50c fsck: add "fsck_refs_options" initialization macros
@@ Commit message
fsck: add "fsck_refs_options" initialization macros
Add "FSCK_REFS_OPTIONS_DEFAULT" and "FSCK_REFS_OPTIONS_STRICT" macros to
- create the "fsck_refs_options" easily. Add refs-specific "error_func"
- callback "fsck_refs_error_function".
+ create the refs options easily. Add refs-specific "error_func" callback
+ "fsck_refs_error_function".
"fsck_refs_error_function" will use the "oid" parameter. When the caller
passed the oid, it will use "oid_to_hex" to get the corresponding hex
@@ Commit message
Signed-off-by: shejialuo <shejialuo@gmail.com>
## fsck.c ##
-@@ fsck.c: int fsck_error_function(struct fsck_objects_options *objects_options,
+@@ fsck.c: int fsck_objects_error_function(struct fsck_options *o,
return 1;
}
-+int fsck_refs_error_function(struct fsck_objects_options *objects_options UNUSED,
-+ struct fsck_refs_options *refs_options,
++int fsck_refs_error_function(struct fsck_options *options UNUSED,
+ const struct object_id *oid,
+ enum object_type object_type UNUSED,
+ const char *checked_ref_name,
@@ fsck.c: int fsck_error_function(struct fsck_objects_options *objects_options,
+
static int fsck_blobs(struct oidset *blobs_found, struct oidset *blobs_done,
enum fsck_msg_id msg_missing, enum fsck_msg_id msg_type,
- struct fsck_objects_options *options,
+ struct fsck_options *options, const char *blob_type)
## fsck.h ##
-@@ fsck.h: int fsck_error_cb_print_missing_gitmodules(struct fsck_objects_options *objects_
- enum fsck_msg_type msg_type,
- enum fsck_msg_id msg_id,
- const char *message);
-+int fsck_refs_error_function(struct fsck_objects_options *objects_options,
-+ struct fsck_refs_options *refs_options,
+@@ fsck.h: int fsck_objects_error_cb_print_missing_gitmodules(struct fsck_options *o,
+ enum fsck_msg_type msg_type,
+ enum fsck_msg_id msg_id,
+ const char *message);
++int fsck_refs_error_function(struct fsck_options *options,
+ const struct object_id *oid,
+ enum object_type object_type,
+ const char *checked_ref_name,
@@ fsck.h: int fsck_error_cb_print_missing_gitmodules(struct fsck_objects_options *
+ enum fsck_msg_id msg_id,
+ const char *message);
- struct fsck_options {
- fsck_error error_func;
-@@ fsck.h: struct fsck_options {
struct fsck_refs_options {
- struct fsck_options fsck_options;
- };
+ unsigned verbose:1;
+@@ fsck.h: struct fsck_options {
+ .gitattributes_done = OIDSET_INIT, \
+ } \
+ }
+#define FSCK_REFS_OPTIONS_DEFAULT { \
-+ .fsck_options = { \
-+ .error_func = fsck_refs_error_function, \
-+ }, \
++ .error_func = fsck_refs_error_function, \
+}
+#define FSCK_REFS_OPTIONS_STRICT { \
-+ .fsck_options = { \
-+ .error_func = fsck_refs_error_function, \
-+ .strict = 1, \
-+ }, \
++ .strict = 1, \
++ .error_func = fsck_refs_error_function, \
+}
- struct fsck_objects_options {
- struct fsck_options fsck_options;
+ /* descend in all linked child objects
+ * the return value is:
7: 2ecd1f5407 ! 6: 85aa953f6d refs: set up ref consistency check infrastructure
@@ refs.c: int check_refname_format(const char *refname, int flags)
return check_or_sanitize_refname(refname, flags, NULL);
}
-+int refs_fsck(struct ref_store *refs, struct fsck_refs_options *o)
++int refs_fsck(struct ref_store *refs, struct fsck_options *o)
+{
+ return refs->be->fsck(refs, o);
+}
@@ refs.h
#include "commit.h"
-+struct fsck_refs_options;
++struct fsck_options;
struct object_id;
struct ref_store;
struct repository;
@@ refs.h: int refs_for_each_reflog(struct ref_store *refs, each_reflog_fn fn, void
+ * reflogs are consistent, and non-zero otherwise. The errors will be
+ * written to stderr.
+ */
-+int refs_fsck(struct ref_store *refs, struct fsck_refs_options *o);
++int refs_fsck(struct ref_store *refs, struct fsck_options *o);
+
/*
* Apply the rules from check_refname_format, but mutate the result until it
@@ refs/debug.c: static int debug_reflog_expire(struct ref_store *ref_store, const
}
+static int debug_fsck(struct ref_store *ref_store,
-+ struct fsck_refs_options *o)
++ struct fsck_options *o)
+{
+ struct debug_ref_store *drefs = (struct debug_ref_store *)ref_store;
+ int res = drefs->refs->be->fsck(drefs->refs, o);
@@ refs/files-backend.c: static int files_ref_store_remove_on_disk(struct ref_store
}
+static int files_fsck(struct ref_store *ref_store,
-+ struct fsck_refs_options *o)
++ struct fsck_options *o)
+{
+ int ret;
+ struct files_ref_store *refs =
@@ refs/packed-backend.c: static struct ref_iterator *packed_reflog_iterator_begin(
}
+static int packed_fsck(struct ref_store *ref_store,
-+ struct fsck_refs_options *o)
++ struct fsck_options *o)
+{
+ return 0;
+}
@@ refs/refs-internal.h: typedef int read_raw_ref_fn(struct ref_store *ref_store, c
struct strbuf *referent);
+typedef int fsck_fn(struct ref_store *ref_store,
-+ struct fsck_refs_options *o);
++ struct fsck_options *o);
+
struct ref_storage_be {
const char *name;
@@ refs/reftable-backend.c: static int reftable_be_reflog_expire(struct ref_store *
}
+static int reftable_be_fsck(struct ref_store *ref_store,
-+ struct fsck_refs_options *o)
++ struct fsck_options *o)
+{
+ return 0;
+}
8: 534a3d00af ! 7: ce7adc7372 builtin/refs: add verify subcommand
@@ builtin/refs.c: static int cmd_refs_migrate(int argc, const char **argv, const c
+static int cmd_refs_verify(int argc, const char **argv, const char *prefix)
+{
-+ struct fsck_refs_options fsck_refs_options = FSCK_REFS_OPTIONS_DEFAULT;
++ struct fsck_options fsck_refs_options = FSCK_REFS_OPTIONS_DEFAULT;
+ const char * const verify_usage[] = {
+ REFS_VERIFY_USAGE,
+ NULL,
@@ builtin/refs.c: static int cmd_refs_migrate(int argc, const char **argv, const c
+ OPT_BOOL(0, "strict", &strict, N_("enable strict checking")),
+ OPT_END(),
+ };
++ int ret = 0;
+
+ argc = parse_options(argc, argv, prefix, options, verify_usage, 0);
+ if (argc)
+ usage(_("too many arguments"));
+
+ if (verbose)
-+ fsck_refs_options.fsck_options.verbose = 1;
++ fsck_refs_options.refs_options.verbose = 1;
+ if (strict)
-+ fsck_refs_options.fsck_options.strict = 1;
++ fsck_refs_options.strict = 1;
+
-+ git_config(git_fsck_config, &fsck_refs_options.fsck_options);
++ git_config(git_fsck_config, &fsck_refs_options);
+ prepare_repo_settings(the_repository);
+
-+ return refs_fsck(get_main_ref_store(the_repository), &fsck_refs_options);
++ ret = refs_fsck(get_main_ref_store(the_repository), &fsck_refs_options);
++
++ /*
++ * Explicitly free the allocated array and "oid_skiplist"
++ */
++ free(fsck_refs_options.msg_type);
++ oidset_clear(&fsck_refs_options.oid_skiplist);
++ return ret;
+}
+
int cmd_refs(int argc, const char **argv, const char *prefix)
9: 054c8cea89 = 8: 035eafe10b builtin/fsck: add `git-refs verify` child process
10: 7f87c6a26e ! 9: 9398bf3f0d files-backend: add unified interface for refs scanning
@@ refs/files-backend.c: static int files_ref_store_remove_on_disk(struct ref_store
+ * the whole directory. This function is used as the callback for each
+ * regular file or symlink in the directory.
+ */
-+typedef int (*files_fsck_refs_fn)(struct fsck_refs_options *o,
++typedef int (*files_fsck_refs_fn)(struct fsck_options *o,
+ const char *gitdir,
+ const char *refs_check_dir,
+ struct dir_iterator *iter);
+
+static int files_fsck_refs_dir(struct ref_store *ref_store,
-+ struct fsck_refs_options *o,
++ struct fsck_options *o,
+ const char *refs_check_dir,
+ files_fsck_refs_fn *fsck_refs_fns)
+{
@@ refs/files-backend.c: static int files_ref_store_remove_on_disk(struct ref_store
+ continue;
+ } else if (S_ISREG(iter->st.st_mode) ||
+ S_ISLNK(iter->st.st_mode)) {
-+ if (o->fsck_options.verbose)
++ if (o->refs_options.verbose)
+ fprintf_ln(stderr, "Checking %s/%s",
+ refs_check_dir, iter->relative_path);
+ for (size_t i = 0; fsck_refs_fns[i]; i++) {
@@ refs/files-backend.c: static int files_ref_store_remove_on_disk(struct ref_store
+}
+
+static int files_fsck_refs(struct ref_store *ref_store,
-+ struct fsck_refs_options *o)
++ struct fsck_options *o)
+{
+ int ret;
+ files_fsck_refs_fn fsck_refs_fns[]= {
+ NULL
+ };
+
-+ if (o->fsck_options.verbose)
++ if (o->refs_options.verbose)
+ fprintf_ln(stderr, "Checking references consistency");
+
+ ret = files_fsck_refs_dir(ref_store, o, "refs", fsck_refs_fns);
@@ refs/files-backend.c: static int files_ref_store_remove_on_disk(struct ref_store
+}
+
static int files_fsck(struct ref_store *ref_store,
- struct fsck_refs_options *o)
+ struct fsck_options *o)
{
@@ refs/files-backend.c: static int files_fsck(struct ref_store *ref_store,
struct files_ref_store *refs =
11: 7d78014e5f ! 10: 046773e35c fsck: add ref name check for files backend
@@ fsck.h: enum fsck_msg_type {
FUNC(BAD_TREE_SHA1, ERROR) \
## refs/files-backend.c ##
-@@ refs/files-backend.c: typedef int (*files_fsck_refs_fn)(struct fsck_refs_options *o,
+@@ refs/files-backend.c: typedef int (*files_fsck_refs_fn)(struct fsck_options *o,
const char *refs_check_dir,
struct dir_iterator *iter);
-+static int files_fsck_refs_name(struct fsck_refs_options *o,
++static int files_fsck_refs_name(struct fsck_options *o,
+ const char *gitdir UNUSED,
+ const char *refs_check_dir,
+ struct dir_iterator *iter)
@@ refs/files-backend.c: typedef int (*files_fsck_refs_fn)(struct fsck_refs_options
+}
+
static int files_fsck_refs_dir(struct ref_store *ref_store,
- struct fsck_refs_options *o,
+ struct fsck_options *o,
const char *refs_check_dir,
@@ refs/files-backend.c: static int files_fsck_refs(struct ref_store *ref_store,
{
12: 16209a73ed ! 11: c9b9599589 fsck: add ref content check for files backend
@@ refs/files-backend.c: int parse_loose_ref_contents(const char *buf, struct objec
return 0;
}
-@@ refs/files-backend.c: static int files_fsck_refs_name(struct fsck_refs_options *o,
+@@ refs/files-backend.c: static int files_fsck_refs_name(struct fsck_options *o,
return ret;
}
@@ refs/files-backend.c: static int files_fsck_refs_name(struct fsck_refs_options *
+ * would be the content after "refs:". For symblic link, "pointee_name" would
+ * be the relative path agaignst "gitdir".
+ */
-+static int files_fsck_symref_target(struct fsck_refs_options *o,
++static int files_fsck_symref_target(struct fsck_options *o,
+ const char *refname,
+ const char *pointee_name,
+ const char *pointee_path)
@@ refs/files-backend.c: static int files_fsck_refs_name(struct fsck_refs_options *
+ return ret;
+}
+
-+static int files_fsck_refs_content(struct fsck_refs_options *o,
++static int files_fsck_refs_content(struct fsck_options *o,
+ const char *gitdir,
+ const char *refs_check_dir,
+ struct dir_iterator *iter)
@@ refs/files-backend.c: static int files_fsck_refs_name(struct fsck_refs_options *
+}
+
static int files_fsck_refs_dir(struct ref_store *ref_store,
- struct fsck_refs_options *o,
+ struct fsck_options *o,
const char *refs_check_dir,
@@ refs/files-backend.c: static int files_fsck_refs(struct ref_store *ref_store,
int ret;
--
2.45.2
^ permalink raw reply [flat|nested] 282+ messages in thread
* [PATCH v6 01/11] fsck: add "fsck_objects_options" to hold objects-related options
2024-07-01 15:13 ` [GSoC][PATCH v6 00/11] ref consistency check infra setup shejialuo
@ 2024-07-01 15:18 ` shejialuo
2024-07-01 15:19 ` [GSoC][PATCH v6 02/11] fsck: rename "skiplist" to "oid_skiplist" shejialuo
` (11 subsequent siblings)
12 siblings, 0 replies; 282+ messages in thread
From: shejialuo @ 2024-07-01 15:18 UTC (permalink / raw)
To: git; +Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine
The git-fsck(1) focuses on object database consistency check. It relies
on the "fsck_options" to interact with fsck error levels. However,
"fsck_options" aims at checking the object database which contains a lot
of fields only related to object database.
In order to add ref operations, remove the options related to objects in
"fsck_options" to "fsck_objects_options" and let the "fsck_options"
incorporate "fsck_objects_options". Change the macros and influenced
code to adapt into the above design.
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
builtin/fsck.c | 12 +++++-----
builtin/index-pack.c | 4 ++--
builtin/mktag.c | 2 +-
builtin/unpack-objects.c | 4 ++--
fetch-pack.c | 10 ++++----
fsck.c | 51 +++++++++++++++++++++------------------
fsck.h | 52 ++++++++++++++++++++++++----------------
object-file.c | 2 +-
8 files changed, 77 insertions(+), 60 deletions(-)
diff --git a/builtin/fsck.c b/builtin/fsck.c
index d13a226c2e..13b64f723f 100644
--- a/builtin/fsck.c
+++ b/builtin/fsck.c
@@ -42,8 +42,8 @@ static int check_full = 1;
static int connectivity_only;
static int check_strict;
static int keep_cache_objects;
-static struct fsck_options fsck_walk_options = FSCK_OPTIONS_DEFAULT;
-static struct fsck_options fsck_obj_options = FSCK_OPTIONS_DEFAULT;
+static struct fsck_options fsck_walk_options = FSCK_OBJECTS_OPTIONS_DEFAULT;
+static struct fsck_options fsck_obj_options = FSCK_OBJECTS_OPTIONS_DEFAULT;
static int errors_found;
static int write_lost_and_found;
static int verbose;
@@ -214,7 +214,7 @@ static int mark_used(struct object *obj, enum object_type type UNUSED,
static void mark_unreachable_referents(const struct object_id *oid)
{
- struct fsck_options options = FSCK_OPTIONS_DEFAULT;
+ struct fsck_options options = FSCK_OBJECTS_OPTIONS_DEFAULT;
struct object *obj = lookup_object(the_repository, oid);
if (!obj || !(obj->flags & HAS_OBJ))
@@ -233,7 +233,7 @@ static void mark_unreachable_referents(const struct object_id *oid)
object_as_type(obj, type, 0);
}
- options.walk = mark_used;
+ options.objects_options.walk = mark_used;
fsck_walk(obj, NULL, &options);
if (obj->type == OBJ_TREE)
free_tree_buffer((struct tree *)obj);
@@ -936,8 +936,8 @@ int cmd_fsck(int argc, const char **argv, const char *prefix)
argc = parse_options(argc, argv, prefix, fsck_opts, fsck_usage, 0);
- fsck_walk_options.walk = mark_object;
- fsck_obj_options.walk = mark_used;
+ fsck_walk_options.objects_options.walk = mark_object;
+ fsck_obj_options.objects_options.walk = mark_used;
fsck_obj_options.error_func = fsck_error_func;
if (check_strict)
fsck_obj_options.strict = 1;
diff --git a/builtin/index-pack.c b/builtin/index-pack.c
index 856428fef9..59eb8c0355 100644
--- a/builtin/index-pack.c
+++ b/builtin/index-pack.c
@@ -127,7 +127,7 @@ static int nr_threads;
static int from_stdin;
static int strict;
static int do_fsck_object;
-static struct fsck_options fsck_options = FSCK_OPTIONS_MISSING_GITMODULES;
+static struct fsck_options fsck_options = FSCK_OBJECTS_OPTIONS_MISSING_GITMODULES;
static int verbose;
static const char *progress_title;
static int show_resolving_progress;
@@ -1746,7 +1746,7 @@ int cmd_index_pack(int argc, const char **argv, const char *prefix)
usage(index_pack_usage);
disable_replace_refs();
- fsck_options.walk = mark_link;
+ fsck_options.objects_options.walk = mark_link;
reset_pack_idx_option(&opts);
opts.flags |= WRITE_REV;
diff --git a/builtin/mktag.c b/builtin/mktag.c
index 4767f1a97e..c6fbeb58d4 100644
--- a/builtin/mktag.c
+++ b/builtin/mktag.c
@@ -15,7 +15,7 @@ static char const * const builtin_mktag_usage[] = {
};
static int option_strict = 1;
-static struct fsck_options fsck_options = FSCK_OPTIONS_STRICT;
+static struct fsck_options fsck_options = FSCK_OBJECTS_OPTIONS_STRICT;
static int mktag_fsck_error_func(struct fsck_options *o UNUSED,
const struct object_id *oid UNUSED,
diff --git a/builtin/unpack-objects.c b/builtin/unpack-objects.c
index f1c85a00ae..53cff0a91c 100644
--- a/builtin/unpack-objects.c
+++ b/builtin/unpack-objects.c
@@ -25,7 +25,7 @@ static unsigned int offset, len;
static off_t consumed_bytes;
static off_t max_input_size;
static git_hash_ctx ctx;
-static struct fsck_options fsck_options = FSCK_OPTIONS_STRICT;
+static struct fsck_options fsck_options = FSCK_OBJECTS_OPTIONS_STRICT;
static struct progress *progress;
/*
@@ -239,7 +239,7 @@ static int check_object(struct object *obj, enum object_type type,
die("Whoops! Cannot find object '%s'", oid_to_hex(&obj->oid));
if (fsck_object(obj, obj_buf->buffer, obj_buf->size, &fsck_options))
die("fsck error in packed object");
- fsck_options.walk = check_object;
+ fsck_options.objects_options.walk = check_object;
if (fsck_walk(obj, NULL, &fsck_options))
die("Error on reachable objects of %s", oid_to_hex(&obj->oid));
write_cached_object(obj, obj_buf);
diff --git a/fetch-pack.c b/fetch-pack.c
index 42f48fbc31..adce10fb4c 100644
--- a/fetch-pack.c
+++ b/fetch-pack.c
@@ -46,7 +46,7 @@ static int server_supports_filtering;
static int advertise_sid;
static struct shallow_lock shallow_lock;
static const char *alternate_shallow_file;
-static struct fsck_options fsck_options = FSCK_OPTIONS_MISSING_GITMODULES;
+static struct fsck_options fsck_options = FSCK_OBJECTS_OPTIONS_MISSING_GITMODULES;
static struct strbuf fsck_msg_types = STRBUF_INIT;
static struct string_list uri_protocols = STRING_LIST_INIT_DUP;
@@ -1222,7 +1222,7 @@ static struct ref *do_fetch_pack(struct fetch_pack_args *args,
} else
alternate_shallow_file = NULL;
if (get_pack(args, fd, pack_lockfiles, NULL, sought, nr_sought,
- &fsck_options.gitmodules_found))
+ &fsck_options.objects_options.gitmodules_found))
die(_("git fetch-pack: fetch failed."));
if (fsck_finish(&fsck_options))
die("fsck failed");
@@ -1782,7 +1782,8 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args,
if (get_pack(args, fd, pack_lockfiles,
packfile_uris.nr ? &index_pack_args : NULL,
- sought, nr_sought, &fsck_options.gitmodules_found))
+ sought, nr_sought,
+ &fsck_options.objects_options.gitmodules_found))
die(_("git fetch-pack: fetch failed."));
do_check_stateless_delimiter(args->stateless_rpc, &reader);
@@ -1825,7 +1826,8 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args,
packname[the_hash_algo->hexsz] = '\0';
- parse_gitmodules_oids(cmd.out, &fsck_options.gitmodules_found);
+ parse_gitmodules_oids(cmd.out,
+ &fsck_options.objects_options.gitmodules_found);
close(cmd.out);
diff --git a/fsck.c b/fsck.c
index e193930ae7..e9848f2678 100644
--- a/fsck.c
+++ b/fsck.c
@@ -259,20 +259,20 @@ static int report(struct fsck_options *options,
void fsck_enable_object_names(struct fsck_options *options)
{
- if (!options->object_names)
- options->object_names = kh_init_oid_map();
+ if (!options->objects_options.object_names)
+ options->objects_options.object_names = kh_init_oid_map();
}
const char *fsck_get_object_name(struct fsck_options *options,
const struct object_id *oid)
{
khiter_t pos;
- if (!options->object_names)
+ if (!options->objects_options.object_names)
return NULL;
- pos = kh_get_oid_map(options->object_names, *oid);
- if (pos >= kh_end(options->object_names))
+ pos = kh_get_oid_map(options->objects_options.object_names, *oid);
+ if (pos >= kh_end(options->objects_options.object_names))
return NULL;
- return kh_value(options->object_names, pos);
+ return kh_value(options->objects_options.object_names, pos);
}
void fsck_put_object_name(struct fsck_options *options,
@@ -284,15 +284,16 @@ void fsck_put_object_name(struct fsck_options *options,
khiter_t pos;
int hashret;
- if (!options->object_names)
+ if (!options->objects_options.object_names)
return;
- pos = kh_put_oid_map(options->object_names, *oid, &hashret);
+ pos = kh_put_oid_map(options->objects_options.object_names,
+ *oid, &hashret);
if (!hashret)
return;
va_start(ap, fmt);
strbuf_vaddf(&buf, fmt, ap);
- kh_value(options->object_names, pos) = strbuf_detach(&buf, NULL);
+ kh_value(options->objects_options.object_names, pos) = strbuf_detach(&buf, NULL);
va_end(ap);
}
@@ -342,14 +343,14 @@ static int fsck_walk_tree(struct tree *tree, void *data, struct fsck_options *op
if (name && obj)
fsck_put_object_name(options, &entry.oid, "%s%s/",
name, entry.path);
- result = options->walk(obj, OBJ_TREE, data, options);
+ result = options->objects_options.walk(obj, OBJ_TREE, data, options);
}
else if (S_ISREG(entry.mode) || S_ISLNK(entry.mode)) {
obj = (struct object *)lookup_blob(the_repository, &entry.oid);
if (name && obj)
fsck_put_object_name(options, &entry.oid, "%s%s",
name, entry.path);
- result = options->walk(obj, OBJ_BLOB, data, options);
+ result = options->objects_options.walk(obj, OBJ_BLOB, data, options);
}
else {
result = error("in tree %s: entry %s has bad mode %.6o",
@@ -380,7 +381,7 @@ static int fsck_walk_commit(struct commit *commit, void *data, struct fsck_optio
fsck_put_object_name(options, get_commit_tree_oid(commit),
"%s:", name);
- result = options->walk((struct object *) repo_get_commit_tree(the_repository, commit),
+ result = options->objects_options.walk((struct object *) repo_get_commit_tree(the_repository, commit),
OBJ_TREE, data, options);
if (result < 0)
return result;
@@ -423,7 +424,7 @@ static int fsck_walk_commit(struct commit *commit, void *data, struct fsck_optio
else
fsck_put_object_name(options, oid, "%s^", name);
}
- result = options->walk((struct object *)parents->item, OBJ_COMMIT, data, options);
+ result = options->objects_options.walk((struct object *)parents->item, OBJ_COMMIT, data, options);
if (result < 0)
return result;
if (!res)
@@ -441,7 +442,7 @@ static int fsck_walk_tag(struct tag *tag, void *data, struct fsck_options *optio
return -1;
if (name)
fsck_put_object_name(options, &tag->tagged->oid, "%s", name);
- return options->walk(tag->tagged, OBJ_ANY, data, options);
+ return options->objects_options.walk(tag->tagged, OBJ_ANY, data, options);
}
int fsck_walk(struct object *obj, void *data, struct fsck_options *options)
@@ -598,6 +599,7 @@ static int fsck_tree(const struct object_id *tree_oid,
unsigned o_mode;
const char *o_name;
struct name_stack df_dup_candidates = { NULL };
+ struct fsck_objects_options *objects_options = &options->objects_options;
if (init_tree_desc_gently(&desc, tree_oid, buffer, size,
TREE_DESC_RAW_MODES)) {
@@ -628,7 +630,7 @@ static int fsck_tree(const struct object_id *tree_oid,
if (is_hfs_dotgitmodules(name) || is_ntfs_dotgitmodules(name)) {
if (!S_ISLNK(mode))
- oidset_insert(&options->gitmodules_found,
+ oidset_insert(&objects_options->gitmodules_found,
entry_oid);
else
retval += report(options,
@@ -639,7 +641,7 @@ static int fsck_tree(const struct object_id *tree_oid,
if (is_hfs_dotgitattributes(name) || is_ntfs_dotgitattributes(name)) {
if (!S_ISLNK(mode))
- oidset_insert(&options->gitattributes_found,
+ oidset_insert(&objects_options->gitattributes_found,
entry_oid);
else
retval += report(options, tree_oid, OBJ_TREE,
@@ -666,7 +668,7 @@ static int fsck_tree(const struct object_id *tree_oid,
has_dotgit |= is_ntfs_dotgit(backslash);
if (is_ntfs_dotgitmodules(backslash)) {
if (!S_ISLNK(mode))
- oidset_insert(&options->gitmodules_found,
+ oidset_insert(&objects_options->gitmodules_found,
entry_oid);
else
retval += report(options, tree_oid, OBJ_TREE,
@@ -1107,11 +1109,11 @@ static int fsck_blob(const struct object_id *oid, const char *buf,
if (object_on_skiplist(options, oid))
return 0;
- if (oidset_contains(&options->gitmodules_found, oid)) {
+ if (oidset_contains(&options->objects_options.gitmodules_found, oid)) {
struct config_options config_opts = { 0 };
struct fsck_gitmodules_data data;
- oidset_insert(&options->gitmodules_done, oid);
+ oidset_insert(&options->objects_options.gitmodules_done, oid);
if (!buf) {
/*
@@ -1137,10 +1139,10 @@ static int fsck_blob(const struct object_id *oid, const char *buf,
ret |= data.ret;
}
- if (oidset_contains(&options->gitattributes_found, oid)) {
+ if (oidset_contains(&options->objects_options.gitattributes_found, oid)) {
const char *ptr;
- oidset_insert(&options->gitattributes_done, oid);
+ oidset_insert(&options->objects_options.gitattributes_done, oid);
if (!buf || size > ATTR_MAX_FILE_SIZE) {
/*
@@ -1255,12 +1257,15 @@ static int fsck_blobs(struct oidset *blobs_found, struct oidset *blobs_done,
int fsck_finish(struct fsck_options *options)
{
+ struct fsck_objects_options *objects_options = &options->objects_options;
int ret = 0;
- ret |= fsck_blobs(&options->gitmodules_found, &options->gitmodules_done,
+ ret |= fsck_blobs(&objects_options->gitmodules_found,
+ &objects_options->gitmodules_done,
FSCK_MSG_GITMODULES_MISSING, FSCK_MSG_GITMODULES_BLOB,
options, ".gitmodules");
- ret |= fsck_blobs(&options->gitattributes_found, &options->gitattributes_done,
+ ret |= fsck_blobs(&objects_options->gitattributes_found,
+ &objects_options->gitattributes_done,
FSCK_MSG_GITATTRIBUTES_MISSING, FSCK_MSG_GITATTRIBUTES_BLOB,
options, ".gitattributes");
diff --git a/fsck.h b/fsck.h
index 6085a384f6..37b6f6676f 100644
--- a/fsck.h
+++ b/fsck.h
@@ -131,12 +131,8 @@ int fsck_error_cb_print_missing_gitmodules(struct fsck_options *o,
enum fsck_msg_id msg_id,
const char *message);
-struct fsck_options {
+struct fsck_objects_options {
fsck_walk_func walk;
- fsck_error error_func;
- unsigned strict:1;
- enum fsck_msg_type *msg_type;
- struct oidset skiplist;
struct oidset gitmodules_found;
struct oidset gitmodules_done;
struct oidset gitattributes_found;
@@ -144,29 +140,43 @@ struct fsck_options {
kh_oid_map_t *object_names;
};
-#define FSCK_OPTIONS_DEFAULT { \
+struct fsck_options {
+ unsigned strict:1;
+ enum fsck_msg_type *msg_type;
+ struct oidset skiplist;
+ fsck_error error_func;
+ struct fsck_objects_options objects_options;
+};
+
+#define FSCK_OBJECTS_OPTIONS_DEFAULT { \
.skiplist = OIDSET_INIT, \
- .gitmodules_found = OIDSET_INIT, \
- .gitmodules_done = OIDSET_INIT, \
- .gitattributes_found = OIDSET_INIT, \
- .gitattributes_done = OIDSET_INIT, \
- .error_func = fsck_error_function \
+ .error_func = fsck_error_function, \
+ .objects_options = { \
+ .gitmodules_found = OIDSET_INIT, \
+ .gitmodules_done = OIDSET_INIT, \
+ .gitattributes_found = OIDSET_INIT, \
+ .gitattributes_done = OIDSET_INIT, \
+ } \
}
-#define FSCK_OPTIONS_STRICT { \
+#define FSCK_OBJECTS_OPTIONS_STRICT { \
.strict = 1, \
- .gitmodules_found = OIDSET_INIT, \
- .gitmodules_done = OIDSET_INIT, \
- .gitattributes_found = OIDSET_INIT, \
- .gitattributes_done = OIDSET_INIT, \
.error_func = fsck_error_function, \
+ .objects_options = { \
+ .gitmodules_found = OIDSET_INIT, \
+ .gitmodules_done = OIDSET_INIT, \
+ .gitattributes_found = OIDSET_INIT, \
+ .gitattributes_done = OIDSET_INIT, \
+ } \
}
-#define FSCK_OPTIONS_MISSING_GITMODULES { \
+#define FSCK_OBJECTS_OPTIONS_MISSING_GITMODULES { \
.strict = 1, \
- .gitmodules_found = OIDSET_INIT, \
- .gitmodules_done = OIDSET_INIT, \
- .gitattributes_found = OIDSET_INIT, \
- .gitattributes_done = OIDSET_INIT, \
.error_func = fsck_error_cb_print_missing_gitmodules, \
+ .objects_options = { \
+ .gitmodules_found = OIDSET_INIT, \
+ .gitmodules_done = OIDSET_INIT, \
+ .gitattributes_found = OIDSET_INIT, \
+ .gitattributes_done = OIDSET_INIT, \
+ } \
}
/* descend in all linked child objects
diff --git a/object-file.c b/object-file.c
index d3cf4b8b2e..4e10dd9804 100644
--- a/object-file.c
+++ b/object-file.c
@@ -2507,7 +2507,7 @@ static int index_mem(struct index_state *istate,
}
}
if (flags & HASH_FORMAT_CHECK) {
- struct fsck_options opts = FSCK_OPTIONS_DEFAULT;
+ struct fsck_options opts = FSCK_OBJECTS_OPTIONS_DEFAULT;
opts.strict = 1;
opts.error_func = hash_format_check_report;
--
2.45.2
^ permalink raw reply related [flat|nested] 282+ messages in thread
* [GSoC][PATCH v6 02/11] fsck: rename "skiplist" to "oid_skiplist"
2024-07-01 15:13 ` [GSoC][PATCH v6 00/11] ref consistency check infra setup shejialuo
2024-07-01 15:18 ` [PATCH v6 01/11] fsck: add "fsck_objects_options" to hold objects-related options shejialuo
@ 2024-07-01 15:19 ` shejialuo
2024-07-01 15:19 ` [GSoC][PATCH v6 03/11] fsck: add "fsck_refs_options" into "fsck_options" shejialuo
` (10 subsequent siblings)
12 siblings, 0 replies; 282+ messages in thread
From: shejialuo @ 2024-07-01 15:19 UTC (permalink / raw)
To: git; +Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine
Because we introduce ref consistency check. The original "skiplist" is a
common option which is set up during handling user configs. To avoid
causing ambiguity, rename "skiplist" to "oid_skiplist".
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
fsck.c | 4 ++--
fsck.h | 4 ++--
2 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/fsck.c b/fsck.c
index e9848f2678..04d0aa1766 100644
--- a/fsck.c
+++ b/fsck.c
@@ -203,7 +203,7 @@ void fsck_set_msg_types(struct fsck_options *options, const char *values)
if (!strcmp(buf, "skiplist")) {
if (equal == len)
die("skiplist requires a path");
- oidset_parse_file(&options->skiplist, buf + equal + 1);
+ oidset_parse_file(&options->oid_skiplist, buf + equal + 1);
buf += len + 1;
continue;
}
@@ -220,7 +220,7 @@ void fsck_set_msg_types(struct fsck_options *options, const char *values)
static int object_on_skiplist(struct fsck_options *opts,
const struct object_id *oid)
{
- return opts && oid && oidset_contains(&opts->skiplist, oid);
+ return opts && oid && oidset_contains(&opts->oid_skiplist, oid);
}
__attribute__((format (printf, 5, 6)))
diff --git a/fsck.h b/fsck.h
index 37b6f6676f..0c581ef082 100644
--- a/fsck.h
+++ b/fsck.h
@@ -143,13 +143,13 @@ struct fsck_objects_options {
struct fsck_options {
unsigned strict:1;
enum fsck_msg_type *msg_type;
- struct oidset skiplist;
+ struct oidset oid_skiplist;
fsck_error error_func;
struct fsck_objects_options objects_options;
};
#define FSCK_OBJECTS_OPTIONS_DEFAULT { \
- .skiplist = OIDSET_INIT, \
+ .oid_skiplist = OIDSET_INIT, \
.error_func = fsck_error_function, \
.objects_options = { \
.gitmodules_found = OIDSET_INIT, \
--
2.45.2
^ permalink raw reply related [flat|nested] 282+ messages in thread
* [GSoC][PATCH v6 03/11] fsck: add "fsck_refs_options" into "fsck_options"
2024-07-01 15:13 ` [GSoC][PATCH v6 00/11] ref consistency check infra setup shejialuo
2024-07-01 15:18 ` [PATCH v6 01/11] fsck: add "fsck_objects_options" to hold objects-related options shejialuo
2024-07-01 15:19 ` [GSoC][PATCH v6 02/11] fsck: rename "skiplist" to "oid_skiplist" shejialuo
@ 2024-07-01 15:19 ` shejialuo
2024-07-01 15:19 ` [GSoC][PATCH v6 04/11] fsck: add a unified interface for reporting fsck messages shejialuo
` (9 subsequent siblings)
12 siblings, 0 replies; 282+ messages in thread
From: shejialuo @ 2024-07-01 15:19 UTC (permalink / raw)
To: git; +Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine
Introduce "fsck_refs_options" to represent the refs-related options.
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
fsck.h | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/fsck.h b/fsck.h
index 0c581ef082..ff7281b410 100644
--- a/fsck.h
+++ b/fsck.h
@@ -131,6 +131,10 @@ int fsck_error_cb_print_missing_gitmodules(struct fsck_options *o,
enum fsck_msg_id msg_id,
const char *message);
+struct fsck_refs_options {
+ unsigned verbose:1;
+};
+
struct fsck_objects_options {
fsck_walk_func walk;
struct oidset gitmodules_found;
@@ -146,6 +150,7 @@ struct fsck_options {
struct oidset oid_skiplist;
fsck_error error_func;
struct fsck_objects_options objects_options;
+ struct fsck_refs_options refs_options;
};
#define FSCK_OBJECTS_OPTIONS_DEFAULT { \
--
2.45.2
^ permalink raw reply related [flat|nested] 282+ messages in thread
* [GSoC][PATCH v6 04/11] fsck: add a unified interface for reporting fsck messages
2024-07-01 15:13 ` [GSoC][PATCH v6 00/11] ref consistency check infra setup shejialuo
` (2 preceding siblings ...)
2024-07-01 15:19 ` [GSoC][PATCH v6 03/11] fsck: add "fsck_refs_options" into "fsck_options" shejialuo
@ 2024-07-01 15:19 ` shejialuo
2024-07-01 15:19 ` [GSoC][PATCH v6 05/11] fsck: add "fsck_refs_options" initialization macros shejialuo
` (8 subsequent siblings)
12 siblings, 0 replies; 282+ messages in thread
From: shejialuo @ 2024-07-01 15:19 UTC (permalink / raw)
To: git; +Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine
The static function "report" provided by "fsck.c" aims at reporting the
problems related to object database which cannot be reused for refs.
In order to provide a unified interface which can report either objects
or refs, create a new function "vfsck_report" by adding
"checked_ref_name" parameter following the "report" prototype. However,
instead of using "...", provide "va_list" to allow more flexibility.
The "vfsck_report" function will use "error_func" registered in
"fsck_options" function to report customized messages. Change
"error_func" prototype to align with the "vfsck_report".
Change "report" function to make it use "vfsck_report" to report
objects-related messages. Add a new function called "fsck_refs_report"
to use "vfsck_report" to report refs-related messages.
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
builtin/fsck.c | 15 ++++-----
builtin/mktag.c | 1 +
fsck.c | 81 ++++++++++++++++++++++++++++++++++++-------------
fsck.h | 40 +++++++++++++++---------
object-file.c | 11 ++++---
5 files changed, 101 insertions(+), 47 deletions(-)
diff --git a/builtin/fsck.c b/builtin/fsck.c
index 13b64f723f..cdf2218dfd 100644
--- a/builtin/fsck.c
+++ b/builtin/fsck.c
@@ -89,12 +89,13 @@ static int objerror(struct object *obj, const char *err)
return -1;
}
-static int fsck_error_func(struct fsck_options *o UNUSED,
- const struct object_id *oid,
- enum object_type object_type,
- enum fsck_msg_type msg_type,
- enum fsck_msg_id msg_id UNUSED,
- const char *message)
+static int fsck_objects_error_func(struct fsck_options *o UNUSED,
+ const struct object_id *oid,
+ enum object_type object_type,
+ const char *checked_ref_name UNUSED,
+ enum fsck_msg_type msg_type,
+ enum fsck_msg_id msg_id UNUSED,
+ const char *message)
{
switch (msg_type) {
case FSCK_WARN:
@@ -938,7 +939,7 @@ int cmd_fsck(int argc, const char **argv, const char *prefix)
fsck_walk_options.objects_options.walk = mark_object;
fsck_obj_options.objects_options.walk = mark_used;
- fsck_obj_options.error_func = fsck_error_func;
+ fsck_obj_options.error_func = fsck_objects_error_func;
if (check_strict)
fsck_obj_options.strict = 1;
diff --git a/builtin/mktag.c b/builtin/mktag.c
index c6fbeb58d4..d8e4b91134 100644
--- a/builtin/mktag.c
+++ b/builtin/mktag.c
@@ -20,6 +20,7 @@ static struct fsck_options fsck_options = FSCK_OBJECTS_OPTIONS_STRICT;
static int mktag_fsck_error_func(struct fsck_options *o UNUSED,
const struct object_id *oid UNUSED,
enum object_type object_type UNUSED,
+ const char *checked_ref_name UNUSED,
enum fsck_msg_type msg_type,
enum fsck_msg_id msg_id UNUSED,
const char *message)
diff --git a/fsck.c b/fsck.c
index 04d0aa1766..31ae8d57ce 100644
--- a/fsck.c
+++ b/fsck.c
@@ -223,12 +223,18 @@ static int object_on_skiplist(struct fsck_options *opts,
return opts && oid && oidset_contains(&opts->oid_skiplist, oid);
}
-__attribute__((format (printf, 5, 6)))
-static int report(struct fsck_options *options,
- const struct object_id *oid, enum object_type object_type,
- enum fsck_msg_id msg_id, const char *fmt, ...)
+/*
+ * Provide a unified interface for either fscking refs or objects.
+ * It will get the current msg error type and call the error_func callback
+ * which is registered in the "fsck_options" struct.
+ */
+static int vfsck_report(struct fsck_options *options,
+ const struct object_id *oid,
+ enum object_type object_type,
+ const char *checked_ref_name,
+ enum fsck_msg_id msg_id, const char *fmt, va_list ap)
{
- va_list ap;
+ va_list ap_copy;
struct strbuf sb = STRBUF_INIT;
enum fsck_msg_type msg_type = fsck_msg_type(msg_id, options);
int result;
@@ -247,9 +253,9 @@ static int report(struct fsck_options *options,
prepare_msg_ids();
strbuf_addf(&sb, "%s: ", msg_id_info[msg_id].camelcased);
- va_start(ap, fmt);
- strbuf_vaddf(&sb, fmt, ap);
- result = options->error_func(options, oid, object_type,
+ va_copy(ap_copy, ap);
+ strbuf_vaddf(&sb, fmt, ap_copy);
+ result = options->error_func(options, oid, object_type, checked_ref_name,
msg_type, msg_id, sb.buf);
strbuf_release(&sb);
va_end(ap);
@@ -257,6 +263,36 @@ static int report(struct fsck_options *options,
return result;
}
+__attribute__((format (printf, 5, 6)))
+static int report(struct fsck_options *options,
+ const struct object_id *oid, enum object_type object_type,
+ enum fsck_msg_id msg_id, const char *fmt, ...)
+{
+ va_list ap;
+ int result;
+ va_start(ap, fmt);
+ result = vfsck_report(options, oid, object_type, NULL,
+ msg_id, fmt, ap);
+ va_end(ap);
+ return result;
+}
+
+
+
+int fsck_refs_report(struct fsck_options *options,
+ const struct object_id *oid,
+ const char *checked_ref_name,
+ enum fsck_msg_id msg_id, const char *fmt, ...)
+{
+ va_list ap;
+ int result;
+ va_start(ap, fmt);
+ result = vfsck_report(options, oid, OBJ_NONE,
+ checked_ref_name, msg_id, fmt, ap);
+ va_end(ap);
+ return result;
+}
+
void fsck_enable_object_names(struct fsck_options *options)
{
if (!options->objects_options.object_names)
@@ -1199,12 +1235,13 @@ int fsck_buffer(const struct object_id *oid, enum object_type type,
type);
}
-int fsck_error_function(struct fsck_options *o,
- const struct object_id *oid,
- enum object_type object_type UNUSED,
- enum fsck_msg_type msg_type,
- enum fsck_msg_id msg_id UNUSED,
- const char *message)
+int fsck_objects_error_function(struct fsck_options *o,
+ const struct object_id *oid,
+ enum object_type object_type UNUSED,
+ const char *checked_ref_name UNUSED,
+ enum fsck_msg_type msg_type,
+ enum fsck_msg_id msg_id UNUSED,
+ const char *message)
{
if (msg_type == FSCK_WARN) {
warning("object %s: %s", fsck_describe_object(o, oid), message);
@@ -1305,16 +1342,18 @@ int git_fsck_config(const char *var, const char *value,
* Custom error callbacks that are used in more than one place.
*/
-int fsck_error_cb_print_missing_gitmodules(struct fsck_options *o,
- const struct object_id *oid,
- enum object_type object_type,
- enum fsck_msg_type msg_type,
- enum fsck_msg_id msg_id,
- const char *message)
+int fsck_objects_error_cb_print_missing_gitmodules(struct fsck_options *o,
+ const struct object_id *oid,
+ enum object_type object_type,
+ const char *checked_ref_name,
+ enum fsck_msg_type msg_type,
+ enum fsck_msg_id msg_id,
+ const char *message)
{
if (msg_id == FSCK_MSG_GITMODULES_MISSING) {
puts(oid_to_hex(oid));
return 0;
}
- return fsck_error_function(o, oid, object_type, msg_type, msg_id, message);
+ return fsck_objects_error_function(o, oid, object_type, checked_ref_name,
+ msg_type, msg_id, message);
}
diff --git a/fsck.h b/fsck.h
index ff7281b410..e611b3b65b 100644
--- a/fsck.h
+++ b/fsck.h
@@ -114,22 +114,27 @@ int is_valid_msg_type(const char *msg_id, const char *msg_type);
typedef int (*fsck_walk_func)(struct object *obj, enum object_type object_type,
void *data, struct fsck_options *options);
-/* callback for fsck_object, type is FSCK_ERROR or FSCK_WARN */
+/*
+ * callback function for reporting errors when checking either objects or refs
+ */
typedef int (*fsck_error)(struct fsck_options *o,
const struct object_id *oid, enum object_type object_type,
+ const char *checked_ref_name,
enum fsck_msg_type msg_type, enum fsck_msg_id msg_id,
const char *message);
-int fsck_error_function(struct fsck_options *o,
- const struct object_id *oid, enum object_type object_type,
- enum fsck_msg_type msg_type, enum fsck_msg_id msg_id,
- const char *message);
-int fsck_error_cb_print_missing_gitmodules(struct fsck_options *o,
- const struct object_id *oid,
- enum object_type object_type,
- enum fsck_msg_type msg_type,
- enum fsck_msg_id msg_id,
- const char *message);
+int fsck_objects_error_function(struct fsck_options *o,
+ const struct object_id *oid, enum object_type object_type,
+ const char *checked_ref_name,
+ enum fsck_msg_type msg_type, enum fsck_msg_id msg_id,
+ const char *message);
+int fsck_objects_error_cb_print_missing_gitmodules(struct fsck_options *o,
+ const struct object_id *oid,
+ enum object_type object_type,
+ const char *checked_ref_name,
+ enum fsck_msg_type msg_type,
+ enum fsck_msg_id msg_id,
+ const char *message);
struct fsck_refs_options {
unsigned verbose:1;
@@ -155,7 +160,7 @@ struct fsck_options {
#define FSCK_OBJECTS_OPTIONS_DEFAULT { \
.oid_skiplist = OIDSET_INIT, \
- .error_func = fsck_error_function, \
+ .error_func = fsck_objects_error_function, \
.objects_options = { \
.gitmodules_found = OIDSET_INIT, \
.gitmodules_done = OIDSET_INIT, \
@@ -165,7 +170,7 @@ struct fsck_options {
}
#define FSCK_OBJECTS_OPTIONS_STRICT { \
.strict = 1, \
- .error_func = fsck_error_function, \
+ .error_func = fsck_objects_error_function, \
.objects_options = { \
.gitmodules_found = OIDSET_INIT, \
.gitmodules_done = OIDSET_INIT, \
@@ -175,7 +180,7 @@ struct fsck_options {
}
#define FSCK_OBJECTS_OPTIONS_MISSING_GITMODULES { \
.strict = 1, \
- .error_func = fsck_error_cb_print_missing_gitmodules, \
+ .error_func = fsck_objects_error_cb_print_missing_gitmodules, \
.objects_options = { \
.gitmodules_found = OIDSET_INIT, \
.gitmodules_done = OIDSET_INIT, \
@@ -224,6 +229,13 @@ int fsck_tag_standalone(const struct object_id *oid, const char *buffer,
*/
int fsck_finish(struct fsck_options *options);
+__attribute__((format (printf, 5, 6)))
+int fsck_refs_report(struct fsck_options *options,
+ const struct object_id *oid,
+ const char *checked_ref_name,
+ enum fsck_msg_id msg_id,
+ const char *fmt, ...);
+
/*
* Subsystem for storing human-readable names for each object.
*
diff --git a/object-file.c b/object-file.c
index 4e10dd9804..a681087625 100644
--- a/object-file.c
+++ b/object-file.c
@@ -2473,11 +2473,12 @@ int repo_has_object_file(struct repository *r,
* give more context.
*/
static int hash_format_check_report(struct fsck_options *opts UNUSED,
- const struct object_id *oid UNUSED,
- enum object_type object_type UNUSED,
- enum fsck_msg_type msg_type UNUSED,
- enum fsck_msg_id msg_id UNUSED,
- const char *message)
+ const struct object_id *oid UNUSED,
+ enum object_type object_type UNUSED,
+ const char *ref_checked_name UNUSED,
+ enum fsck_msg_type msg_type UNUSED,
+ enum fsck_msg_id msg_id UNUSED,
+ const char *message)
{
error(_("object fails fsck: %s"), message);
return 1;
--
2.45.2
^ permalink raw reply related [flat|nested] 282+ messages in thread
* [GSoC][PATCH v6 05/11] fsck: add "fsck_refs_options" initialization macros
2024-07-01 15:13 ` [GSoC][PATCH v6 00/11] ref consistency check infra setup shejialuo
` (3 preceding siblings ...)
2024-07-01 15:19 ` [GSoC][PATCH v6 04/11] fsck: add a unified interface for reporting fsck messages shejialuo
@ 2024-07-01 15:19 ` shejialuo
2024-07-01 15:20 ` [GSoC][PATCH v6 06/11] refs: set up ref consistency check infrastructure shejialuo
` (7 subsequent siblings)
12 siblings, 0 replies; 282+ messages in thread
From: shejialuo @ 2024-07-01 15:19 UTC (permalink / raw)
To: git; +Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine
Add "FSCK_REFS_OPTIONS_DEFAULT" and "FSCK_REFS_OPTIONS_STRICT" macros to
create the refs options easily. Add refs-specific "error_func" callback
"fsck_refs_error_function".
"fsck_refs_error_function" will use the "oid" parameter. When the caller
passed the oid, it will use "oid_to_hex" to get the corresponding hex
value to report to the user.
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
fsck.c | 22 ++++++++++++++++++++++
fsck.h | 14 ++++++++++++++
2 files changed, 36 insertions(+)
diff --git a/fsck.c b/fsck.c
index 31ae8d57ce..ea00fb4790 100644
--- a/fsck.c
+++ b/fsck.c
@@ -1251,6 +1251,28 @@ int fsck_objects_error_function(struct fsck_options *o,
return 1;
}
+int fsck_refs_error_function(struct fsck_options *options UNUSED,
+ const struct object_id *oid,
+ enum object_type object_type UNUSED,
+ const char *checked_ref_name,
+ enum fsck_msg_type msg_type,
+ enum fsck_msg_id msg_id UNUSED,
+ const char *message)
+{
+ static struct strbuf sb = STRBUF_INIT;
+
+ strbuf_addstr(&sb, checked_ref_name);
+ if (oid)
+ strbuf_addf(&sb, " -> (%s)", oid_to_hex(oid));
+
+ if (msg_type == FSCK_WARN) {
+ warning("%s: %s", sb.buf, message);
+ return 0;
+ }
+ error("%s: %s", sb.buf, message);
+ return 1;
+}
+
static int fsck_blobs(struct oidset *blobs_found, struct oidset *blobs_done,
enum fsck_msg_id msg_missing, enum fsck_msg_id msg_type,
struct fsck_options *options, const char *blob_type)
diff --git a/fsck.h b/fsck.h
index e611b3b65b..887fe2969f 100644
--- a/fsck.h
+++ b/fsck.h
@@ -135,6 +135,13 @@ int fsck_objects_error_cb_print_missing_gitmodules(struct fsck_options *o,
enum fsck_msg_type msg_type,
enum fsck_msg_id msg_id,
const char *message);
+int fsck_refs_error_function(struct fsck_options *options,
+ const struct object_id *oid,
+ enum object_type object_type,
+ const char *checked_ref_name,
+ enum fsck_msg_type msg_type,
+ enum fsck_msg_id msg_id,
+ const char *message);
struct fsck_refs_options {
unsigned verbose:1;
@@ -188,6 +195,13 @@ struct fsck_options {
.gitattributes_done = OIDSET_INIT, \
} \
}
+#define FSCK_REFS_OPTIONS_DEFAULT { \
+ .error_func = fsck_refs_error_function, \
+}
+#define FSCK_REFS_OPTIONS_STRICT { \
+ .strict = 1, \
+ .error_func = fsck_refs_error_function, \
+}
/* descend in all linked child objects
* the return value is:
--
2.45.2
^ permalink raw reply related [flat|nested] 282+ messages in thread
* [GSoC][PATCH v6 06/11] refs: set up ref consistency check infrastructure
2024-07-01 15:13 ` [GSoC][PATCH v6 00/11] ref consistency check infra setup shejialuo
` (4 preceding siblings ...)
2024-07-01 15:19 ` [GSoC][PATCH v6 05/11] fsck: add "fsck_refs_options" initialization macros shejialuo
@ 2024-07-01 15:20 ` shejialuo
2024-07-01 15:20 ` [GSoC][PATCH v6 07/11] builtin/refs: add verify subcommand shejialuo
` (6 subsequent siblings)
12 siblings, 0 replies; 282+ messages in thread
From: shejialuo @ 2024-07-01 15:20 UTC (permalink / raw)
To: git; +Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine
The interfaces defined in the `ref_storage_be` are carefully structured
in semantic. It's organized as the five parts:
1. The name and the initialization interfaces.
2. The ref transaction interfaces.
3. The ref internal interfaces (pack, rename and copy).
4. The ref filesystem interfaces.
5. The reflog related interfaces.
To keep consistent with the git-fsck(1), add a new interface named
"fsck_refs_fn" to the end of "ref_storage_be". This semantic cannot be
grouped into any above five categories. Explicitly add blank line to
make it different from others.
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
refs.c | 5 +++++
refs.h | 8 ++++++++
refs/debug.c | 11 +++++++++++
refs/files-backend.c | 15 ++++++++++++++-
refs/packed-backend.c | 8 ++++++++
refs/refs-internal.h | 6 ++++++
refs/reftable-backend.c | 8 ++++++++
7 files changed, 60 insertions(+), 1 deletion(-)
diff --git a/refs.c b/refs.c
index dd0d9c360f..5f17de38e2 100644
--- a/refs.c
+++ b/refs.c
@@ -316,6 +316,11 @@ int check_refname_format(const char *refname, int flags)
return check_or_sanitize_refname(refname, flags, NULL);
}
+int refs_fsck(struct ref_store *refs, struct fsck_options *o)
+{
+ return refs->be->fsck(refs, o);
+}
+
void sanitize_refname_component(const char *refname, struct strbuf *out)
{
if (check_or_sanitize_refname(refname, REFNAME_ALLOW_ONELEVEL, out))
diff --git a/refs.h b/refs.h
index df8cbf6124..946e929af9 100644
--- a/refs.h
+++ b/refs.h
@@ -3,6 +3,7 @@
#include "commit.h"
+struct fsck_options;
struct object_id;
struct ref_store;
struct repository;
@@ -547,6 +548,13 @@ int refs_for_each_reflog(struct ref_store *refs, each_reflog_fn fn, void *cb_dat
*/
int check_refname_format(const char *refname, int flags);
+/*
+ * Check the reference database for consistency. Return 0 if refs and
+ * reflogs are consistent, and non-zero otherwise. The errors will be
+ * written to stderr.
+ */
+int refs_fsck(struct ref_store *refs, struct fsck_options *o);
+
/*
* Apply the rules from check_refname_format, but mutate the result until it
* is acceptable, and place the result in "out".
diff --git a/refs/debug.c b/refs/debug.c
index 547d9245b9..45e2e784a0 100644
--- a/refs/debug.c
+++ b/refs/debug.c
@@ -419,6 +419,15 @@ static int debug_reflog_expire(struct ref_store *ref_store, const char *refname,
return res;
}
+static int debug_fsck(struct ref_store *ref_store,
+ struct fsck_options *o)
+{
+ struct debug_ref_store *drefs = (struct debug_ref_store *)ref_store;
+ int res = drefs->refs->be->fsck(drefs->refs, o);
+ trace_printf_key(&trace_refs, "fsck: %d\n", res);
+ return res;
+}
+
struct ref_storage_be refs_be_debug = {
.name = "debug",
.init = NULL,
@@ -451,4 +460,6 @@ struct ref_storage_be refs_be_debug = {
.create_reflog = debug_create_reflog,
.delete_reflog = debug_delete_reflog,
.reflog_expire = debug_reflog_expire,
+
+ .fsck = debug_fsck,
};
diff --git a/refs/files-backend.c b/refs/files-backend.c
index 44c5c3b201..817813e723 100644
--- a/refs/files-backend.c
+++ b/refs/files-backend.c
@@ -3406,6 +3406,17 @@ static int files_ref_store_remove_on_disk(struct ref_store *ref_store,
return ret;
}
+static int files_fsck(struct ref_store *ref_store,
+ struct fsck_options *o)
+{
+ int ret;
+ struct files_ref_store *refs =
+ files_downcast(ref_store, REF_STORE_READ, "fsck");
+
+ ret = refs->packed_ref_store->be->fsck(refs->packed_ref_store, o);
+ return ret;
+}
+
struct ref_storage_be refs_be_files = {
.name = "files",
.init = files_ref_store_init,
@@ -3432,5 +3443,7 @@ struct ref_storage_be refs_be_files = {
.reflog_exists = files_reflog_exists,
.create_reflog = files_create_reflog,
.delete_reflog = files_delete_reflog,
- .reflog_expire = files_reflog_expire
+ .reflog_expire = files_reflog_expire,
+
+ .fsck = files_fsck,
};
diff --git a/refs/packed-backend.c b/refs/packed-backend.c
index c4c1e36aa2..ad3e8fb1d1 100644
--- a/refs/packed-backend.c
+++ b/refs/packed-backend.c
@@ -1733,6 +1733,12 @@ static struct ref_iterator *packed_reflog_iterator_begin(struct ref_store *ref_s
return empty_ref_iterator_begin();
}
+static int packed_fsck(struct ref_store *ref_store,
+ struct fsck_options *o)
+{
+ return 0;
+}
+
struct ref_storage_be refs_be_packed = {
.name = "packed",
.init = packed_ref_store_init,
@@ -1760,4 +1766,6 @@ struct ref_storage_be refs_be_packed = {
.create_reflog = NULL,
.delete_reflog = NULL,
.reflog_expire = NULL,
+
+ .fsck = packed_fsck,
};
diff --git a/refs/refs-internal.h b/refs/refs-internal.h
index fa975d69aa..a905e187cd 100644
--- a/refs/refs-internal.h
+++ b/refs/refs-internal.h
@@ -4,6 +4,7 @@
#include "refs.h"
#include "iterator.h"
+struct fsck_options;
struct ref_transaction;
/*
@@ -650,6 +651,9 @@ typedef int read_raw_ref_fn(struct ref_store *ref_store, const char *refname,
typedef int read_symbolic_ref_fn(struct ref_store *ref_store, const char *refname,
struct strbuf *referent);
+typedef int fsck_fn(struct ref_store *ref_store,
+ struct fsck_options *o);
+
struct ref_storage_be {
const char *name;
ref_store_init_fn *init;
@@ -677,6 +681,8 @@ struct ref_storage_be {
create_reflog_fn *create_reflog;
delete_reflog_fn *delete_reflog;
reflog_expire_fn *reflog_expire;
+
+ fsck_fn *fsck;
};
extern struct ref_storage_be refs_be_files;
diff --git a/refs/reftable-backend.c b/refs/reftable-backend.c
index 7d872a32ac..7f07de8d45 100644
--- a/refs/reftable-backend.c
+++ b/refs/reftable-backend.c
@@ -2292,6 +2292,12 @@ static int reftable_be_reflog_expire(struct ref_store *ref_store,
return ret;
}
+static int reftable_be_fsck(struct ref_store *ref_store,
+ struct fsck_options *o)
+{
+ return 0;
+}
+
struct ref_storage_be refs_be_reftable = {
.name = "reftable",
.init = reftable_be_init,
@@ -2319,4 +2325,6 @@ struct ref_storage_be refs_be_reftable = {
.create_reflog = reftable_be_create_reflog,
.delete_reflog = reftable_be_delete_reflog,
.reflog_expire = reftable_be_reflog_expire,
+
+ .fsck = reftable_be_fsck,
};
--
2.45.2
^ permalink raw reply related [flat|nested] 282+ messages in thread
* [GSoC][PATCH v6 07/11] builtin/refs: add verify subcommand
2024-07-01 15:13 ` [GSoC][PATCH v6 00/11] ref consistency check infra setup shejialuo
` (5 preceding siblings ...)
2024-07-01 15:20 ` [GSoC][PATCH v6 06/11] refs: set up ref consistency check infrastructure shejialuo
@ 2024-07-01 15:20 ` shejialuo
2024-07-01 15:21 ` [GSoC][PATCH v6 08/11] builtin/fsck: add `git-refs verify` child process shejialuo
` (5 subsequent siblings)
12 siblings, 0 replies; 282+ messages in thread
From: shejialuo @ 2024-07-01 15:20 UTC (permalink / raw)
To: git; +Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine
Introduce a new subcommand "verify" in git-refs(1) to allow the user to
check the reference database consistency.
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
Documentation/git-refs.txt | 13 +++++++++++
builtin/refs.c | 44 ++++++++++++++++++++++++++++++++++++++
2 files changed, 57 insertions(+)
diff --git a/Documentation/git-refs.txt b/Documentation/git-refs.txt
index 5b99e04385..1244a85b64 100644
--- a/Documentation/git-refs.txt
+++ b/Documentation/git-refs.txt
@@ -10,6 +10,7 @@ SYNOPSIS
--------
[verse]
'git refs migrate' --ref-format=<format> [--dry-run]
+'git refs verify' [--strict] [--verbose]
DESCRIPTION
-----------
@@ -22,6 +23,9 @@ COMMANDS
migrate::
Migrate ref store between different formats.
+verify::
+ Verify reference database consistency.
+
OPTIONS
-------
@@ -39,6 +43,15 @@ include::ref-storage-format.txt[]
can be used to double check that the migration works as expected before
performing the actual migration.
+The following options are specific to 'git refs verify':
+
+--strict::
+ Enable more strict checking, every WARN severity for the `Fsck Messages`
+ be seen as ERROR. See linkgit:git-fsck[1].
+
+--verbose::
+ When verifying the reference database consistency, be chatty.
+
KNOWN LIMITATIONS
-----------------
diff --git a/builtin/refs.c b/builtin/refs.c
index 46dcd150d4..34371573f7 100644
--- a/builtin/refs.c
+++ b/builtin/refs.c
@@ -1,4 +1,6 @@
#include "builtin.h"
+#include "config.h"
+#include "fsck.h"
#include "parse-options.h"
#include "refs.h"
#include "repository.h"
@@ -7,6 +9,9 @@
#define REFS_MIGRATE_USAGE \
N_("git refs migrate --ref-format=<format> [--dry-run]")
+#define REFS_VERIFY_USAGE \
+ N_("git refs verify [--strict] [--verbose]")
+
static int cmd_refs_migrate(int argc, const char **argv, const char *prefix)
{
const char * const migrate_usage[] = {
@@ -58,15 +63,54 @@ static int cmd_refs_migrate(int argc, const char **argv, const char *prefix)
return err;
}
+static int cmd_refs_verify(int argc, const char **argv, const char *prefix)
+{
+ struct fsck_options fsck_refs_options = FSCK_REFS_OPTIONS_DEFAULT;
+ const char * const verify_usage[] = {
+ REFS_VERIFY_USAGE,
+ NULL,
+ };
+ unsigned int verbose = 0, strict = 0;
+ struct option options[] = {
+ OPT__VERBOSE(&verbose, N_("be verbose")),
+ OPT_BOOL(0, "strict", &strict, N_("enable strict checking")),
+ OPT_END(),
+ };
+ int ret = 0;
+
+ argc = parse_options(argc, argv, prefix, options, verify_usage, 0);
+ if (argc)
+ usage(_("too many arguments"));
+
+ if (verbose)
+ fsck_refs_options.refs_options.verbose = 1;
+ if (strict)
+ fsck_refs_options.strict = 1;
+
+ git_config(git_fsck_config, &fsck_refs_options);
+ prepare_repo_settings(the_repository);
+
+ ret = refs_fsck(get_main_ref_store(the_repository), &fsck_refs_options);
+
+ /*
+ * Explicitly free the allocated array and "oid_skiplist"
+ */
+ free(fsck_refs_options.msg_type);
+ oidset_clear(&fsck_refs_options.oid_skiplist);
+ return ret;
+}
+
int cmd_refs(int argc, const char **argv, const char *prefix)
{
const char * const refs_usage[] = {
REFS_MIGRATE_USAGE,
+ REFS_VERIFY_USAGE,
NULL,
};
parse_opt_subcommand_fn *fn = NULL;
struct option opts[] = {
OPT_SUBCOMMAND("migrate", &fn, cmd_refs_migrate),
+ OPT_SUBCOMMAND("verify", &fn, cmd_refs_verify),
OPT_END(),
};
--
2.45.2
^ permalink raw reply related [flat|nested] 282+ messages in thread
* [GSoC][PATCH v6 08/11] builtin/fsck: add `git-refs verify` child process
2024-07-01 15:13 ` [GSoC][PATCH v6 00/11] ref consistency check infra setup shejialuo
` (6 preceding siblings ...)
2024-07-01 15:20 ` [GSoC][PATCH v6 07/11] builtin/refs: add verify subcommand shejialuo
@ 2024-07-01 15:21 ` shejialuo
2024-07-01 15:21 ` [GSoC][PATCH v6 09/11] files-backend: add unified interface for refs scanning shejialuo
` (4 subsequent siblings)
12 siblings, 0 replies; 282+ messages in thread
From: shejialuo @ 2024-07-01 15:21 UTC (permalink / raw)
To: git; +Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine
Introduce a new function "fsck_refs" that initializes and runs a child
process to execute the "git-refs verify" command.
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
builtin/fsck.c | 17 +++++++++++++++++
1 file changed, 17 insertions(+)
diff --git a/builtin/fsck.c b/builtin/fsck.c
index cdf2218dfd..aea635ea00 100644
--- a/builtin/fsck.c
+++ b/builtin/fsck.c
@@ -897,6 +897,21 @@ static int check_pack_rev_indexes(struct repository *r, int show_progress)
return res;
}
+static void fsck_refs(void)
+{
+ struct child_process refs_verify = CHILD_PROCESS_INIT;
+ child_process_init(&refs_verify);
+ refs_verify.git_cmd = 1;
+ strvec_pushl(&refs_verify.args, "refs", "verify", NULL);
+ if (verbose)
+ strvec_push(&refs_verify.args, "--verbose");
+ if (check_strict)
+ strvec_push(&refs_verify.args, "--strict");
+
+ if (run_command(&refs_verify))
+ errors_found |= ERROR_REFS;
+}
+
static char const * const fsck_usage[] = {
N_("git fsck [--tags] [--root] [--unreachable] [--cache] [--no-reflogs]\n"
" [--[no-]full] [--strict] [--verbose] [--lost-found]\n"
@@ -1066,6 +1081,8 @@ int cmd_fsck(int argc, const char **argv, const char *prefix)
check_connectivity();
+ fsck_refs();
+
if (the_repository->settings.core_commit_graph) {
struct child_process commit_graph_verify = CHILD_PROCESS_INIT;
--
2.45.2
^ permalink raw reply related [flat|nested] 282+ messages in thread
* [GSoC][PATCH v6 09/11] files-backend: add unified interface for refs scanning
2024-07-01 15:13 ` [GSoC][PATCH v6 00/11] ref consistency check infra setup shejialuo
` (7 preceding siblings ...)
2024-07-01 15:21 ` [GSoC][PATCH v6 08/11] builtin/fsck: add `git-refs verify` child process shejialuo
@ 2024-07-01 15:21 ` shejialuo
2024-07-01 15:21 ` [GSoC][PATCH v6 10/11] fsck: add ref name check for files backend shejialuo
` (3 subsequent siblings)
12 siblings, 0 replies; 282+ messages in thread
From: shejialuo @ 2024-07-01 15:21 UTC (permalink / raw)
To: git; +Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine
For refs and reflogs, we need to scan its corresponding directories to
check every regular file or symbolic link which shares the same pattern.
Introduce a unified interface for scanning directories for
files-backend.
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
refs/files-backend.c | 77 +++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 76 insertions(+), 1 deletion(-)
diff --git a/refs/files-backend.c b/refs/files-backend.c
index 817813e723..744ee5c5db 100644
--- a/refs/files-backend.c
+++ b/refs/files-backend.c
@@ -4,6 +4,7 @@
#include "../gettext.h"
#include "../hash.h"
#include "../hex.h"
+#include "../fsck.h"
#include "../refs.h"
#include "refs-internal.h"
#include "ref-cache.h"
@@ -3406,6 +3407,78 @@ static int files_ref_store_remove_on_disk(struct ref_store *ref_store,
return ret;
}
+/*
+ * For refs and reflogs, they share a unified interface when scanning
+ * the whole directory. This function is used as the callback for each
+ * regular file or symlink in the directory.
+ */
+typedef int (*files_fsck_refs_fn)(struct fsck_options *o,
+ const char *gitdir,
+ const char *refs_check_dir,
+ struct dir_iterator *iter);
+
+static int files_fsck_refs_dir(struct ref_store *ref_store,
+ struct fsck_options *o,
+ const char *refs_check_dir,
+ files_fsck_refs_fn *fsck_refs_fns)
+{
+ const char *gitdir = ref_store->gitdir;
+ struct strbuf sb = STRBUF_INIT;
+ struct dir_iterator *iter;
+ int iter_status;
+ int ret = 0;
+
+ strbuf_addf(&sb, "%s/%s", gitdir, refs_check_dir);
+
+ iter = dir_iterator_begin(sb.buf, 0);
+
+ if (!iter) {
+ ret = error_errno("cannot open directory %s", sb.buf);
+ goto out;
+ }
+
+ while ((iter_status = dir_iterator_advance(iter)) == ITER_OK) {
+ if (S_ISDIR(iter->st.st_mode)) {
+ continue;
+ } else if (S_ISREG(iter->st.st_mode) ||
+ S_ISLNK(iter->st.st_mode)) {
+ if (o->refs_options.verbose)
+ fprintf_ln(stderr, "Checking %s/%s",
+ refs_check_dir, iter->relative_path);
+ for (size_t i = 0; fsck_refs_fns[i]; i++) {
+ if (fsck_refs_fns[i](o, gitdir, refs_check_dir, iter))
+ ret = -1;
+ }
+ } else {
+ ret = error(_("unexpected file type for '%s'"),
+ iter->basename);
+ }
+ }
+
+ if (iter_status != ITER_DONE)
+ ret = error(_("failed to iterate over '%s'"), sb.buf);
+
+out:
+ strbuf_release(&sb);
+ return ret;
+}
+
+static int files_fsck_refs(struct ref_store *ref_store,
+ struct fsck_options *o)
+{
+ int ret;
+ files_fsck_refs_fn fsck_refs_fns[]= {
+ NULL
+ };
+
+ if (o->refs_options.verbose)
+ fprintf_ln(stderr, "Checking references consistency");
+
+ ret = files_fsck_refs_dir(ref_store, o, "refs", fsck_refs_fns);
+
+ return ret;
+}
+
static int files_fsck(struct ref_store *ref_store,
struct fsck_options *o)
{
@@ -3413,7 +3486,9 @@ static int files_fsck(struct ref_store *ref_store,
struct files_ref_store *refs =
files_downcast(ref_store, REF_STORE_READ, "fsck");
- ret = refs->packed_ref_store->be->fsck(refs->packed_ref_store, o);
+ ret = refs->packed_ref_store->be->fsck(refs->packed_ref_store, o)
+ | files_fsck_refs(ref_store, o);
+
return ret;
}
--
2.45.2
^ permalink raw reply related [flat|nested] 282+ messages in thread
* [GSoC][PATCH v6 10/11] fsck: add ref name check for files backend
2024-07-01 15:13 ` [GSoC][PATCH v6 00/11] ref consistency check infra setup shejialuo
` (8 preceding siblings ...)
2024-07-01 15:21 ` [GSoC][PATCH v6 09/11] files-backend: add unified interface for refs scanning shejialuo
@ 2024-07-01 15:21 ` shejialuo
2024-07-01 15:22 ` [GSoC][PATCH v6 11/11] fsck: add ref content " shejialuo
` (2 subsequent siblings)
12 siblings, 0 replies; 282+ messages in thread
From: shejialuo @ 2024-07-01 15:21 UTC (permalink / raw)
To: git; +Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine
The git-fsck(1) only implicitly checks the reference, it does not fully
check refs with bad format name such as standalone "@" and name ending
with ".lock".
In order to provide such checks, add a new fsck message id "badRefName"
with default ERROR type. Use existing "check_refname_format" to explicit
check the ref name. And add a new unit test to verify the functionality.
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
Documentation/fsck-msgids.txt | 3 +
fsck.h | 1 +
refs/files-backend.c | 20 +++++++
t/t0602-reffiles-fsck.sh | 101 ++++++++++++++++++++++++++++++++++
4 files changed, 125 insertions(+)
create mode 100755 t/t0602-reffiles-fsck.sh
diff --git a/Documentation/fsck-msgids.txt b/Documentation/fsck-msgids.txt
index f643585a34..dab4012246 100644
--- a/Documentation/fsck-msgids.txt
+++ b/Documentation/fsck-msgids.txt
@@ -19,6 +19,9 @@
`badParentSha1`::
(ERROR) A commit object has a bad parent sha1.
+`badRefName`::
+ (ERROR) A ref has a bad name.
+
`badTagName`::
(INFO) A tag has an invalid format.
diff --git a/fsck.h b/fsck.h
index 887fe2969f..fb035d5457 100644
--- a/fsck.h
+++ b/fsck.h
@@ -31,6 +31,7 @@ enum fsck_msg_type {
FUNC(BAD_NAME, ERROR) \
FUNC(BAD_OBJECT_SHA1, ERROR) \
FUNC(BAD_PARENT_SHA1, ERROR) \
+ FUNC(BAD_REF_NAME, ERROR) \
FUNC(BAD_TIMEZONE, ERROR) \
FUNC(BAD_TREE, ERROR) \
FUNC(BAD_TREE_SHA1, ERROR) \
diff --git a/refs/files-backend.c b/refs/files-backend.c
index 744ee5c5db..3badec62ce 100644
--- a/refs/files-backend.c
+++ b/refs/files-backend.c
@@ -3417,6 +3417,25 @@ typedef int (*files_fsck_refs_fn)(struct fsck_options *o,
const char *refs_check_dir,
struct dir_iterator *iter);
+static int files_fsck_refs_name(struct fsck_options *o,
+ const char *gitdir UNUSED,
+ const char *refs_check_dir,
+ struct dir_iterator *iter)
+{
+ struct strbuf sb = STRBUF_INIT;
+ int ret = 0;
+
+ if (check_refname_format(iter->basename, REFNAME_ALLOW_ONELEVEL)) {
+ strbuf_addf(&sb, "%s/%s", refs_check_dir, iter->relative_path);
+ ret = fsck_refs_report(o, NULL, sb.buf,
+ FSCK_MSG_BAD_REF_NAME,
+ "invalid refname format");
+ }
+
+ strbuf_release(&sb);
+ return ret;
+}
+
static int files_fsck_refs_dir(struct ref_store *ref_store,
struct fsck_options *o,
const char *refs_check_dir,
@@ -3468,6 +3487,7 @@ static int files_fsck_refs(struct ref_store *ref_store,
{
int ret;
files_fsck_refs_fn fsck_refs_fns[]= {
+ files_fsck_refs_name,
NULL
};
diff --git a/t/t0602-reffiles-fsck.sh b/t/t0602-reffiles-fsck.sh
new file mode 100755
index 0000000000..b2db58d2c6
--- /dev/null
+++ b/t/t0602-reffiles-fsck.sh
@@ -0,0 +1,101 @@
+#!/bin/sh
+
+test_description='Test reffiles backend consistency check'
+
+GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=main
+export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME
+GIT_TEST_DEFAULT_REF_FORMAT=files
+export GIT_TEST_DEFAULT_REF_FORMAT
+
+. ./test-lib.sh
+
+test_expect_success 'ref name should be checked' '
+ test_when_finished "rm -rf repo" &&
+ git init repo &&
+ branch_dir_prefix=.git/refs/heads &&
+ tag_dir_prefix=.git/refs/tags &&
+ (
+ cd repo &&
+ git commit --allow-empty -m initial &&
+ git checkout -b branch-1 &&
+ git tag tag-1 &&
+ git commit --allow-empty -m second &&
+ git checkout -b branch-2 &&
+ git tag tag-2 &&
+ git tag multi_hierarchy/tag-2
+ ) &&
+ (
+ cd repo &&
+ cp $branch_dir_prefix/branch-1 $branch_dir_prefix/.branch-1 &&
+ test_must_fail git fsck 2>err &&
+ cat >expect <<-EOF &&
+ error: refs/heads/.branch-1: badRefName: invalid refname format
+ EOF
+ rm $branch_dir_prefix/.branch-1 &&
+ test_cmp expect err
+ ) &&
+ (
+ cd repo &&
+ cp $tag_dir_prefix/tag-1 $tag_dir_prefix/tag-1.lock &&
+ test_must_fail git fsck 2>err &&
+ cat >expect <<-EOF &&
+ error: refs/tags/tag-1.lock: badRefName: invalid refname format
+ EOF
+ rm $tag_dir_prefix/tag-1.lock &&
+ test_cmp expect err
+ ) &&
+ (
+ cd repo &&
+ cp $branch_dir_prefix/branch-1 $branch_dir_prefix/@ &&
+ test_must_fail git fsck 2>err &&
+ cat >expect <<-EOF &&
+ error: refs/heads/@: badRefName: invalid refname format
+ EOF
+ rm $branch_dir_prefix/@ &&
+ test_cmp expect err
+ ) &&
+ (
+ cd repo &&
+ cp $tag_dir_prefix/multi_hierarchy/tag-2 $tag_dir_prefix/multi_hierarchy/@ &&
+ test_must_fail git fsck 2>err &&
+ cat >expect <<-EOF &&
+ error: refs/tags/multi_hierarchy/@: badRefName: invalid refname format
+ EOF
+ rm $tag_dir_prefix/multi_hierarchy/@ &&
+ test_cmp expect err
+ )
+'
+
+test_expect_success 'ref name check should be adapted into fsck messages' '
+ test_when_finished "rm -rf repo" &&
+ git init repo &&
+ branch_dir_prefix=.git/refs/heads &&
+ tag_dir_prefix=.git/refs/tags &&
+ (
+ cd repo &&
+ git commit --allow-empty -m initial &&
+ git checkout -b branch-1 &&
+ git tag tag-1 &&
+ git commit --allow-empty -m second &&
+ git checkout -b branch-2 &&
+ git tag tag-2
+ ) &&
+ (
+ cd repo &&
+ cp $branch_dir_prefix/branch-1 $branch_dir_prefix/.branch-1 &&
+ git -c fsck.badRefName=warn fsck 2>err &&
+ cat >expect <<-EOF &&
+ warning: refs/heads/.branch-1: badRefName: invalid refname format
+ EOF
+ rm $branch_dir_prefix/.branch-1 &&
+ test_cmp expect err
+ ) &&
+ (
+ cd repo &&
+ cp $branch_dir_prefix/branch-1 $branch_dir_prefix/@ &&
+ git -c fsck.badRefName=ignore fsck 2>err &&
+ test_must_be_empty err
+ )
+'
+
+test_done
--
2.45.2
^ permalink raw reply related [flat|nested] 282+ messages in thread
* [GSoC][PATCH v6 11/11] fsck: add ref content check for files backend
2024-07-01 15:13 ` [GSoC][PATCH v6 00/11] ref consistency check infra setup shejialuo
` (9 preceding siblings ...)
2024-07-01 15:21 ` [GSoC][PATCH v6 10/11] fsck: add ref name check for files backend shejialuo
@ 2024-07-01 15:22 ` shejialuo
2024-07-02 10:33 ` [GSoC][PATCH v6 00/11] ref consistency check infra setup Karthik Nayak
2024-07-03 13:53 ` [GSoC][PATCH v7 0/9] " shejialuo
12 siblings, 0 replies; 282+ messages in thread
From: shejialuo @ 2024-07-01 15:22 UTC (permalink / raw)
To: git; +Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine
Enhance the git-fsck(1) command by adding a check for reference content
in the files backend. The new functionality ensures that symrefs, real
symbolic link and regular refs are validated correctly.
In order to check the trailing content of the regular refs, add a new
parameter `trailing` to `parse_loose_ref_contents`.
For symrefs, `parse_loose_ref_contents` will set the "referent".
However, symbolic link could be either absolute or relative. Use
"strbuf_add_real_path" to read the symbolic link and convert the
relative path to absolute path. Then use "skip_prefix" to make it align
with symref "referent".
Thus, the symrefs and symbolic links could share the same interface. Add
a new function "files_fsck_symref_target" which aims at checking the
following things:
1. whether the pointee is under the `refs/` directory.
2. whether the pointee name is correct.
3. whether the pointee path is a wrong type in filesystem.
Last, add the following FSCK MESSAGEs:
1. "badRefContent(ERROR)": A ref has a bad content
2. "badSymrefPointee(ERROR)": The pointee of a symref is bad.
3. "trailingRefContent(WARN)": A ref content has trailing contents.
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
Documentation/fsck-msgids.txt | 9 +++
fsck.h | 3 +
refs.c | 2 +-
refs/files-backend.c | 145 +++++++++++++++++++++++++++++++++-
refs/refs-internal.h | 5 +-
t/t0602-reffiles-fsck.sh | 110 ++++++++++++++++++++++++++
6 files changed, 269 insertions(+), 5 deletions(-)
diff --git a/Documentation/fsck-msgids.txt b/Documentation/fsck-msgids.txt
index dab4012246..b1630a478b 100644
--- a/Documentation/fsck-msgids.txt
+++ b/Documentation/fsck-msgids.txt
@@ -19,9 +19,15 @@
`badParentSha1`::
(ERROR) A commit object has a bad parent sha1.
+`badRefContent`::
+ (ERROR) A ref has a bad content.
+
`badRefName`::
(ERROR) A ref has a bad name.
+`badSymrefPointee`::
+ (ERROR) The pointee of a symref is bad.
+
`badTagName`::
(INFO) A tag has an invalid format.
@@ -167,6 +173,9 @@
`nullSha1`::
(WARN) Tree contains entries pointing to a null sha1.
+`trailingRefContent`::
+ (WARN) A ref content has trailing contents.
+
`treeNotSorted`::
(ERROR) A tree is not properly sorted.
diff --git a/fsck.h b/fsck.h
index fb035d5457..e917b221f6 100644
--- a/fsck.h
+++ b/fsck.h
@@ -32,6 +32,8 @@ enum fsck_msg_type {
FUNC(BAD_OBJECT_SHA1, ERROR) \
FUNC(BAD_PARENT_SHA1, ERROR) \
FUNC(BAD_REF_NAME, ERROR) \
+ FUNC(BAD_REF_CONTENT, ERROR) \
+ FUNC(BAD_SYMREF_POINTEE, ERROR) \
FUNC(BAD_TIMEZONE, ERROR) \
FUNC(BAD_TREE, ERROR) \
FUNC(BAD_TREE_SHA1, ERROR) \
@@ -72,6 +74,7 @@ enum fsck_msg_type {
FUNC(HAS_DOTDOT, WARN) \
FUNC(HAS_DOTGIT, WARN) \
FUNC(NULL_SHA1, WARN) \
+ FUNC(TRAILING_REF_CONTENT, WARN) \
FUNC(ZERO_PADDED_FILEMODE, WARN) \
FUNC(NUL_IN_COMMIT, WARN) \
FUNC(LARGE_PATHNAME, WARN) \
diff --git a/refs.c b/refs.c
index 5f17de38e2..4e94f92997 100644
--- a/refs.c
+++ b/refs.c
@@ -1758,7 +1758,7 @@ static int refs_read_special_head(struct ref_store *ref_store,
}
result = parse_loose_ref_contents(content.buf, oid, referent, type,
- failure_errno);
+ failure_errno, NULL);
done:
strbuf_release(&full_path);
diff --git a/refs/files-backend.c b/refs/files-backend.c
index 3badec62ce..21a9a80d39 100644
--- a/refs/files-backend.c
+++ b/refs/files-backend.c
@@ -1,4 +1,5 @@
#include "../git-compat-util.h"
+#include "../abspath.h"
#include "../copy.h"
#include "../environment.h"
#include "../gettext.h"
@@ -551,7 +552,7 @@ static int read_ref_internal(struct ref_store *ref_store, const char *refname,
strbuf_rtrim(&sb_contents);
buf = sb_contents.buf;
- ret = parse_loose_ref_contents(buf, oid, referent, type, &myerr);
+ ret = parse_loose_ref_contents(buf, oid, referent, type, &myerr, NULL);
out:
if (ret && !myerr)
@@ -587,7 +588,7 @@ static int files_read_symbolic_ref(struct ref_store *ref_store, const char *refn
int parse_loose_ref_contents(const char *buf, struct object_id *oid,
struct strbuf *referent, unsigned int *type,
- int *failure_errno)
+ int *failure_errno, const char **trailing)
{
const char *p;
if (skip_prefix(buf, "ref:", &buf)) {
@@ -609,6 +610,10 @@ int parse_loose_ref_contents(const char *buf, struct object_id *oid,
*failure_errno = EINVAL;
return -1;
}
+
+ if (trailing)
+ *trailing = p;
+
return 0;
}
@@ -3436,6 +3441,141 @@ static int files_fsck_refs_name(struct fsck_options *o,
return ret;
}
+/*
+ * Check the symref "pointee_name" and "pointee_path". The caller should
+ * make sure that "pointee_path" is absolute. For symbolic ref, "pointee_name"
+ * would be the content after "refs:". For symblic link, "pointee_name" would
+ * be the relative path agaignst "gitdir".
+ */
+static int files_fsck_symref_target(struct fsck_options *o,
+ const char *refname,
+ const char *pointee_name,
+ const char *pointee_path)
+{
+ const char *p = NULL;
+ struct stat st;
+ int ret = 0;
+
+ if (!skip_prefix(pointee_name, "refs/", &p)) {
+
+ ret = fsck_refs_report(o, NULL, refname,
+ FSCK_MSG_BAD_SYMREF_POINTEE,
+ "point to target out of refs hierarchy");
+ goto out;
+ }
+
+ if (check_refname_format(pointee_name, 0)) {
+ ret = fsck_refs_report(o, NULL, refname,
+ FSCK_MSG_BAD_SYMREF_POINTEE,
+ "point to invalid refname");
+ }
+
+ if (lstat(pointee_path, &st) < 0)
+ goto out;
+
+ if (!S_ISREG(st.st_mode) && !S_ISLNK(st.st_mode)) {
+ ret = fsck_refs_report(o, NULL, refname,
+ FSCK_MSG_BAD_SYMREF_POINTEE,
+ "point to invalid target");
+ goto out;
+ }
+out:
+ return ret;
+}
+
+static int files_fsck_refs_content(struct fsck_options *o,
+ const char *gitdir,
+ const char *refs_check_dir,
+ struct dir_iterator *iter)
+{
+ struct strbuf pointee_path = STRBUF_INIT,
+ ref_content = STRBUF_INIT,
+ abs_gitdir = STRBUF_INIT,
+ referent = STRBUF_INIT,
+ refname = STRBUF_INIT;
+ const char *trailing = NULL;
+ int failure_errno = 0;
+ unsigned int type = 0;
+ struct object_id oid;
+ int ret = 0;
+
+ strbuf_addf(&refname, "%s/%s", refs_check_dir, iter->relative_path);
+
+ /*
+ * If the file is a symlink, we need to only check the connectivity
+ * of the destination object.
+ */
+ if (S_ISLNK(iter->st.st_mode)) {
+ const char *pointee_name = NULL;
+
+ strbuf_add_real_path(&pointee_path, iter->path.buf);
+
+ strbuf_add_absolute_path(&abs_gitdir, gitdir);
+ strbuf_normalize_path(&abs_gitdir);
+ if (!is_dir_sep(abs_gitdir.buf[abs_gitdir.len - 1]))
+ strbuf_addch(&abs_gitdir, '/');
+
+ if (!skip_prefix(pointee_path.buf,
+ abs_gitdir.buf, &pointee_name)) {
+ ret = fsck_refs_report(o, NULL, refname.buf,
+ FSCK_MSG_BAD_SYMREF_POINTEE,
+ "point to target outside gitdir");
+ goto clean;
+ }
+
+ ret = files_fsck_symref_target(o, refname.buf, pointee_name,
+ pointee_path.buf);
+ goto clean;
+ }
+
+ if (strbuf_read_file(&ref_content, iter->path.buf, 0) < 0) {
+ ret = error_errno(_("%s/%s: unable to read the ref"),
+ refs_check_dir, iter->relative_path);
+ goto clean;
+ }
+
+ if (parse_loose_ref_contents(ref_content.buf, &oid,
+ &referent, &type,
+ &failure_errno, &trailing)) {
+ ret = fsck_refs_report(o, NULL, refname.buf,
+ FSCK_MSG_BAD_REF_CONTENT,
+ "invalid ref content");
+ goto clean;
+ }
+
+ /*
+ * If the ref is a symref, we need to check the destination name and
+ * connectivity.
+ */
+ if (referent.len && (type & REF_ISSYMREF)) {
+ strbuf_addf(&pointee_path, "%s/%s", gitdir, referent.buf);
+ strbuf_rtrim(&referent);
+
+ ret = files_fsck_symref_target(o, refname.buf, referent.buf,
+ pointee_path.buf);
+ goto clean;
+ } else {
+ /*
+ * Only regular refs could have a trailing garbage. Should
+ * be reported as a warning.
+ */
+ if (trailing && (*trailing != '\0' && *trailing != '\n')) {
+ ret = fsck_refs_report(o, NULL, refname.buf,
+ FSCK_MSG_TRAILING_REF_CONTENT,
+ "trailing garbage in ref");
+ goto clean;
+ }
+ }
+
+clean:
+ strbuf_release(&abs_gitdir);
+ strbuf_release(&pointee_path);
+ strbuf_release(&refname);
+ strbuf_release(&ref_content);
+ strbuf_release(&referent);
+ return ret;
+}
+
static int files_fsck_refs_dir(struct ref_store *ref_store,
struct fsck_options *o,
const char *refs_check_dir,
@@ -3488,6 +3628,7 @@ static int files_fsck_refs(struct ref_store *ref_store,
int ret;
files_fsck_refs_fn fsck_refs_fns[]= {
files_fsck_refs_name,
+ files_fsck_refs_content,
NULL
};
diff --git a/refs/refs-internal.h b/refs/refs-internal.h
index a905e187cd..2fabf41d14 100644
--- a/refs/refs-internal.h
+++ b/refs/refs-internal.h
@@ -709,11 +709,12 @@ struct ref_store {
/*
* Parse contents of a loose ref file. *failure_errno maybe be set to EINVAL for
- * invalid contents.
+ * invalid contents. Also *trailing is set to the first character after the
+ * refname or NULL if the referent is not empty.
*/
int parse_loose_ref_contents(const char *buf, struct object_id *oid,
struct strbuf *referent, unsigned int *type,
- int *failure_errno);
+ int *failure_errno, const char **trailing);
/*
* Fill in the generic part of refs and add it to our collection of
diff --git a/t/t0602-reffiles-fsck.sh b/t/t0602-reffiles-fsck.sh
index b2db58d2c6..35bf40ee64 100755
--- a/t/t0602-reffiles-fsck.sh
+++ b/t/t0602-reffiles-fsck.sh
@@ -98,4 +98,114 @@ test_expect_success 'ref name check should be adapted into fsck messages' '
)
'
+test_expect_success 'regular ref content should be checked' '
+ test_when_finished "rm -rf repo" &&
+ git init repo &&
+ branch_dir_prefix=.git/refs/heads &&
+ tag_dir_prefix=.git/refs/tags &&
+ (
+ cd repo &&
+ git commit --allow-empty -m initial &&
+ git checkout -b branch-1 &&
+ git tag tag-1 &&
+ git commit --allow-empty -m second &&
+ git checkout -b branch-2 &&
+ git tag tag-2 &&
+ git checkout -b a/b/tag-2
+ ) &&
+ (
+ cd repo &&
+ printf "%s garbage" "$(git rev-parse branch-1)" > $branch_dir_prefix/branch-1-garbage &&
+ git fsck 2>err &&
+ cat >expect <<-EOF &&
+ warning: refs/heads/branch-1-garbage: trailingRefContent: trailing garbage in ref
+ EOF
+ rm $branch_dir_prefix/branch-1-garbage &&
+ test_cmp expect err
+ ) &&
+ (
+ cd repo &&
+ printf "%s garbage" "$(git rev-parse tag-1)" > $tag_dir_prefix/tag-1-garbage &&
+ test_must_fail git -c fsck.trailingRefContent=error fsck 2>err &&
+ cat >expect <<-EOF &&
+ error: refs/tags/tag-1-garbage: trailingRefContent: trailing garbage in ref
+ EOF
+ rm $tag_dir_prefix/tag-1-garbage &&
+ test_cmp expect err
+ ) &&
+ (
+ cd repo &&
+ printf "%s " "$(git rev-parse tag-2)" > $tag_dir_prefix/tag-2-garbage &&
+ git fsck 2>err &&
+ cat >expect <<-EOF &&
+ warning: refs/tags/tag-2-garbage: trailingRefContent: trailing garbage in ref
+ EOF
+ rm $tag_dir_prefix/tag-2-garbage &&
+ test_cmp expect err
+ ) &&
+ (
+ cd repo &&
+ printf "xfsazqfxcadas" > $tag_dir_prefix/tag-2-bad &&
+ test_must_fail git refs verify 2>err &&
+ cat >expect <<-EOF &&
+ error: refs/tags/tag-2-bad: badRefContent: invalid ref content
+ EOF
+ rm $tag_dir_prefix/tag-2-bad &&
+ test_cmp expect err
+ ) &&
+ (
+ cd repo &&
+ printf "xfsazqfxcadas" > $branch_dir_prefix/a/b/branch-2-bad &&
+ test_must_fail git refs verify 2>err &&
+ cat >expect <<-EOF &&
+ error: refs/heads/a/b/branch-2-bad: badRefContent: invalid ref content
+ EOF
+ rm $branch_dir_prefix/a/b/branch-2-bad &&
+ test_cmp expect err
+ )
+'
+
+test_expect_success 'symbolic ref content should be checked' '
+ test_when_finished "rm -rf repo" &&
+ git init repo &&
+ branch_dir_prefix=.git/refs/heads &&
+ tag_dir_prefix=.git/refs/tags &&
+ (
+ cd repo &&
+ git commit --allow-empty -m initial &&
+ git checkout -b branch-1 &&
+ git tag tag-1
+ ) &&
+ (
+ cd repo &&
+ printf "ref: refs/heads/.branch" > $branch_dir_prefix/branch-2-bad &&
+ test_must_fail git refs verify 2>err &&
+ cat >expect <<-EOF &&
+ error: refs/heads/branch-2-bad: badSymrefPointee: point to invalid refname
+ EOF
+ rm $branch_dir_prefix/branch-2-bad &&
+ test_cmp expect err
+ ) &&
+ (
+ cd repo &&
+ printf "ref: refs/heads" > $branch_dir_prefix/branch-2-bad &&
+ test_must_fail git refs verify 2>err &&
+ cat >expect <<-EOF &&
+ error: refs/heads/branch-2-bad: badSymrefPointee: point to invalid target
+ EOF
+ rm $branch_dir_prefix/branch-2-bad &&
+ test_cmp expect err
+ ) &&
+ (
+ cd repo &&
+ printf "ref: logs/maint-v2.45" > $branch_dir_prefix/branch-2-bad &&
+ test_must_fail git refs verify 2>err &&
+ cat >expect <<-EOF &&
+ error: refs/heads/branch-2-bad: badSymrefPointee: point to target out of refs hierarchy
+ EOF
+ rm $branch_dir_prefix/branch-2-bad &&
+ test_cmp expect err
+ )
+'
+
test_done
--
2.45.2
^ permalink raw reply related [flat|nested] 282+ messages in thread
* Re: [GSoC][PATCH v6 00/11] ref consistency check infra setup
2024-07-01 15:13 ` [GSoC][PATCH v6 00/11] ref consistency check infra setup shejialuo
` (10 preceding siblings ...)
2024-07-01 15:22 ` [GSoC][PATCH v6 11/11] fsck: add ref content " shejialuo
@ 2024-07-02 10:33 ` Karthik Nayak
2024-07-02 12:15 ` shejialuo
2024-07-02 15:39 ` Junio C Hamano
2024-07-03 13:53 ` [GSoC][PATCH v7 0/9] " shejialuo
12 siblings, 2 replies; 282+ messages in thread
From: Karthik Nayak @ 2024-07-02 10:33 UTC (permalink / raw)
To: shejialuo, git; +Cc: Patrick Steinhardt, Junio C Hamano, Eric Sunshine
[-- Attachment #1: Type: text/plain, Size: 1680 bytes --]
Hello,
shejialuo <shejialuo@gmail.com> writes:
> Hi All:
>
> This version follows the Junio's advice. Instead of creating the
> following data structure:
>
> struct fsck_options {
> enum fsck_type {
> FSCK_OBJECTS,
> FSCK_REFS,
> ...
> } t;
> union {
> struct fsck_objects_options objects;
> struct fsck_refs_options refs;
> } u;
> };
>
> I simply use the combination idea where "fsck_options" will incorporate
> "fsck_objects_options" and "fsck_refs_options". Karthik has told me that
> I should balance the job I should does and the extensibility for future.
> So I use the most clear way to do this. Also Junio has said:
>
If I understood Junio's comments correctly, he was drawing out the point
about if we even need the separation of options for refs. Since the only
option we're adding is a verbose:
struct fsck_refs_options {
unsigned verbose:1;
};
wouldn't it be better if we simply amended `fsck_options` as so:
diff --git a/fsck.h b/fsck.h
index 6085a384f6..ea97f48acc 100644
--- a/fsck.h
+++ b/fsck.h
@@ -135,6 +135,7 @@ struct fsck_options {
fsck_walk_func walk;
fsck_error error_func;
unsigned strict:1;
+ unsigned verbose_refs:1;
enum fsck_msg_type *msg_type;
struct oidset skiplist;
struct oidset gitmodules_found;
Your approach seems to take a different path though, where we create a
new route of creating two new structs, one for refs and another for
objects and adding both to fsck_objects. If we're doing this, wouldn't
it be better to use the enum+union idea, like Junio mentioned? That way
we would have clarity around which type it represents.
[snip]
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 690 bytes --]
^ permalink raw reply [flat|nested] 282+ messages in thread
* Re: [GSoC][PATCH v6 00/11] ref consistency check infra setup
2024-07-02 10:33 ` [GSoC][PATCH v6 00/11] ref consistency check infra setup Karthik Nayak
@ 2024-07-02 12:15 ` shejialuo
2024-07-02 15:39 ` Junio C Hamano
1 sibling, 0 replies; 282+ messages in thread
From: shejialuo @ 2024-07-02 12:15 UTC (permalink / raw)
To: Karthik Nayak; +Cc: git, Patrick Steinhardt, Junio C Hamano, Eric Sunshine
On Tue, Jul 02, 2024 at 10:33:36AM +0000, Karthik Nayak wrote:
> Hello,
>
> shejialuo <shejialuo@gmail.com> writes:
>
> > Hi All:
> >
> > This version follows the Junio's advice. Instead of creating the
> > following data structure:
> >
> > struct fsck_options {
> > enum fsck_type {
> > FSCK_OBJECTS,
> > FSCK_REFS,
> > ...
> > } t;
> > union {
> > struct fsck_objects_options objects;
> > struct fsck_refs_options refs;
> > } u;
> > };
> >
> > I simply use the combination idea where "fsck_options" will incorporate
> > "fsck_objects_options" and "fsck_refs_options". Karthik has told me that
> > I should balance the job I should does and the extensibility for future.
> > So I use the most clear way to do this. Also Junio has said:
> >
>
> If I understood Junio's comments correctly, he was drawing out the point
> about if we even need the separation of options for refs. Since the only
> option we're adding is a verbose:
>
> struct fsck_refs_options {
> unsigned verbose:1;
> };
>
> wouldn't it be better if we simply amended `fsck_options` as so:
>
> diff --git a/fsck.h b/fsck.h
> index 6085a384f6..ea97f48acc 100644
> --- a/fsck.h
> +++ b/fsck.h
> @@ -135,6 +135,7 @@ struct fsck_options {
> fsck_walk_func walk;
> fsck_error error_func;
> unsigned strict:1;
> + unsigned verbose_refs:1;
> enum fsck_msg_type *msg_type;
> struct oidset skiplist;
> struct oidset gitmodules_found;
>
> Your approach seems to take a different path though, where we create a
> new route of creating two new structs, one for refs and another for
> objects and adding both to fsck_objects. If we're doing this, wouldn't
> it be better to use the enum+union idea, like Junio mentioned? That way
> we would have clarity around which type it represents.
>
I agree. Let's give up breaking the structs. I will send a new version
immediately.
Thanks.
> [snip]
^ permalink raw reply [flat|nested] 282+ messages in thread
* Re: [GSoC][PATCH v6 00/11] ref consistency check infra setup
2024-07-02 10:33 ` [GSoC][PATCH v6 00/11] ref consistency check infra setup Karthik Nayak
2024-07-02 12:15 ` shejialuo
@ 2024-07-02 15:39 ` Junio C Hamano
1 sibling, 0 replies; 282+ messages in thread
From: Junio C Hamano @ 2024-07-02 15:39 UTC (permalink / raw)
To: Karthik Nayak; +Cc: shejialuo, git, Patrick Steinhardt, Eric Sunshine
Karthik Nayak <karthik.188@gmail.com> writes:
> If I understood Junio's comments correctly, he was drawing out the point
> about if we even need the separation of options for refs. Since the only
> option we're adding is a verbose:
> ...
> Your approach seems to take a different path though, where we create a
> new route of creating two new structs, one for refs and another for
> objects and adding both to fsck_objects. If we're doing this, wouldn't
> it be better to use the enum+union idea, like Junio mentioned? That way
> we would have clarity around which type it represents.
Yup. If we are going to over-engineer this, enum+union would be a
reasonable way to do so, but we should ask if we need to split (and
more importantly, if we know the problem space well enough to make
the right split) in the first place. Just like premature optimization
is bad, premature factoring and over-modularization is bad.
Thanks.
^ permalink raw reply [flat|nested] 282+ messages in thread
* [GSoC][PATCH v7 0/9] ref consistency check infra setup
2024-07-01 15:13 ` [GSoC][PATCH v6 00/11] ref consistency check infra setup shejialuo
` (11 preceding siblings ...)
2024-07-02 10:33 ` [GSoC][PATCH v6 00/11] ref consistency check infra setup Karthik Nayak
@ 2024-07-03 13:53 ` shejialuo
2024-07-03 13:56 ` [GSoC][PATCH v7 1/9] fsck: rename "skiplist" to "oid_skiplist" shejialuo
` (19 more replies)
12 siblings, 20 replies; 282+ messages in thread
From: shejialuo @ 2024-07-03 13:53 UTC (permalink / raw)
To: git; +Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine
Hi All:
We have come to an agreement that there is no need to add complexity in
"fsck_options". So this version just adds a new field into "fsck_option"
for refs. The other parts are just the same as the previous version.
CI: https://github.com/shejialuo/git/pull/7
This patch is made upon the latest commit:
06e570c0df (Sync with 'maint', 2024-07-02)
> Just like premature optimization is bad, premature factoring and
> over-modularization is bad.
I have learned a lot from yesterday's email. I always think we should
modularize the code which made a lot of trouble in this series. However,
the range-diff below may contain some commits from others. (Because this
patch is built upon the latest, sorry for the inconvenience)
Thanks for every reviewer.
shejialuo (9):
fsck: rename "skiplist" to "oid_skiplist"
fsck: add a unified interface for reporting fsck messages
fsck: add refs-related options and error report function
refs: set up ref consistency check infrastructure
builtin/refs: add verify subcommand
builtin/fsck: add `git-refs verify` child process
files-backend: add unified interface for refs scanning
fsck: add ref name check for files backend
fsck: add ref content check for files backend
Documentation/fsck-msgids.txt | 12 ++
Documentation/git-refs.txt | 13 ++
builtin/fsck.c | 32 ++++-
builtin/mktag.c | 1 +
builtin/refs.c | 44 ++++++
fsck.c | 107 +++++++++++---
fsck.h | 63 ++++++---
object-file.c | 11 +-
refs.c | 7 +-
refs.h | 8 ++
refs/debug.c | 11 ++
refs/files-backend.c | 255 +++++++++++++++++++++++++++++++++-
refs/packed-backend.c | 8 ++
refs/refs-internal.h | 11 +-
refs/reftable-backend.c | 8 ++
t/t0602-reffiles-fsck.sh | 211 ++++++++++++++++++++++++++++
16 files changed, 745 insertions(+), 57 deletions(-)
create mode 100755 t/t0602-reffiles-fsck.sh
Range-diff against v6:
1: d32ae41a4d < -: ---------- fsck: add "fsck_objects_options" to hold objects-related options
2: 589a7a6fac < -: ---------- fsck: rename "skiplist" to "oid_skiplist"
3: bab97d7c82 < -: ---------- fsck: add "fsck_refs_options" into "fsck_options"
-: ---------- > 1: f5fcf36132 fsck: rename "skiplist" to "oid_skiplist"
4: 276da52a6b ! 2: c81b5b6fba fsck: add a unified interface for reporting fsck messages
@@ builtin/fsck.c: static int objerror(struct object *obj, const char *err)
case FSCK_WARN:
@@ builtin/fsck.c: int cmd_fsck(int argc, const char **argv, const char *prefix)
- fsck_walk_options.objects_options.walk = mark_object;
- fsck_obj_options.objects_options.walk = mark_used;
+ fsck_walk_options.walk = mark_object;
+ fsck_obj_options.walk = mark_used;
- fsck_obj_options.error_func = fsck_error_func;
+ fsck_obj_options.error_func = fsck_objects_error_func;
if (check_strict)
@@ builtin/fsck.c: int cmd_fsck(int argc, const char **argv, const char *prefix)
## builtin/mktag.c ##
-@@ builtin/mktag.c: static struct fsck_options fsck_options = FSCK_OBJECTS_OPTIONS_STRICT;
+@@ builtin/mktag.c: static struct fsck_options fsck_options = FSCK_OPTIONS_STRICT;
static int mktag_fsck_error_func(struct fsck_options *o UNUSED,
const struct object_id *oid UNUSED,
enum object_type object_type UNUSED,
@@ fsck.c: static int report(struct fsck_options *options,
+
void fsck_enable_object_names(struct fsck_options *options)
{
- if (!options->objects_options.object_names)
+ if (!options->object_names)
@@ fsck.c: int fsck_buffer(const struct object_id *oid, enum object_type type,
type);
}
@@ fsck.h: int is_valid_msg_type(const char *msg_id, const char *msg_type);
+ enum fsck_msg_id msg_id,
+ const char *message);
- struct fsck_refs_options {
- unsigned verbose:1;
+ struct fsck_options {
+ fsck_walk_func walk;
@@ fsck.h: struct fsck_options {
+ };
- #define FSCK_OBJECTS_OPTIONS_DEFAULT { \
- .oid_skiplist = OIDSET_INIT, \
-- .error_func = fsck_error_function, \
-+ .error_func = fsck_objects_error_function, \
- .objects_options = { \
- .gitmodules_found = OIDSET_INIT, \
- .gitmodules_done = OIDSET_INIT, \
-@@ fsck.h: struct fsck_options {
+ #define FSCK_OPTIONS_DEFAULT { \
+- .skiplist = OIDSET_INIT, \
++ .oid_skiplist = OIDSET_INIT, \
+ .gitmodules_found = OIDSET_INIT, \
+ .gitmodules_done = OIDSET_INIT, \
+ .gitattributes_found = OIDSET_INIT, \
+ .gitattributes_done = OIDSET_INIT, \
+- .error_func = fsck_error_function \
++ .error_func = fsck_objects_error_function \
}
- #define FSCK_OBJECTS_OPTIONS_STRICT { \
+ #define FSCK_OPTIONS_STRICT { \
.strict = 1, \
+@@ fsck.h: struct fsck_options {
+ .gitmodules_done = OIDSET_INIT, \
+ .gitattributes_found = OIDSET_INIT, \
+ .gitattributes_done = OIDSET_INIT, \
- .error_func = fsck_error_function, \
+ .error_func = fsck_objects_error_function, \
- .objects_options = { \
- .gitmodules_found = OIDSET_INIT, \
- .gitmodules_done = OIDSET_INIT, \
-@@ fsck.h: struct fsck_options {
}
- #define FSCK_OBJECTS_OPTIONS_MISSING_GITMODULES { \
+ #define FSCK_OPTIONS_MISSING_GITMODULES { \
.strict = 1, \
+@@ fsck.h: struct fsck_options {
+ .gitmodules_done = OIDSET_INIT, \
+ .gitattributes_found = OIDSET_INIT, \
+ .gitattributes_done = OIDSET_INIT, \
- .error_func = fsck_error_cb_print_missing_gitmodules, \
+ .error_func = fsck_objects_error_cb_print_missing_gitmodules, \
- .objects_options = { \
- .gitmodules_found = OIDSET_INIT, \
- .gitmodules_done = OIDSET_INIT, \
+ }
+
+ /* descend in all linked child objects
@@ fsck.h: int fsck_tag_standalone(const struct object_id *oid, const char *buffer,
*/
int fsck_finish(struct fsck_options *options);
5: e93940c50c ! 3: 53156dc847 fsck: add "fsck_refs_options" initialization macros
@@ Metadata
Author: shejialuo <shejialuo@gmail.com>
## Commit message ##
- fsck: add "fsck_refs_options" initialization macros
+ fsck: add refs-related options and error report function
- Add "FSCK_REFS_OPTIONS_DEFAULT" and "FSCK_REFS_OPTIONS_STRICT" macros to
- create the refs options easily. Add refs-specific "error_func" callback
- "fsck_refs_error_function".
+ Add refs-related options to the "fsck_options", create refs-specific
+ "error_func" callback "fsck_refs_error_function".
"fsck_refs_error_function" will use the "oid" parameter. When the caller
- passed the oid, it will use "oid_to_hex" to get the corresponding hex
- value to report to the user.
+ passes the oid, it will use "oid_to_hex" to get the corresponding hex
+ value to report to the caller.
+
+ Last, add "FSCK_REFS_OPTIONS_DEFAULT" and "FSCK_REFS_OPTIONS_STRICT"
+ macros to create refs options easily.
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
@@ fsck.h: int fsck_objects_error_cb_print_missing_gitmodules(struct fsck_options *
+ enum fsck_msg_id msg_id,
+ const char *message);
- struct fsck_refs_options {
- unsigned verbose:1;
+ struct fsck_options {
+ fsck_walk_func walk;
+ fsck_error error_func;
+ unsigned strict:1;
++ unsigned verbose_refs:1;
+ enum fsck_msg_type *msg_type;
+ struct oidset oid_skiplist;
+ struct oidset gitmodules_found;
@@ fsck.h: struct fsck_options {
- .gitattributes_done = OIDSET_INIT, \
- } \
+ .gitattributes_done = OIDSET_INIT, \
+ .error_func = fsck_objects_error_cb_print_missing_gitmodules, \
}
+#define FSCK_REFS_OPTIONS_DEFAULT { \
+ .error_func = fsck_refs_error_function, \
6: 85aa953f6d ! 4: 358f4a1be9 refs: set up ref consistency check infrastructure
@@ refs.c: int check_refname_format(const char *refname, int flags)
## refs.h ##
@@
-
#include "commit.h"
+ #include "repository.h"
+struct fsck_options;
struct object_id;
struct ref_store;
- struct repository;
+ struct strbuf;
@@ refs.h: int refs_for_each_reflog(struct ref_store *refs, each_reflog_fn fn, void *cb_dat
*/
int check_refname_format(const char *refname, int flags);
7: ce7adc7372 ! 5: e632859df6 builtin/refs: add verify subcommand
@@ builtin/refs.c: static int cmd_refs_migrate(int argc, const char **argv, const c
+ usage(_("too many arguments"));
+
+ if (verbose)
-+ fsck_refs_options.refs_options.verbose = 1;
++ fsck_refs_options.verbose_refs = 1;
+ if (strict)
+ fsck_refs_options.strict = 1;
+
8: 035eafe10b = 6: 5d7a6261ae builtin/fsck: add `git-refs verify` child process
9: 9398bf3f0d ! 7: 33311af1b1 files-backend: add unified interface for refs scanning
@@ refs/files-backend.c: static int files_ref_store_remove_on_disk(struct ref_store
+ continue;
+ } else if (S_ISREG(iter->st.st_mode) ||
+ S_ISLNK(iter->st.st_mode)) {
-+ if (o->refs_options.verbose)
++ if (o->verbose_refs)
+ fprintf_ln(stderr, "Checking %s/%s",
+ refs_check_dir, iter->relative_path);
+ for (size_t i = 0; fsck_refs_fns[i]; i++) {
@@ refs/files-backend.c: static int files_ref_store_remove_on_disk(struct ref_store
+ NULL
+ };
+
-+ if (o->refs_options.verbose)
++ if (o->verbose_refs)
+ fprintf_ln(stderr, "Checking references consistency");
+
+ ret = files_fsck_refs_dir(ref_store, o, "refs", fsck_refs_fns);
10: 046773e35c = 8: 02bd3ac952 fsck: add ref name check for files backend
11: c9b9599589 ! 9: 94d123471d fsck: add ref content check for files backend
@@ refs.c: static int refs_read_special_head(struct ref_store *ref_store,
## refs/files-backend.c ##
@@
+ #define USE_THE_REPOSITORY_VARIABLE
+
#include "../git-compat-util.h"
+#include "../abspath.h"
#include "../copy.h"
--
2.45.2
^ permalink raw reply [flat|nested] 282+ messages in thread
* [GSoC][PATCH v7 1/9] fsck: rename "skiplist" to "oid_skiplist"
2024-07-03 13:53 ` [GSoC][PATCH v7 0/9] " shejialuo
@ 2024-07-03 13:56 ` shejialuo
2024-07-05 22:07 ` Justin Tobler
2024-07-03 13:56 ` [GSoC][PATCH v7 2/9] fsck: add a unified interface for reporting fsck messages shejialuo
` (18 subsequent siblings)
19 siblings, 1 reply; 282+ messages in thread
From: shejialuo @ 2024-07-03 13:56 UTC (permalink / raw)
To: git; +Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine
Because we introduce ref consistency check. The original "skiplist" is a
common option which is set up during handling user configs. To avoid
causing ambiguity, rename "skiplist" to "oid_skiplist".
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
fsck.c | 4 ++--
fsck.h | 2 +-
2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/fsck.c b/fsck.c
index eea7145470..1960bfeba9 100644
--- a/fsck.c
+++ b/fsck.c
@@ -205,7 +205,7 @@ void fsck_set_msg_types(struct fsck_options *options, const char *values)
if (!strcmp(buf, "skiplist")) {
if (equal == len)
die("skiplist requires a path");
- oidset_parse_file(&options->skiplist, buf + equal + 1,
+ oidset_parse_file(&options->oid_skiplist, buf + equal + 1,
the_repository->hash_algo);
buf += len + 1;
continue;
@@ -223,7 +223,7 @@ void fsck_set_msg_types(struct fsck_options *options, const char *values)
static int object_on_skiplist(struct fsck_options *opts,
const struct object_id *oid)
{
- return opts && oid && oidset_contains(&opts->skiplist, oid);
+ return opts && oid && oidset_contains(&opts->oid_skiplist, oid);
}
__attribute__((format (printf, 5, 6)))
diff --git a/fsck.h b/fsck.h
index 6085a384f6..1ee3dd85ba 100644
--- a/fsck.h
+++ b/fsck.h
@@ -136,7 +136,7 @@ struct fsck_options {
fsck_error error_func;
unsigned strict:1;
enum fsck_msg_type *msg_type;
- struct oidset skiplist;
+ struct oidset oid_skiplist;
struct oidset gitmodules_found;
struct oidset gitmodules_done;
struct oidset gitattributes_found;
--
2.45.2
^ permalink raw reply related [flat|nested] 282+ messages in thread
* Re: [GSoC][PATCH v7 1/9] fsck: rename "skiplist" to "oid_skiplist"
2024-07-03 13:56 ` [GSoC][PATCH v7 1/9] fsck: rename "skiplist" to "oid_skiplist" shejialuo
@ 2024-07-05 22:07 ` Justin Tobler
2024-07-08 12:06 ` shejialuo
0 siblings, 1 reply; 282+ messages in thread
From: Justin Tobler @ 2024-07-05 22:07 UTC (permalink / raw)
To: shejialuo
Cc: git, Patrick Steinhardt, Karthik Nayak, Junio C Hamano,
Eric Sunshine
On 24/07/03 09:56PM, shejialuo wrote:
> Because we introduce ref consistency check. The original "skiplist" is a
> common option which is set up during handling user configs. To avoid
> causing ambiguity, rename "skiplist" to "oid_skiplist".
I think the commit message could be expanded on to provide additional
context and reasoning for the change. From reading this alone, it sounds
like we have already introduced the ref consistency check and are now
looking to rename a variable. When really this is a preparatory change.
Without reading ahead, I'm also left wondering why this name change
reduces ambiguity.
-Justin
^ permalink raw reply [flat|nested] 282+ messages in thread
* Re: [GSoC][PATCH v7 1/9] fsck: rename "skiplist" to "oid_skiplist"
2024-07-05 22:07 ` Justin Tobler
@ 2024-07-08 12:06 ` shejialuo
0 siblings, 0 replies; 282+ messages in thread
From: shejialuo @ 2024-07-08 12:06 UTC (permalink / raw)
To: Justin Tobler
Cc: git, Patrick Steinhardt, Karthik Nayak, Junio C Hamano,
Eric Sunshine
On Fri, Jul 05, 2024 at 05:07:29PM -0500, Justin Tobler wrote:
> On 24/07/03 09:56PM, shejialuo wrote:
> > Because we introduce ref consistency check. The original "skiplist" is a
> > common option which is set up during handling user configs. To avoid
> > causing ambiguity, rename "skiplist" to "oid_skiplist".
>
> I think the commit message could be expanded on to provide additional
> context and reasoning for the change. From reading this alone, it sounds
> like we have already introduced the ref consistency check and are now
> looking to rename a variable. When really this is a preparatory change.
> Without reading ahead, I'm also left wondering why this name change
> reduces ambiguity.
>
Yes, I will add more information to show the context. "skiplist" is
initialized using "git_fsck_config" to parse the user-specific config.
In this series, we introduce ref-specific check. "skiplist" is a general
name which may make the caller think "skiplist" is related to both the
refs and objects.
For later implementation, we may also introduce "skiplist" for refs, but
for refs, we concern about the name not the "oidset". I will add more
information to explain how this change reduces ambiguity.
Thanks,
Jialuo
^ permalink raw reply [flat|nested] 282+ messages in thread
* [GSoC][PATCH v7 2/9] fsck: add a unified interface for reporting fsck messages
2024-07-03 13:53 ` [GSoC][PATCH v7 0/9] " shejialuo
2024-07-03 13:56 ` [GSoC][PATCH v7 1/9] fsck: rename "skiplist" to "oid_skiplist" shejialuo
@ 2024-07-03 13:56 ` shejialuo
2024-07-05 22:43 ` Justin Tobler
2024-07-03 13:57 ` [GSoC][PATCH v7 3/9] fsck: add refs-related options and error report function shejialuo
` (17 subsequent siblings)
19 siblings, 1 reply; 282+ messages in thread
From: shejialuo @ 2024-07-03 13:56 UTC (permalink / raw)
To: git; +Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine
The static function "report" provided by "fsck.c" aims at reporting the
problems related to object database which cannot be reused for refs.
In order to provide a unified interface which can report either objects
or refs, create a new function "vfsck_report" by adding
"checked_ref_name" parameter following the "report" prototype. However,
instead of using "...", provide "va_list" to allow more flexibility.
The "vfsck_report" function will use "error_func" registered in
"fsck_options" function to report customized messages. Change
"error_func" prototype to align with the "vfsck_report".
Change "report" function to make it use "vfsck_report" to report
objects-related messages. Add a new function called "fsck_refs_report"
to use "vfsck_report" to report refs-related messages.
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
builtin/fsck.c | 15 ++++-----
builtin/mktag.c | 1 +
fsck.c | 81 ++++++++++++++++++++++++++++++++++++-------------
fsck.h | 42 ++++++++++++++++---------
object-file.c | 11 ++++---
5 files changed, 102 insertions(+), 48 deletions(-)
diff --git a/builtin/fsck.c b/builtin/fsck.c
index d13a226c2e..de34538c4f 100644
--- a/builtin/fsck.c
+++ b/builtin/fsck.c
@@ -89,12 +89,13 @@ static int objerror(struct object *obj, const char *err)
return -1;
}
-static int fsck_error_func(struct fsck_options *o UNUSED,
- const struct object_id *oid,
- enum object_type object_type,
- enum fsck_msg_type msg_type,
- enum fsck_msg_id msg_id UNUSED,
- const char *message)
+static int fsck_objects_error_func(struct fsck_options *o UNUSED,
+ const struct object_id *oid,
+ enum object_type object_type,
+ const char *checked_ref_name UNUSED,
+ enum fsck_msg_type msg_type,
+ enum fsck_msg_id msg_id UNUSED,
+ const char *message)
{
switch (msg_type) {
case FSCK_WARN:
@@ -938,7 +939,7 @@ int cmd_fsck(int argc, const char **argv, const char *prefix)
fsck_walk_options.walk = mark_object;
fsck_obj_options.walk = mark_used;
- fsck_obj_options.error_func = fsck_error_func;
+ fsck_obj_options.error_func = fsck_objects_error_func;
if (check_strict)
fsck_obj_options.strict = 1;
diff --git a/builtin/mktag.c b/builtin/mktag.c
index 4767f1a97e..42f945c584 100644
--- a/builtin/mktag.c
+++ b/builtin/mktag.c
@@ -20,6 +20,7 @@ static struct fsck_options fsck_options = FSCK_OPTIONS_STRICT;
static int mktag_fsck_error_func(struct fsck_options *o UNUSED,
const struct object_id *oid UNUSED,
enum object_type object_type UNUSED,
+ const char *checked_ref_name UNUSED,
enum fsck_msg_type msg_type,
enum fsck_msg_id msg_id UNUSED,
const char *message)
diff --git a/fsck.c b/fsck.c
index 1960bfeba9..7182ce8e80 100644
--- a/fsck.c
+++ b/fsck.c
@@ -226,12 +226,18 @@ static int object_on_skiplist(struct fsck_options *opts,
return opts && oid && oidset_contains(&opts->oid_skiplist, oid);
}
-__attribute__((format (printf, 5, 6)))
-static int report(struct fsck_options *options,
- const struct object_id *oid, enum object_type object_type,
- enum fsck_msg_id msg_id, const char *fmt, ...)
+/*
+ * Provide a unified interface for either fscking refs or objects.
+ * It will get the current msg error type and call the error_func callback
+ * which is registered in the "fsck_options" struct.
+ */
+static int vfsck_report(struct fsck_options *options,
+ const struct object_id *oid,
+ enum object_type object_type,
+ const char *checked_ref_name,
+ enum fsck_msg_id msg_id, const char *fmt, va_list ap)
{
- va_list ap;
+ va_list ap_copy;
struct strbuf sb = STRBUF_INIT;
enum fsck_msg_type msg_type = fsck_msg_type(msg_id, options);
int result;
@@ -250,9 +256,9 @@ static int report(struct fsck_options *options,
prepare_msg_ids();
strbuf_addf(&sb, "%s: ", msg_id_info[msg_id].camelcased);
- va_start(ap, fmt);
- strbuf_vaddf(&sb, fmt, ap);
- result = options->error_func(options, oid, object_type,
+ va_copy(ap_copy, ap);
+ strbuf_vaddf(&sb, fmt, ap_copy);
+ result = options->error_func(options, oid, object_type, checked_ref_name,
msg_type, msg_id, sb.buf);
strbuf_release(&sb);
va_end(ap);
@@ -260,6 +266,36 @@ static int report(struct fsck_options *options,
return result;
}
+__attribute__((format (printf, 5, 6)))
+static int report(struct fsck_options *options,
+ const struct object_id *oid, enum object_type object_type,
+ enum fsck_msg_id msg_id, const char *fmt, ...)
+{
+ va_list ap;
+ int result;
+ va_start(ap, fmt);
+ result = vfsck_report(options, oid, object_type, NULL,
+ msg_id, fmt, ap);
+ va_end(ap);
+ return result;
+}
+
+
+
+int fsck_refs_report(struct fsck_options *options,
+ const struct object_id *oid,
+ const char *checked_ref_name,
+ enum fsck_msg_id msg_id, const char *fmt, ...)
+{
+ va_list ap;
+ int result;
+ va_start(ap, fmt);
+ result = vfsck_report(options, oid, OBJ_NONE,
+ checked_ref_name, msg_id, fmt, ap);
+ va_end(ap);
+ return result;
+}
+
void fsck_enable_object_names(struct fsck_options *options)
{
if (!options->object_names)
@@ -1200,12 +1236,13 @@ int fsck_buffer(const struct object_id *oid, enum object_type type,
type);
}
-int fsck_error_function(struct fsck_options *o,
- const struct object_id *oid,
- enum object_type object_type UNUSED,
- enum fsck_msg_type msg_type,
- enum fsck_msg_id msg_id UNUSED,
- const char *message)
+int fsck_objects_error_function(struct fsck_options *o,
+ const struct object_id *oid,
+ enum object_type object_type UNUSED,
+ const char *checked_ref_name UNUSED,
+ enum fsck_msg_type msg_type,
+ enum fsck_msg_id msg_id UNUSED,
+ const char *message)
{
if (msg_type == FSCK_WARN) {
warning("object %s: %s", fsck_describe_object(o, oid), message);
@@ -1303,16 +1340,18 @@ int git_fsck_config(const char *var, const char *value,
* Custom error callbacks that are used in more than one place.
*/
-int fsck_error_cb_print_missing_gitmodules(struct fsck_options *o,
- const struct object_id *oid,
- enum object_type object_type,
- enum fsck_msg_type msg_type,
- enum fsck_msg_id msg_id,
- const char *message)
+int fsck_objects_error_cb_print_missing_gitmodules(struct fsck_options *o,
+ const struct object_id *oid,
+ enum object_type object_type,
+ const char *checked_ref_name,
+ enum fsck_msg_type msg_type,
+ enum fsck_msg_id msg_id,
+ const char *message)
{
if (msg_id == FSCK_MSG_GITMODULES_MISSING) {
puts(oid_to_hex(oid));
return 0;
}
- return fsck_error_function(o, oid, object_type, msg_type, msg_id, message);
+ return fsck_objects_error_function(o, oid, object_type, checked_ref_name,
+ msg_type, msg_id, message);
}
diff --git a/fsck.h b/fsck.h
index 1ee3dd85ba..f703dfb5e8 100644
--- a/fsck.h
+++ b/fsck.h
@@ -114,22 +114,27 @@ int is_valid_msg_type(const char *msg_id, const char *msg_type);
typedef int (*fsck_walk_func)(struct object *obj, enum object_type object_type,
void *data, struct fsck_options *options);
-/* callback for fsck_object, type is FSCK_ERROR or FSCK_WARN */
+/*
+ * callback function for reporting errors when checking either objects or refs
+ */
typedef int (*fsck_error)(struct fsck_options *o,
const struct object_id *oid, enum object_type object_type,
+ const char *checked_ref_name,
enum fsck_msg_type msg_type, enum fsck_msg_id msg_id,
const char *message);
-int fsck_error_function(struct fsck_options *o,
- const struct object_id *oid, enum object_type object_type,
- enum fsck_msg_type msg_type, enum fsck_msg_id msg_id,
- const char *message);
-int fsck_error_cb_print_missing_gitmodules(struct fsck_options *o,
- const struct object_id *oid,
- enum object_type object_type,
- enum fsck_msg_type msg_type,
- enum fsck_msg_id msg_id,
- const char *message);
+int fsck_objects_error_function(struct fsck_options *o,
+ const struct object_id *oid, enum object_type object_type,
+ const char *checked_ref_name,
+ enum fsck_msg_type msg_type, enum fsck_msg_id msg_id,
+ const char *message);
+int fsck_objects_error_cb_print_missing_gitmodules(struct fsck_options *o,
+ const struct object_id *oid,
+ enum object_type object_type,
+ const char *checked_ref_name,
+ enum fsck_msg_type msg_type,
+ enum fsck_msg_id msg_id,
+ const char *message);
struct fsck_options {
fsck_walk_func walk;
@@ -145,12 +150,12 @@ struct fsck_options {
};
#define FSCK_OPTIONS_DEFAULT { \
- .skiplist = OIDSET_INIT, \
+ .oid_skiplist = OIDSET_INIT, \
.gitmodules_found = OIDSET_INIT, \
.gitmodules_done = OIDSET_INIT, \
.gitattributes_found = OIDSET_INIT, \
.gitattributes_done = OIDSET_INIT, \
- .error_func = fsck_error_function \
+ .error_func = fsck_objects_error_function \
}
#define FSCK_OPTIONS_STRICT { \
.strict = 1, \
@@ -158,7 +163,7 @@ struct fsck_options {
.gitmodules_done = OIDSET_INIT, \
.gitattributes_found = OIDSET_INIT, \
.gitattributes_done = OIDSET_INIT, \
- .error_func = fsck_error_function, \
+ .error_func = fsck_objects_error_function, \
}
#define FSCK_OPTIONS_MISSING_GITMODULES { \
.strict = 1, \
@@ -166,7 +171,7 @@ struct fsck_options {
.gitmodules_done = OIDSET_INIT, \
.gitattributes_found = OIDSET_INIT, \
.gitattributes_done = OIDSET_INIT, \
- .error_func = fsck_error_cb_print_missing_gitmodules, \
+ .error_func = fsck_objects_error_cb_print_missing_gitmodules, \
}
/* descend in all linked child objects
@@ -209,6 +214,13 @@ int fsck_tag_standalone(const struct object_id *oid, const char *buffer,
*/
int fsck_finish(struct fsck_options *options);
+__attribute__((format (printf, 5, 6)))
+int fsck_refs_report(struct fsck_options *options,
+ const struct object_id *oid,
+ const char *checked_ref_name,
+ enum fsck_msg_id msg_id,
+ const char *fmt, ...);
+
/*
* Subsystem for storing human-readable names for each object.
*
diff --git a/object-file.c b/object-file.c
index 065103be3e..d2c6427935 100644
--- a/object-file.c
+++ b/object-file.c
@@ -2470,11 +2470,12 @@ int repo_has_object_file(struct repository *r,
* give more context.
*/
static int hash_format_check_report(struct fsck_options *opts UNUSED,
- const struct object_id *oid UNUSED,
- enum object_type object_type UNUSED,
- enum fsck_msg_type msg_type UNUSED,
- enum fsck_msg_id msg_id UNUSED,
- const char *message)
+ const struct object_id *oid UNUSED,
+ enum object_type object_type UNUSED,
+ const char *ref_checked_name UNUSED,
+ enum fsck_msg_type msg_type UNUSED,
+ enum fsck_msg_id msg_id UNUSED,
+ const char *message)
{
error(_("object fails fsck: %s"), message);
return 1;
--
2.45.2
^ permalink raw reply related [flat|nested] 282+ messages in thread
* Re: [GSoC][PATCH v7 2/9] fsck: add a unified interface for reporting fsck messages
2024-07-03 13:56 ` [GSoC][PATCH v7 2/9] fsck: add a unified interface for reporting fsck messages shejialuo
@ 2024-07-05 22:43 ` Justin Tobler
2024-07-08 12:12 ` shejialuo
0 siblings, 1 reply; 282+ messages in thread
From: Justin Tobler @ 2024-07-05 22:43 UTC (permalink / raw)
To: shejialuo
Cc: git, Patrick Steinhardt, Karthik Nayak, Junio C Hamano,
Eric Sunshine
On 24/07/03 09:56PM, shejialuo wrote:
> The static function "report" provided by "fsck.c" aims at reporting the
> problems related to object database which cannot be reused for refs.
> In order to provide a unified interface which can report either objects
> or refs, create a new function "vfsck_report" by adding
> "checked_ref_name" parameter following the "report" prototype. However,
> instead of using "...", provide "va_list" to allow more flexibility.
>
> The "vfsck_report" function will use "error_func" registered in
> "fsck_options" function to report customized messages. Change
> "error_func" prototype to align with the "vfsck_report".
>
> Change "report" function to make it use "vfsck_report" to report
> objects-related messages. Add a new function called "fsck_refs_report"
> to use "vfsck_report" to report refs-related messages.
To restate in my own words, the existing "report" function is checked if
an fsck message should be returned and also formats the message. It is
currently setup to only handle object database related fsck problems.
The `fsck_error` function type is generalized to also accept a reference
name enabling fsck problems to be reported for references.
>
> Mentored-by: Patrick Steinhardt <ps@pks.im>
> Mentored-by: Karthik Nayak <karthik.188@gmail.com>
> Signed-off-by: shejialuo <shejialuo@gmail.com>
> ---
> builtin/fsck.c | 15 ++++-----
> builtin/mktag.c | 1 +
> fsck.c | 81 ++++++++++++++++++++++++++++++++++++-------------
> fsck.h | 42 ++++++++++++++++---------
> object-file.c | 11 ++++---
> 5 files changed, 102 insertions(+), 48 deletions(-)
>
> diff --git a/builtin/fsck.c b/builtin/fsck.c
> index d13a226c2e..de34538c4f 100644
> --- a/builtin/fsck.c
> +++ b/builtin/fsck.c
> @@ -89,12 +89,13 @@ static int objerror(struct object *obj, const char *err)
> return -1;
> }
>
> -static int fsck_error_func(struct fsck_options *o UNUSED,
> - const struct object_id *oid,
> - enum object_type object_type,
> - enum fsck_msg_type msg_type,
> - enum fsck_msg_id msg_id UNUSED,
> - const char *message)
> +static int fsck_objects_error_func(struct fsck_options *o UNUSED,
> + const struct object_id *oid,
> + enum object_type object_type,
> + const char *checked_ref_name UNUSED,
> + enum fsck_msg_type msg_type,
> + enum fsck_msg_id msg_id UNUSED,
> + const char *message)
> {
> switch (msg_type) {
> case FSCK_WARN:
> @@ -938,7 +939,7 @@ int cmd_fsck(int argc, const char **argv, const char *prefix)
>
> fsck_walk_options.walk = mark_object;
> fsck_obj_options.walk = mark_used;
> - fsck_obj_options.error_func = fsck_error_func;
> + fsck_obj_options.error_func = fsck_objects_error_func;
> if (check_strict)
> fsck_obj_options.strict = 1;
>
> diff --git a/builtin/mktag.c b/builtin/mktag.c
> index 4767f1a97e..42f945c584 100644
> --- a/builtin/mktag.c
> +++ b/builtin/mktag.c
> @@ -20,6 +20,7 @@ static struct fsck_options fsck_options = FSCK_OPTIONS_STRICT;
> static int mktag_fsck_error_func(struct fsck_options *o UNUSED,
> const struct object_id *oid UNUSED,
> enum object_type object_type UNUSED,
> + const char *checked_ref_name UNUSED,
> enum fsck_msg_type msg_type,
> enum fsck_msg_id msg_id UNUSED,
> const char *message)
> diff --git a/fsck.c b/fsck.c
> index 1960bfeba9..7182ce8e80 100644
> --- a/fsck.c
> +++ b/fsck.c
> @@ -226,12 +226,18 @@ static int object_on_skiplist(struct fsck_options *opts,
> return opts && oid && oidset_contains(&opts->oid_skiplist, oid);
> }
>
> -__attribute__((format (printf, 5, 6)))
> -static int report(struct fsck_options *options,
> - const struct object_id *oid, enum object_type object_type,
> - enum fsck_msg_id msg_id, const char *fmt, ...)
> +/*
> + * Provide a unified interface for either fscking refs or objects.
> + * It will get the current msg error type and call the error_func callback
> + * which is registered in the "fsck_options" struct.
> + */
> +static int vfsck_report(struct fsck_options *options,
> + const struct object_id *oid,
> + enum object_type object_type,
> + const char *checked_ref_name,
> + enum fsck_msg_id msg_id, const char *fmt, va_list ap)
> {
> - va_list ap;
> + va_list ap_copy;
> struct strbuf sb = STRBUF_INIT;
> enum fsck_msg_type msg_type = fsck_msg_type(msg_id, options);
> int result;
> @@ -250,9 +256,9 @@ static int report(struct fsck_options *options,
> prepare_msg_ids();
> strbuf_addf(&sb, "%s: ", msg_id_info[msg_id].camelcased);
>
> - va_start(ap, fmt);
> - strbuf_vaddf(&sb, fmt, ap);
> - result = options->error_func(options, oid, object_type,
> + va_copy(ap_copy, ap);
> + strbuf_vaddf(&sb, fmt, ap_copy);
> + result = options->error_func(options, oid, object_type, checked_ref_name,
> msg_type, msg_id, sb.buf);
> strbuf_release(&sb);
> va_end(ap);
> @@ -260,6 +266,36 @@ static int report(struct fsck_options *options,
> return result;
> }
>
> +__attribute__((format (printf, 5, 6)))
> +static int report(struct fsck_options *options,
> + const struct object_id *oid, enum object_type object_type,
> + enum fsck_msg_id msg_id, const char *fmt, ...)
> +{
> + va_list ap;
> + int result;
> + va_start(ap, fmt);
> + result = vfsck_report(options, oid, object_type, NULL,
> + msg_id, fmt, ap);
> + va_end(ap);
> + return result;
> +}
> +
> +
Looks like there are some extra new lines here.
> +
> +int fsck_refs_report(struct fsck_options *options,
> + const struct object_id *oid,
> + const char *checked_ref_name,
> + enum fsck_msg_id msg_id, const char *fmt, ...)
> +{
> + va_list ap;
> + int result;
> + va_start(ap, fmt);
> + result = vfsck_report(options, oid, OBJ_NONE,
> + checked_ref_name, msg_id, fmt, ap);
Do we expect that reference related fsck problems may also be skipped
via the oid_skiplist?
> + va_end(ap);
> + return result;
> +}
> +
> void fsck_enable_object_names(struct fsck_options *options)
> {
> if (!options->object_names)
> @@ -1200,12 +1236,13 @@ int fsck_buffer(const struct object_id *oid, enum object_type type,
> type);
> }
>
> -int fsck_error_function(struct fsck_options *o,
> - const struct object_id *oid,
> - enum object_type object_type UNUSED,
> - enum fsck_msg_type msg_type,
> - enum fsck_msg_id msg_id UNUSED,
> - const char *message)
> +int fsck_objects_error_function(struct fsck_options *o,
> + const struct object_id *oid,
> + enum object_type object_type UNUSED,
> + const char *checked_ref_name UNUSED,
> + enum fsck_msg_type msg_type,
> + enum fsck_msg_id msg_id UNUSED,
> + const char *message)
> {
> if (msg_type == FSCK_WARN) {
> warning("object %s: %s", fsck_describe_object(o, oid), message);
> @@ -1303,16 +1340,18 @@ int git_fsck_config(const char *var, const char *value,
> * Custom error callbacks that are used in more than one place.
> */
>
> -int fsck_error_cb_print_missing_gitmodules(struct fsck_options *o,
> - const struct object_id *oid,
> - enum object_type object_type,
> - enum fsck_msg_type msg_type,
> - enum fsck_msg_id msg_id,
> - const char *message)
> +int fsck_objects_error_cb_print_missing_gitmodules(struct fsck_options *o,
> + const struct object_id *oid,
> + enum object_type object_type,
> + const char *checked_ref_name,
> + enum fsck_msg_type msg_type,
> + enum fsck_msg_id msg_id,
> + const char *message)
> {
> if (msg_id == FSCK_MSG_GITMODULES_MISSING) {
> puts(oid_to_hex(oid));
> return 0;
> }
> - return fsck_error_function(o, oid, object_type, msg_type, msg_id, message);
> + return fsck_objects_error_function(o, oid, object_type, checked_ref_name,
> + msg_type, msg_id, message);
> }
> diff --git a/fsck.h b/fsck.h
> index 1ee3dd85ba..f703dfb5e8 100644
> --- a/fsck.h
> +++ b/fsck.h
> @@ -114,22 +114,27 @@ int is_valid_msg_type(const char *msg_id, const char *msg_type);
> typedef int (*fsck_walk_func)(struct object *obj, enum object_type object_type,
> void *data, struct fsck_options *options);
>
> -/* callback for fsck_object, type is FSCK_ERROR or FSCK_WARN */
> +/*
> + * callback function for reporting errors when checking either objects or refs
> + */
> typedef int (*fsck_error)(struct fsck_options *o,
> const struct object_id *oid, enum object_type object_type,
> + const char *checked_ref_name,
> enum fsck_msg_type msg_type, enum fsck_msg_id msg_id,
> const char *message);
>
> -int fsck_error_function(struct fsck_options *o,
> - const struct object_id *oid, enum object_type object_type,
> - enum fsck_msg_type msg_type, enum fsck_msg_id msg_id,
> - const char *message);
> -int fsck_error_cb_print_missing_gitmodules(struct fsck_options *o,
> - const struct object_id *oid,
> - enum object_type object_type,
> - enum fsck_msg_type msg_type,
> - enum fsck_msg_id msg_id,
> - const char *message);
> +int fsck_objects_error_function(struct fsck_options *o,
> + const struct object_id *oid, enum object_type object_type,
> + const char *checked_ref_name,
> + enum fsck_msg_type msg_type, enum fsck_msg_id msg_id,
> + const char *message);
> +int fsck_objects_error_cb_print_missing_gitmodules(struct fsck_options *o,
> + const struct object_id *oid,
> + enum object_type object_type,
> + const char *checked_ref_name,
> + enum fsck_msg_type msg_type,
> + enum fsck_msg_id msg_id,
> + const char *message);
>
> struct fsck_options {
> fsck_walk_func walk;
> @@ -145,12 +150,12 @@ struct fsck_options {
> };
>
> #define FSCK_OPTIONS_DEFAULT { \
> - .skiplist = OIDSET_INIT, \
> + .oid_skiplist = OIDSET_INIT, \
Since we renamed skiplist -> oid_skiplist in the previous patch,
shouldn't we update this there?
> .gitmodules_found = OIDSET_INIT, \
> .gitmodules_done = OIDSET_INIT, \
> .gitattributes_found = OIDSET_INIT, \
> .gitattributes_done = OIDSET_INIT, \
> - .error_func = fsck_error_function \
> + .error_func = fsck_objects_error_function \
> }
> #define FSCK_OPTIONS_STRICT { \
> .strict = 1, \
> @@ -158,7 +163,7 @@ struct fsck_options {
> .gitmodules_done = OIDSET_INIT, \
> .gitattributes_found = OIDSET_INIT, \
> .gitattributes_done = OIDSET_INIT, \
> - .error_func = fsck_error_function, \
> + .error_func = fsck_objects_error_function, \
> }
> #define FSCK_OPTIONS_MISSING_GITMODULES { \
> .strict = 1, \
> @@ -166,7 +171,7 @@ struct fsck_options {
> .gitmodules_done = OIDSET_INIT, \
> .gitattributes_found = OIDSET_INIT, \
> .gitattributes_done = OIDSET_INIT, \
> - .error_func = fsck_error_cb_print_missing_gitmodules, \
> + .error_func = fsck_objects_error_cb_print_missing_gitmodules, \
> }
>
> /* descend in all linked child objects
> @@ -209,6 +214,13 @@ int fsck_tag_standalone(const struct object_id *oid, const char *buffer,
> */
> int fsck_finish(struct fsck_options *options);
>
In my opinion it would be nice to add a comment to document
`fsck_refs_report()` here.
> +__attribute__((format (printf, 5, 6)))
> +int fsck_refs_report(struct fsck_options *options,
> + const struct object_id *oid,
> + const char *checked_ref_name,
> + enum fsck_msg_id msg_id,
> + const char *fmt, ...);
> +
> /*
> * Subsystem for storing human-readable names for each object.
> *
> diff --git a/object-file.c b/object-file.c
> index 065103be3e..d2c6427935 100644
> --- a/object-file.c
> +++ b/object-file.c
> @@ -2470,11 +2470,12 @@ int repo_has_object_file(struct repository *r,
> * give more context.
> */
> static int hash_format_check_report(struct fsck_options *opts UNUSED,
> - const struct object_id *oid UNUSED,
> - enum object_type object_type UNUSED,
> - enum fsck_msg_type msg_type UNUSED,
> - enum fsck_msg_id msg_id UNUSED,
> - const char *message)
> + const struct object_id *oid UNUSED,
> + enum object_type object_type UNUSED,
> + const char *ref_checked_name UNUSED,
> + enum fsck_msg_type msg_type UNUSED,
> + enum fsck_msg_id msg_id UNUSED,
> + const char *message)
> {
> error(_("object fails fsck: %s"), message);
> return 1;
> --
> 2.45.2
>
>
^ permalink raw reply [flat|nested] 282+ messages in thread
* Re: [GSoC][PATCH v7 2/9] fsck: add a unified interface for reporting fsck messages
2024-07-05 22:43 ` Justin Tobler
@ 2024-07-08 12:12 ` shejialuo
0 siblings, 0 replies; 282+ messages in thread
From: shejialuo @ 2024-07-08 12:12 UTC (permalink / raw)
To: Justin Tobler
Cc: git, Patrick Steinhardt, Karthik Nayak, Junio C Hamano,
Eric Sunshine
On Fri, Jul 05, 2024 at 05:43:37PM -0500, Justin Tobler wrote:
> On 24/07/03 09:56PM, shejialuo wrote:
> > The static function "report" provided by "fsck.c" aims at reporting the
> > problems related to object database which cannot be reused for refs.
> > In order to provide a unified interface which can report either objects
> > or refs, create a new function "vfsck_report" by adding
> > "checked_ref_name" parameter following the "report" prototype. However,
> > instead of using "...", provide "va_list" to allow more flexibility.
> >
> > The "vfsck_report" function will use "error_func" registered in
> > "fsck_options" function to report customized messages. Change
> > "error_func" prototype to align with the "vfsck_report".
> >
> > Change "report" function to make it use "vfsck_report" to report
> > objects-related messages. Add a new function called "fsck_refs_report"
> > to use "vfsck_report" to report refs-related messages.
>
> To restate in my own words, the existing "report" function is checked if
> an fsck message should be returned and also formats the message. It is
> currently setup to only handle object database related fsck problems.
> The `fsck_error` function type is generalized to also accept a reference
> name enabling fsck problems to be reported for references.
>
Exactly, I will add more information in the commit message.
Thanks,
Jialuo
^ permalink raw reply [flat|nested] 282+ messages in thread
* [GSoC][PATCH v7 3/9] fsck: add refs-related options and error report function
2024-07-03 13:53 ` [GSoC][PATCH v7 0/9] " shejialuo
2024-07-03 13:56 ` [GSoC][PATCH v7 1/9] fsck: rename "skiplist" to "oid_skiplist" shejialuo
2024-07-03 13:56 ` [GSoC][PATCH v7 2/9] fsck: add a unified interface for reporting fsck messages shejialuo
@ 2024-07-03 13:57 ` shejialuo
2024-07-03 13:57 ` [GSoC][PATCH v7 4/9] refs: set up ref consistency check infrastructure shejialuo
` (16 subsequent siblings)
19 siblings, 0 replies; 282+ messages in thread
From: shejialuo @ 2024-07-03 13:57 UTC (permalink / raw)
To: git; +Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine
Add refs-related options to the "fsck_options", create refs-specific
"error_func" callback "fsck_refs_error_function".
"fsck_refs_error_function" will use the "oid" parameter. When the caller
passes the oid, it will use "oid_to_hex" to get the corresponding hex
value to report to the caller.
Last, add "FSCK_REFS_OPTIONS_DEFAULT" and "FSCK_REFS_OPTIONS_STRICT"
macros to create refs options easily.
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
fsck.c | 22 ++++++++++++++++++++++
fsck.h | 15 +++++++++++++++
2 files changed, 37 insertions(+)
diff --git a/fsck.c b/fsck.c
index 7182ce8e80..d1dcbdcac2 100644
--- a/fsck.c
+++ b/fsck.c
@@ -1252,6 +1252,28 @@ int fsck_objects_error_function(struct fsck_options *o,
return 1;
}
+int fsck_refs_error_function(struct fsck_options *options UNUSED,
+ const struct object_id *oid,
+ enum object_type object_type UNUSED,
+ const char *checked_ref_name,
+ enum fsck_msg_type msg_type,
+ enum fsck_msg_id msg_id UNUSED,
+ const char *message)
+{
+ static struct strbuf sb = STRBUF_INIT;
+
+ strbuf_addstr(&sb, checked_ref_name);
+ if (oid)
+ strbuf_addf(&sb, " -> (%s)", oid_to_hex(oid));
+
+ if (msg_type == FSCK_WARN) {
+ warning("%s: %s", sb.buf, message);
+ return 0;
+ }
+ error("%s: %s", sb.buf, message);
+ return 1;
+}
+
static int fsck_blobs(struct oidset *blobs_found, struct oidset *blobs_done,
enum fsck_msg_id msg_missing, enum fsck_msg_id msg_type,
struct fsck_options *options, const char *blob_type)
diff --git a/fsck.h b/fsck.h
index f703dfb5e8..246055c0f9 100644
--- a/fsck.h
+++ b/fsck.h
@@ -135,11 +135,19 @@ int fsck_objects_error_cb_print_missing_gitmodules(struct fsck_options *o,
enum fsck_msg_type msg_type,
enum fsck_msg_id msg_id,
const char *message);
+int fsck_refs_error_function(struct fsck_options *options,
+ const struct object_id *oid,
+ enum object_type object_type,
+ const char *checked_ref_name,
+ enum fsck_msg_type msg_type,
+ enum fsck_msg_id msg_id,
+ const char *message);
struct fsck_options {
fsck_walk_func walk;
fsck_error error_func;
unsigned strict:1;
+ unsigned verbose_refs:1;
enum fsck_msg_type *msg_type;
struct oidset oid_skiplist;
struct oidset gitmodules_found;
@@ -173,6 +181,13 @@ struct fsck_options {
.gitattributes_done = OIDSET_INIT, \
.error_func = fsck_objects_error_cb_print_missing_gitmodules, \
}
+#define FSCK_REFS_OPTIONS_DEFAULT { \
+ .error_func = fsck_refs_error_function, \
+}
+#define FSCK_REFS_OPTIONS_STRICT { \
+ .strict = 1, \
+ .error_func = fsck_refs_error_function, \
+}
/* descend in all linked child objects
* the return value is:
--
2.45.2
^ permalink raw reply related [flat|nested] 282+ messages in thread
* [GSoC][PATCH v7 4/9] refs: set up ref consistency check infrastructure
2024-07-03 13:53 ` [GSoC][PATCH v7 0/9] " shejialuo
` (2 preceding siblings ...)
2024-07-03 13:57 ` [GSoC][PATCH v7 3/9] fsck: add refs-related options and error report function shejialuo
@ 2024-07-03 13:57 ` shejialuo
2024-07-03 13:58 ` [GSoC][PATCH v7 5/9] builtin/refs: add verify subcommand shejialuo
` (15 subsequent siblings)
19 siblings, 0 replies; 282+ messages in thread
From: shejialuo @ 2024-07-03 13:57 UTC (permalink / raw)
To: git; +Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine
The interfaces defined in the `ref_storage_be` are carefully structured
in semantic. It's organized as the five parts:
1. The name and the initialization interfaces.
2. The ref transaction interfaces.
3. The ref internal interfaces (pack, rename and copy).
4. The ref filesystem interfaces.
5. The reflog related interfaces.
To keep consistent with the git-fsck(1), add a new interface named
"fsck_refs_fn" to the end of "ref_storage_be". This semantic cannot be
grouped into any above five categories. Explicitly add blank line to
make it different from others.
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
refs.c | 5 +++++
refs.h | 8 ++++++++
refs/debug.c | 11 +++++++++++
refs/files-backend.c | 15 ++++++++++++++-
refs/packed-backend.c | 8 ++++++++
refs/refs-internal.h | 6 ++++++
refs/reftable-backend.c | 8 ++++++++
7 files changed, 60 insertions(+), 1 deletion(-)
diff --git a/refs.c b/refs.c
index bb90a18875..410919246b 100644
--- a/refs.c
+++ b/refs.c
@@ -318,6 +318,11 @@ int check_refname_format(const char *refname, int flags)
return check_or_sanitize_refname(refname, flags, NULL);
}
+int refs_fsck(struct ref_store *refs, struct fsck_options *o)
+{
+ return refs->be->fsck(refs, o);
+}
+
void sanitize_refname_component(const char *refname, struct strbuf *out)
{
if (check_or_sanitize_refname(refname, REFNAME_ALLOW_ONELEVEL, out))
diff --git a/refs.h b/refs.h
index 0ecba21b4a..804d6a7fce 100644
--- a/refs.h
+++ b/refs.h
@@ -4,6 +4,7 @@
#include "commit.h"
#include "repository.h"
+struct fsck_options;
struct object_id;
struct ref_store;
struct strbuf;
@@ -541,6 +542,13 @@ int refs_for_each_reflog(struct ref_store *refs, each_reflog_fn fn, void *cb_dat
*/
int check_refname_format(const char *refname, int flags);
+/*
+ * Check the reference database for consistency. Return 0 if refs and
+ * reflogs are consistent, and non-zero otherwise. The errors will be
+ * written to stderr.
+ */
+int refs_fsck(struct ref_store *refs, struct fsck_options *o);
+
/*
* Apply the rules from check_refname_format, but mutate the result until it
* is acceptable, and place the result in "out".
diff --git a/refs/debug.c b/refs/debug.c
index 547d9245b9..45e2e784a0 100644
--- a/refs/debug.c
+++ b/refs/debug.c
@@ -419,6 +419,15 @@ static int debug_reflog_expire(struct ref_store *ref_store, const char *refname,
return res;
}
+static int debug_fsck(struct ref_store *ref_store,
+ struct fsck_options *o)
+{
+ struct debug_ref_store *drefs = (struct debug_ref_store *)ref_store;
+ int res = drefs->refs->be->fsck(drefs->refs, o);
+ trace_printf_key(&trace_refs, "fsck: %d\n", res);
+ return res;
+}
+
struct ref_storage_be refs_be_debug = {
.name = "debug",
.init = NULL,
@@ -451,4 +460,6 @@ struct ref_storage_be refs_be_debug = {
.create_reflog = debug_create_reflog,
.delete_reflog = debug_delete_reflog,
.reflog_expire = debug_reflog_expire,
+
+ .fsck = debug_fsck,
};
diff --git a/refs/files-backend.c b/refs/files-backend.c
index aa52d9be7c..d89eeda8ef 100644
--- a/refs/files-backend.c
+++ b/refs/files-backend.c
@@ -3408,6 +3408,17 @@ static int files_ref_store_remove_on_disk(struct ref_store *ref_store,
return ret;
}
+static int files_fsck(struct ref_store *ref_store,
+ struct fsck_options *o)
+{
+ int ret;
+ struct files_ref_store *refs =
+ files_downcast(ref_store, REF_STORE_READ, "fsck");
+
+ ret = refs->packed_ref_store->be->fsck(refs->packed_ref_store, o);
+ return ret;
+}
+
struct ref_storage_be refs_be_files = {
.name = "files",
.init = files_ref_store_init,
@@ -3434,5 +3445,7 @@ struct ref_storage_be refs_be_files = {
.reflog_exists = files_reflog_exists,
.create_reflog = files_create_reflog,
.delete_reflog = files_delete_reflog,
- .reflog_expire = files_reflog_expire
+ .reflog_expire = files_reflog_expire,
+
+ .fsck = files_fsck,
};
diff --git a/refs/packed-backend.c b/refs/packed-backend.c
index a0666407cd..5209b0b212 100644
--- a/refs/packed-backend.c
+++ b/refs/packed-backend.c
@@ -1735,6 +1735,12 @@ static struct ref_iterator *packed_reflog_iterator_begin(struct ref_store *ref_s
return empty_ref_iterator_begin();
}
+static int packed_fsck(struct ref_store *ref_store,
+ struct fsck_options *o)
+{
+ return 0;
+}
+
struct ref_storage_be refs_be_packed = {
.name = "packed",
.init = packed_ref_store_init,
@@ -1762,4 +1768,6 @@ struct ref_storage_be refs_be_packed = {
.create_reflog = NULL,
.delete_reflog = NULL,
.reflog_expire = NULL,
+
+ .fsck = packed_fsck,
};
diff --git a/refs/refs-internal.h b/refs/refs-internal.h
index fa975d69aa..a905e187cd 100644
--- a/refs/refs-internal.h
+++ b/refs/refs-internal.h
@@ -4,6 +4,7 @@
#include "refs.h"
#include "iterator.h"
+struct fsck_options;
struct ref_transaction;
/*
@@ -650,6 +651,9 @@ typedef int read_raw_ref_fn(struct ref_store *ref_store, const char *refname,
typedef int read_symbolic_ref_fn(struct ref_store *ref_store, const char *refname,
struct strbuf *referent);
+typedef int fsck_fn(struct ref_store *ref_store,
+ struct fsck_options *o);
+
struct ref_storage_be {
const char *name;
ref_store_init_fn *init;
@@ -677,6 +681,8 @@ struct ref_storage_be {
create_reflog_fn *create_reflog;
delete_reflog_fn *delete_reflog;
reflog_expire_fn *reflog_expire;
+
+ fsck_fn *fsck;
};
extern struct ref_storage_be refs_be_files;
diff --git a/refs/reftable-backend.c b/refs/reftable-backend.c
index fbe74c239d..b5a1a526df 100644
--- a/refs/reftable-backend.c
+++ b/refs/reftable-backend.c
@@ -2303,6 +2303,12 @@ static int reftable_be_reflog_expire(struct ref_store *ref_store,
return ret;
}
+static int reftable_be_fsck(struct ref_store *ref_store,
+ struct fsck_options *o)
+{
+ return 0;
+}
+
struct ref_storage_be refs_be_reftable = {
.name = "reftable",
.init = reftable_be_init,
@@ -2330,4 +2336,6 @@ struct ref_storage_be refs_be_reftable = {
.create_reflog = reftable_be_create_reflog,
.delete_reflog = reftable_be_delete_reflog,
.reflog_expire = reftable_be_reflog_expire,
+
+ .fsck = reftable_be_fsck,
};
--
2.45.2
^ permalink raw reply related [flat|nested] 282+ messages in thread
* [GSoC][PATCH v7 5/9] builtin/refs: add verify subcommand
2024-07-03 13:53 ` [GSoC][PATCH v7 0/9] " shejialuo
` (3 preceding siblings ...)
2024-07-03 13:57 ` [GSoC][PATCH v7 4/9] refs: set up ref consistency check infrastructure shejialuo
@ 2024-07-03 13:58 ` shejialuo
2024-07-03 13:58 ` [GSoC][PATCH v7 6/9] builtin/fsck: add `git-refs verify` child process shejialuo
` (14 subsequent siblings)
19 siblings, 0 replies; 282+ messages in thread
From: shejialuo @ 2024-07-03 13:58 UTC (permalink / raw)
To: git; +Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine
Introduce a new subcommand "verify" in git-refs(1) to allow the user to
check the reference database consistency.
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
Documentation/git-refs.txt | 13 +++++++++++
builtin/refs.c | 44 ++++++++++++++++++++++++++++++++++++++
2 files changed, 57 insertions(+)
diff --git a/Documentation/git-refs.txt b/Documentation/git-refs.txt
index 5b99e04385..1244a85b64 100644
--- a/Documentation/git-refs.txt
+++ b/Documentation/git-refs.txt
@@ -10,6 +10,7 @@ SYNOPSIS
--------
[verse]
'git refs migrate' --ref-format=<format> [--dry-run]
+'git refs verify' [--strict] [--verbose]
DESCRIPTION
-----------
@@ -22,6 +23,9 @@ COMMANDS
migrate::
Migrate ref store between different formats.
+verify::
+ Verify reference database consistency.
+
OPTIONS
-------
@@ -39,6 +43,15 @@ include::ref-storage-format.txt[]
can be used to double check that the migration works as expected before
performing the actual migration.
+The following options are specific to 'git refs verify':
+
+--strict::
+ Enable more strict checking, every WARN severity for the `Fsck Messages`
+ be seen as ERROR. See linkgit:git-fsck[1].
+
+--verbose::
+ When verifying the reference database consistency, be chatty.
+
KNOWN LIMITATIONS
-----------------
diff --git a/builtin/refs.c b/builtin/refs.c
index 46dcd150d4..74720f5e0d 100644
--- a/builtin/refs.c
+++ b/builtin/refs.c
@@ -1,4 +1,6 @@
#include "builtin.h"
+#include "config.h"
+#include "fsck.h"
#include "parse-options.h"
#include "refs.h"
#include "repository.h"
@@ -7,6 +9,9 @@
#define REFS_MIGRATE_USAGE \
N_("git refs migrate --ref-format=<format> [--dry-run]")
+#define REFS_VERIFY_USAGE \
+ N_("git refs verify [--strict] [--verbose]")
+
static int cmd_refs_migrate(int argc, const char **argv, const char *prefix)
{
const char * const migrate_usage[] = {
@@ -58,15 +63,54 @@ static int cmd_refs_migrate(int argc, const char **argv, const char *prefix)
return err;
}
+static int cmd_refs_verify(int argc, const char **argv, const char *prefix)
+{
+ struct fsck_options fsck_refs_options = FSCK_REFS_OPTIONS_DEFAULT;
+ const char * const verify_usage[] = {
+ REFS_VERIFY_USAGE,
+ NULL,
+ };
+ unsigned int verbose = 0, strict = 0;
+ struct option options[] = {
+ OPT__VERBOSE(&verbose, N_("be verbose")),
+ OPT_BOOL(0, "strict", &strict, N_("enable strict checking")),
+ OPT_END(),
+ };
+ int ret = 0;
+
+ argc = parse_options(argc, argv, prefix, options, verify_usage, 0);
+ if (argc)
+ usage(_("too many arguments"));
+
+ if (verbose)
+ fsck_refs_options.verbose_refs = 1;
+ if (strict)
+ fsck_refs_options.strict = 1;
+
+ git_config(git_fsck_config, &fsck_refs_options);
+ prepare_repo_settings(the_repository);
+
+ ret = refs_fsck(get_main_ref_store(the_repository), &fsck_refs_options);
+
+ /*
+ * Explicitly free the allocated array and "oid_skiplist"
+ */
+ free(fsck_refs_options.msg_type);
+ oidset_clear(&fsck_refs_options.oid_skiplist);
+ return ret;
+}
+
int cmd_refs(int argc, const char **argv, const char *prefix)
{
const char * const refs_usage[] = {
REFS_MIGRATE_USAGE,
+ REFS_VERIFY_USAGE,
NULL,
};
parse_opt_subcommand_fn *fn = NULL;
struct option opts[] = {
OPT_SUBCOMMAND("migrate", &fn, cmd_refs_migrate),
+ OPT_SUBCOMMAND("verify", &fn, cmd_refs_verify),
OPT_END(),
};
--
2.45.2
^ permalink raw reply related [flat|nested] 282+ messages in thread
* [GSoC][PATCH v7 6/9] builtin/fsck: add `git-refs verify` child process
2024-07-03 13:53 ` [GSoC][PATCH v7 0/9] " shejialuo
` (4 preceding siblings ...)
2024-07-03 13:58 ` [GSoC][PATCH v7 5/9] builtin/refs: add verify subcommand shejialuo
@ 2024-07-03 13:58 ` shejialuo
2024-07-03 13:58 ` [GSoC][PATCH v7 7/9] files-backend: add unified interface for refs scanning shejialuo
` (13 subsequent siblings)
19 siblings, 0 replies; 282+ messages in thread
From: shejialuo @ 2024-07-03 13:58 UTC (permalink / raw)
To: git; +Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine
Introduce a new function "fsck_refs" that initializes and runs a child
process to execute the "git-refs verify" command.
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
builtin/fsck.c | 17 +++++++++++++++++
1 file changed, 17 insertions(+)
diff --git a/builtin/fsck.c b/builtin/fsck.c
index de34538c4f..ec3357722c 100644
--- a/builtin/fsck.c
+++ b/builtin/fsck.c
@@ -897,6 +897,21 @@ static int check_pack_rev_indexes(struct repository *r, int show_progress)
return res;
}
+static void fsck_refs(void)
+{
+ struct child_process refs_verify = CHILD_PROCESS_INIT;
+ child_process_init(&refs_verify);
+ refs_verify.git_cmd = 1;
+ strvec_pushl(&refs_verify.args, "refs", "verify", NULL);
+ if (verbose)
+ strvec_push(&refs_verify.args, "--verbose");
+ if (check_strict)
+ strvec_push(&refs_verify.args, "--strict");
+
+ if (run_command(&refs_verify))
+ errors_found |= ERROR_REFS;
+}
+
static char const * const fsck_usage[] = {
N_("git fsck [--tags] [--root] [--unreachable] [--cache] [--no-reflogs]\n"
" [--[no-]full] [--strict] [--verbose] [--lost-found]\n"
@@ -1066,6 +1081,8 @@ int cmd_fsck(int argc, const char **argv, const char *prefix)
check_connectivity();
+ fsck_refs();
+
if (the_repository->settings.core_commit_graph) {
struct child_process commit_graph_verify = CHILD_PROCESS_INIT;
--
2.45.2
^ permalink raw reply related [flat|nested] 282+ messages in thread
* [GSoC][PATCH v7 7/9] files-backend: add unified interface for refs scanning
2024-07-03 13:53 ` [GSoC][PATCH v7 0/9] " shejialuo
` (5 preceding siblings ...)
2024-07-03 13:58 ` [GSoC][PATCH v7 6/9] builtin/fsck: add `git-refs verify` child process shejialuo
@ 2024-07-03 13:58 ` shejialuo
2024-07-03 13:59 ` [GSoC][PATCH v7 8/9] fsck: add ref name check for files backend shejialuo
` (12 subsequent siblings)
19 siblings, 0 replies; 282+ messages in thread
From: shejialuo @ 2024-07-03 13:58 UTC (permalink / raw)
To: git; +Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine
For refs and reflogs, we need to scan its corresponding directories to
check every regular file or symbolic link which shares the same pattern.
Introduce a unified interface for scanning directories for
files-backend.
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
refs/files-backend.c | 77 +++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 76 insertions(+), 1 deletion(-)
diff --git a/refs/files-backend.c b/refs/files-backend.c
index d89eeda8ef..84acb58782 100644
--- a/refs/files-backend.c
+++ b/refs/files-backend.c
@@ -6,6 +6,7 @@
#include "../gettext.h"
#include "../hash.h"
#include "../hex.h"
+#include "../fsck.h"
#include "../refs.h"
#include "refs-internal.h"
#include "ref-cache.h"
@@ -3408,6 +3409,78 @@ static int files_ref_store_remove_on_disk(struct ref_store *ref_store,
return ret;
}
+/*
+ * For refs and reflogs, they share a unified interface when scanning
+ * the whole directory. This function is used as the callback for each
+ * regular file or symlink in the directory.
+ */
+typedef int (*files_fsck_refs_fn)(struct fsck_options *o,
+ const char *gitdir,
+ const char *refs_check_dir,
+ struct dir_iterator *iter);
+
+static int files_fsck_refs_dir(struct ref_store *ref_store,
+ struct fsck_options *o,
+ const char *refs_check_dir,
+ files_fsck_refs_fn *fsck_refs_fns)
+{
+ const char *gitdir = ref_store->gitdir;
+ struct strbuf sb = STRBUF_INIT;
+ struct dir_iterator *iter;
+ int iter_status;
+ int ret = 0;
+
+ strbuf_addf(&sb, "%s/%s", gitdir, refs_check_dir);
+
+ iter = dir_iterator_begin(sb.buf, 0);
+
+ if (!iter) {
+ ret = error_errno("cannot open directory %s", sb.buf);
+ goto out;
+ }
+
+ while ((iter_status = dir_iterator_advance(iter)) == ITER_OK) {
+ if (S_ISDIR(iter->st.st_mode)) {
+ continue;
+ } else if (S_ISREG(iter->st.st_mode) ||
+ S_ISLNK(iter->st.st_mode)) {
+ if (o->verbose_refs)
+ fprintf_ln(stderr, "Checking %s/%s",
+ refs_check_dir, iter->relative_path);
+ for (size_t i = 0; fsck_refs_fns[i]; i++) {
+ if (fsck_refs_fns[i](o, gitdir, refs_check_dir, iter))
+ ret = -1;
+ }
+ } else {
+ ret = error(_("unexpected file type for '%s'"),
+ iter->basename);
+ }
+ }
+
+ if (iter_status != ITER_DONE)
+ ret = error(_("failed to iterate over '%s'"), sb.buf);
+
+out:
+ strbuf_release(&sb);
+ return ret;
+}
+
+static int files_fsck_refs(struct ref_store *ref_store,
+ struct fsck_options *o)
+{
+ int ret;
+ files_fsck_refs_fn fsck_refs_fns[]= {
+ NULL
+ };
+
+ if (o->verbose_refs)
+ fprintf_ln(stderr, "Checking references consistency");
+
+ ret = files_fsck_refs_dir(ref_store, o, "refs", fsck_refs_fns);
+
+ return ret;
+}
+
static int files_fsck(struct ref_store *ref_store,
struct fsck_options *o)
{
@@ -3415,7 +3488,9 @@ static int files_fsck(struct ref_store *ref_store,
struct files_ref_store *refs =
files_downcast(ref_store, REF_STORE_READ, "fsck");
- ret = refs->packed_ref_store->be->fsck(refs->packed_ref_store, o);
+ ret = refs->packed_ref_store->be->fsck(refs->packed_ref_store, o)
+ | files_fsck_refs(ref_store, o);
+
return ret;
}
--
2.45.2
^ permalink raw reply related [flat|nested] 282+ messages in thread
* [GSoC][PATCH v7 8/9] fsck: add ref name check for files backend
2024-07-03 13:53 ` [GSoC][PATCH v7 0/9] " shejialuo
` (6 preceding siblings ...)
2024-07-03 13:58 ` [GSoC][PATCH v7 7/9] files-backend: add unified interface for refs scanning shejialuo
@ 2024-07-03 13:59 ` shejialuo
2024-07-03 13:59 ` [GSoC][PATCH v7 9/9] fsck: add ref content " shejialuo
` (11 subsequent siblings)
19 siblings, 0 replies; 282+ messages in thread
From: shejialuo @ 2024-07-03 13:59 UTC (permalink / raw)
To: git; +Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine
The git-fsck(1) only implicitly checks the reference, it does not fully
check refs with bad format name such as standalone "@" and name ending
with ".lock".
In order to provide such checks, add a new fsck message id "badRefName"
with default ERROR type. Use existing "check_refname_format" to explicit
check the ref name. And add a new unit test to verify the functionality.
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
Documentation/fsck-msgids.txt | 3 +
fsck.h | 1 +
refs/files-backend.c | 20 +++++++
t/t0602-reffiles-fsck.sh | 101 ++++++++++++++++++++++++++++++++++
4 files changed, 125 insertions(+)
create mode 100755 t/t0602-reffiles-fsck.sh
diff --git a/Documentation/fsck-msgids.txt b/Documentation/fsck-msgids.txt
index f643585a34..dab4012246 100644
--- a/Documentation/fsck-msgids.txt
+++ b/Documentation/fsck-msgids.txt
@@ -19,6 +19,9 @@
`badParentSha1`::
(ERROR) A commit object has a bad parent sha1.
+`badRefName`::
+ (ERROR) A ref has a bad name.
+
`badTagName`::
(INFO) A tag has an invalid format.
diff --git a/fsck.h b/fsck.h
index 246055c0f9..90457d1a1f 100644
--- a/fsck.h
+++ b/fsck.h
@@ -31,6 +31,7 @@ enum fsck_msg_type {
FUNC(BAD_NAME, ERROR) \
FUNC(BAD_OBJECT_SHA1, ERROR) \
FUNC(BAD_PARENT_SHA1, ERROR) \
+ FUNC(BAD_REF_NAME, ERROR) \
FUNC(BAD_TIMEZONE, ERROR) \
FUNC(BAD_TREE, ERROR) \
FUNC(BAD_TREE_SHA1, ERROR) \
diff --git a/refs/files-backend.c b/refs/files-backend.c
index 84acb58782..69a76048d3 100644
--- a/refs/files-backend.c
+++ b/refs/files-backend.c
@@ -3419,6 +3419,25 @@ typedef int (*files_fsck_refs_fn)(struct fsck_options *o,
const char *refs_check_dir,
struct dir_iterator *iter);
+static int files_fsck_refs_name(struct fsck_options *o,
+ const char *gitdir UNUSED,
+ const char *refs_check_dir,
+ struct dir_iterator *iter)
+{
+ struct strbuf sb = STRBUF_INIT;
+ int ret = 0;
+
+ if (check_refname_format(iter->basename, REFNAME_ALLOW_ONELEVEL)) {
+ strbuf_addf(&sb, "%s/%s", refs_check_dir, iter->relative_path);
+ ret = fsck_refs_report(o, NULL, sb.buf,
+ FSCK_MSG_BAD_REF_NAME,
+ "invalid refname format");
+ }
+
+ strbuf_release(&sb);
+ return ret;
+}
+
static int files_fsck_refs_dir(struct ref_store *ref_store,
struct fsck_options *o,
const char *refs_check_dir,
@@ -3470,6 +3489,7 @@ static int files_fsck_refs(struct ref_store *ref_store,
{
int ret;
files_fsck_refs_fn fsck_refs_fns[]= {
+ files_fsck_refs_name,
NULL
};
diff --git a/t/t0602-reffiles-fsck.sh b/t/t0602-reffiles-fsck.sh
new file mode 100755
index 0000000000..b2db58d2c6
--- /dev/null
+++ b/t/t0602-reffiles-fsck.sh
@@ -0,0 +1,101 @@
+#!/bin/sh
+
+test_description='Test reffiles backend consistency check'
+
+GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=main
+export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME
+GIT_TEST_DEFAULT_REF_FORMAT=files
+export GIT_TEST_DEFAULT_REF_FORMAT
+
+. ./test-lib.sh
+
+test_expect_success 'ref name should be checked' '
+ test_when_finished "rm -rf repo" &&
+ git init repo &&
+ branch_dir_prefix=.git/refs/heads &&
+ tag_dir_prefix=.git/refs/tags &&
+ (
+ cd repo &&
+ git commit --allow-empty -m initial &&
+ git checkout -b branch-1 &&
+ git tag tag-1 &&
+ git commit --allow-empty -m second &&
+ git checkout -b branch-2 &&
+ git tag tag-2 &&
+ git tag multi_hierarchy/tag-2
+ ) &&
+ (
+ cd repo &&
+ cp $branch_dir_prefix/branch-1 $branch_dir_prefix/.branch-1 &&
+ test_must_fail git fsck 2>err &&
+ cat >expect <<-EOF &&
+ error: refs/heads/.branch-1: badRefName: invalid refname format
+ EOF
+ rm $branch_dir_prefix/.branch-1 &&
+ test_cmp expect err
+ ) &&
+ (
+ cd repo &&
+ cp $tag_dir_prefix/tag-1 $tag_dir_prefix/tag-1.lock &&
+ test_must_fail git fsck 2>err &&
+ cat >expect <<-EOF &&
+ error: refs/tags/tag-1.lock: badRefName: invalid refname format
+ EOF
+ rm $tag_dir_prefix/tag-1.lock &&
+ test_cmp expect err
+ ) &&
+ (
+ cd repo &&
+ cp $branch_dir_prefix/branch-1 $branch_dir_prefix/@ &&
+ test_must_fail git fsck 2>err &&
+ cat >expect <<-EOF &&
+ error: refs/heads/@: badRefName: invalid refname format
+ EOF
+ rm $branch_dir_prefix/@ &&
+ test_cmp expect err
+ ) &&
+ (
+ cd repo &&
+ cp $tag_dir_prefix/multi_hierarchy/tag-2 $tag_dir_prefix/multi_hierarchy/@ &&
+ test_must_fail git fsck 2>err &&
+ cat >expect <<-EOF &&
+ error: refs/tags/multi_hierarchy/@: badRefName: invalid refname format
+ EOF
+ rm $tag_dir_prefix/multi_hierarchy/@ &&
+ test_cmp expect err
+ )
+'
+
+test_expect_success 'ref name check should be adapted into fsck messages' '
+ test_when_finished "rm -rf repo" &&
+ git init repo &&
+ branch_dir_prefix=.git/refs/heads &&
+ tag_dir_prefix=.git/refs/tags &&
+ (
+ cd repo &&
+ git commit --allow-empty -m initial &&
+ git checkout -b branch-1 &&
+ git tag tag-1 &&
+ git commit --allow-empty -m second &&
+ git checkout -b branch-2 &&
+ git tag tag-2
+ ) &&
+ (
+ cd repo &&
+ cp $branch_dir_prefix/branch-1 $branch_dir_prefix/.branch-1 &&
+ git -c fsck.badRefName=warn fsck 2>err &&
+ cat >expect <<-EOF &&
+ warning: refs/heads/.branch-1: badRefName: invalid refname format
+ EOF
+ rm $branch_dir_prefix/.branch-1 &&
+ test_cmp expect err
+ ) &&
+ (
+ cd repo &&
+ cp $branch_dir_prefix/branch-1 $branch_dir_prefix/@ &&
+ git -c fsck.badRefName=ignore fsck 2>err &&
+ test_must_be_empty err
+ )
+'
+
+test_done
--
2.45.2
^ permalink raw reply related [flat|nested] 282+ messages in thread
* [GSoC][PATCH v7 9/9] fsck: add ref content check for files backend
2024-07-03 13:53 ` [GSoC][PATCH v7 0/9] " shejialuo
` (7 preceding siblings ...)
2024-07-03 13:59 ` [GSoC][PATCH v7 8/9] fsck: add ref name check for files backend shejialuo
@ 2024-07-03 13:59 ` shejialuo
2024-07-08 13:32 ` [GSoC][PATCH v8 0/9] ref consistency check infra setup shejialuo
` (10 subsequent siblings)
19 siblings, 0 replies; 282+ messages in thread
From: shejialuo @ 2024-07-03 13:59 UTC (permalink / raw)
To: git; +Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine
Enhance the git-fsck(1) command by adding a check for reference content
in the files backend. The new functionality ensures that symrefs, real
symbolic link and regular refs are validated correctly.
In order to check the trailing content of the regular refs, add a new
parameter `trailing` to `parse_loose_ref_contents`.
For symrefs, `parse_loose_ref_contents` will set the "referent".
However, symbolic link could be either absolute or relative. Use
"strbuf_add_real_path" to read the symbolic link and convert the
relative path to absolute path. Then use "skip_prefix" to make it align
with symref "referent".
Thus, the symrefs and symbolic links could share the same interface. Add
a new function "files_fsck_symref_target" which aims at checking the
following things:
1. whether the pointee is under the `refs/` directory.
2. whether the pointee name is correct.
3. whether the pointee path is a wrong type in filesystem.
Last, add the following FSCK MESSAGEs:
1. "badRefContent(ERROR)": A ref has a bad content
2. "badSymrefPointee(ERROR)": The pointee of a symref is bad.
3. "trailingRefContent(WARN)": A ref content has trailing contents.
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
Documentation/fsck-msgids.txt | 9 +++
fsck.h | 3 +
refs.c | 2 +-
refs/files-backend.c | 145 +++++++++++++++++++++++++++++++++-
refs/refs-internal.h | 5 +-
t/t0602-reffiles-fsck.sh | 110 ++++++++++++++++++++++++++
6 files changed, 269 insertions(+), 5 deletions(-)
diff --git a/Documentation/fsck-msgids.txt b/Documentation/fsck-msgids.txt
index dab4012246..b1630a478b 100644
--- a/Documentation/fsck-msgids.txt
+++ b/Documentation/fsck-msgids.txt
@@ -19,9 +19,15 @@
`badParentSha1`::
(ERROR) A commit object has a bad parent sha1.
+`badRefContent`::
+ (ERROR) A ref has a bad content.
+
`badRefName`::
(ERROR) A ref has a bad name.
+`badSymrefPointee`::
+ (ERROR) The pointee of a symref is bad.
+
`badTagName`::
(INFO) A tag has an invalid format.
@@ -167,6 +173,9 @@
`nullSha1`::
(WARN) Tree contains entries pointing to a null sha1.
+`trailingRefContent`::
+ (WARN) A ref content has trailing contents.
+
`treeNotSorted`::
(ERROR) A tree is not properly sorted.
diff --git a/fsck.h b/fsck.h
index 90457d1a1f..637f596930 100644
--- a/fsck.h
+++ b/fsck.h
@@ -32,6 +32,8 @@ enum fsck_msg_type {
FUNC(BAD_OBJECT_SHA1, ERROR) \
FUNC(BAD_PARENT_SHA1, ERROR) \
FUNC(BAD_REF_NAME, ERROR) \
+ FUNC(BAD_REF_CONTENT, ERROR) \
+ FUNC(BAD_SYMREF_POINTEE, ERROR) \
FUNC(BAD_TIMEZONE, ERROR) \
FUNC(BAD_TREE, ERROR) \
FUNC(BAD_TREE_SHA1, ERROR) \
@@ -72,6 +74,7 @@ enum fsck_msg_type {
FUNC(HAS_DOTDOT, WARN) \
FUNC(HAS_DOTGIT, WARN) \
FUNC(NULL_SHA1, WARN) \
+ FUNC(TRAILING_REF_CONTENT, WARN) \
FUNC(ZERO_PADDED_FILEMODE, WARN) \
FUNC(NUL_IN_COMMIT, WARN) \
FUNC(LARGE_PATHNAME, WARN) \
diff --git a/refs.c b/refs.c
index 410919246b..eb82fb7d4e 100644
--- a/refs.c
+++ b/refs.c
@@ -1760,7 +1760,7 @@ static int refs_read_special_head(struct ref_store *ref_store,
}
result = parse_loose_ref_contents(content.buf, oid, referent, type,
- failure_errno);
+ failure_errno, NULL);
done:
strbuf_release(&full_path);
diff --git a/refs/files-backend.c b/refs/files-backend.c
index 69a76048d3..d98ef45403 100644
--- a/refs/files-backend.c
+++ b/refs/files-backend.c
@@ -1,6 +1,7 @@
#define USE_THE_REPOSITORY_VARIABLE
#include "../git-compat-util.h"
+#include "../abspath.h"
#include "../copy.h"
#include "../environment.h"
#include "../gettext.h"
@@ -553,7 +554,7 @@ static int read_ref_internal(struct ref_store *ref_store, const char *refname,
strbuf_rtrim(&sb_contents);
buf = sb_contents.buf;
- ret = parse_loose_ref_contents(buf, oid, referent, type, &myerr);
+ ret = parse_loose_ref_contents(buf, oid, referent, type, &myerr, NULL);
out:
if (ret && !myerr)
@@ -589,7 +590,7 @@ static int files_read_symbolic_ref(struct ref_store *ref_store, const char *refn
int parse_loose_ref_contents(const char *buf, struct object_id *oid,
struct strbuf *referent, unsigned int *type,
- int *failure_errno)
+ int *failure_errno, const char **trailing)
{
const char *p;
if (skip_prefix(buf, "ref:", &buf)) {
@@ -611,6 +612,10 @@ int parse_loose_ref_contents(const char *buf, struct object_id *oid,
*failure_errno = EINVAL;
return -1;
}
+
+ if (trailing)
+ *trailing = p;
+
return 0;
}
@@ -3438,6 +3443,141 @@ static int files_fsck_refs_name(struct fsck_options *o,
return ret;
}
+/*
+ * Check the symref "pointee_name" and "pointee_path". The caller should
+ * make sure that "pointee_path" is absolute. For symbolic ref, "pointee_name"
+ * would be the content after "refs:". For symblic link, "pointee_name" would
+ * be the relative path agaignst "gitdir".
+ */
+static int files_fsck_symref_target(struct fsck_options *o,
+ const char *refname,
+ const char *pointee_name,
+ const char *pointee_path)
+{
+ const char *p = NULL;
+ struct stat st;
+ int ret = 0;
+
+ if (!skip_prefix(pointee_name, "refs/", &p)) {
+
+ ret = fsck_refs_report(o, NULL, refname,
+ FSCK_MSG_BAD_SYMREF_POINTEE,
+ "point to target out of refs hierarchy");
+ goto out;
+ }
+
+ if (check_refname_format(pointee_name, 0)) {
+ ret = fsck_refs_report(o, NULL, refname,
+ FSCK_MSG_BAD_SYMREF_POINTEE,
+ "point to invalid refname");
+ }
+
+ if (lstat(pointee_path, &st) < 0)
+ goto out;
+
+ if (!S_ISREG(st.st_mode) && !S_ISLNK(st.st_mode)) {
+ ret = fsck_refs_report(o, NULL, refname,
+ FSCK_MSG_BAD_SYMREF_POINTEE,
+ "point to invalid target");
+ goto out;
+ }
+out:
+ return ret;
+}
+
+static int files_fsck_refs_content(struct fsck_options *o,
+ const char *gitdir,
+ const char *refs_check_dir,
+ struct dir_iterator *iter)
+{
+ struct strbuf pointee_path = STRBUF_INIT,
+ ref_content = STRBUF_INIT,
+ abs_gitdir = STRBUF_INIT,
+ referent = STRBUF_INIT,
+ refname = STRBUF_INIT;
+ const char *trailing = NULL;
+ int failure_errno = 0;
+ unsigned int type = 0;
+ struct object_id oid;
+ int ret = 0;
+
+ strbuf_addf(&refname, "%s/%s", refs_check_dir, iter->relative_path);
+
+ /*
+ * If the file is a symlink, we need to only check the connectivity
+ * of the destination object.
+ */
+ if (S_ISLNK(iter->st.st_mode)) {
+ const char *pointee_name = NULL;
+
+ strbuf_add_real_path(&pointee_path, iter->path.buf);
+
+ strbuf_add_absolute_path(&abs_gitdir, gitdir);
+ strbuf_normalize_path(&abs_gitdir);
+ if (!is_dir_sep(abs_gitdir.buf[abs_gitdir.len - 1]))
+ strbuf_addch(&abs_gitdir, '/');
+
+ if (!skip_prefix(pointee_path.buf,
+ abs_gitdir.buf, &pointee_name)) {
+ ret = fsck_refs_report(o, NULL, refname.buf,
+ FSCK_MSG_BAD_SYMREF_POINTEE,
+ "point to target outside gitdir");
+ goto clean;
+ }
+
+ ret = files_fsck_symref_target(o, refname.buf, pointee_name,
+ pointee_path.buf);
+ goto clean;
+ }
+
+ if (strbuf_read_file(&ref_content, iter->path.buf, 0) < 0) {
+ ret = error_errno(_("%s/%s: unable to read the ref"),
+ refs_check_dir, iter->relative_path);
+ goto clean;
+ }
+
+ if (parse_loose_ref_contents(ref_content.buf, &oid,
+ &referent, &type,
+ &failure_errno, &trailing)) {
+ ret = fsck_refs_report(o, NULL, refname.buf,
+ FSCK_MSG_BAD_REF_CONTENT,
+ "invalid ref content");
+ goto clean;
+ }
+
+ /*
+ * If the ref is a symref, we need to check the destination name and
+ * connectivity.
+ */
+ if (referent.len && (type & REF_ISSYMREF)) {
+ strbuf_addf(&pointee_path, "%s/%s", gitdir, referent.buf);
+ strbuf_rtrim(&referent);
+
+ ret = files_fsck_symref_target(o, refname.buf, referent.buf,
+ pointee_path.buf);
+ goto clean;
+ } else {
+ /*
+ * Only regular refs could have a trailing garbage. Should
+ * be reported as a warning.
+ */
+ if (trailing && (*trailing != '\0' && *trailing != '\n')) {
+ ret = fsck_refs_report(o, NULL, refname.buf,
+ FSCK_MSG_TRAILING_REF_CONTENT,
+ "trailing garbage in ref");
+ goto clean;
+ }
+ }
+
+clean:
+ strbuf_release(&abs_gitdir);
+ strbuf_release(&pointee_path);
+ strbuf_release(&refname);
+ strbuf_release(&ref_content);
+ strbuf_release(&referent);
+ return ret;
+}
+
static int files_fsck_refs_dir(struct ref_store *ref_store,
struct fsck_options *o,
const char *refs_check_dir,
@@ -3490,6 +3630,7 @@ static int files_fsck_refs(struct ref_store *ref_store,
int ret;
files_fsck_refs_fn fsck_refs_fns[]= {
files_fsck_refs_name,
+ files_fsck_refs_content,
NULL
};
diff --git a/refs/refs-internal.h b/refs/refs-internal.h
index a905e187cd..2fabf41d14 100644
--- a/refs/refs-internal.h
+++ b/refs/refs-internal.h
@@ -709,11 +709,12 @@ struct ref_store {
/*
* Parse contents of a loose ref file. *failure_errno maybe be set to EINVAL for
- * invalid contents.
+ * invalid contents. Also *trailing is set to the first character after the
+ * refname or NULL if the referent is not empty.
*/
int parse_loose_ref_contents(const char *buf, struct object_id *oid,
struct strbuf *referent, unsigned int *type,
- int *failure_errno);
+ int *failure_errno, const char **trailing);
/*
* Fill in the generic part of refs and add it to our collection of
diff --git a/t/t0602-reffiles-fsck.sh b/t/t0602-reffiles-fsck.sh
index b2db58d2c6..35bf40ee64 100755
--- a/t/t0602-reffiles-fsck.sh
+++ b/t/t0602-reffiles-fsck.sh
@@ -98,4 +98,114 @@ test_expect_success 'ref name check should be adapted into fsck messages' '
)
'
+test_expect_success 'regular ref content should be checked' '
+ test_when_finished "rm -rf repo" &&
+ git init repo &&
+ branch_dir_prefix=.git/refs/heads &&
+ tag_dir_prefix=.git/refs/tags &&
+ (
+ cd repo &&
+ git commit --allow-empty -m initial &&
+ git checkout -b branch-1 &&
+ git tag tag-1 &&
+ git commit --allow-empty -m second &&
+ git checkout -b branch-2 &&
+ git tag tag-2 &&
+ git checkout -b a/b/tag-2
+ ) &&
+ (
+ cd repo &&
+ printf "%s garbage" "$(git rev-parse branch-1)" > $branch_dir_prefix/branch-1-garbage &&
+ git fsck 2>err &&
+ cat >expect <<-EOF &&
+ warning: refs/heads/branch-1-garbage: trailingRefContent: trailing garbage in ref
+ EOF
+ rm $branch_dir_prefix/branch-1-garbage &&
+ test_cmp expect err
+ ) &&
+ (
+ cd repo &&
+ printf "%s garbage" "$(git rev-parse tag-1)" > $tag_dir_prefix/tag-1-garbage &&
+ test_must_fail git -c fsck.trailingRefContent=error fsck 2>err &&
+ cat >expect <<-EOF &&
+ error: refs/tags/tag-1-garbage: trailingRefContent: trailing garbage in ref
+ EOF
+ rm $tag_dir_prefix/tag-1-garbage &&
+ test_cmp expect err
+ ) &&
+ (
+ cd repo &&
+ printf "%s " "$(git rev-parse tag-2)" > $tag_dir_prefix/tag-2-garbage &&
+ git fsck 2>err &&
+ cat >expect <<-EOF &&
+ warning: refs/tags/tag-2-garbage: trailingRefContent: trailing garbage in ref
+ EOF
+ rm $tag_dir_prefix/tag-2-garbage &&
+ test_cmp expect err
+ ) &&
+ (
+ cd repo &&
+ printf "xfsazqfxcadas" > $tag_dir_prefix/tag-2-bad &&
+ test_must_fail git refs verify 2>err &&
+ cat >expect <<-EOF &&
+ error: refs/tags/tag-2-bad: badRefContent: invalid ref content
+ EOF
+ rm $tag_dir_prefix/tag-2-bad &&
+ test_cmp expect err
+ ) &&
+ (
+ cd repo &&
+ printf "xfsazqfxcadas" > $branch_dir_prefix/a/b/branch-2-bad &&
+ test_must_fail git refs verify 2>err &&
+ cat >expect <<-EOF &&
+ error: refs/heads/a/b/branch-2-bad: badRefContent: invalid ref content
+ EOF
+ rm $branch_dir_prefix/a/b/branch-2-bad &&
+ test_cmp expect err
+ )
+'
+
+test_expect_success 'symbolic ref content should be checked' '
+ test_when_finished "rm -rf repo" &&
+ git init repo &&
+ branch_dir_prefix=.git/refs/heads &&
+ tag_dir_prefix=.git/refs/tags &&
+ (
+ cd repo &&
+ git commit --allow-empty -m initial &&
+ git checkout -b branch-1 &&
+ git tag tag-1
+ ) &&
+ (
+ cd repo &&
+ printf "ref: refs/heads/.branch" > $branch_dir_prefix/branch-2-bad &&
+ test_must_fail git refs verify 2>err &&
+ cat >expect <<-EOF &&
+ error: refs/heads/branch-2-bad: badSymrefPointee: point to invalid refname
+ EOF
+ rm $branch_dir_prefix/branch-2-bad &&
+ test_cmp expect err
+ ) &&
+ (
+ cd repo &&
+ printf "ref: refs/heads" > $branch_dir_prefix/branch-2-bad &&
+ test_must_fail git refs verify 2>err &&
+ cat >expect <<-EOF &&
+ error: refs/heads/branch-2-bad: badSymrefPointee: point to invalid target
+ EOF
+ rm $branch_dir_prefix/branch-2-bad &&
+ test_cmp expect err
+ ) &&
+ (
+ cd repo &&
+ printf "ref: logs/maint-v2.45" > $branch_dir_prefix/branch-2-bad &&
+ test_must_fail git refs verify 2>err &&
+ cat >expect <<-EOF &&
+ error: refs/heads/branch-2-bad: badSymrefPointee: point to target out of refs hierarchy
+ EOF
+ rm $branch_dir_prefix/branch-2-bad &&
+ test_cmp expect err
+ )
+'
+
test_done
--
2.45.2
^ permalink raw reply related [flat|nested] 282+ messages in thread
* [GSoC][PATCH v8 0/9] ref consistency check infra setup
2024-07-03 13:53 ` [GSoC][PATCH v7 0/9] " shejialuo
` (8 preceding siblings ...)
2024-07-03 13:59 ` [GSoC][PATCH v7 9/9] fsck: add ref content " shejialuo
@ 2024-07-08 13:32 ` shejialuo
2024-07-08 13:34 ` [GSoC][PATCH v8 1/9] fsck: rename "skiplist" to "oid_skiplist" shejialuo
` (9 subsequent siblings)
19 siblings, 0 replies; 282+ messages in thread
From: shejialuo @ 2024-07-08 13:32 UTC (permalink / raw)
To: git
Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine,
Justin Tobler
Hi All:
This version enhances the patch 1 and patch 2 commit messages according
to the review of Justin.
Thanks,
Jialuo
shejialuo (9):
fsck: rename "skiplist" to "oid_skiplist"
fsck: add a unified interface for reporting fsck messages
fsck: add refs-related options and error report function
refs: set up ref consistency check infrastructure
builtin/refs: add verify subcommand
builtin/fsck: add `git-refs verify` child process
files-backend: add unified interface for refs scanning
fsck: add ref name check for files backend
fsck: add ref content check for files backend
Documentation/fsck-msgids.txt | 12 ++
Documentation/git-refs.txt | 13 ++
builtin/fsck.c | 32 ++++-
builtin/mktag.c | 1 +
builtin/refs.c | 44 ++++++
fsck.c | 107 +++++++++++---
fsck.h | 63 ++++++---
object-file.c | 11 +-
refs.c | 7 +-
refs.h | 8 ++
refs/debug.c | 11 ++
refs/files-backend.c | 255 +++++++++++++++++++++++++++++++++-
refs/packed-backend.c | 8 ++
refs/refs-internal.h | 11 +-
refs/reftable-backend.c | 8 ++
t/t0602-reffiles-fsck.sh | 211 ++++++++++++++++++++++++++++
16 files changed, 745 insertions(+), 57 deletions(-)
create mode 100755 t/t0602-reffiles-fsck.sh
Range-diff against v7:
1: f5fcf36132 ! 1: 61e475840f fsck: rename "skiplist" to "oid_skiplist"
@@ Metadata
## Commit message ##
fsck: rename "skiplist" to "oid_skiplist"
- Because we introduce ref consistency check. The original "skiplist" is a
- common option which is set up during handling user configs. To avoid
- causing ambiguity, rename "skiplist" to "oid_skiplist".
+ The "skiplist" field in "fsck_options" is related to objects. Because we
+ are going to introduce ref consistency check, the "skiplist" name is too
+ general which will make the caller think "skiplist" is related to both
+ the refs and objects.
+
+ It may seem that for both refs and objects, we should provide a general
+ "skiplist" here. However, the type for "skiplist" is `struct oidset`
+ which is totally unsuitable for refs.
+
+ To avoid above ambiguity, rename "skiplist" to "oid_skiplist".
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
2: c81b5b6fba ! 2: f2576d88a9 fsck: add a unified interface for reporting fsck messages
@@ Metadata
## Commit message ##
fsck: add a unified interface for reporting fsck messages
- The static function "report" provided by "fsck.c" aims at reporting the
- problems related to object database which cannot be reused for refs.
- In order to provide a unified interface which can report either objects
- or refs, create a new function "vfsck_report" by adding
- "checked_ref_name" parameter following the "report" prototype. However,
- instead of using "...", provide "va_list" to allow more flexibility.
+ The static function "report" provided by "fsck.c" aims at checking fsck
+ error type and calling the callback "error_func" to report the message.
+ However, "report" function is only related to object database which
+ cannot be reused for refs. In order to provide a unified interface which
+ can report either objects or refs, create a new function "vfsck_report"
+ by adding "checked_ref_name" parameter following the "report" prototype.
+ Instead of using "...", provide "va_list" to allow more flexibility.
- The "vfsck_report" function will use "error_func" registered in
- "fsck_options" function to report customized messages. Change
- "error_func" prototype to align with the "vfsck_report".
+ Like "report", the "vfsck_report" function will use "error_func"
+ registered in "fsck_options" to report customized messages. Change
+ "error_func" prototype to align with the new "vfsck_report".
- Change "report" function to make it use "vfsck_report" to report
- objects-related messages. Add a new function called "fsck_refs_report"
- to use "vfsck_report" to report refs-related messages.
+ Then, change "report" function to use "vfsck_report" to report objects
+ related messages. Add a new function called "fsck_refs_report" to use
+ "vfsck_report" to report refs related messages.
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
3: 53156dc847 = 3: c3c2dda50c fsck: add refs-related options and error report function
4: 358f4a1be9 = 4: e826dc17ec refs: set up ref consistency check infrastructure
5: e632859df6 = 5: 33cac4882b builtin/refs: add verify subcommand
6: 5d7a6261ae = 6: 32668e3543 builtin/fsck: add `git-refs verify` child process
7: 33311af1b1 = 7: df83b2a990 files-backend: add unified interface for refs scanning
8: 02bd3ac952 = 8: c696c15651 fsck: add ref name check for files backend
9: 94d123471d = 9: 8b0f3aeb9c fsck: add ref content check for files backend
--
2.45.2
^ permalink raw reply [flat|nested] 282+ messages in thread
* [GSoC][PATCH v8 1/9] fsck: rename "skiplist" to "oid_skiplist"
2024-07-03 13:53 ` [GSoC][PATCH v7 0/9] " shejialuo
` (9 preceding siblings ...)
2024-07-08 13:32 ` [GSoC][PATCH v8 0/9] ref consistency check infra setup shejialuo
@ 2024-07-08 13:34 ` shejialuo
2024-07-08 13:35 ` [GSoC][PATCH v8 2/9] fsck: add a unified interface for reporting fsck messages shejialuo
` (8 subsequent siblings)
19 siblings, 0 replies; 282+ messages in thread
From: shejialuo @ 2024-07-08 13:34 UTC (permalink / raw)
To: git
Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine,
Justin Tobler
The "skiplist" field in "fsck_options" is related to objects. Because we
are going to introduce ref consistency check, the "skiplist" name is too
general which will make the caller think "skiplist" is related to both
the refs and objects.
It may seem that for both refs and objects, we should provide a general
"skiplist" here. However, the type for "skiplist" is `struct oidset`
which is totally unsuitable for refs.
To avoid above ambiguity, rename "skiplist" to "oid_skiplist".
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
fsck.c | 4 ++--
fsck.h | 2 +-
2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/fsck.c b/fsck.c
index eea7145470..1960bfeba9 100644
--- a/fsck.c
+++ b/fsck.c
@@ -205,7 +205,7 @@ void fsck_set_msg_types(struct fsck_options *options, const char *values)
if (!strcmp(buf, "skiplist")) {
if (equal == len)
die("skiplist requires a path");
- oidset_parse_file(&options->skiplist, buf + equal + 1,
+ oidset_parse_file(&options->oid_skiplist, buf + equal + 1,
the_repository->hash_algo);
buf += len + 1;
continue;
@@ -223,7 +223,7 @@ void fsck_set_msg_types(struct fsck_options *options, const char *values)
static int object_on_skiplist(struct fsck_options *opts,
const struct object_id *oid)
{
- return opts && oid && oidset_contains(&opts->skiplist, oid);
+ return opts && oid && oidset_contains(&opts->oid_skiplist, oid);
}
__attribute__((format (printf, 5, 6)))
diff --git a/fsck.h b/fsck.h
index 6085a384f6..1ee3dd85ba 100644
--- a/fsck.h
+++ b/fsck.h
@@ -136,7 +136,7 @@ struct fsck_options {
fsck_error error_func;
unsigned strict:1;
enum fsck_msg_type *msg_type;
- struct oidset skiplist;
+ struct oidset oid_skiplist;
struct oidset gitmodules_found;
struct oidset gitmodules_done;
struct oidset gitattributes_found;
--
2.45.2
^ permalink raw reply related [flat|nested] 282+ messages in thread
* [GSoC][PATCH v8 2/9] fsck: add a unified interface for reporting fsck messages
2024-07-03 13:53 ` [GSoC][PATCH v7 0/9] " shejialuo
` (10 preceding siblings ...)
2024-07-08 13:34 ` [GSoC][PATCH v8 1/9] fsck: rename "skiplist" to "oid_skiplist" shejialuo
@ 2024-07-08 13:35 ` shejialuo
2024-07-08 14:36 ` Karthik Nayak
2024-07-08 13:35 ` [GSoC][PATCH v8 3/9] fsck: add refs-related options and error report function shejialuo
` (7 subsequent siblings)
19 siblings, 1 reply; 282+ messages in thread
From: shejialuo @ 2024-07-08 13:35 UTC (permalink / raw)
To: git
Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine,
Justin Tobler
The static function "report" provided by "fsck.c" aims at checking fsck
error type and calling the callback "error_func" to report the message.
However, "report" function is only related to object database which
cannot be reused for refs. In order to provide a unified interface which
can report either objects or refs, create a new function "vfsck_report"
by adding "checked_ref_name" parameter following the "report" prototype.
Instead of using "...", provide "va_list" to allow more flexibility.
Like "report", the "vfsck_report" function will use "error_func"
registered in "fsck_options" to report customized messages. Change
"error_func" prototype to align with the new "vfsck_report".
Then, change "report" function to use "vfsck_report" to report objects
related messages. Add a new function called "fsck_refs_report" to use
"vfsck_report" to report refs related messages.
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
builtin/fsck.c | 15 ++++-----
builtin/mktag.c | 1 +
fsck.c | 81 ++++++++++++++++++++++++++++++++++++-------------
fsck.h | 42 ++++++++++++++++---------
object-file.c | 11 ++++---
5 files changed, 102 insertions(+), 48 deletions(-)
diff --git a/builtin/fsck.c b/builtin/fsck.c
index d13a226c2e..de34538c4f 100644
--- a/builtin/fsck.c
+++ b/builtin/fsck.c
@@ -89,12 +89,13 @@ static int objerror(struct object *obj, const char *err)
return -1;
}
-static int fsck_error_func(struct fsck_options *o UNUSED,
- const struct object_id *oid,
- enum object_type object_type,
- enum fsck_msg_type msg_type,
- enum fsck_msg_id msg_id UNUSED,
- const char *message)
+static int fsck_objects_error_func(struct fsck_options *o UNUSED,
+ const struct object_id *oid,
+ enum object_type object_type,
+ const char *checked_ref_name UNUSED,
+ enum fsck_msg_type msg_type,
+ enum fsck_msg_id msg_id UNUSED,
+ const char *message)
{
switch (msg_type) {
case FSCK_WARN:
@@ -938,7 +939,7 @@ int cmd_fsck(int argc, const char **argv, const char *prefix)
fsck_walk_options.walk = mark_object;
fsck_obj_options.walk = mark_used;
- fsck_obj_options.error_func = fsck_error_func;
+ fsck_obj_options.error_func = fsck_objects_error_func;
if (check_strict)
fsck_obj_options.strict = 1;
diff --git a/builtin/mktag.c b/builtin/mktag.c
index 4767f1a97e..42f945c584 100644
--- a/builtin/mktag.c
+++ b/builtin/mktag.c
@@ -20,6 +20,7 @@ static struct fsck_options fsck_options = FSCK_OPTIONS_STRICT;
static int mktag_fsck_error_func(struct fsck_options *o UNUSED,
const struct object_id *oid UNUSED,
enum object_type object_type UNUSED,
+ const char *checked_ref_name UNUSED,
enum fsck_msg_type msg_type,
enum fsck_msg_id msg_id UNUSED,
const char *message)
diff --git a/fsck.c b/fsck.c
index 1960bfeba9..7182ce8e80 100644
--- a/fsck.c
+++ b/fsck.c
@@ -226,12 +226,18 @@ static int object_on_skiplist(struct fsck_options *opts,
return opts && oid && oidset_contains(&opts->oid_skiplist, oid);
}
-__attribute__((format (printf, 5, 6)))
-static int report(struct fsck_options *options,
- const struct object_id *oid, enum object_type object_type,
- enum fsck_msg_id msg_id, const char *fmt, ...)
+/*
+ * Provide a unified interface for either fscking refs or objects.
+ * It will get the current msg error type and call the error_func callback
+ * which is registered in the "fsck_options" struct.
+ */
+static int vfsck_report(struct fsck_options *options,
+ const struct object_id *oid,
+ enum object_type object_type,
+ const char *checked_ref_name,
+ enum fsck_msg_id msg_id, const char *fmt, va_list ap)
{
- va_list ap;
+ va_list ap_copy;
struct strbuf sb = STRBUF_INIT;
enum fsck_msg_type msg_type = fsck_msg_type(msg_id, options);
int result;
@@ -250,9 +256,9 @@ static int report(struct fsck_options *options,
prepare_msg_ids();
strbuf_addf(&sb, "%s: ", msg_id_info[msg_id].camelcased);
- va_start(ap, fmt);
- strbuf_vaddf(&sb, fmt, ap);
- result = options->error_func(options, oid, object_type,
+ va_copy(ap_copy, ap);
+ strbuf_vaddf(&sb, fmt, ap_copy);
+ result = options->error_func(options, oid, object_type, checked_ref_name,
msg_type, msg_id, sb.buf);
strbuf_release(&sb);
va_end(ap);
@@ -260,6 +266,36 @@ static int report(struct fsck_options *options,
return result;
}
+__attribute__((format (printf, 5, 6)))
+static int report(struct fsck_options *options,
+ const struct object_id *oid, enum object_type object_type,
+ enum fsck_msg_id msg_id, const char *fmt, ...)
+{
+ va_list ap;
+ int result;
+ va_start(ap, fmt);
+ result = vfsck_report(options, oid, object_type, NULL,
+ msg_id, fmt, ap);
+ va_end(ap);
+ return result;
+}
+
+
+
+int fsck_refs_report(struct fsck_options *options,
+ const struct object_id *oid,
+ const char *checked_ref_name,
+ enum fsck_msg_id msg_id, const char *fmt, ...)
+{
+ va_list ap;
+ int result;
+ va_start(ap, fmt);
+ result = vfsck_report(options, oid, OBJ_NONE,
+ checked_ref_name, msg_id, fmt, ap);
+ va_end(ap);
+ return result;
+}
+
void fsck_enable_object_names(struct fsck_options *options)
{
if (!options->object_names)
@@ -1200,12 +1236,13 @@ int fsck_buffer(const struct object_id *oid, enum object_type type,
type);
}
-int fsck_error_function(struct fsck_options *o,
- const struct object_id *oid,
- enum object_type object_type UNUSED,
- enum fsck_msg_type msg_type,
- enum fsck_msg_id msg_id UNUSED,
- const char *message)
+int fsck_objects_error_function(struct fsck_options *o,
+ const struct object_id *oid,
+ enum object_type object_type UNUSED,
+ const char *checked_ref_name UNUSED,
+ enum fsck_msg_type msg_type,
+ enum fsck_msg_id msg_id UNUSED,
+ const char *message)
{
if (msg_type == FSCK_WARN) {
warning("object %s: %s", fsck_describe_object(o, oid), message);
@@ -1303,16 +1340,18 @@ int git_fsck_config(const char *var, const char *value,
* Custom error callbacks that are used in more than one place.
*/
-int fsck_error_cb_print_missing_gitmodules(struct fsck_options *o,
- const struct object_id *oid,
- enum object_type object_type,
- enum fsck_msg_type msg_type,
- enum fsck_msg_id msg_id,
- const char *message)
+int fsck_objects_error_cb_print_missing_gitmodules(struct fsck_options *o,
+ const struct object_id *oid,
+ enum object_type object_type,
+ const char *checked_ref_name,
+ enum fsck_msg_type msg_type,
+ enum fsck_msg_id msg_id,
+ const char *message)
{
if (msg_id == FSCK_MSG_GITMODULES_MISSING) {
puts(oid_to_hex(oid));
return 0;
}
- return fsck_error_function(o, oid, object_type, msg_type, msg_id, message);
+ return fsck_objects_error_function(o, oid, object_type, checked_ref_name,
+ msg_type, msg_id, message);
}
diff --git a/fsck.h b/fsck.h
index 1ee3dd85ba..f703dfb5e8 100644
--- a/fsck.h
+++ b/fsck.h
@@ -114,22 +114,27 @@ int is_valid_msg_type(const char *msg_id, const char *msg_type);
typedef int (*fsck_walk_func)(struct object *obj, enum object_type object_type,
void *data, struct fsck_options *options);
-/* callback for fsck_object, type is FSCK_ERROR or FSCK_WARN */
+/*
+ * callback function for reporting errors when checking either objects or refs
+ */
typedef int (*fsck_error)(struct fsck_options *o,
const struct object_id *oid, enum object_type object_type,
+ const char *checked_ref_name,
enum fsck_msg_type msg_type, enum fsck_msg_id msg_id,
const char *message);
-int fsck_error_function(struct fsck_options *o,
- const struct object_id *oid, enum object_type object_type,
- enum fsck_msg_type msg_type, enum fsck_msg_id msg_id,
- const char *message);
-int fsck_error_cb_print_missing_gitmodules(struct fsck_options *o,
- const struct object_id *oid,
- enum object_type object_type,
- enum fsck_msg_type msg_type,
- enum fsck_msg_id msg_id,
- const char *message);
+int fsck_objects_error_function(struct fsck_options *o,
+ const struct object_id *oid, enum object_type object_type,
+ const char *checked_ref_name,
+ enum fsck_msg_type msg_type, enum fsck_msg_id msg_id,
+ const char *message);
+int fsck_objects_error_cb_print_missing_gitmodules(struct fsck_options *o,
+ const struct object_id *oid,
+ enum object_type object_type,
+ const char *checked_ref_name,
+ enum fsck_msg_type msg_type,
+ enum fsck_msg_id msg_id,
+ const char *message);
struct fsck_options {
fsck_walk_func walk;
@@ -145,12 +150,12 @@ struct fsck_options {
};
#define FSCK_OPTIONS_DEFAULT { \
- .skiplist = OIDSET_INIT, \
+ .oid_skiplist = OIDSET_INIT, \
.gitmodules_found = OIDSET_INIT, \
.gitmodules_done = OIDSET_INIT, \
.gitattributes_found = OIDSET_INIT, \
.gitattributes_done = OIDSET_INIT, \
- .error_func = fsck_error_function \
+ .error_func = fsck_objects_error_function \
}
#define FSCK_OPTIONS_STRICT { \
.strict = 1, \
@@ -158,7 +163,7 @@ struct fsck_options {
.gitmodules_done = OIDSET_INIT, \
.gitattributes_found = OIDSET_INIT, \
.gitattributes_done = OIDSET_INIT, \
- .error_func = fsck_error_function, \
+ .error_func = fsck_objects_error_function, \
}
#define FSCK_OPTIONS_MISSING_GITMODULES { \
.strict = 1, \
@@ -166,7 +171,7 @@ struct fsck_options {
.gitmodules_done = OIDSET_INIT, \
.gitattributes_found = OIDSET_INIT, \
.gitattributes_done = OIDSET_INIT, \
- .error_func = fsck_error_cb_print_missing_gitmodules, \
+ .error_func = fsck_objects_error_cb_print_missing_gitmodules, \
}
/* descend in all linked child objects
@@ -209,6 +214,13 @@ int fsck_tag_standalone(const struct object_id *oid, const char *buffer,
*/
int fsck_finish(struct fsck_options *options);
+__attribute__((format (printf, 5, 6)))
+int fsck_refs_report(struct fsck_options *options,
+ const struct object_id *oid,
+ const char *checked_ref_name,
+ enum fsck_msg_id msg_id,
+ const char *fmt, ...);
+
/*
* Subsystem for storing human-readable names for each object.
*
diff --git a/object-file.c b/object-file.c
index 065103be3e..d2c6427935 100644
--- a/object-file.c
+++ b/object-file.c
@@ -2470,11 +2470,12 @@ int repo_has_object_file(struct repository *r,
* give more context.
*/
static int hash_format_check_report(struct fsck_options *opts UNUSED,
- const struct object_id *oid UNUSED,
- enum object_type object_type UNUSED,
- enum fsck_msg_type msg_type UNUSED,
- enum fsck_msg_id msg_id UNUSED,
- const char *message)
+ const struct object_id *oid UNUSED,
+ enum object_type object_type UNUSED,
+ const char *ref_checked_name UNUSED,
+ enum fsck_msg_type msg_type UNUSED,
+ enum fsck_msg_id msg_id UNUSED,
+ const char *message)
{
error(_("object fails fsck: %s"), message);
return 1;
--
2.45.2
^ permalink raw reply related [flat|nested] 282+ messages in thread
* Re: [GSoC][PATCH v8 2/9] fsck: add a unified interface for reporting fsck messages
2024-07-08 13:35 ` [GSoC][PATCH v8 2/9] fsck: add a unified interface for reporting fsck messages shejialuo
@ 2024-07-08 14:36 ` Karthik Nayak
2024-07-08 15:01 ` shejialuo
0 siblings, 1 reply; 282+ messages in thread
From: Karthik Nayak @ 2024-07-08 14:36 UTC (permalink / raw)
To: shejialuo, git
Cc: Patrick Steinhardt, Junio C Hamano, Eric Sunshine, Justin Tobler
[-- Attachment #1: Type: text/plain, Size: 12209 bytes --]
shejialuo <shejialuo@gmail.com> writes:
> The static function "report" provided by "fsck.c" aims at checking fsck
> error type and calling the callback "error_func" to report the message.
> However, "report" function is only related to object database which
> cannot be reused for refs. In order to provide a unified interface which
> can report either objects or refs, create a new function "vfsck_report"
> by adding "checked_ref_name" parameter following the "report" prototype.
> Instead of using "...", provide "va_list" to allow more flexibility.
>
> Like "report", the "vfsck_report" function will use "error_func"
> registered in "fsck_options" to report customized messages. Change
> "error_func" prototype to align with the new "vfsck_report".
>
> Then, change "report" function to use "vfsck_report" to report objects
> related messages. Add a new function called "fsck_refs_report" to use
> "vfsck_report" to report refs related messages.
>
Not sure I really understand why we need to do this. Why can't we simply
add `const char *checked_ref_name` to the existing 'report' and
propagate this also to 'error_func'. Why do we need all this parallel
flows?
Apart from that, what does 'v' in 'vfsck_report' signify?
Perhaps it is also because this commit is doing a lot of things and we
could have simplified it into smaller commits?
> Mentored-by: Patrick Steinhardt <ps@pks.im>
> Mentored-by: Karthik Nayak <karthik.188@gmail.com>
> Signed-off-by: shejialuo <shejialuo@gmail.com>
> ---
> builtin/fsck.c | 15 ++++-----
> builtin/mktag.c | 1 +
> fsck.c | 81 ++++++++++++++++++++++++++++++++++++-------------
> fsck.h | 42 ++++++++++++++++---------
> object-file.c | 11 ++++---
> 5 files changed, 102 insertions(+), 48 deletions(-)
>
> diff --git a/builtin/fsck.c b/builtin/fsck.c
> index d13a226c2e..de34538c4f 100644
> --- a/builtin/fsck.c
> +++ b/builtin/fsck.c
> @@ -89,12 +89,13 @@ static int objerror(struct object *obj, const char *err)
> return -1;
> }
>
> -static int fsck_error_func(struct fsck_options *o UNUSED,
> - const struct object_id *oid,
> - enum object_type object_type,
> - enum fsck_msg_type msg_type,
> - enum fsck_msg_id msg_id UNUSED,
> - const char *message)
> +static int fsck_objects_error_func(struct fsck_options *o UNUSED,
> + const struct object_id *oid,
> + enum object_type object_type,
> + const char *checked_ref_name UNUSED,
> + enum fsck_msg_type msg_type,
> + enum fsck_msg_id msg_id UNUSED,
> + const char *message)
> {
> switch (msg_type) {
> case FSCK_WARN:
> @@ -938,7 +939,7 @@ int cmd_fsck(int argc, const char **argv, const char *prefix)
>
> fsck_walk_options.walk = mark_object;
> fsck_obj_options.walk = mark_used;
> - fsck_obj_options.error_func = fsck_error_func;
> + fsck_obj_options.error_func = fsck_objects_error_func;
> if (check_strict)
> fsck_obj_options.strict = 1;
>
> diff --git a/builtin/mktag.c b/builtin/mktag.c
> index 4767f1a97e..42f945c584 100644
> --- a/builtin/mktag.c
> +++ b/builtin/mktag.c
> @@ -20,6 +20,7 @@ static struct fsck_options fsck_options = FSCK_OPTIONS_STRICT;
> static int mktag_fsck_error_func(struct fsck_options *o UNUSED,
> const struct object_id *oid UNUSED,
> enum object_type object_type UNUSED,
> + const char *checked_ref_name UNUSED,
> enum fsck_msg_type msg_type,
> enum fsck_msg_id msg_id UNUSED,
> const char *message)
> diff --git a/fsck.c b/fsck.c
> index 1960bfeba9..7182ce8e80 100644
> --- a/fsck.c
> +++ b/fsck.c
> @@ -226,12 +226,18 @@ static int object_on_skiplist(struct fsck_options *opts,
> return opts && oid && oidset_contains(&opts->oid_skiplist, oid);
> }
>
> -__attribute__((format (printf, 5, 6)))
> -static int report(struct fsck_options *options,
> - const struct object_id *oid, enum object_type object_type,
> - enum fsck_msg_id msg_id, const char *fmt, ...)
> +/*
> + * Provide a unified interface for either fscking refs or objects.
> + * It will get the current msg error type and call the error_func callback
> + * which is registered in the "fsck_options" struct.
> + */
> +static int vfsck_report(struct fsck_options *options,
> + const struct object_id *oid,
> + enum object_type object_type,
> + const char *checked_ref_name,
> + enum fsck_msg_id msg_id, const char *fmt, va_list ap)
> {
> - va_list ap;
> + va_list ap_copy;
> struct strbuf sb = STRBUF_INIT;
> enum fsck_msg_type msg_type = fsck_msg_type(msg_id, options);
> int result;
> @@ -250,9 +256,9 @@ static int report(struct fsck_options *options,
> prepare_msg_ids();
> strbuf_addf(&sb, "%s: ", msg_id_info[msg_id].camelcased);
>
> - va_start(ap, fmt);
> - strbuf_vaddf(&sb, fmt, ap);
> - result = options->error_func(options, oid, object_type,
> + va_copy(ap_copy, ap);
> + strbuf_vaddf(&sb, fmt, ap_copy);
> + result = options->error_func(options, oid, object_type, checked_ref_name,
> msg_type, msg_id, sb.buf);
> strbuf_release(&sb);
> va_end(ap);
> @@ -260,6 +266,36 @@ static int report(struct fsck_options *options,
> return result;
> }
>
> +__attribute__((format (printf, 5, 6)))
>
Shouldn't this be moved to the header file too?
> +static int report(struct fsck_options *options,
> + const struct object_id *oid, enum object_type object_type,
> + enum fsck_msg_id msg_id, const char *fmt, ...)
> +{
> + va_list ap;
> + int result;
> + va_start(ap, fmt);
> + result = vfsck_report(options, oid, object_type, NULL,
> + msg_id, fmt, ap);
> + va_end(ap);
> + return result;
> +}
> +
> +
> +
There is an extra newline here.
> +int fsck_refs_report(struct fsck_options *options,
> + const struct object_id *oid,
> + const char *checked_ref_name,
> + enum fsck_msg_id msg_id, const char *fmt, ...)
> +{
> + va_list ap;
> + int result;
> + va_start(ap, fmt);
> + result = vfsck_report(options, oid, OBJ_NONE,
> + checked_ref_name, msg_id, fmt, ap);
> + va_end(ap);
> + return result;
> +}
> +
> void fsck_enable_object_names(struct fsck_options *options)
> {
> if (!options->object_names)
> @@ -1200,12 +1236,13 @@ int fsck_buffer(const struct object_id *oid, enum object_type type,
> type);
> }
>
> -int fsck_error_function(struct fsck_options *o,
> - const struct object_id *oid,
> - enum object_type object_type UNUSED,
> - enum fsck_msg_type msg_type,
> - enum fsck_msg_id msg_id UNUSED,
> - const char *message)
> +int fsck_objects_error_function(struct fsck_options *o,
> + const struct object_id *oid,
> + enum object_type object_type UNUSED,
> + const char *checked_ref_name UNUSED,
> + enum fsck_msg_type msg_type,
> + enum fsck_msg_id msg_id UNUSED,
> + const char *message)
> {
> if (msg_type == FSCK_WARN) {
> warning("object %s: %s", fsck_describe_object(o, oid), message);
> @@ -1303,16 +1340,18 @@ int git_fsck_config(const char *var, const char *value,
> * Custom error callbacks that are used in more than one place.
> */
>
> -int fsck_error_cb_print_missing_gitmodules(struct fsck_options *o,
> - const struct object_id *oid,
> - enum object_type object_type,
> - enum fsck_msg_type msg_type,
> - enum fsck_msg_id msg_id,
> - const char *message)
> +int fsck_objects_error_cb_print_missing_gitmodules(struct fsck_options *o,
> + const struct object_id *oid,
> + enum object_type object_type,
> + const char *checked_ref_name,
> + enum fsck_msg_type msg_type,
> + enum fsck_msg_id msg_id,
> + const char *message)
> {
> if (msg_id == FSCK_MSG_GITMODULES_MISSING) {
> puts(oid_to_hex(oid));
> return 0;
> }
> - return fsck_error_function(o, oid, object_type, msg_type, msg_id, message);
> + return fsck_objects_error_function(o, oid, object_type, checked_ref_name,
> + msg_type, msg_id, message);
> }
> diff --git a/fsck.h b/fsck.h
> index 1ee3dd85ba..f703dfb5e8 100644
> --- a/fsck.h
> +++ b/fsck.h
> @@ -114,22 +114,27 @@ int is_valid_msg_type(const char *msg_id, const char *msg_type);
> typedef int (*fsck_walk_func)(struct object *obj, enum object_type object_type,
> void *data, struct fsck_options *options);
>
> -/* callback for fsck_object, type is FSCK_ERROR or FSCK_WARN */
> +/*
> + * callback function for reporting errors when checking either objects or refs
> + */
> typedef int (*fsck_error)(struct fsck_options *o,
> const struct object_id *oid, enum object_type object_type,
> + const char *checked_ref_name,
> enum fsck_msg_type msg_type, enum fsck_msg_id msg_id,
> const char *message);
>
> -int fsck_error_function(struct fsck_options *o,
> - const struct object_id *oid, enum object_type object_type,
> - enum fsck_msg_type msg_type, enum fsck_msg_id msg_id,
> - const char *message);
> -int fsck_error_cb_print_missing_gitmodules(struct fsck_options *o,
> - const struct object_id *oid,
> - enum object_type object_type,
> - enum fsck_msg_type msg_type,
> - enum fsck_msg_id msg_id,
> - const char *message);
> +int fsck_objects_error_function(struct fsck_options *o,
> + const struct object_id *oid, enum object_type object_type,
> + const char *checked_ref_name,
> + enum fsck_msg_type msg_type, enum fsck_msg_id msg_id,
> + const char *message);
> +int fsck_objects_error_cb_print_missing_gitmodules(struct fsck_options *o,
> + const struct object_id *oid,
> + enum object_type object_type,
> + const char *checked_ref_name,
> + enum fsck_msg_type msg_type,
> + enum fsck_msg_id msg_id,
> + const char *message);
>
> struct fsck_options {
> fsck_walk_func walk;
> @@ -145,12 +150,12 @@ struct fsck_options {
> };
>
> #define FSCK_OPTIONS_DEFAULT { \
> - .skiplist = OIDSET_INIT, \
> + .oid_skiplist = OIDSET_INIT, \
> .gitmodules_found = OIDSET_INIT, \
> .gitmodules_done = OIDSET_INIT, \
> .gitattributes_found = OIDSET_INIT, \
> .gitattributes_done = OIDSET_INIT, \
> - .error_func = fsck_error_function \
> + .error_func = fsck_objects_error_function \
> }
> #define FSCK_OPTIONS_STRICT { \
> .strict = 1, \
> @@ -158,7 +163,7 @@ struct fsck_options {
> .gitmodules_done = OIDSET_INIT, \
> .gitattributes_found = OIDSET_INIT, \
> .gitattributes_done = OIDSET_INIT, \
> - .error_func = fsck_error_function, \
> + .error_func = fsck_objects_error_function, \
> }
> #define FSCK_OPTIONS_MISSING_GITMODULES { \
> .strict = 1, \
> @@ -166,7 +171,7 @@ struct fsck_options {
> .gitmodules_done = OIDSET_INIT, \
> .gitattributes_found = OIDSET_INIT, \
> .gitattributes_done = OIDSET_INIT, \
> - .error_func = fsck_error_cb_print_missing_gitmodules, \
> + .error_func = fsck_objects_error_cb_print_missing_gitmodules, \
> }
>
> /* descend in all linked child objects
> @@ -209,6 +214,13 @@ int fsck_tag_standalone(const struct object_id *oid, const char *buffer,
> */
> int fsck_finish(struct fsck_options *options);
>
> +__attribute__((format (printf, 5, 6)))
> +int fsck_refs_report(struct fsck_options *options,
> + const struct object_id *oid,
> + const char *checked_ref_name,
> + enum fsck_msg_id msg_id,
> + const char *fmt, ...);
> +
> /*
> * Subsystem for storing human-readable names for each object.
> *
> diff --git a/object-file.c b/object-file.c
> index 065103be3e..d2c6427935 100644
> --- a/object-file.c
> +++ b/object-file.c
> @@ -2470,11 +2470,12 @@ int repo_has_object_file(struct repository *r,
> * give more context.
> */
> static int hash_format_check_report(struct fsck_options *opts UNUSED,
> - const struct object_id *oid UNUSED,
> - enum object_type object_type UNUSED,
> - enum fsck_msg_type msg_type UNUSED,
> - enum fsck_msg_id msg_id UNUSED,
> - const char *message)
> + const struct object_id *oid UNUSED,
> + enum object_type object_type UNUSED,
> + const char *ref_checked_name UNUSED,
> + enum fsck_msg_type msg_type UNUSED,
> + enum fsck_msg_id msg_id UNUSED,
> + const char *message)
> {
> error(_("object fails fsck: %s"), message);
> return 1;
> --
> 2.45.2
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 690 bytes --]
^ permalink raw reply [flat|nested] 282+ messages in thread
* Re: [GSoC][PATCH v8 2/9] fsck: add a unified interface for reporting fsck messages
2024-07-08 14:36 ` Karthik Nayak
@ 2024-07-08 15:01 ` shejialuo
2024-07-08 17:11 ` Karthik Nayak
0 siblings, 1 reply; 282+ messages in thread
From: shejialuo @ 2024-07-08 15:01 UTC (permalink / raw)
To: Karthik Nayak
Cc: git, Patrick Steinhardt, Junio C Hamano, Eric Sunshine,
Justin Tobler
On Mon, Jul 08, 2024 at 10:36:38AM -0400, Karthik Nayak wrote:
> shejialuo <shejialuo@gmail.com> writes:
>
> > The static function "report" provided by "fsck.c" aims at checking fsck
> > error type and calling the callback "error_func" to report the message.
> > However, "report" function is only related to object database which
> > cannot be reused for refs. In order to provide a unified interface which
> > can report either objects or refs, create a new function "vfsck_report"
> > by adding "checked_ref_name" parameter following the "report" prototype.
> > Instead of using "...", provide "va_list" to allow more flexibility.
> >
> > Like "report", the "vfsck_report" function will use "error_func"
> > registered in "fsck_options" to report customized messages. Change
> > "error_func" prototype to align with the new "vfsck_report".
> >
> > Then, change "report" function to use "vfsck_report" to report objects
> > related messages. Add a new function called "fsck_refs_report" to use
> > "vfsck_report" to report refs related messages.
> >
>
> Not sure I really understand why we need to do this. Why can't we simply
> add `const char *checked_ref_name` to the existing 'report' and
> propagate this also to 'error_func'. Why do we need all this parallel
> flows?
>
Yes, we could just add a parameter "const char *checked_ref_name" to the
existing "report". This may seem the simplest way to do. However, it
will also introduce some trouble below:
1. "report" function should be exported to the outside, we need to
rename it to "fsck_report". Well, we need to change a lot of code here.
And we MUST do this, because "report" is a general name. When exporting
to the outside, it's not proper.
2. When we add a new parameter in "report", for all the "report" calls,
we need to pass this new parameter with NULL.
Use this way, we could do not change "report" function prototype and the
corresponding calls. Most importantly, we could let the caller feel
transparent. Using "report", caller can just ignore "checked_ref_name".
Also for "fsck_refs_report", we could ignore some UNUSED parameters.
So I think this design is more elegant than just adding a new parameter
in the existing "report" function.
> Apart from that, what does 'v' in 'vfsck_report' signify?
>
Because I use "va_list" parameter, I want to emphasis on this. And this
provides flexibility that we could add a "fsck_report" function later.
There are many codes in git code base using this way. I just followed
this.
> Perhaps it is also because this commit is doing a lot of things and we
> could have simplified it into smaller commits?
>
Actually, this commit is very clear. I just want to provide a unified
function "vfsck_report" here. And let the "report" use this function and
"fsck_refs_report" function use this.
So I don't know whether we should split this commit into multiple
commits. They are just tied together.
^ permalink raw reply [flat|nested] 282+ messages in thread
* Re: [GSoC][PATCH v8 2/9] fsck: add a unified interface for reporting fsck messages
2024-07-08 15:01 ` shejialuo
@ 2024-07-08 17:11 ` Karthik Nayak
0 siblings, 0 replies; 282+ messages in thread
From: Karthik Nayak @ 2024-07-08 17:11 UTC (permalink / raw)
To: shejialuo
Cc: git, Patrick Steinhardt, Junio C Hamano, Eric Sunshine,
Justin Tobler
[-- Attachment #1: Type: text/plain, Size: 3291 bytes --]
shejialuo <shejialuo@gmail.com> writes:
> On Mon, Jul 08, 2024 at 10:36:38AM -0400, Karthik Nayak wrote:
>> shejialuo <shejialuo@gmail.com> writes:
>>
>> > The static function "report" provided by "fsck.c" aims at checking fsck
>> > error type and calling the callback "error_func" to report the message.
>> > However, "report" function is only related to object database which
>> > cannot be reused for refs. In order to provide a unified interface which
>> > can report either objects or refs, create a new function "vfsck_report"
>> > by adding "checked_ref_name" parameter following the "report" prototype.
>> > Instead of using "...", provide "va_list" to allow more flexibility.
>> >
>> > Like "report", the "vfsck_report" function will use "error_func"
>> > registered in "fsck_options" to report customized messages. Change
>> > "error_func" prototype to align with the new "vfsck_report".
>> >
>> > Then, change "report" function to use "vfsck_report" to report objects
>> > related messages. Add a new function called "fsck_refs_report" to use
>> > "vfsck_report" to report refs related messages.
>> >
>>
>> Not sure I really understand why we need to do this. Why can't we simply
>> add `const char *checked_ref_name` to the existing 'report' and
>> propagate this also to 'error_func'. Why do we need all this parallel
>> flows?
>>
>
> Yes, we could just add a parameter "const char *checked_ref_name" to the
> existing "report". This may seem the simplest way to do. However, it
> will also introduce some trouble below:
>
> 1. "report" function should be exported to the outside, we need to
> rename it to "fsck_report". Well, we need to change a lot of code here.
> And we MUST do this, because "report" is a general name. When exporting
> to the outside, it's not proper.
>
agreed.
> 2. When we add a new parameter in "report", for all the "report" calls,
> we need to pass this new parameter with NULL.
>
agreed too.
> Use this way, we could do not change "report" function prototype and the
> corresponding calls. Most importantly, we could let the caller feel
> transparent. Using "report", caller can just ignore "checked_ref_name".
> Also for "fsck_refs_report", we could ignore some UNUSED parameters.
>
> So I think this design is more elegant than just adding a new parameter
> in the existing "report" function.
>
I understand what you're saying, I also checked and can see that there
are 60 references to the `report()` function. So perhaps there is some
merit in keeping it as is and adding a new 'report_refs()'.
>> Apart from that, what does 'v' in 'vfsck_report' signify?
>>
>
> Because I use "va_list" parameter, I want to emphasis on this. And this
> provides flexibility that we could add a "fsck_report" function later.
> There are many codes in git code base using this way. I just followed
> this.
>
I see. Makes sense.
>> Perhaps it is also because this commit is doing a lot of things and we
>> could have simplified it into smaller commits?
>>
>
> Actually, this commit is very clear. I just want to provide a unified
> function "vfsck_report" here. And let the "report" use this function and
> "fsck_refs_report" function use this.
>
> So I don't know whether we should split this commit into multiple
> commits. They are just tied together.
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 690 bytes --]
^ permalink raw reply [flat|nested] 282+ messages in thread
* [GSoC][PATCH v8 3/9] fsck: add refs-related options and error report function
2024-07-03 13:53 ` [GSoC][PATCH v7 0/9] " shejialuo
` (11 preceding siblings ...)
2024-07-08 13:35 ` [GSoC][PATCH v8 2/9] fsck: add a unified interface for reporting fsck messages shejialuo
@ 2024-07-08 13:35 ` shejialuo
2024-07-08 14:49 ` Karthik Nayak
2024-07-08 13:35 ` [GSoC][PATCH v8 4/9] refs: set up ref consistency check infrastructure shejialuo
` (6 subsequent siblings)
19 siblings, 1 reply; 282+ messages in thread
From: shejialuo @ 2024-07-08 13:35 UTC (permalink / raw)
To: git
Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine,
Justin Tobler
Add refs-related options to the "fsck_options", create refs-specific
"error_func" callback "fsck_refs_error_function".
"fsck_refs_error_function" will use the "oid" parameter. When the caller
passes the oid, it will use "oid_to_hex" to get the corresponding hex
value to report to the caller.
Last, add "FSCK_REFS_OPTIONS_DEFAULT" and "FSCK_REFS_OPTIONS_STRICT"
macros to create refs options easily.
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
fsck.c | 22 ++++++++++++++++++++++
fsck.h | 15 +++++++++++++++
2 files changed, 37 insertions(+)
diff --git a/fsck.c b/fsck.c
index 7182ce8e80..d1dcbdcac2 100644
--- a/fsck.c
+++ b/fsck.c
@@ -1252,6 +1252,28 @@ int fsck_objects_error_function(struct fsck_options *o,
return 1;
}
+int fsck_refs_error_function(struct fsck_options *options UNUSED,
+ const struct object_id *oid,
+ enum object_type object_type UNUSED,
+ const char *checked_ref_name,
+ enum fsck_msg_type msg_type,
+ enum fsck_msg_id msg_id UNUSED,
+ const char *message)
+{
+ static struct strbuf sb = STRBUF_INIT;
+
+ strbuf_addstr(&sb, checked_ref_name);
+ if (oid)
+ strbuf_addf(&sb, " -> (%s)", oid_to_hex(oid));
+
+ if (msg_type == FSCK_WARN) {
+ warning("%s: %s", sb.buf, message);
+ return 0;
+ }
+ error("%s: %s", sb.buf, message);
+ return 1;
+}
+
static int fsck_blobs(struct oidset *blobs_found, struct oidset *blobs_done,
enum fsck_msg_id msg_missing, enum fsck_msg_id msg_type,
struct fsck_options *options, const char *blob_type)
diff --git a/fsck.h b/fsck.h
index f703dfb5e8..246055c0f9 100644
--- a/fsck.h
+++ b/fsck.h
@@ -135,11 +135,19 @@ int fsck_objects_error_cb_print_missing_gitmodules(struct fsck_options *o,
enum fsck_msg_type msg_type,
enum fsck_msg_id msg_id,
const char *message);
+int fsck_refs_error_function(struct fsck_options *options,
+ const struct object_id *oid,
+ enum object_type object_type,
+ const char *checked_ref_name,
+ enum fsck_msg_type msg_type,
+ enum fsck_msg_id msg_id,
+ const char *message);
struct fsck_options {
fsck_walk_func walk;
fsck_error error_func;
unsigned strict:1;
+ unsigned verbose_refs:1;
enum fsck_msg_type *msg_type;
struct oidset oid_skiplist;
struct oidset gitmodules_found;
@@ -173,6 +181,13 @@ struct fsck_options {
.gitattributes_done = OIDSET_INIT, \
.error_func = fsck_objects_error_cb_print_missing_gitmodules, \
}
+#define FSCK_REFS_OPTIONS_DEFAULT { \
+ .error_func = fsck_refs_error_function, \
+}
+#define FSCK_REFS_OPTIONS_STRICT { \
+ .strict = 1, \
+ .error_func = fsck_refs_error_function, \
+}
/* descend in all linked child objects
* the return value is:
--
2.45.2
^ permalink raw reply related [flat|nested] 282+ messages in thread
* Re: [GSoC][PATCH v8 3/9] fsck: add refs-related options and error report function
2024-07-08 13:35 ` [GSoC][PATCH v8 3/9] fsck: add refs-related options and error report function shejialuo
@ 2024-07-08 14:49 ` Karthik Nayak
2024-07-08 15:32 ` shejialuo
0 siblings, 1 reply; 282+ messages in thread
From: Karthik Nayak @ 2024-07-08 14:49 UTC (permalink / raw)
To: shejialuo, git
Cc: Patrick Steinhardt, Junio C Hamano, Eric Sunshine, Justin Tobler
[-- Attachment #1: Type: text/plain, Size: 4587 bytes --]
shejialuo <shejialuo@gmail.com> writes:
> Add refs-related options to the "fsck_options", create refs-specific
> "error_func" callback "fsck_refs_error_function".
>
> "fsck_refs_error_function" will use the "oid" parameter. When the caller
> passes the oid, it will use "oid_to_hex" to get the corresponding hex
> value to report to the caller.
>
> Last, add "FSCK_REFS_OPTIONS_DEFAULT" and "FSCK_REFS_OPTIONS_STRICT"
> macros to create refs options easily.
>
Carrying over from the previous commit, couldn't we simply do something
like:
diff --git a/fsck.c b/fsck.c
index eea7145470..32ae36a4fc 100644
--- a/fsck.c
+++ b/fsck.c
@@ -1202,17 +1203,33 @@ int fsck_buffer(const struct object_id
*oid, enum object_type type,
int fsck_error_function(struct fsck_options *o,
const struct object_id *oid,
- enum object_type object_type UNUSED,
+ enum object_type object_type,
+ const char *checked_ref_name,
enum fsck_msg_type msg_type,
enum fsck_msg_id msg_id UNUSED,
const char *message)
{
+ static struct strbuf sb = STRBUF_INIT;
+ int ret = 0;
+
+ if (checked_ref_name) {
+ strbuf_addstr("ref %s", checked_ref_name);
+ if (oid)
+ strbuf_addf(&sb, " -> (%s)", oid_to_hex(oid));
+ } else {
+ strbuf_addstr("object %s", fsck_describe_object(o, oid));
+ }
+
if (msg_type == FSCK_WARN) {
- warning("object %s: %s", fsck_describe_object(o, oid), message);
- return 0;
+ warning("%s: %s", sb.buf, message);
+ ret = 0;
+ goto cleanup;
}
- error("object %s: %s", fsck_describe_object(o, oid), message);
- return 1;
+ error("%s: %s", sb.buf, message);
+
+cleanup:
+ strbuf_release(&sb);
+ return ret;
}
> Mentored-by: Patrick Steinhardt <ps@pks.im>
> Mentored-by: Karthik Nayak <karthik.188@gmail.com>
> Signed-off-by: shejialuo <shejialuo@gmail.com>
> ---
> fsck.c | 22 ++++++++++++++++++++++
> fsck.h | 15 +++++++++++++++
> 2 files changed, 37 insertions(+)
>
> diff --git a/fsck.c b/fsck.c
> index 7182ce8e80..d1dcbdcac2 100644
> --- a/fsck.c
> +++ b/fsck.c
> @@ -1252,6 +1252,28 @@ int fsck_objects_error_function(struct fsck_options *o,
> return 1;
> }
>
> +int fsck_refs_error_function(struct fsck_options *options UNUSED,
> + const struct object_id *oid,
> + enum object_type object_type UNUSED,
> + const char *checked_ref_name,
> + enum fsck_msg_type msg_type,
> + enum fsck_msg_id msg_id UNUSED,
> + const char *message)
> +{
> + static struct strbuf sb = STRBUF_INIT;
> +
> + strbuf_addstr(&sb, checked_ref_name);
> + if (oid)
> + strbuf_addf(&sb, " -> (%s)", oid_to_hex(oid));
> +
> + if (msg_type == FSCK_WARN) {
> + warning("%s: %s", sb.buf, message);
> + return 0;
> + }
> + error("%s: %s", sb.buf, message);
> + return 1;
> +}
> +
We don't free strbuf here.
> static int fsck_blobs(struct oidset *blobs_found, struct oidset *blobs_done,
> enum fsck_msg_id msg_missing, enum fsck_msg_id msg_type,
> struct fsck_options *options, const char *blob_type)
> diff --git a/fsck.h b/fsck.h
> index f703dfb5e8..246055c0f9 100644
> --- a/fsck.h
> +++ b/fsck.h
> @@ -135,11 +135,19 @@ int fsck_objects_error_cb_print_missing_gitmodules(struct fsck_options *o,
> enum fsck_msg_type msg_type,
> enum fsck_msg_id msg_id,
> const char *message);
> +int fsck_refs_error_function(struct fsck_options *options,
> + const struct object_id *oid,
> + enum object_type object_type,
> + const char *checked_ref_name,
> + enum fsck_msg_type msg_type,
> + enum fsck_msg_id msg_id,
> + const char *message);
>
> struct fsck_options {
> fsck_walk_func walk;
> fsck_error error_func;
> unsigned strict:1;
> + unsigned verbose_refs:1;
Nit: Here we have the subject 'refs' towards the end of the name.
> enum fsck_msg_type *msg_type;
> struct oidset oid_skiplist;
but here we have the subject 'oid' at the start of the name. Perhaps we
can rename this to 'skip_oids'?
> struct oidset gitmodules_found;
> @@ -173,6 +181,13 @@ struct fsck_options {
> .gitattributes_done = OIDSET_INIT, \
> .error_func = fsck_objects_error_cb_print_missing_gitmodules, \
> }
> +#define FSCK_REFS_OPTIONS_DEFAULT { \
> + .error_func = fsck_refs_error_function, \
> +}
> +#define FSCK_REFS_OPTIONS_STRICT { \
> + .strict = 1, \
> + .error_func = fsck_refs_error_function, \
> +}
>
> /* descend in all linked child objects
> * the return value is:
> --
> 2.45.2
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 690 bytes --]
^ permalink raw reply [flat|nested] 282+ messages in thread
* Re: [GSoC][PATCH v8 3/9] fsck: add refs-related options and error report function
2024-07-08 14:49 ` Karthik Nayak
@ 2024-07-08 15:32 ` shejialuo
0 siblings, 0 replies; 282+ messages in thread
From: shejialuo @ 2024-07-08 15:32 UTC (permalink / raw)
To: Karthik Nayak
Cc: git, Patrick Steinhardt, Junio C Hamano, Eric Sunshine,
Justin Tobler
On Mon, Jul 08, 2024 at 07:49:43AM -0700, Karthik Nayak wrote:
> shejialuo <shejialuo@gmail.com> writes:
>
> > Add refs-related options to the "fsck_options", create refs-specific
> > "error_func" callback "fsck_refs_error_function".
> >
> > "fsck_refs_error_function" will use the "oid" parameter. When the caller
> > passes the oid, it will use "oid_to_hex" to get the corresponding hex
> > value to report to the caller.
> >
> > Last, add "FSCK_REFS_OPTIONS_DEFAULT" and "FSCK_REFS_OPTIONS_STRICT"
> > macros to create refs options easily.
> >
>
> Carrying over from the previous commit, couldn't we simply do something
> like:
>
> diff --git a/fsck.c b/fsck.c
> index eea7145470..32ae36a4fc 100644
> --- a/fsck.c
> +++ b/fsck.c
> @@ -1202,17 +1203,33 @@ int fsck_buffer(const struct object_id
> *oid, enum object_type type,
>
> int fsck_error_function(struct fsck_options *o,
> const struct object_id *oid,
> - enum object_type object_type UNUSED,
> + enum object_type object_type,
> + const char *checked_ref_name,
> enum fsck_msg_type msg_type,
> enum fsck_msg_id msg_id UNUSED,
> const char *message)
> {
> + static struct strbuf sb = STRBUF_INIT;
> + int ret = 0;
> +
> + if (checked_ref_name) {
> + strbuf_addstr("ref %s", checked_ref_name);
> + if (oid)
> + strbuf_addf(&sb, " -> (%s)", oid_to_hex(oid));
> + } else {
> + strbuf_addstr("object %s", fsck_describe_object(o, oid));
> + }
> +
> if (msg_type == FSCK_WARN) {
> - warning("object %s: %s", fsck_describe_object(o, oid), message);
> - return 0;
> + warning("%s: %s", sb.buf, message);
> + ret = 0;
> + goto cleanup;
> }
> - error("object %s: %s", fsck_describe_object(o, oid), message);
> - return 1;
> + error("%s: %s", sb.buf, message);
> +
> +cleanup:
> + strbuf_release(&sb);
> + return ret;
> }
>
>
However, "fsck_error_function" will be used as the callback for
object-related checks. I don't think we should use one function to
incorporate all the situations.
For example, if we pass both "checked_ref_name" and "oid" here, how does
this one function knows whether we handle refs or objects. Although we
may use some flags here to choose the different situations, it's bad
idea from my perspective. Adding a new callback here will be cleaner.
> > Mentored-by: Patrick Steinhardt <ps@pks.im>
> > Mentored-by: Karthik Nayak <karthik.188@gmail.com>
> > Signed-off-by: shejialuo <shejialuo@gmail.com>
> > ---
> > fsck.c | 22 ++++++++++++++++++++++
> > fsck.h | 15 +++++++++++++++
> > 2 files changed, 37 insertions(+)
> >
> > diff --git a/fsck.c b/fsck.c
> > index 7182ce8e80..d1dcbdcac2 100644
> > --- a/fsck.c
> > +++ b/fsck.c
> > @@ -1252,6 +1252,28 @@ int fsck_objects_error_function(struct fsck_options *o,
> > return 1;
> > }
> >
> > +int fsck_refs_error_function(struct fsck_options *options UNUSED,
> > + const struct object_id *oid,
> > + enum object_type object_type UNUSED,
> > + const char *checked_ref_name,
> > + enum fsck_msg_type msg_type,
> > + enum fsck_msg_id msg_id UNUSED,
> > + const char *message)
> > +{
> > + static struct strbuf sb = STRBUF_INIT;
> > +
> > + strbuf_addstr(&sb, checked_ref_name);
> > + if (oid)
> > + strbuf_addf(&sb, " -> (%s)", oid_to_hex(oid));
> > +
> > + if (msg_type == FSCK_WARN) {
> > + warning("%s: %s", sb.buf, message);
> > + return 0;
> > + }
> > + error("%s: %s", sb.buf, message);
> > + return 1;
> > +}
> > +
>
> We don't free strbuf here.
>
Yes, I made a mistake here. I should not define the "sb" static, will
fix it in the next version.
> > static int fsck_blobs(struct oidset *blobs_found, struct oidset *blobs_done,
> > enum fsck_msg_id msg_missing, enum fsck_msg_id msg_type,
> > struct fsck_options *options, const char *blob_type)
> > diff --git a/fsck.h b/fsck.h
> > index f703dfb5e8..246055c0f9 100644
> > --- a/fsck.h
> > +++ b/fsck.h
> > @@ -135,11 +135,19 @@ int fsck_objects_error_cb_print_missing_gitmodules(struct fsck_options *o,
> > enum fsck_msg_type msg_type,
> > enum fsck_msg_id msg_id,
> > const char *message);
> > +int fsck_refs_error_function(struct fsck_options *options,
> > + const struct object_id *oid,
> > + enum object_type object_type,
> > + const char *checked_ref_name,
> > + enum fsck_msg_type msg_type,
> > + enum fsck_msg_id msg_id,
> > + const char *message);
> >
> > struct fsck_options {
> > fsck_walk_func walk;
> > fsck_error error_func;
> > unsigned strict:1;
> > + unsigned verbose_refs:1;
>
> Nit: Here we have the subject 'refs' towards the end of the name.
>
> > enum fsck_msg_type *msg_type;
> > struct oidset oid_skiplist;
>
> but here we have the subject 'oid' at the start of the name. Perhaps we
> can rename this to 'skip_oids'?
>
I agree. "skip_oids" will be more meaningful. I will fix it in the next
version.
> > struct oidset gitmodules_found;
> > @@ -173,6 +181,13 @@ struct fsck_options {
> > .gitattributes_done = OIDSET_INIT, \
> > .error_func = fsck_objects_error_cb_print_missing_gitmodules, \
> > }
> > +#define FSCK_REFS_OPTIONS_DEFAULT { \
> > + .error_func = fsck_refs_error_function, \
> > +}
> > +#define FSCK_REFS_OPTIONS_STRICT { \
> > + .strict = 1, \
> > + .error_func = fsck_refs_error_function, \
> > +}
> >
> > /* descend in all linked child objects
> > * the return value is:
> > --
> > 2.45.2
^ permalink raw reply [flat|nested] 282+ messages in thread
* [GSoC][PATCH v8 4/9] refs: set up ref consistency check infrastructure
2024-07-03 13:53 ` [GSoC][PATCH v7 0/9] " shejialuo
` (12 preceding siblings ...)
2024-07-08 13:35 ` [GSoC][PATCH v8 3/9] fsck: add refs-related options and error report function shejialuo
@ 2024-07-08 13:35 ` shejialuo
2024-07-08 13:36 ` [GSoC][PATCH v8 5/9] builtin/refs: add verify subcommand shejialuo
` (5 subsequent siblings)
19 siblings, 0 replies; 282+ messages in thread
From: shejialuo @ 2024-07-08 13:35 UTC (permalink / raw)
To: git
Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine,
Justin Tobler
The interfaces defined in the `ref_storage_be` are carefully structured
in semantic. It's organized as the five parts:
1. The name and the initialization interfaces.
2. The ref transaction interfaces.
3. The ref internal interfaces (pack, rename and copy).
4. The ref filesystem interfaces.
5. The reflog related interfaces.
To keep consistent with the git-fsck(1), add a new interface named
"fsck_refs_fn" to the end of "ref_storage_be". This semantic cannot be
grouped into any above five categories. Explicitly add blank line to
make it different from others.
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
refs.c | 5 +++++
refs.h | 8 ++++++++
refs/debug.c | 11 +++++++++++
refs/files-backend.c | 15 ++++++++++++++-
refs/packed-backend.c | 8 ++++++++
refs/refs-internal.h | 6 ++++++
refs/reftable-backend.c | 8 ++++++++
7 files changed, 60 insertions(+), 1 deletion(-)
diff --git a/refs.c b/refs.c
index bb90a18875..410919246b 100644
--- a/refs.c
+++ b/refs.c
@@ -318,6 +318,11 @@ int check_refname_format(const char *refname, int flags)
return check_or_sanitize_refname(refname, flags, NULL);
}
+int refs_fsck(struct ref_store *refs, struct fsck_options *o)
+{
+ return refs->be->fsck(refs, o);
+}
+
void sanitize_refname_component(const char *refname, struct strbuf *out)
{
if (check_or_sanitize_refname(refname, REFNAME_ALLOW_ONELEVEL, out))
diff --git a/refs.h b/refs.h
index 0ecba21b4a..804d6a7fce 100644
--- a/refs.h
+++ b/refs.h
@@ -4,6 +4,7 @@
#include "commit.h"
#include "repository.h"
+struct fsck_options;
struct object_id;
struct ref_store;
struct strbuf;
@@ -541,6 +542,13 @@ int refs_for_each_reflog(struct ref_store *refs, each_reflog_fn fn, void *cb_dat
*/
int check_refname_format(const char *refname, int flags);
+/*
+ * Check the reference database for consistency. Return 0 if refs and
+ * reflogs are consistent, and non-zero otherwise. The errors will be
+ * written to stderr.
+ */
+int refs_fsck(struct ref_store *refs, struct fsck_options *o);
+
/*
* Apply the rules from check_refname_format, but mutate the result until it
* is acceptable, and place the result in "out".
diff --git a/refs/debug.c b/refs/debug.c
index 547d9245b9..45e2e784a0 100644
--- a/refs/debug.c
+++ b/refs/debug.c
@@ -419,6 +419,15 @@ static int debug_reflog_expire(struct ref_store *ref_store, const char *refname,
return res;
}
+static int debug_fsck(struct ref_store *ref_store,
+ struct fsck_options *o)
+{
+ struct debug_ref_store *drefs = (struct debug_ref_store *)ref_store;
+ int res = drefs->refs->be->fsck(drefs->refs, o);
+ trace_printf_key(&trace_refs, "fsck: %d\n", res);
+ return res;
+}
+
struct ref_storage_be refs_be_debug = {
.name = "debug",
.init = NULL,
@@ -451,4 +460,6 @@ struct ref_storage_be refs_be_debug = {
.create_reflog = debug_create_reflog,
.delete_reflog = debug_delete_reflog,
.reflog_expire = debug_reflog_expire,
+
+ .fsck = debug_fsck,
};
diff --git a/refs/files-backend.c b/refs/files-backend.c
index aa52d9be7c..d89eeda8ef 100644
--- a/refs/files-backend.c
+++ b/refs/files-backend.c
@@ -3408,6 +3408,17 @@ static int files_ref_store_remove_on_disk(struct ref_store *ref_store,
return ret;
}
+static int files_fsck(struct ref_store *ref_store,
+ struct fsck_options *o)
+{
+ int ret;
+ struct files_ref_store *refs =
+ files_downcast(ref_store, REF_STORE_READ, "fsck");
+
+ ret = refs->packed_ref_store->be->fsck(refs->packed_ref_store, o);
+ return ret;
+}
+
struct ref_storage_be refs_be_files = {
.name = "files",
.init = files_ref_store_init,
@@ -3434,5 +3445,7 @@ struct ref_storage_be refs_be_files = {
.reflog_exists = files_reflog_exists,
.create_reflog = files_create_reflog,
.delete_reflog = files_delete_reflog,
- .reflog_expire = files_reflog_expire
+ .reflog_expire = files_reflog_expire,
+
+ .fsck = files_fsck,
};
diff --git a/refs/packed-backend.c b/refs/packed-backend.c
index a0666407cd..5209b0b212 100644
--- a/refs/packed-backend.c
+++ b/refs/packed-backend.c
@@ -1735,6 +1735,12 @@ static struct ref_iterator *packed_reflog_iterator_begin(struct ref_store *ref_s
return empty_ref_iterator_begin();
}
+static int packed_fsck(struct ref_store *ref_store,
+ struct fsck_options *o)
+{
+ return 0;
+}
+
struct ref_storage_be refs_be_packed = {
.name = "packed",
.init = packed_ref_store_init,
@@ -1762,4 +1768,6 @@ struct ref_storage_be refs_be_packed = {
.create_reflog = NULL,
.delete_reflog = NULL,
.reflog_expire = NULL,
+
+ .fsck = packed_fsck,
};
diff --git a/refs/refs-internal.h b/refs/refs-internal.h
index fa975d69aa..a905e187cd 100644
--- a/refs/refs-internal.h
+++ b/refs/refs-internal.h
@@ -4,6 +4,7 @@
#include "refs.h"
#include "iterator.h"
+struct fsck_options;
struct ref_transaction;
/*
@@ -650,6 +651,9 @@ typedef int read_raw_ref_fn(struct ref_store *ref_store, const char *refname,
typedef int read_symbolic_ref_fn(struct ref_store *ref_store, const char *refname,
struct strbuf *referent);
+typedef int fsck_fn(struct ref_store *ref_store,
+ struct fsck_options *o);
+
struct ref_storage_be {
const char *name;
ref_store_init_fn *init;
@@ -677,6 +681,8 @@ struct ref_storage_be {
create_reflog_fn *create_reflog;
delete_reflog_fn *delete_reflog;
reflog_expire_fn *reflog_expire;
+
+ fsck_fn *fsck;
};
extern struct ref_storage_be refs_be_files;
diff --git a/refs/reftable-backend.c b/refs/reftable-backend.c
index fbe74c239d..b5a1a526df 100644
--- a/refs/reftable-backend.c
+++ b/refs/reftable-backend.c
@@ -2303,6 +2303,12 @@ static int reftable_be_reflog_expire(struct ref_store *ref_store,
return ret;
}
+static int reftable_be_fsck(struct ref_store *ref_store,
+ struct fsck_options *o)
+{
+ return 0;
+}
+
struct ref_storage_be refs_be_reftable = {
.name = "reftable",
.init = reftable_be_init,
@@ -2330,4 +2336,6 @@ struct ref_storage_be refs_be_reftable = {
.create_reflog = reftable_be_create_reflog,
.delete_reflog = reftable_be_delete_reflog,
.reflog_expire = reftable_be_reflog_expire,
+
+ .fsck = reftable_be_fsck,
};
--
2.45.2
^ permalink raw reply related [flat|nested] 282+ messages in thread
* [GSoC][PATCH v8 5/9] builtin/refs: add verify subcommand
2024-07-03 13:53 ` [GSoC][PATCH v7 0/9] " shejialuo
` (13 preceding siblings ...)
2024-07-08 13:35 ` [GSoC][PATCH v8 4/9] refs: set up ref consistency check infrastructure shejialuo
@ 2024-07-08 13:36 ` shejialuo
2024-07-08 13:36 ` [GSoC][PATCH v8 6/9] builtin/fsck: add `git-refs verify` child process shejialuo
` (4 subsequent siblings)
19 siblings, 0 replies; 282+ messages in thread
From: shejialuo @ 2024-07-08 13:36 UTC (permalink / raw)
To: git
Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine,
Justin Tobler
Introduce a new subcommand "verify" in git-refs(1) to allow the user to
check the reference database consistency.
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
Documentation/git-refs.txt | 13 +++++++++++
builtin/refs.c | 44 ++++++++++++++++++++++++++++++++++++++
2 files changed, 57 insertions(+)
diff --git a/Documentation/git-refs.txt b/Documentation/git-refs.txt
index 5b99e04385..1244a85b64 100644
--- a/Documentation/git-refs.txt
+++ b/Documentation/git-refs.txt
@@ -10,6 +10,7 @@ SYNOPSIS
--------
[verse]
'git refs migrate' --ref-format=<format> [--dry-run]
+'git refs verify' [--strict] [--verbose]
DESCRIPTION
-----------
@@ -22,6 +23,9 @@ COMMANDS
migrate::
Migrate ref store between different formats.
+verify::
+ Verify reference database consistency.
+
OPTIONS
-------
@@ -39,6 +43,15 @@ include::ref-storage-format.txt[]
can be used to double check that the migration works as expected before
performing the actual migration.
+The following options are specific to 'git refs verify':
+
+--strict::
+ Enable more strict checking, every WARN severity for the `Fsck Messages`
+ be seen as ERROR. See linkgit:git-fsck[1].
+
+--verbose::
+ When verifying the reference database consistency, be chatty.
+
KNOWN LIMITATIONS
-----------------
diff --git a/builtin/refs.c b/builtin/refs.c
index 46dcd150d4..74720f5e0d 100644
--- a/builtin/refs.c
+++ b/builtin/refs.c
@@ -1,4 +1,6 @@
#include "builtin.h"
+#include "config.h"
+#include "fsck.h"
#include "parse-options.h"
#include "refs.h"
#include "repository.h"
@@ -7,6 +9,9 @@
#define REFS_MIGRATE_USAGE \
N_("git refs migrate --ref-format=<format> [--dry-run]")
+#define REFS_VERIFY_USAGE \
+ N_("git refs verify [--strict] [--verbose]")
+
static int cmd_refs_migrate(int argc, const char **argv, const char *prefix)
{
const char * const migrate_usage[] = {
@@ -58,15 +63,54 @@ static int cmd_refs_migrate(int argc, const char **argv, const char *prefix)
return err;
}
+static int cmd_refs_verify(int argc, const char **argv, const char *prefix)
+{
+ struct fsck_options fsck_refs_options = FSCK_REFS_OPTIONS_DEFAULT;
+ const char * const verify_usage[] = {
+ REFS_VERIFY_USAGE,
+ NULL,
+ };
+ unsigned int verbose = 0, strict = 0;
+ struct option options[] = {
+ OPT__VERBOSE(&verbose, N_("be verbose")),
+ OPT_BOOL(0, "strict", &strict, N_("enable strict checking")),
+ OPT_END(),
+ };
+ int ret = 0;
+
+ argc = parse_options(argc, argv, prefix, options, verify_usage, 0);
+ if (argc)
+ usage(_("too many arguments"));
+
+ if (verbose)
+ fsck_refs_options.verbose_refs = 1;
+ if (strict)
+ fsck_refs_options.strict = 1;
+
+ git_config(git_fsck_config, &fsck_refs_options);
+ prepare_repo_settings(the_repository);
+
+ ret = refs_fsck(get_main_ref_store(the_repository), &fsck_refs_options);
+
+ /*
+ * Explicitly free the allocated array and "oid_skiplist"
+ */
+ free(fsck_refs_options.msg_type);
+ oidset_clear(&fsck_refs_options.oid_skiplist);
+ return ret;
+}
+
int cmd_refs(int argc, const char **argv, const char *prefix)
{
const char * const refs_usage[] = {
REFS_MIGRATE_USAGE,
+ REFS_VERIFY_USAGE,
NULL,
};
parse_opt_subcommand_fn *fn = NULL;
struct option opts[] = {
OPT_SUBCOMMAND("migrate", &fn, cmd_refs_migrate),
+ OPT_SUBCOMMAND("verify", &fn, cmd_refs_verify),
OPT_END(),
};
--
2.45.2
^ permalink raw reply related [flat|nested] 282+ messages in thread
* [GSoC][PATCH v8 6/9] builtin/fsck: add `git-refs verify` child process
2024-07-03 13:53 ` [GSoC][PATCH v7 0/9] " shejialuo
` (14 preceding siblings ...)
2024-07-08 13:36 ` [GSoC][PATCH v8 5/9] builtin/refs: add verify subcommand shejialuo
@ 2024-07-08 13:36 ` shejialuo
2024-07-08 13:36 ` [GSoC][PATCH v8 7/9] files-backend: add unified interface for refs scanning shejialuo
` (3 subsequent siblings)
19 siblings, 0 replies; 282+ messages in thread
From: shejialuo @ 2024-07-08 13:36 UTC (permalink / raw)
To: git
Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine,
Justin Tobler
Introduce a new function "fsck_refs" that initializes and runs a child
process to execute the "git-refs verify" command.
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
builtin/fsck.c | 17 +++++++++++++++++
1 file changed, 17 insertions(+)
diff --git a/builtin/fsck.c b/builtin/fsck.c
index de34538c4f..ec3357722c 100644
--- a/builtin/fsck.c
+++ b/builtin/fsck.c
@@ -897,6 +897,21 @@ static int check_pack_rev_indexes(struct repository *r, int show_progress)
return res;
}
+static void fsck_refs(void)
+{
+ struct child_process refs_verify = CHILD_PROCESS_INIT;
+ child_process_init(&refs_verify);
+ refs_verify.git_cmd = 1;
+ strvec_pushl(&refs_verify.args, "refs", "verify", NULL);
+ if (verbose)
+ strvec_push(&refs_verify.args, "--verbose");
+ if (check_strict)
+ strvec_push(&refs_verify.args, "--strict");
+
+ if (run_command(&refs_verify))
+ errors_found |= ERROR_REFS;
+}
+
static char const * const fsck_usage[] = {
N_("git fsck [--tags] [--root] [--unreachable] [--cache] [--no-reflogs]\n"
" [--[no-]full] [--strict] [--verbose] [--lost-found]\n"
@@ -1066,6 +1081,8 @@ int cmd_fsck(int argc, const char **argv, const char *prefix)
check_connectivity();
+ fsck_refs();
+
if (the_repository->settings.core_commit_graph) {
struct child_process commit_graph_verify = CHILD_PROCESS_INIT;
--
2.45.2
^ permalink raw reply related [flat|nested] 282+ messages in thread
* [GSoC][PATCH v8 7/9] files-backend: add unified interface for refs scanning
2024-07-03 13:53 ` [GSoC][PATCH v7 0/9] " shejialuo
` (15 preceding siblings ...)
2024-07-08 13:36 ` [GSoC][PATCH v8 6/9] builtin/fsck: add `git-refs verify` child process shejialuo
@ 2024-07-08 13:36 ` shejialuo
2024-07-08 13:36 ` [GSoC][PATCH v8 8/9] fsck: add ref name check for files backend shejialuo
` (2 subsequent siblings)
19 siblings, 0 replies; 282+ messages in thread
From: shejialuo @ 2024-07-08 13:36 UTC (permalink / raw)
To: git
Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine,
Justin Tobler
For refs and reflogs, we need to scan its corresponding directories to
check every regular file or symbolic link which shares the same pattern.
Introduce a unified interface for scanning directories for
files-backend.
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
refs/files-backend.c | 77 +++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 76 insertions(+), 1 deletion(-)
diff --git a/refs/files-backend.c b/refs/files-backend.c
index d89eeda8ef..84acb58782 100644
--- a/refs/files-backend.c
+++ b/refs/files-backend.c
@@ -6,6 +6,7 @@
#include "../gettext.h"
#include "../hash.h"
#include "../hex.h"
+#include "../fsck.h"
#include "../refs.h"
#include "refs-internal.h"
#include "ref-cache.h"
@@ -3408,6 +3409,78 @@ static int files_ref_store_remove_on_disk(struct ref_store *ref_store,
return ret;
}
+/*
+ * For refs and reflogs, they share a unified interface when scanning
+ * the whole directory. This function is used as the callback for each
+ * regular file or symlink in the directory.
+ */
+typedef int (*files_fsck_refs_fn)(struct fsck_options *o,
+ const char *gitdir,
+ const char *refs_check_dir,
+ struct dir_iterator *iter);
+
+static int files_fsck_refs_dir(struct ref_store *ref_store,
+ struct fsck_options *o,
+ const char *refs_check_dir,
+ files_fsck_refs_fn *fsck_refs_fns)
+{
+ const char *gitdir = ref_store->gitdir;
+ struct strbuf sb = STRBUF_INIT;
+ struct dir_iterator *iter;
+ int iter_status;
+ int ret = 0;
+
+ strbuf_addf(&sb, "%s/%s", gitdir, refs_check_dir);
+
+ iter = dir_iterator_begin(sb.buf, 0);
+
+ if (!iter) {
+ ret = error_errno("cannot open directory %s", sb.buf);
+ goto out;
+ }
+
+ while ((iter_status = dir_iterator_advance(iter)) == ITER_OK) {
+ if (S_ISDIR(iter->st.st_mode)) {
+ continue;
+ } else if (S_ISREG(iter->st.st_mode) ||
+ S_ISLNK(iter->st.st_mode)) {
+ if (o->verbose_refs)
+ fprintf_ln(stderr, "Checking %s/%s",
+ refs_check_dir, iter->relative_path);
+ for (size_t i = 0; fsck_refs_fns[i]; i++) {
+ if (fsck_refs_fns[i](o, gitdir, refs_check_dir, iter))
+ ret = -1;
+ }
+ } else {
+ ret = error(_("unexpected file type for '%s'"),
+ iter->basename);
+ }
+ }
+
+ if (iter_status != ITER_DONE)
+ ret = error(_("failed to iterate over '%s'"), sb.buf);
+
+out:
+ strbuf_release(&sb);
+ return ret;
+}
+
+static int files_fsck_refs(struct ref_store *ref_store,
+ struct fsck_options *o)
+{
+ int ret;
+ files_fsck_refs_fn fsck_refs_fns[]= {
+ NULL
+ };
+
+ if (o->verbose_refs)
+ fprintf_ln(stderr, "Checking references consistency");
+
+ ret = files_fsck_refs_dir(ref_store, o, "refs", fsck_refs_fns);
+
+ return ret;
+}
+
static int files_fsck(struct ref_store *ref_store,
struct fsck_options *o)
{
@@ -3415,7 +3488,9 @@ static int files_fsck(struct ref_store *ref_store,
struct files_ref_store *refs =
files_downcast(ref_store, REF_STORE_READ, "fsck");
- ret = refs->packed_ref_store->be->fsck(refs->packed_ref_store, o);
+ ret = refs->packed_ref_store->be->fsck(refs->packed_ref_store, o)
+ | files_fsck_refs(ref_store, o);
+
return ret;
}
--
2.45.2
^ permalink raw reply related [flat|nested] 282+ messages in thread
* [GSoC][PATCH v8 8/9] fsck: add ref name check for files backend
2024-07-03 13:53 ` [GSoC][PATCH v7 0/9] " shejialuo
` (16 preceding siblings ...)
2024-07-08 13:36 ` [GSoC][PATCH v8 7/9] files-backend: add unified interface for refs scanning shejialuo
@ 2024-07-08 13:36 ` shejialuo
2024-07-08 13:37 ` [GSoC][PATCH v8 9/9] fsck: add ref content " shejialuo
2024-07-09 12:32 ` [GSoC][PATCH v9 0/9] ref consistency check infra setup shejialuo
19 siblings, 0 replies; 282+ messages in thread
From: shejialuo @ 2024-07-08 13:36 UTC (permalink / raw)
To: git
Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine,
Justin Tobler
The git-fsck(1) only implicitly checks the reference, it does not fully
check refs with bad format name such as standalone "@" and name ending
with ".lock".
In order to provide such checks, add a new fsck message id "badRefName"
with default ERROR type. Use existing "check_refname_format" to explicit
check the ref name. And add a new unit test to verify the functionality.
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
Documentation/fsck-msgids.txt | 3 +
fsck.h | 1 +
refs/files-backend.c | 20 +++++++
t/t0602-reffiles-fsck.sh | 101 ++++++++++++++++++++++++++++++++++
4 files changed, 125 insertions(+)
create mode 100755 t/t0602-reffiles-fsck.sh
diff --git a/Documentation/fsck-msgids.txt b/Documentation/fsck-msgids.txt
index f643585a34..dab4012246 100644
--- a/Documentation/fsck-msgids.txt
+++ b/Documentation/fsck-msgids.txt
@@ -19,6 +19,9 @@
`badParentSha1`::
(ERROR) A commit object has a bad parent sha1.
+`badRefName`::
+ (ERROR) A ref has a bad name.
+
`badTagName`::
(INFO) A tag has an invalid format.
diff --git a/fsck.h b/fsck.h
index 246055c0f9..90457d1a1f 100644
--- a/fsck.h
+++ b/fsck.h
@@ -31,6 +31,7 @@ enum fsck_msg_type {
FUNC(BAD_NAME, ERROR) \
FUNC(BAD_OBJECT_SHA1, ERROR) \
FUNC(BAD_PARENT_SHA1, ERROR) \
+ FUNC(BAD_REF_NAME, ERROR) \
FUNC(BAD_TIMEZONE, ERROR) \
FUNC(BAD_TREE, ERROR) \
FUNC(BAD_TREE_SHA1, ERROR) \
diff --git a/refs/files-backend.c b/refs/files-backend.c
index 84acb58782..69a76048d3 100644
--- a/refs/files-backend.c
+++ b/refs/files-backend.c
@@ -3419,6 +3419,25 @@ typedef int (*files_fsck_refs_fn)(struct fsck_options *o,
const char *refs_check_dir,
struct dir_iterator *iter);
+static int files_fsck_refs_name(struct fsck_options *o,
+ const char *gitdir UNUSED,
+ const char *refs_check_dir,
+ struct dir_iterator *iter)
+{
+ struct strbuf sb = STRBUF_INIT;
+ int ret = 0;
+
+ if (check_refname_format(iter->basename, REFNAME_ALLOW_ONELEVEL)) {
+ strbuf_addf(&sb, "%s/%s", refs_check_dir, iter->relative_path);
+ ret = fsck_refs_report(o, NULL, sb.buf,
+ FSCK_MSG_BAD_REF_NAME,
+ "invalid refname format");
+ }
+
+ strbuf_release(&sb);
+ return ret;
+}
+
static int files_fsck_refs_dir(struct ref_store *ref_store,
struct fsck_options *o,
const char *refs_check_dir,
@@ -3470,6 +3489,7 @@ static int files_fsck_refs(struct ref_store *ref_store,
{
int ret;
files_fsck_refs_fn fsck_refs_fns[]= {
+ files_fsck_refs_name,
NULL
};
diff --git a/t/t0602-reffiles-fsck.sh b/t/t0602-reffiles-fsck.sh
new file mode 100755
index 0000000000..b2db58d2c6
--- /dev/null
+++ b/t/t0602-reffiles-fsck.sh
@@ -0,0 +1,101 @@
+#!/bin/sh
+
+test_description='Test reffiles backend consistency check'
+
+GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=main
+export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME
+GIT_TEST_DEFAULT_REF_FORMAT=files
+export GIT_TEST_DEFAULT_REF_FORMAT
+
+. ./test-lib.sh
+
+test_expect_success 'ref name should be checked' '
+ test_when_finished "rm -rf repo" &&
+ git init repo &&
+ branch_dir_prefix=.git/refs/heads &&
+ tag_dir_prefix=.git/refs/tags &&
+ (
+ cd repo &&
+ git commit --allow-empty -m initial &&
+ git checkout -b branch-1 &&
+ git tag tag-1 &&
+ git commit --allow-empty -m second &&
+ git checkout -b branch-2 &&
+ git tag tag-2 &&
+ git tag multi_hierarchy/tag-2
+ ) &&
+ (
+ cd repo &&
+ cp $branch_dir_prefix/branch-1 $branch_dir_prefix/.branch-1 &&
+ test_must_fail git fsck 2>err &&
+ cat >expect <<-EOF &&
+ error: refs/heads/.branch-1: badRefName: invalid refname format
+ EOF
+ rm $branch_dir_prefix/.branch-1 &&
+ test_cmp expect err
+ ) &&
+ (
+ cd repo &&
+ cp $tag_dir_prefix/tag-1 $tag_dir_prefix/tag-1.lock &&
+ test_must_fail git fsck 2>err &&
+ cat >expect <<-EOF &&
+ error: refs/tags/tag-1.lock: badRefName: invalid refname format
+ EOF
+ rm $tag_dir_prefix/tag-1.lock &&
+ test_cmp expect err
+ ) &&
+ (
+ cd repo &&
+ cp $branch_dir_prefix/branch-1 $branch_dir_prefix/@ &&
+ test_must_fail git fsck 2>err &&
+ cat >expect <<-EOF &&
+ error: refs/heads/@: badRefName: invalid refname format
+ EOF
+ rm $branch_dir_prefix/@ &&
+ test_cmp expect err
+ ) &&
+ (
+ cd repo &&
+ cp $tag_dir_prefix/multi_hierarchy/tag-2 $tag_dir_prefix/multi_hierarchy/@ &&
+ test_must_fail git fsck 2>err &&
+ cat >expect <<-EOF &&
+ error: refs/tags/multi_hierarchy/@: badRefName: invalid refname format
+ EOF
+ rm $tag_dir_prefix/multi_hierarchy/@ &&
+ test_cmp expect err
+ )
+'
+
+test_expect_success 'ref name check should be adapted into fsck messages' '
+ test_when_finished "rm -rf repo" &&
+ git init repo &&
+ branch_dir_prefix=.git/refs/heads &&
+ tag_dir_prefix=.git/refs/tags &&
+ (
+ cd repo &&
+ git commit --allow-empty -m initial &&
+ git checkout -b branch-1 &&
+ git tag tag-1 &&
+ git commit --allow-empty -m second &&
+ git checkout -b branch-2 &&
+ git tag tag-2
+ ) &&
+ (
+ cd repo &&
+ cp $branch_dir_prefix/branch-1 $branch_dir_prefix/.branch-1 &&
+ git -c fsck.badRefName=warn fsck 2>err &&
+ cat >expect <<-EOF &&
+ warning: refs/heads/.branch-1: badRefName: invalid refname format
+ EOF
+ rm $branch_dir_prefix/.branch-1 &&
+ test_cmp expect err
+ ) &&
+ (
+ cd repo &&
+ cp $branch_dir_prefix/branch-1 $branch_dir_prefix/@ &&
+ git -c fsck.badRefName=ignore fsck 2>err &&
+ test_must_be_empty err
+ )
+'
+
+test_done
--
2.45.2
^ permalink raw reply related [flat|nested] 282+ messages in thread
* [GSoC][PATCH v8 9/9] fsck: add ref content check for files backend
2024-07-03 13:53 ` [GSoC][PATCH v7 0/9] " shejialuo
` (17 preceding siblings ...)
2024-07-08 13:36 ` [GSoC][PATCH v8 8/9] fsck: add ref name check for files backend shejialuo
@ 2024-07-08 13:37 ` shejialuo
2024-07-09 12:32 ` [GSoC][PATCH v9 0/9] ref consistency check infra setup shejialuo
19 siblings, 0 replies; 282+ messages in thread
From: shejialuo @ 2024-07-08 13:37 UTC (permalink / raw)
To: git
Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine,
Justin Tobler
Enhance the git-fsck(1) command by adding a check for reference content
in the files backend. The new functionality ensures that symrefs, real
symbolic link and regular refs are validated correctly.
In order to check the trailing content of the regular refs, add a new
parameter `trailing` to `parse_loose_ref_contents`.
For symrefs, `parse_loose_ref_contents` will set the "referent".
However, symbolic link could be either absolute or relative. Use
"strbuf_add_real_path" to read the symbolic link and convert the
relative path to absolute path. Then use "skip_prefix" to make it align
with symref "referent".
Thus, the symrefs and symbolic links could share the same interface. Add
a new function "files_fsck_symref_target" which aims at checking the
following things:
1. whether the pointee is under the `refs/` directory.
2. whether the pointee name is correct.
3. whether the pointee path is a wrong type in filesystem.
Last, add the following FSCK MESSAGEs:
1. "badRefContent(ERROR)": A ref has a bad content
2. "badSymrefPointee(ERROR)": The pointee of a symref is bad.
3. "trailingRefContent(WARN)": A ref content has trailing contents.
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
Documentation/fsck-msgids.txt | 9 +++
fsck.h | 3 +
refs.c | 2 +-
refs/files-backend.c | 145 +++++++++++++++++++++++++++++++++-
refs/refs-internal.h | 5 +-
t/t0602-reffiles-fsck.sh | 110 ++++++++++++++++++++++++++
6 files changed, 269 insertions(+), 5 deletions(-)
diff --git a/Documentation/fsck-msgids.txt b/Documentation/fsck-msgids.txt
index dab4012246..b1630a478b 100644
--- a/Documentation/fsck-msgids.txt
+++ b/Documentation/fsck-msgids.txt
@@ -19,9 +19,15 @@
`badParentSha1`::
(ERROR) A commit object has a bad parent sha1.
+`badRefContent`::
+ (ERROR) A ref has a bad content.
+
`badRefName`::
(ERROR) A ref has a bad name.
+`badSymrefPointee`::
+ (ERROR) The pointee of a symref is bad.
+
`badTagName`::
(INFO) A tag has an invalid format.
@@ -167,6 +173,9 @@
`nullSha1`::
(WARN) Tree contains entries pointing to a null sha1.
+`trailingRefContent`::
+ (WARN) A ref content has trailing contents.
+
`treeNotSorted`::
(ERROR) A tree is not properly sorted.
diff --git a/fsck.h b/fsck.h
index 90457d1a1f..637f596930 100644
--- a/fsck.h
+++ b/fsck.h
@@ -32,6 +32,8 @@ enum fsck_msg_type {
FUNC(BAD_OBJECT_SHA1, ERROR) \
FUNC(BAD_PARENT_SHA1, ERROR) \
FUNC(BAD_REF_NAME, ERROR) \
+ FUNC(BAD_REF_CONTENT, ERROR) \
+ FUNC(BAD_SYMREF_POINTEE, ERROR) \
FUNC(BAD_TIMEZONE, ERROR) \
FUNC(BAD_TREE, ERROR) \
FUNC(BAD_TREE_SHA1, ERROR) \
@@ -72,6 +74,7 @@ enum fsck_msg_type {
FUNC(HAS_DOTDOT, WARN) \
FUNC(HAS_DOTGIT, WARN) \
FUNC(NULL_SHA1, WARN) \
+ FUNC(TRAILING_REF_CONTENT, WARN) \
FUNC(ZERO_PADDED_FILEMODE, WARN) \
FUNC(NUL_IN_COMMIT, WARN) \
FUNC(LARGE_PATHNAME, WARN) \
diff --git a/refs.c b/refs.c
index 410919246b..eb82fb7d4e 100644
--- a/refs.c
+++ b/refs.c
@@ -1760,7 +1760,7 @@ static int refs_read_special_head(struct ref_store *ref_store,
}
result = parse_loose_ref_contents(content.buf, oid, referent, type,
- failure_errno);
+ failure_errno, NULL);
done:
strbuf_release(&full_path);
diff --git a/refs/files-backend.c b/refs/files-backend.c
index 69a76048d3..d98ef45403 100644
--- a/refs/files-backend.c
+++ b/refs/files-backend.c
@@ -1,6 +1,7 @@
#define USE_THE_REPOSITORY_VARIABLE
#include "../git-compat-util.h"
+#include "../abspath.h"
#include "../copy.h"
#include "../environment.h"
#include "../gettext.h"
@@ -553,7 +554,7 @@ static int read_ref_internal(struct ref_store *ref_store, const char *refname,
strbuf_rtrim(&sb_contents);
buf = sb_contents.buf;
- ret = parse_loose_ref_contents(buf, oid, referent, type, &myerr);
+ ret = parse_loose_ref_contents(buf, oid, referent, type, &myerr, NULL);
out:
if (ret && !myerr)
@@ -589,7 +590,7 @@ static int files_read_symbolic_ref(struct ref_store *ref_store, const char *refn
int parse_loose_ref_contents(const char *buf, struct object_id *oid,
struct strbuf *referent, unsigned int *type,
- int *failure_errno)
+ int *failure_errno, const char **trailing)
{
const char *p;
if (skip_prefix(buf, "ref:", &buf)) {
@@ -611,6 +612,10 @@ int parse_loose_ref_contents(const char *buf, struct object_id *oid,
*failure_errno = EINVAL;
return -1;
}
+
+ if (trailing)
+ *trailing = p;
+
return 0;
}
@@ -3438,6 +3443,141 @@ static int files_fsck_refs_name(struct fsck_options *o,
return ret;
}
+/*
+ * Check the symref "pointee_name" and "pointee_path". The caller should
+ * make sure that "pointee_path" is absolute. For symbolic ref, "pointee_name"
+ * would be the content after "refs:". For symblic link, "pointee_name" would
+ * be the relative path agaignst "gitdir".
+ */
+static int files_fsck_symref_target(struct fsck_options *o,
+ const char *refname,
+ const char *pointee_name,
+ const char *pointee_path)
+{
+ const char *p = NULL;
+ struct stat st;
+ int ret = 0;
+
+ if (!skip_prefix(pointee_name, "refs/", &p)) {
+
+ ret = fsck_refs_report(o, NULL, refname,
+ FSCK_MSG_BAD_SYMREF_POINTEE,
+ "point to target out of refs hierarchy");
+ goto out;
+ }
+
+ if (check_refname_format(pointee_name, 0)) {
+ ret = fsck_refs_report(o, NULL, refname,
+ FSCK_MSG_BAD_SYMREF_POINTEE,
+ "point to invalid refname");
+ }
+
+ if (lstat(pointee_path, &st) < 0)
+ goto out;
+
+ if (!S_ISREG(st.st_mode) && !S_ISLNK(st.st_mode)) {
+ ret = fsck_refs_report(o, NULL, refname,
+ FSCK_MSG_BAD_SYMREF_POINTEE,
+ "point to invalid target");
+ goto out;
+ }
+out:
+ return ret;
+}
+
+static int files_fsck_refs_content(struct fsck_options *o,
+ const char *gitdir,
+ const char *refs_check_dir,
+ struct dir_iterator *iter)
+{
+ struct strbuf pointee_path = STRBUF_INIT,
+ ref_content = STRBUF_INIT,
+ abs_gitdir = STRBUF_INIT,
+ referent = STRBUF_INIT,
+ refname = STRBUF_INIT;
+ const char *trailing = NULL;
+ int failure_errno = 0;
+ unsigned int type = 0;
+ struct object_id oid;
+ int ret = 0;
+
+ strbuf_addf(&refname, "%s/%s", refs_check_dir, iter->relative_path);
+
+ /*
+ * If the file is a symlink, we need to only check the connectivity
+ * of the destination object.
+ */
+ if (S_ISLNK(iter->st.st_mode)) {
+ const char *pointee_name = NULL;
+
+ strbuf_add_real_path(&pointee_path, iter->path.buf);
+
+ strbuf_add_absolute_path(&abs_gitdir, gitdir);
+ strbuf_normalize_path(&abs_gitdir);
+ if (!is_dir_sep(abs_gitdir.buf[abs_gitdir.len - 1]))
+ strbuf_addch(&abs_gitdir, '/');
+
+ if (!skip_prefix(pointee_path.buf,
+ abs_gitdir.buf, &pointee_name)) {
+ ret = fsck_refs_report(o, NULL, refname.buf,
+ FSCK_MSG_BAD_SYMREF_POINTEE,
+ "point to target outside gitdir");
+ goto clean;
+ }
+
+ ret = files_fsck_symref_target(o, refname.buf, pointee_name,
+ pointee_path.buf);
+ goto clean;
+ }
+
+ if (strbuf_read_file(&ref_content, iter->path.buf, 0) < 0) {
+ ret = error_errno(_("%s/%s: unable to read the ref"),
+ refs_check_dir, iter->relative_path);
+ goto clean;
+ }
+
+ if (parse_loose_ref_contents(ref_content.buf, &oid,
+ &referent, &type,
+ &failure_errno, &trailing)) {
+ ret = fsck_refs_report(o, NULL, refname.buf,
+ FSCK_MSG_BAD_REF_CONTENT,
+ "invalid ref content");
+ goto clean;
+ }
+
+ /*
+ * If the ref is a symref, we need to check the destination name and
+ * connectivity.
+ */
+ if (referent.len && (type & REF_ISSYMREF)) {
+ strbuf_addf(&pointee_path, "%s/%s", gitdir, referent.buf);
+ strbuf_rtrim(&referent);
+
+ ret = files_fsck_symref_target(o, refname.buf, referent.buf,
+ pointee_path.buf);
+ goto clean;
+ } else {
+ /*
+ * Only regular refs could have a trailing garbage. Should
+ * be reported as a warning.
+ */
+ if (trailing && (*trailing != '\0' && *trailing != '\n')) {
+ ret = fsck_refs_report(o, NULL, refname.buf,
+ FSCK_MSG_TRAILING_REF_CONTENT,
+ "trailing garbage in ref");
+ goto clean;
+ }
+ }
+
+clean:
+ strbuf_release(&abs_gitdir);
+ strbuf_release(&pointee_path);
+ strbuf_release(&refname);
+ strbuf_release(&ref_content);
+ strbuf_release(&referent);
+ return ret;
+}
+
static int files_fsck_refs_dir(struct ref_store *ref_store,
struct fsck_options *o,
const char *refs_check_dir,
@@ -3490,6 +3630,7 @@ static int files_fsck_refs(struct ref_store *ref_store,
int ret;
files_fsck_refs_fn fsck_refs_fns[]= {
files_fsck_refs_name,
+ files_fsck_refs_content,
NULL
};
diff --git a/refs/refs-internal.h b/refs/refs-internal.h
index a905e187cd..2fabf41d14 100644
--- a/refs/refs-internal.h
+++ b/refs/refs-internal.h
@@ -709,11 +709,12 @@ struct ref_store {
/*
* Parse contents of a loose ref file. *failure_errno maybe be set to EINVAL for
- * invalid contents.
+ * invalid contents. Also *trailing is set to the first character after the
+ * refname or NULL if the referent is not empty.
*/
int parse_loose_ref_contents(const char *buf, struct object_id *oid,
struct strbuf *referent, unsigned int *type,
- int *failure_errno);
+ int *failure_errno, const char **trailing);
/*
* Fill in the generic part of refs and add it to our collection of
diff --git a/t/t0602-reffiles-fsck.sh b/t/t0602-reffiles-fsck.sh
index b2db58d2c6..35bf40ee64 100755
--- a/t/t0602-reffiles-fsck.sh
+++ b/t/t0602-reffiles-fsck.sh
@@ -98,4 +98,114 @@ test_expect_success 'ref name check should be adapted into fsck messages' '
)
'
+test_expect_success 'regular ref content should be checked' '
+ test_when_finished "rm -rf repo" &&
+ git init repo &&
+ branch_dir_prefix=.git/refs/heads &&
+ tag_dir_prefix=.git/refs/tags &&
+ (
+ cd repo &&
+ git commit --allow-empty -m initial &&
+ git checkout -b branch-1 &&
+ git tag tag-1 &&
+ git commit --allow-empty -m second &&
+ git checkout -b branch-2 &&
+ git tag tag-2 &&
+ git checkout -b a/b/tag-2
+ ) &&
+ (
+ cd repo &&
+ printf "%s garbage" "$(git rev-parse branch-1)" > $branch_dir_prefix/branch-1-garbage &&
+ git fsck 2>err &&
+ cat >expect <<-EOF &&
+ warning: refs/heads/branch-1-garbage: trailingRefContent: trailing garbage in ref
+ EOF
+ rm $branch_dir_prefix/branch-1-garbage &&
+ test_cmp expect err
+ ) &&
+ (
+ cd repo &&
+ printf "%s garbage" "$(git rev-parse tag-1)" > $tag_dir_prefix/tag-1-garbage &&
+ test_must_fail git -c fsck.trailingRefContent=error fsck 2>err &&
+ cat >expect <<-EOF &&
+ error: refs/tags/tag-1-garbage: trailingRefContent: trailing garbage in ref
+ EOF
+ rm $tag_dir_prefix/tag-1-garbage &&
+ test_cmp expect err
+ ) &&
+ (
+ cd repo &&
+ printf "%s " "$(git rev-parse tag-2)" > $tag_dir_prefix/tag-2-garbage &&
+ git fsck 2>err &&
+ cat >expect <<-EOF &&
+ warning: refs/tags/tag-2-garbage: trailingRefContent: trailing garbage in ref
+ EOF
+ rm $tag_dir_prefix/tag-2-garbage &&
+ test_cmp expect err
+ ) &&
+ (
+ cd repo &&
+ printf "xfsazqfxcadas" > $tag_dir_prefix/tag-2-bad &&
+ test_must_fail git refs verify 2>err &&
+ cat >expect <<-EOF &&
+ error: refs/tags/tag-2-bad: badRefContent: invalid ref content
+ EOF
+ rm $tag_dir_prefix/tag-2-bad &&
+ test_cmp expect err
+ ) &&
+ (
+ cd repo &&
+ printf "xfsazqfxcadas" > $branch_dir_prefix/a/b/branch-2-bad &&
+ test_must_fail git refs verify 2>err &&
+ cat >expect <<-EOF &&
+ error: refs/heads/a/b/branch-2-bad: badRefContent: invalid ref content
+ EOF
+ rm $branch_dir_prefix/a/b/branch-2-bad &&
+ test_cmp expect err
+ )
+'
+
+test_expect_success 'symbolic ref content should be checked' '
+ test_when_finished "rm -rf repo" &&
+ git init repo &&
+ branch_dir_prefix=.git/refs/heads &&
+ tag_dir_prefix=.git/refs/tags &&
+ (
+ cd repo &&
+ git commit --allow-empty -m initial &&
+ git checkout -b branch-1 &&
+ git tag tag-1
+ ) &&
+ (
+ cd repo &&
+ printf "ref: refs/heads/.branch" > $branch_dir_prefix/branch-2-bad &&
+ test_must_fail git refs verify 2>err &&
+ cat >expect <<-EOF &&
+ error: refs/heads/branch-2-bad: badSymrefPointee: point to invalid refname
+ EOF
+ rm $branch_dir_prefix/branch-2-bad &&
+ test_cmp expect err
+ ) &&
+ (
+ cd repo &&
+ printf "ref: refs/heads" > $branch_dir_prefix/branch-2-bad &&
+ test_must_fail git refs verify 2>err &&
+ cat >expect <<-EOF &&
+ error: refs/heads/branch-2-bad: badSymrefPointee: point to invalid target
+ EOF
+ rm $branch_dir_prefix/branch-2-bad &&
+ test_cmp expect err
+ ) &&
+ (
+ cd repo &&
+ printf "ref: logs/maint-v2.45" > $branch_dir_prefix/branch-2-bad &&
+ test_must_fail git refs verify 2>err &&
+ cat >expect <<-EOF &&
+ error: refs/heads/branch-2-bad: badSymrefPointee: point to target out of refs hierarchy
+ EOF
+ rm $branch_dir_prefix/branch-2-bad &&
+ test_cmp expect err
+ )
+'
+
test_done
--
2.45.2
^ permalink raw reply related [flat|nested] 282+ messages in thread
* [GSoC][PATCH v9 0/9] ref consistency check infra setup
2024-07-03 13:53 ` [GSoC][PATCH v7 0/9] " shejialuo
` (18 preceding siblings ...)
2024-07-08 13:37 ` [GSoC][PATCH v8 9/9] fsck: add ref content " shejialuo
@ 2024-07-09 12:32 ` shejialuo
2024-07-09 12:34 ` [GSoC][PATCH v9 1/9] fsck: rename "skiplist" to "skip_oids" shejialuo
` (9 more replies)
19 siblings, 10 replies; 282+ messages in thread
From: shejialuo @ 2024-07-09 12:32 UTC (permalink / raw)
To: git
Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine,
Justin Tobler
Hi All:
This version changes the following things:
1. Instead of using "oid_skiplist", use "skip_oids".
2. In "fsck_refs_error_function", I define a static struct strbuf here,
the reason why I define it to be `static` is that we may call this
function many times. It's a bad idea to allocate memory too frequently.
And we should never free its memory. I made a mistake here, I should
call `strbuf_reset` function every time. So this version, add
"strbuf_reset" to make the behavior correct.
shejialuo (9):
fsck: rename "skiplist" to "skip_oids"
fsck: add a unified interface for reporting fsck messages
fsck: add refs-related options and error report function
refs: set up ref consistency check infrastructure
builtin/refs: add verify subcommand
builtin/fsck: add `git-refs verify` child process
files-backend: add unified interface for refs scanning
fsck: add ref name check for files backend
fsck: add ref content check for files backend
Documentation/fsck-msgids.txt | 12 ++
Documentation/git-refs.txt | 13 ++
builtin/fsck.c | 32 ++++-
builtin/mktag.c | 1 +
builtin/refs.c | 44 ++++++
fsck.c | 108 +++++++++++---
fsck.h | 63 ++++++---
object-file.c | 11 +-
refs.c | 7 +-
refs.h | 8 ++
refs/debug.c | 11 ++
refs/files-backend.c | 255 +++++++++++++++++++++++++++++++++-
refs/packed-backend.c | 8 ++
refs/refs-internal.h | 11 +-
refs/reftable-backend.c | 8 ++
t/t0602-reffiles-fsck.sh | 211 ++++++++++++++++++++++++++++
16 files changed, 746 insertions(+), 57 deletions(-)
create mode 100755 t/t0602-reffiles-fsck.sh
Range-diff against v8:
1: 61e475840f ! 1: e044f933de fsck: rename "skiplist" to "oid_skiplist"
@@ Metadata
Author: shejialuo <shejialuo@gmail.com>
## Commit message ##
- fsck: rename "skiplist" to "oid_skiplist"
+ fsck: rename "skiplist" to "skip_oids"
The "skiplist" field in "fsck_options" is related to objects. Because we
are going to introduce ref consistency check, the "skiplist" name is too
@@ Commit message
"skiplist" here. However, the type for "skiplist" is `struct oidset`
which is totally unsuitable for refs.
- To avoid above ambiguity, rename "skiplist" to "oid_skiplist".
+ To avoid above ambiguity, rename "skiplist" to "skip_oids".
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
@@ fsck.c: void fsck_set_msg_types(struct fsck_options *options, const char *values
if (equal == len)
die("skiplist requires a path");
- oidset_parse_file(&options->skiplist, buf + equal + 1,
-+ oidset_parse_file(&options->oid_skiplist, buf + equal + 1,
++ oidset_parse_file(&options->skip_oids, buf + equal + 1,
the_repository->hash_algo);
buf += len + 1;
continue;
@@ fsck.c: void fsck_set_msg_types(struct fsck_options *options, const char *values
const struct object_id *oid)
{
- return opts && oid && oidset_contains(&opts->skiplist, oid);
-+ return opts && oid && oidset_contains(&opts->oid_skiplist, oid);
++ return opts && oid && oidset_contains(&opts->skip_oids, oid);
}
__attribute__((format (printf, 5, 6)))
@@ fsck.h: struct fsck_options {
unsigned strict:1;
enum fsck_msg_type *msg_type;
- struct oidset skiplist;
-+ struct oidset oid_skiplist;
++ struct oidset skip_oids;
struct oidset gitmodules_found;
struct oidset gitmodules_done;
struct oidset gitattributes_found;
+@@ fsck.h: struct fsck_options {
+ };
+
+ #define FSCK_OPTIONS_DEFAULT { \
+- .skiplist = OIDSET_INIT, \
++ .skip_oids = OIDSET_INIT, \
+ .gitmodules_found = OIDSET_INIT, \
+ .gitmodules_done = OIDSET_INIT, \
+ .gitattributes_found = OIDSET_INIT, \
2: f2576d88a9 ! 2: daaf3d0ffe fsck: add a unified interface for reporting fsck messages
@@ builtin/mktag.c: static struct fsck_options fsck_options = FSCK_OPTIONS_STRICT;
## fsck.c ##
@@ fsck.c: static int object_on_skiplist(struct fsck_options *opts,
- return opts && oid && oidset_contains(&opts->oid_skiplist, oid);
+ return opts && oid && oidset_contains(&opts->skip_oids, oid);
}
-__attribute__((format (printf, 5, 6)))
@@ fsck.h: int is_valid_msg_type(const char *msg_id, const char *msg_type);
struct fsck_options {
fsck_walk_func walk;
@@ fsck.h: struct fsck_options {
- };
-
- #define FSCK_OPTIONS_DEFAULT { \
-- .skiplist = OIDSET_INIT, \
-+ .oid_skiplist = OIDSET_INIT, \
- .gitmodules_found = OIDSET_INIT, \
.gitmodules_done = OIDSET_INIT, \
.gitattributes_found = OIDSET_INIT, \
.gitattributes_done = OIDSET_INIT, \
3: c3c2dda50c ! 3: 40da85ae30 fsck: add refs-related options and error report function
@@ fsck.c: int fsck_objects_error_function(struct fsck_options *o,
+{
+ static struct strbuf sb = STRBUF_INIT;
+
++ strbuf_reset(&sb);
+ strbuf_addstr(&sb, checked_ref_name);
+ if (oid)
+ strbuf_addf(&sb, " -> (%s)", oid_to_hex(oid));
@@ fsck.h: int fsck_objects_error_cb_print_missing_gitmodules(struct fsck_options *
unsigned strict:1;
+ unsigned verbose_refs:1;
enum fsck_msg_type *msg_type;
- struct oidset oid_skiplist;
+ struct oidset skip_oids;
struct oidset gitmodules_found;
@@ fsck.h: struct fsck_options {
.gitattributes_done = OIDSET_INIT, \
4: e826dc17ec = 4: a38ea1b117 refs: set up ref consistency check infrastructure
5: 33cac4882b ! 5: 8320f56e0b builtin/refs: add verify subcommand
@@ builtin/refs.c: static int cmd_refs_migrate(int argc, const char **argv, const c
+ * Explicitly free the allocated array and "oid_skiplist"
+ */
+ free(fsck_refs_options.msg_type);
-+ oidset_clear(&fsck_refs_options.oid_skiplist);
++ oidset_clear(&fsck_refs_options.skip_oids);
+ return ret;
+}
+
6: 32668e3543 = 6: 6614a06ef5 builtin/fsck: add `git-refs verify` child process
7: df83b2a990 = 7: 928cc96396 files-backend: add unified interface for refs scanning
8: c696c15651 = 8: 4d50d4932f fsck: add ref name check for files backend
9: 8b0f3aeb9c = 9: 7edb810819 fsck: add ref content check for files backend
--
2.45.2
^ permalink raw reply [flat|nested] 282+ messages in thread
* [GSoC][PATCH v9 1/9] fsck: rename "skiplist" to "skip_oids"
2024-07-09 12:32 ` [GSoC][PATCH v9 0/9] ref consistency check infra setup shejialuo
@ 2024-07-09 12:34 ` shejialuo
2024-07-09 12:35 ` [GSoC][PATCH v9 2/9] fsck: add a unified interface for reporting fsck messages shejialuo
` (8 subsequent siblings)
9 siblings, 0 replies; 282+ messages in thread
From: shejialuo @ 2024-07-09 12:34 UTC (permalink / raw)
To: git
Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine,
Justin Tobler
The "skiplist" field in "fsck_options" is related to objects. Because we
are going to introduce ref consistency check, the "skiplist" name is too
general which will make the caller think "skiplist" is related to both
the refs and objects.
It may seem that for both refs and objects, we should provide a general
"skiplist" here. However, the type for "skiplist" is `struct oidset`
which is totally unsuitable for refs.
To avoid above ambiguity, rename "skiplist" to "skip_oids".
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
fsck.c | 4 ++--
fsck.h | 4 ++--
2 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/fsck.c b/fsck.c
index eea7145470..3f32441492 100644
--- a/fsck.c
+++ b/fsck.c
@@ -205,7 +205,7 @@ void fsck_set_msg_types(struct fsck_options *options, const char *values)
if (!strcmp(buf, "skiplist")) {
if (equal == len)
die("skiplist requires a path");
- oidset_parse_file(&options->skiplist, buf + equal + 1,
+ oidset_parse_file(&options->skip_oids, buf + equal + 1,
the_repository->hash_algo);
buf += len + 1;
continue;
@@ -223,7 +223,7 @@ void fsck_set_msg_types(struct fsck_options *options, const char *values)
static int object_on_skiplist(struct fsck_options *opts,
const struct object_id *oid)
{
- return opts && oid && oidset_contains(&opts->skiplist, oid);
+ return opts && oid && oidset_contains(&opts->skip_oids, oid);
}
__attribute__((format (printf, 5, 6)))
diff --git a/fsck.h b/fsck.h
index 6085a384f6..bcfb2e34cd 100644
--- a/fsck.h
+++ b/fsck.h
@@ -136,7 +136,7 @@ struct fsck_options {
fsck_error error_func;
unsigned strict:1;
enum fsck_msg_type *msg_type;
- struct oidset skiplist;
+ struct oidset skip_oids;
struct oidset gitmodules_found;
struct oidset gitmodules_done;
struct oidset gitattributes_found;
@@ -145,7 +145,7 @@ struct fsck_options {
};
#define FSCK_OPTIONS_DEFAULT { \
- .skiplist = OIDSET_INIT, \
+ .skip_oids = OIDSET_INIT, \
.gitmodules_found = OIDSET_INIT, \
.gitmodules_done = OIDSET_INIT, \
.gitattributes_found = OIDSET_INIT, \
--
2.45.2
^ permalink raw reply related [flat|nested] 282+ messages in thread
* [GSoC][PATCH v9 2/9] fsck: add a unified interface for reporting fsck messages
2024-07-09 12:32 ` [GSoC][PATCH v9 0/9] ref consistency check infra setup shejialuo
2024-07-09 12:34 ` [GSoC][PATCH v9 1/9] fsck: rename "skiplist" to "skip_oids" shejialuo
@ 2024-07-09 12:35 ` shejialuo
2024-07-09 20:24 ` Justin Tobler
2024-07-09 12:35 ` [GSoC][PATCH v9 3/9] fsck: add refs-related options and error report function shejialuo
` (7 subsequent siblings)
9 siblings, 1 reply; 282+ messages in thread
From: shejialuo @ 2024-07-09 12:35 UTC (permalink / raw)
To: git
Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine,
Justin Tobler
The static function "report" provided by "fsck.c" aims at checking fsck
error type and calling the callback "error_func" to report the message.
However, "report" function is only related to object database which
cannot be reused for refs. In order to provide a unified interface which
can report either objects or refs, create a new function "vfsck_report"
by adding "checked_ref_name" parameter following the "report" prototype.
Instead of using "...", provide "va_list" to allow more flexibility.
Like "report", the "vfsck_report" function will use "error_func"
registered in "fsck_options" to report customized messages. Change
"error_func" prototype to align with the new "vfsck_report".
Then, change "report" function to use "vfsck_report" to report objects
related messages. Add a new function called "fsck_refs_report" to use
"vfsck_report" to report refs related messages.
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
builtin/fsck.c | 15 ++++-----
builtin/mktag.c | 1 +
fsck.c | 81 ++++++++++++++++++++++++++++++++++++-------------
fsck.h | 40 +++++++++++++++---------
object-file.c | 11 ++++---
5 files changed, 101 insertions(+), 47 deletions(-)
diff --git a/builtin/fsck.c b/builtin/fsck.c
index d13a226c2e..de34538c4f 100644
--- a/builtin/fsck.c
+++ b/builtin/fsck.c
@@ -89,12 +89,13 @@ static int objerror(struct object *obj, const char *err)
return -1;
}
-static int fsck_error_func(struct fsck_options *o UNUSED,
- const struct object_id *oid,
- enum object_type object_type,
- enum fsck_msg_type msg_type,
- enum fsck_msg_id msg_id UNUSED,
- const char *message)
+static int fsck_objects_error_func(struct fsck_options *o UNUSED,
+ const struct object_id *oid,
+ enum object_type object_type,
+ const char *checked_ref_name UNUSED,
+ enum fsck_msg_type msg_type,
+ enum fsck_msg_id msg_id UNUSED,
+ const char *message)
{
switch (msg_type) {
case FSCK_WARN:
@@ -938,7 +939,7 @@ int cmd_fsck(int argc, const char **argv, const char *prefix)
fsck_walk_options.walk = mark_object;
fsck_obj_options.walk = mark_used;
- fsck_obj_options.error_func = fsck_error_func;
+ fsck_obj_options.error_func = fsck_objects_error_func;
if (check_strict)
fsck_obj_options.strict = 1;
diff --git a/builtin/mktag.c b/builtin/mktag.c
index 4767f1a97e..42f945c584 100644
--- a/builtin/mktag.c
+++ b/builtin/mktag.c
@@ -20,6 +20,7 @@ static struct fsck_options fsck_options = FSCK_OPTIONS_STRICT;
static int mktag_fsck_error_func(struct fsck_options *o UNUSED,
const struct object_id *oid UNUSED,
enum object_type object_type UNUSED,
+ const char *checked_ref_name UNUSED,
enum fsck_msg_type msg_type,
enum fsck_msg_id msg_id UNUSED,
const char *message)
diff --git a/fsck.c b/fsck.c
index 3f32441492..e1819964e3 100644
--- a/fsck.c
+++ b/fsck.c
@@ -226,12 +226,18 @@ static int object_on_skiplist(struct fsck_options *opts,
return opts && oid && oidset_contains(&opts->skip_oids, oid);
}
-__attribute__((format (printf, 5, 6)))
-static int report(struct fsck_options *options,
- const struct object_id *oid, enum object_type object_type,
- enum fsck_msg_id msg_id, const char *fmt, ...)
+/*
+ * Provide a unified interface for either fscking refs or objects.
+ * It will get the current msg error type and call the error_func callback
+ * which is registered in the "fsck_options" struct.
+ */
+static int vfsck_report(struct fsck_options *options,
+ const struct object_id *oid,
+ enum object_type object_type,
+ const char *checked_ref_name,
+ enum fsck_msg_id msg_id, const char *fmt, va_list ap)
{
- va_list ap;
+ va_list ap_copy;
struct strbuf sb = STRBUF_INIT;
enum fsck_msg_type msg_type = fsck_msg_type(msg_id, options);
int result;
@@ -250,9 +256,9 @@ static int report(struct fsck_options *options,
prepare_msg_ids();
strbuf_addf(&sb, "%s: ", msg_id_info[msg_id].camelcased);
- va_start(ap, fmt);
- strbuf_vaddf(&sb, fmt, ap);
- result = options->error_func(options, oid, object_type,
+ va_copy(ap_copy, ap);
+ strbuf_vaddf(&sb, fmt, ap_copy);
+ result = options->error_func(options, oid, object_type, checked_ref_name,
msg_type, msg_id, sb.buf);
strbuf_release(&sb);
va_end(ap);
@@ -260,6 +266,36 @@ static int report(struct fsck_options *options,
return result;
}
+__attribute__((format (printf, 5, 6)))
+static int report(struct fsck_options *options,
+ const struct object_id *oid, enum object_type object_type,
+ enum fsck_msg_id msg_id, const char *fmt, ...)
+{
+ va_list ap;
+ int result;
+ va_start(ap, fmt);
+ result = vfsck_report(options, oid, object_type, NULL,
+ msg_id, fmt, ap);
+ va_end(ap);
+ return result;
+}
+
+
+
+int fsck_refs_report(struct fsck_options *options,
+ const struct object_id *oid,
+ const char *checked_ref_name,
+ enum fsck_msg_id msg_id, const char *fmt, ...)
+{
+ va_list ap;
+ int result;
+ va_start(ap, fmt);
+ result = vfsck_report(options, oid, OBJ_NONE,
+ checked_ref_name, msg_id, fmt, ap);
+ va_end(ap);
+ return result;
+}
+
void fsck_enable_object_names(struct fsck_options *options)
{
if (!options->object_names)
@@ -1200,12 +1236,13 @@ int fsck_buffer(const struct object_id *oid, enum object_type type,
type);
}
-int fsck_error_function(struct fsck_options *o,
- const struct object_id *oid,
- enum object_type object_type UNUSED,
- enum fsck_msg_type msg_type,
- enum fsck_msg_id msg_id UNUSED,
- const char *message)
+int fsck_objects_error_function(struct fsck_options *o,
+ const struct object_id *oid,
+ enum object_type object_type UNUSED,
+ const char *checked_ref_name UNUSED,
+ enum fsck_msg_type msg_type,
+ enum fsck_msg_id msg_id UNUSED,
+ const char *message)
{
if (msg_type == FSCK_WARN) {
warning("object %s: %s", fsck_describe_object(o, oid), message);
@@ -1303,16 +1340,18 @@ int git_fsck_config(const char *var, const char *value,
* Custom error callbacks that are used in more than one place.
*/
-int fsck_error_cb_print_missing_gitmodules(struct fsck_options *o,
- const struct object_id *oid,
- enum object_type object_type,
- enum fsck_msg_type msg_type,
- enum fsck_msg_id msg_id,
- const char *message)
+int fsck_objects_error_cb_print_missing_gitmodules(struct fsck_options *o,
+ const struct object_id *oid,
+ enum object_type object_type,
+ const char *checked_ref_name,
+ enum fsck_msg_type msg_type,
+ enum fsck_msg_id msg_id,
+ const char *message)
{
if (msg_id == FSCK_MSG_GITMODULES_MISSING) {
puts(oid_to_hex(oid));
return 0;
}
- return fsck_error_function(o, oid, object_type, msg_type, msg_id, message);
+ return fsck_objects_error_function(o, oid, object_type, checked_ref_name,
+ msg_type, msg_id, message);
}
diff --git a/fsck.h b/fsck.h
index bcfb2e34cd..8ce48395f6 100644
--- a/fsck.h
+++ b/fsck.h
@@ -114,22 +114,27 @@ int is_valid_msg_type(const char *msg_id, const char *msg_type);
typedef int (*fsck_walk_func)(struct object *obj, enum object_type object_type,
void *data, struct fsck_options *options);
-/* callback for fsck_object, type is FSCK_ERROR or FSCK_WARN */
+/*
+ * callback function for reporting errors when checking either objects or refs
+ */
typedef int (*fsck_error)(struct fsck_options *o,
const struct object_id *oid, enum object_type object_type,
+ const char *checked_ref_name,
enum fsck_msg_type msg_type, enum fsck_msg_id msg_id,
const char *message);
-int fsck_error_function(struct fsck_options *o,
- const struct object_id *oid, enum object_type object_type,
- enum fsck_msg_type msg_type, enum fsck_msg_id msg_id,
- const char *message);
-int fsck_error_cb_print_missing_gitmodules(struct fsck_options *o,
- const struct object_id *oid,
- enum object_type object_type,
- enum fsck_msg_type msg_type,
- enum fsck_msg_id msg_id,
- const char *message);
+int fsck_objects_error_function(struct fsck_options *o,
+ const struct object_id *oid, enum object_type object_type,
+ const char *checked_ref_name,
+ enum fsck_msg_type msg_type, enum fsck_msg_id msg_id,
+ const char *message);
+int fsck_objects_error_cb_print_missing_gitmodules(struct fsck_options *o,
+ const struct object_id *oid,
+ enum object_type object_type,
+ const char *checked_ref_name,
+ enum fsck_msg_type msg_type,
+ enum fsck_msg_id msg_id,
+ const char *message);
struct fsck_options {
fsck_walk_func walk;
@@ -150,7 +155,7 @@ struct fsck_options {
.gitmodules_done = OIDSET_INIT, \
.gitattributes_found = OIDSET_INIT, \
.gitattributes_done = OIDSET_INIT, \
- .error_func = fsck_error_function \
+ .error_func = fsck_objects_error_function \
}
#define FSCK_OPTIONS_STRICT { \
.strict = 1, \
@@ -158,7 +163,7 @@ struct fsck_options {
.gitmodules_done = OIDSET_INIT, \
.gitattributes_found = OIDSET_INIT, \
.gitattributes_done = OIDSET_INIT, \
- .error_func = fsck_error_function, \
+ .error_func = fsck_objects_error_function, \
}
#define FSCK_OPTIONS_MISSING_GITMODULES { \
.strict = 1, \
@@ -166,7 +171,7 @@ struct fsck_options {
.gitmodules_done = OIDSET_INIT, \
.gitattributes_found = OIDSET_INIT, \
.gitattributes_done = OIDSET_INIT, \
- .error_func = fsck_error_cb_print_missing_gitmodules, \
+ .error_func = fsck_objects_error_cb_print_missing_gitmodules, \
}
/* descend in all linked child objects
@@ -209,6 +214,13 @@ int fsck_tag_standalone(const struct object_id *oid, const char *buffer,
*/
int fsck_finish(struct fsck_options *options);
+__attribute__((format (printf, 5, 6)))
+int fsck_refs_report(struct fsck_options *options,
+ const struct object_id *oid,
+ const char *checked_ref_name,
+ enum fsck_msg_id msg_id,
+ const char *fmt, ...);
+
/*
* Subsystem for storing human-readable names for each object.
*
diff --git a/object-file.c b/object-file.c
index 065103be3e..d2c6427935 100644
--- a/object-file.c
+++ b/object-file.c
@@ -2470,11 +2470,12 @@ int repo_has_object_file(struct repository *r,
* give more context.
*/
static int hash_format_check_report(struct fsck_options *opts UNUSED,
- const struct object_id *oid UNUSED,
- enum object_type object_type UNUSED,
- enum fsck_msg_type msg_type UNUSED,
- enum fsck_msg_id msg_id UNUSED,
- const char *message)
+ const struct object_id *oid UNUSED,
+ enum object_type object_type UNUSED,
+ const char *ref_checked_name UNUSED,
+ enum fsck_msg_type msg_type UNUSED,
+ enum fsck_msg_id msg_id UNUSED,
+ const char *message)
{
error(_("object fails fsck: %s"), message);
return 1;
--
2.45.2
^ permalink raw reply related [flat|nested] 282+ messages in thread
* Re: [GSoC][PATCH v9 2/9] fsck: add a unified interface for reporting fsck messages
2024-07-09 12:35 ` [GSoC][PATCH v9 2/9] fsck: add a unified interface for reporting fsck messages shejialuo
@ 2024-07-09 20:24 ` Justin Tobler
2024-07-10 12:09 ` shejialuo
0 siblings, 1 reply; 282+ messages in thread
From: Justin Tobler @ 2024-07-09 20:24 UTC (permalink / raw)
To: shejialuo
Cc: git, Patrick Steinhardt, Karthik Nayak, Junio C Hamano,
Eric Sunshine
On 24/07/09 08:35PM, shejialuo wrote:
> The static function "report" provided by "fsck.c" aims at checking fsck
> error type and calling the callback "error_func" to report the message.
> However, "report" function is only related to object database which
> cannot be reused for refs. In order to provide a unified interface which
> can report either objects or refs, create a new function "vfsck_report"
> by adding "checked_ref_name" parameter following the "report" prototype.
> Instead of using "...", provide "va_list" to allow more flexibility.
>
> Like "report", the "vfsck_report" function will use "error_func"
> registered in "fsck_options" to report customized messages. Change
> "error_func" prototype to align with the new "vfsck_report".
>
> Then, change "report" function to use "vfsck_report" to report objects
> related messages. Add a new function called "fsck_refs_report" to use
> "vfsck_report" to report refs related messages.
>
> Mentored-by: Patrick Steinhardt <ps@pks.im>
> Mentored-by: Karthik Nayak <karthik.188@gmail.com>
> Signed-off-by: shejialuo <shejialuo@gmail.com>
> ---
> builtin/fsck.c | 15 ++++-----
> builtin/mktag.c | 1 +
> fsck.c | 81 ++++++++++++++++++++++++++++++++++++-------------
> fsck.h | 40 +++++++++++++++---------
> object-file.c | 11 ++++---
> 5 files changed, 101 insertions(+), 47 deletions(-)
>
> diff --git a/builtin/fsck.c b/builtin/fsck.c
> index d13a226c2e..de34538c4f 100644
> --- a/builtin/fsck.c
> +++ b/builtin/fsck.c
> @@ -89,12 +89,13 @@ static int objerror(struct object *obj, const char *err)
> return -1;
> }
>
> -static int fsck_error_func(struct fsck_options *o UNUSED,
> - const struct object_id *oid,
> - enum object_type object_type,
> - enum fsck_msg_type msg_type,
> - enum fsck_msg_id msg_id UNUSED,
> - const char *message)
> +static int fsck_objects_error_func(struct fsck_options *o UNUSED,
> + const struct object_id *oid,
> + enum object_type object_type,
> + const char *checked_ref_name UNUSED,
> + enum fsck_msg_type msg_type,
> + enum fsck_msg_id msg_id UNUSED,
> + const char *message)
This is just a suggestion, but I think it would be slightly easier to
review if the `*_error_func()` renames were done in a separate preceding
patch. That way the purpose of the renames can also be clearly
explained.
> {
> switch (msg_type) {
> case FSCK_WARN:
> @@ -938,7 +939,7 @@ int cmd_fsck(int argc, const char **argv, const char *prefix)
>
> fsck_walk_options.walk = mark_object;
> fsck_obj_options.walk = mark_used;
> - fsck_obj_options.error_func = fsck_error_func;
> + fsck_obj_options.error_func = fsck_objects_error_func;
> if (check_strict)
> fsck_obj_options.strict = 1;
>
[snip]
> @@ -166,7 +171,7 @@ struct fsck_options {
> .gitmodules_done = OIDSET_INIT, \
> .gitattributes_found = OIDSET_INIT, \
> .gitattributes_done = OIDSET_INIT, \
> - .error_func = fsck_error_cb_print_missing_gitmodules, \
> + .error_func = fsck_objects_error_cb_print_missing_gitmodules, \
> }
>
> /* descend in all linked child objects
> @@ -209,6 +214,13 @@ int fsck_tag_standalone(const struct object_id *oid, const char *buffer,
> */
> int fsck_finish(struct fsck_options *options);
>
> +__attribute__((format (printf, 5, 6)))
> +int fsck_refs_report(struct fsck_options *options,
> + const struct object_id *oid,
> + const char *checked_ref_name,
> + enum fsck_msg_id msg_id,
> + const char *fmt, ...);
> +
I think I mentioned this in a previous reply, but it was missed. Not a
big deal, but it might be nice to document `int fsck_refs_report()`
here.
-Justin
^ permalink raw reply [flat|nested] 282+ messages in thread
* Re: [GSoC][PATCH v9 2/9] fsck: add a unified interface for reporting fsck messages
2024-07-09 20:24 ` Justin Tobler
@ 2024-07-10 12:09 ` shejialuo
0 siblings, 0 replies; 282+ messages in thread
From: shejialuo @ 2024-07-10 12:09 UTC (permalink / raw)
To: Justin Tobler
Cc: git, Patrick Steinhardt, Karthik Nayak, Junio C Hamano,
Eric Sunshine
On Tue, Jul 09, 2024 at 03:24:50PM -0500, Justin Tobler wrote:
> On 24/07/09 08:35PM, shejialuo wrote:
> > The static function "report" provided by "fsck.c" aims at checking fsck
> > error type and calling the callback "error_func" to report the message.
> > However, "report" function is only related to object database which
> > cannot be reused for refs. In order to provide a unified interface which
> > can report either objects or refs, create a new function "vfsck_report"
> > by adding "checked_ref_name" parameter following the "report" prototype.
> > Instead of using "...", provide "va_list" to allow more flexibility.
> >
> > Like "report", the "vfsck_report" function will use "error_func"
> > registered in "fsck_options" to report customized messages. Change
> > "error_func" prototype to align with the new "vfsck_report".
> >
> > Then, change "report" function to use "vfsck_report" to report objects
> > related messages. Add a new function called "fsck_refs_report" to use
> > "vfsck_report" to report refs related messages.
> >
> > Mentored-by: Patrick Steinhardt <ps@pks.im>
> > Mentored-by: Karthik Nayak <karthik.188@gmail.com>
> > Signed-off-by: shejialuo <shejialuo@gmail.com>
> > ---
> > builtin/fsck.c | 15 ++++-----
> > builtin/mktag.c | 1 +
> > fsck.c | 81 ++++++++++++++++++++++++++++++++++++-------------
> > fsck.h | 40 +++++++++++++++---------
> > object-file.c | 11 ++++---
> > 5 files changed, 101 insertions(+), 47 deletions(-)
> >
> > diff --git a/builtin/fsck.c b/builtin/fsck.c
> > index d13a226c2e..de34538c4f 100644
> > --- a/builtin/fsck.c
> > +++ b/builtin/fsck.c
> > @@ -89,12 +89,13 @@ static int objerror(struct object *obj, const char *err)
> > return -1;
> > }
> >
> > -static int fsck_error_func(struct fsck_options *o UNUSED,
> > - const struct object_id *oid,
> > - enum object_type object_type,
> > - enum fsck_msg_type msg_type,
> > - enum fsck_msg_id msg_id UNUSED,
> > - const char *message)
> > +static int fsck_objects_error_func(struct fsck_options *o UNUSED,
> > + const struct object_id *oid,
> > + enum object_type object_type,
> > + const char *checked_ref_name UNUSED,
> > + enum fsck_msg_type msg_type,
> > + enum fsck_msg_id msg_id UNUSED,
> > + const char *message)
>
> This is just a suggestion, but I think it would be slightly easier to
> review if the `*_error_func()` renames were done in a separate preceding
> patch. That way the purpose of the renames can also be clearly
> explained.
>
I agree with this, will change in the next version.
> > {
> > switch (msg_type) {
> > case FSCK_WARN:
> > @@ -938,7 +939,7 @@ int cmd_fsck(int argc, const char **argv, const char *prefix)
> >
> > fsck_walk_options.walk = mark_object;
> > fsck_obj_options.walk = mark_used;
> > - fsck_obj_options.error_func = fsck_error_func;
> > + fsck_obj_options.error_func = fsck_objects_error_func;
> > if (check_strict)
> > fsck_obj_options.strict = 1;
> >
> [snip]
> > @@ -166,7 +171,7 @@ struct fsck_options {
> > .gitmodules_done = OIDSET_INIT, \
> > .gitattributes_found = OIDSET_INIT, \
> > .gitattributes_done = OIDSET_INIT, \
> > - .error_func = fsck_error_cb_print_missing_gitmodules, \
> > + .error_func = fsck_objects_error_cb_print_missing_gitmodules, \
> > }
> >
> > /* descend in all linked child objects
> > @@ -209,6 +214,13 @@ int fsck_tag_standalone(const struct object_id *oid, const char *buffer,
> > */
> > int fsck_finish(struct fsck_options *options);
> >
> > +__attribute__((format (printf, 5, 6)))
> > +int fsck_refs_report(struct fsck_options *options,
> > + const struct object_id *oid,
> > + const char *checked_ref_name,
> > + enum fsck_msg_id msg_id,
> > + const char *fmt, ...);
> > +
>
> I think I mentioned this in a previous reply, but it was missed. Not a
> big deal, but it might be nice to document `int fsck_refs_report()`
> here.
>
I will improve this in the next version.
> -Justin
Thanks,
Jialuo
^ permalink raw reply [flat|nested] 282+ messages in thread
* [GSoC][PATCH v9 3/9] fsck: add refs-related options and error report function
2024-07-09 12:32 ` [GSoC][PATCH v9 0/9] ref consistency check infra setup shejialuo
2024-07-09 12:34 ` [GSoC][PATCH v9 1/9] fsck: rename "skiplist" to "skip_oids" shejialuo
2024-07-09 12:35 ` [GSoC][PATCH v9 2/9] fsck: add a unified interface for reporting fsck messages shejialuo
@ 2024-07-09 12:35 ` shejialuo
2024-07-09 21:29 ` Justin Tobler
2024-07-09 12:35 ` [GSoC][PATCH v9 4/9] refs: set up ref consistency check infrastructure shejialuo
` (6 subsequent siblings)
9 siblings, 1 reply; 282+ messages in thread
From: shejialuo @ 2024-07-09 12:35 UTC (permalink / raw)
To: git
Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine,
Justin Tobler
Add refs-related options to the "fsck_options", create refs-specific
"error_func" callback "fsck_refs_error_function".
"fsck_refs_error_function" will use the "oid" parameter. When the caller
passes the oid, it will use "oid_to_hex" to get the corresponding hex
value to report to the caller.
Last, add "FSCK_REFS_OPTIONS_DEFAULT" and "FSCK_REFS_OPTIONS_STRICT"
macros to create refs options easily.
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
fsck.c | 23 +++++++++++++++++++++++
fsck.h | 15 +++++++++++++++
2 files changed, 38 insertions(+)
diff --git a/fsck.c b/fsck.c
index e1819964e3..c5c7e8454f 100644
--- a/fsck.c
+++ b/fsck.c
@@ -1252,6 +1252,29 @@ int fsck_objects_error_function(struct fsck_options *o,
return 1;
}
+int fsck_refs_error_function(struct fsck_options *options UNUSED,
+ const struct object_id *oid,
+ enum object_type object_type UNUSED,
+ const char *checked_ref_name,
+ enum fsck_msg_type msg_type,
+ enum fsck_msg_id msg_id UNUSED,
+ const char *message)
+{
+ static struct strbuf sb = STRBUF_INIT;
+
+ strbuf_reset(&sb);
+ strbuf_addstr(&sb, checked_ref_name);
+ if (oid)
+ strbuf_addf(&sb, " -> (%s)", oid_to_hex(oid));
+
+ if (msg_type == FSCK_WARN) {
+ warning("%s: %s", sb.buf, message);
+ return 0;
+ }
+ error("%s: %s", sb.buf, message);
+ return 1;
+}
+
static int fsck_blobs(struct oidset *blobs_found, struct oidset *blobs_done,
enum fsck_msg_id msg_missing, enum fsck_msg_id msg_type,
struct fsck_options *options, const char *blob_type)
diff --git a/fsck.h b/fsck.h
index 8ce48395f6..ff52913494 100644
--- a/fsck.h
+++ b/fsck.h
@@ -135,11 +135,19 @@ int fsck_objects_error_cb_print_missing_gitmodules(struct fsck_options *o,
enum fsck_msg_type msg_type,
enum fsck_msg_id msg_id,
const char *message);
+int fsck_refs_error_function(struct fsck_options *options,
+ const struct object_id *oid,
+ enum object_type object_type,
+ const char *checked_ref_name,
+ enum fsck_msg_type msg_type,
+ enum fsck_msg_id msg_id,
+ const char *message);
struct fsck_options {
fsck_walk_func walk;
fsck_error error_func;
unsigned strict:1;
+ unsigned verbose_refs:1;
enum fsck_msg_type *msg_type;
struct oidset skip_oids;
struct oidset gitmodules_found;
@@ -173,6 +181,13 @@ struct fsck_options {
.gitattributes_done = OIDSET_INIT, \
.error_func = fsck_objects_error_cb_print_missing_gitmodules, \
}
+#define FSCK_REFS_OPTIONS_DEFAULT { \
+ .error_func = fsck_refs_error_function, \
+}
+#define FSCK_REFS_OPTIONS_STRICT { \
+ .strict = 1, \
+ .error_func = fsck_refs_error_function, \
+}
/* descend in all linked child objects
* the return value is:
--
2.45.2
^ permalink raw reply related [flat|nested] 282+ messages in thread
* Re: [GSoC][PATCH v9 3/9] fsck: add refs-related options and error report function
2024-07-09 12:35 ` [GSoC][PATCH v9 3/9] fsck: add refs-related options and error report function shejialuo
@ 2024-07-09 21:29 ` Justin Tobler
2024-07-09 21:40 ` Eric Sunshine
2024-07-10 12:28 ` shejialuo
0 siblings, 2 replies; 282+ messages in thread
From: Justin Tobler @ 2024-07-09 21:29 UTC (permalink / raw)
To: shejialuo
Cc: git, Patrick Steinhardt, Karthik Nayak, Junio C Hamano,
Eric Sunshine
On 24/07/09 08:35PM, shejialuo wrote:
> Add refs-related options to the "fsck_options", create refs-specific
> "error_func" callback "fsck_refs_error_function".
>
> "fsck_refs_error_function" will use the "oid" parameter. When the caller
> passes the oid, it will use "oid_to_hex" to get the corresponding hex
> value to report to the caller.
Out of curiousity, under what circumstances would the caller want to
also pass the oid? Would it simply be to optionally provide additional
context?
>
> Last, add "FSCK_REFS_OPTIONS_DEFAULT" and "FSCK_REFS_OPTIONS_STRICT"
> macros to create refs options easily.
>
> Mentored-by: Patrick Steinhardt <ps@pks.im>
> Mentored-by: Karthik Nayak <karthik.188@gmail.com>
> Signed-off-by: shejialuo <shejialuo@gmail.com>
> ---
> fsck.c | 23 +++++++++++++++++++++++
> fsck.h | 15 +++++++++++++++
> 2 files changed, 38 insertions(+)
>
> diff --git a/fsck.c b/fsck.c
> index e1819964e3..c5c7e8454f 100644
> --- a/fsck.c
> +++ b/fsck.c
> @@ -1252,6 +1252,29 @@ int fsck_objects_error_function(struct fsck_options *o,
> return 1;
> }
>
> +int fsck_refs_error_function(struct fsck_options *options UNUSED,
> + const struct object_id *oid,
> + enum object_type object_type UNUSED,
> + const char *checked_ref_name,
> + enum fsck_msg_type msg_type,
> + enum fsck_msg_id msg_id UNUSED,
> + const char *message)
> +{
> + static struct strbuf sb = STRBUF_INIT;
> +
> + strbuf_reset(&sb);
Naive question, is there reason to reset `sb` immediately after
`STRBUF_INIT`? My understanding is that because we initialize the
buffer, the other fields should also be zeroed. If so, resetting the
buffer here seems redundant.
> + strbuf_addstr(&sb, checked_ref_name);
> + if (oid)
> + strbuf_addf(&sb, " -> (%s)", oid_to_hex(oid));
> +
> + if (msg_type == FSCK_WARN) {
> + warning("%s: %s", sb.buf, message);
> + return 0;
> + }
> + error("%s: %s", sb.buf, message);
> + return 1;
> +}
> +
> static int fsck_blobs(struct oidset *blobs_found, struct oidset *blobs_done,
> enum fsck_msg_id msg_missing, enum fsck_msg_id msg_type,
> struct fsck_options *options, const char *blob_type)
> diff --git a/fsck.h b/fsck.h
> index 8ce48395f6..ff52913494 100644
> --- a/fsck.h
> +++ b/fsck.h
> @@ -135,11 +135,19 @@ int fsck_objects_error_cb_print_missing_gitmodules(struct fsck_options *o,
> enum fsck_msg_type msg_type,
> enum fsck_msg_id msg_id,
> const char *message);
> +int fsck_refs_error_function(struct fsck_options *options,
> + const struct object_id *oid,
> + enum object_type object_type,
> + const char *checked_ref_name,
> + enum fsck_msg_type msg_type,
> + enum fsck_msg_id msg_id,
> + const char *message);
>
> struct fsck_options {
> fsck_walk_func walk;
> fsck_error error_func;
> unsigned strict:1;
> + unsigned verbose_refs:1;
What is the purpose of adding `verbose_refs` in this patch? At this
point, I'm not seeing it used. If there is a reason to be included in
this patch, it might be nice to mention in the commit message.
> enum fsck_msg_type *msg_type;
> struct oidset skip_oids;
> struct oidset gitmodules_found;
> @@ -173,6 +181,13 @@ struct fsck_options {
> .gitattributes_done = OIDSET_INIT, \
> .error_func = fsck_objects_error_cb_print_missing_gitmodules, \
> }
> +#define FSCK_REFS_OPTIONS_DEFAULT { \
> + .error_func = fsck_refs_error_function, \
> +}
> +#define FSCK_REFS_OPTIONS_STRICT { \
> + .strict = 1, \
> + .error_func = fsck_refs_error_function, \
> +}
>
> /* descend in all linked child objects
> * the return value is:
> --
> 2.45.2
>
^ permalink raw reply [flat|nested] 282+ messages in thread
* Re: [GSoC][PATCH v9 3/9] fsck: add refs-related options and error report function
2024-07-09 21:29 ` Justin Tobler
@ 2024-07-09 21:40 ` Eric Sunshine
2024-07-10 12:13 ` shejialuo
2024-07-10 12:28 ` shejialuo
1 sibling, 1 reply; 282+ messages in thread
From: Eric Sunshine @ 2024-07-09 21:40 UTC (permalink / raw)
To: Justin Tobler
Cc: shejialuo, git, Patrick Steinhardt, Karthik Nayak, Junio C Hamano
On Tue, Jul 9, 2024 at 5:30 PM Justin Tobler <jltobler@gmail.com> wrote:
> On 24/07/09 08:35PM, shejialuo wrote:
> > +int fsck_refs_error_function(struct fsck_options *options UNUSED,
> > + const struct object_id *oid,
> > + enum object_type object_type UNUSED,
> > + const char *checked_ref_name,
> > + enum fsck_msg_type msg_type,
> > + enum fsck_msg_id msg_id UNUSED,
> > + const char *message)
> > +{
> > + static struct strbuf sb = STRBUF_INIT;
> > +
> > + strbuf_reset(&sb);
>
> Naive question, is there reason to reset `sb` immediately after
> `STRBUF_INIT`? My understanding is that because we initialize the
> buffer, the other fields should also be zeroed. If so, resetting the
> buffer here seems redundant.
This particular strbuf is static, so it needs to be cleared each time
the function is called.
The cover letter provides an argument for making it static: that this
will be called often, and we don't want to make a lot of repeated
allocations. Personally, I find that argument rather weak. Why would
an error function be called frequently? Is this really a hot path that
needs to worry about a few extra allocations? Also, importantly, every
static added makes the code harder to "libify", so making it static
requires a very strong reason, but there doesn't seem to be such a
reason in this case.
^ permalink raw reply [flat|nested] 282+ messages in thread
* Re: [GSoC][PATCH v9 3/9] fsck: add refs-related options and error report function
2024-07-09 21:40 ` Eric Sunshine
@ 2024-07-10 12:13 ` shejialuo
0 siblings, 0 replies; 282+ messages in thread
From: shejialuo @ 2024-07-10 12:13 UTC (permalink / raw)
To: Eric Sunshine
Cc: Justin Tobler, git, Patrick Steinhardt, Karthik Nayak,
Junio C Hamano
On Tue, Jul 09, 2024 at 05:40:08PM -0400, Eric Sunshine wrote:
> On Tue, Jul 9, 2024 at 5:30 PM Justin Tobler <jltobler@gmail.com> wrote:
> > On 24/07/09 08:35PM, shejialuo wrote:
> > > +int fsck_refs_error_function(struct fsck_options *options UNUSED,
> > > + const struct object_id *oid,
> > > + enum object_type object_type UNUSED,
> > > + const char *checked_ref_name,
> > > + enum fsck_msg_type msg_type,
> > > + enum fsck_msg_id msg_id UNUSED,
> > > + const char *message)
> > > +{
> > > + static struct strbuf sb = STRBUF_INIT;
> > > +
> > > + strbuf_reset(&sb);
> >
> > Naive question, is there reason to reset `sb` immediately after
> > `STRBUF_INIT`? My understanding is that because we initialize the
> > buffer, the other fields should also be zeroed. If so, resetting the
> > buffer here seems redundant.
>
> This particular strbuf is static, so it needs to be cleared each time
> the function is called.
>
> The cover letter provides an argument for making it static: that this
> will be called often, and we don't want to make a lot of repeated
> allocations. Personally, I find that argument rather weak. Why would
> an error function be called frequently? Is this really a hot path that
> needs to worry about a few extra allocations? Also, importantly, every
> static added makes the code harder to "libify", so making it static
> requires a very strong reason, but there doesn't seem to be such a
> reason in this case.
I didn't consider the issue of libify. I just want to reduce some memory
allocations. I will change this in the next version.
Thanks,
Jialuo
^ permalink raw reply [flat|nested] 282+ messages in thread
* Re: [GSoC][PATCH v9 3/9] fsck: add refs-related options and error report function
2024-07-09 21:29 ` Justin Tobler
2024-07-09 21:40 ` Eric Sunshine
@ 2024-07-10 12:28 ` shejialuo
1 sibling, 0 replies; 282+ messages in thread
From: shejialuo @ 2024-07-10 12:28 UTC (permalink / raw)
To: Justin Tobler
Cc: git, Patrick Steinhardt, Karthik Nayak, Junio C Hamano,
Eric Sunshine
On Tue, Jul 09, 2024 at 04:29:24PM -0500, Justin Tobler wrote:
> On 24/07/09 08:35PM, shejialuo wrote:
> > Add refs-related options to the "fsck_options", create refs-specific
> > "error_func" callback "fsck_refs_error_function".
> >
> > "fsck_refs_error_function" will use the "oid" parameter. When the caller
> > passes the oid, it will use "oid_to_hex" to get the corresponding hex
> > value to report to the caller.
>
> Out of curiousity, under what circumstances would the caller want to
> also pass the oid? Would it simply be to optionally provide additional
> context?
>
Because when we check the refs, we will use "parse_loose_ref_contents"
here to check the ref contents. Below is the prototype:
int parse_loose_ref_contents(const char *buf,
struct object_id *oid,
...)
So we could get a oid here. However, we don't know the type of the oid.
It may not be commit object but rather a tag object. And I want to
provide more flexible operations for caller. When caller passes the oid.
The message could be the following:
ref_name -> (oid) : fsck_error_type: user-passed message.
So, actually we have provided additional context for the caller. From
another perspective, the object check needs the "oid" parameter, we
cannot remove it from the callback "error_func" prototype. So why not
just reuse this parameter? It truly provides the caller more flexibility
without big changes.
> > struct fsck_options {
> > fsck_walk_func walk;
> > fsck_error error_func;
> > unsigned strict:1;
> > + unsigned verbose_refs:1;
>
> What is the purpose of adding `verbose_refs` in this patch? At this
> point, I'm not seeing it used. If there is a reason to be included in
> this patch, it might be nice to mention in the commit message.
>
The reason is that fsck builtin handles the object check but we want to
use "git refs verify" command to handle ref check and we put all the
real functionality into each file backend. And there is only one entry
point in the "git refs verify" command. So we need to use "fsck_options"
as the parameter to maintain this state across the ref checks.
Actually, "git-fsck(1)" maintains "verbose" global variable. I think I
should not add this option in this patch which may cause a lot of
confusion here. I will improve this in the next version.
> > enum fsck_msg_type *msg_type;
> > struct oidset skip_oids;
> > struct oidset gitmodules_found;
> > @@ -173,6 +181,13 @@ struct fsck_options {
> > .gitattributes_done = OIDSET_INIT, \
> > .error_func = fsck_objects_error_cb_print_missing_gitmodules, \
> > }
> > +#define FSCK_REFS_OPTIONS_DEFAULT { \
> > + .error_func = fsck_refs_error_function, \
> > +}
> > +#define FSCK_REFS_OPTIONS_STRICT { \
> > + .strict = 1, \
> > + .error_func = fsck_refs_error_function, \
> > +}
> >
> > /* descend in all linked child objects
> > * the return value is:
> > --
> > 2.45.2
> >
^ permalink raw reply [flat|nested] 282+ messages in thread
* [GSoC][PATCH v9 4/9] refs: set up ref consistency check infrastructure
2024-07-09 12:32 ` [GSoC][PATCH v9 0/9] ref consistency check infra setup shejialuo
` (2 preceding siblings ...)
2024-07-09 12:35 ` [GSoC][PATCH v9 3/9] fsck: add refs-related options and error report function shejialuo
@ 2024-07-09 12:35 ` shejialuo
2024-07-09 22:11 ` Justin Tobler
2024-07-09 12:35 ` [GSoC][PATCH v9 5/9] builtin/refs: add verify subcommand shejialuo
` (5 subsequent siblings)
9 siblings, 1 reply; 282+ messages in thread
From: shejialuo @ 2024-07-09 12:35 UTC (permalink / raw)
To: git
Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine,
Justin Tobler
The interfaces defined in the `ref_storage_be` are carefully structured
in semantic. It's organized as the five parts:
1. The name and the initialization interfaces.
2. The ref transaction interfaces.
3. The ref internal interfaces (pack, rename and copy).
4. The ref filesystem interfaces.
5. The reflog related interfaces.
To keep consistent with the git-fsck(1), add a new interface named
"fsck_refs_fn" to the end of "ref_storage_be". This semantic cannot be
grouped into any above five categories. Explicitly add blank line to
make it different from others.
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
refs.c | 5 +++++
refs.h | 8 ++++++++
refs/debug.c | 11 +++++++++++
refs/files-backend.c | 15 ++++++++++++++-
refs/packed-backend.c | 8 ++++++++
refs/refs-internal.h | 6 ++++++
refs/reftable-backend.c | 8 ++++++++
7 files changed, 60 insertions(+), 1 deletion(-)
diff --git a/refs.c b/refs.c
index bb90a18875..410919246b 100644
--- a/refs.c
+++ b/refs.c
@@ -318,6 +318,11 @@ int check_refname_format(const char *refname, int flags)
return check_or_sanitize_refname(refname, flags, NULL);
}
+int refs_fsck(struct ref_store *refs, struct fsck_options *o)
+{
+ return refs->be->fsck(refs, o);
+}
+
void sanitize_refname_component(const char *refname, struct strbuf *out)
{
if (check_or_sanitize_refname(refname, REFNAME_ALLOW_ONELEVEL, out))
diff --git a/refs.h b/refs.h
index 0ecba21b4a..804d6a7fce 100644
--- a/refs.h
+++ b/refs.h
@@ -4,6 +4,7 @@
#include "commit.h"
#include "repository.h"
+struct fsck_options;
struct object_id;
struct ref_store;
struct strbuf;
@@ -541,6 +542,13 @@ int refs_for_each_reflog(struct ref_store *refs, each_reflog_fn fn, void *cb_dat
*/
int check_refname_format(const char *refname, int flags);
+/*
+ * Check the reference database for consistency. Return 0 if refs and
+ * reflogs are consistent, and non-zero otherwise. The errors will be
+ * written to stderr.
+ */
+int refs_fsck(struct ref_store *refs, struct fsck_options *o);
+
/*
* Apply the rules from check_refname_format, but mutate the result until it
* is acceptable, and place the result in "out".
diff --git a/refs/debug.c b/refs/debug.c
index 547d9245b9..45e2e784a0 100644
--- a/refs/debug.c
+++ b/refs/debug.c
@@ -419,6 +419,15 @@ static int debug_reflog_expire(struct ref_store *ref_store, const char *refname,
return res;
}
+static int debug_fsck(struct ref_store *ref_store,
+ struct fsck_options *o)
+{
+ struct debug_ref_store *drefs = (struct debug_ref_store *)ref_store;
+ int res = drefs->refs->be->fsck(drefs->refs, o);
+ trace_printf_key(&trace_refs, "fsck: %d\n", res);
+ return res;
+}
+
struct ref_storage_be refs_be_debug = {
.name = "debug",
.init = NULL,
@@ -451,4 +460,6 @@ struct ref_storage_be refs_be_debug = {
.create_reflog = debug_create_reflog,
.delete_reflog = debug_delete_reflog,
.reflog_expire = debug_reflog_expire,
+
+ .fsck = debug_fsck,
};
diff --git a/refs/files-backend.c b/refs/files-backend.c
index aa52d9be7c..d89eeda8ef 100644
--- a/refs/files-backend.c
+++ b/refs/files-backend.c
@@ -3408,6 +3408,17 @@ static int files_ref_store_remove_on_disk(struct ref_store *ref_store,
return ret;
}
+static int files_fsck(struct ref_store *ref_store,
+ struct fsck_options *o)
+{
+ int ret;
+ struct files_ref_store *refs =
+ files_downcast(ref_store, REF_STORE_READ, "fsck");
+
+ ret = refs->packed_ref_store->be->fsck(refs->packed_ref_store, o);
+ return ret;
+}
+
struct ref_storage_be refs_be_files = {
.name = "files",
.init = files_ref_store_init,
@@ -3434,5 +3445,7 @@ struct ref_storage_be refs_be_files = {
.reflog_exists = files_reflog_exists,
.create_reflog = files_create_reflog,
.delete_reflog = files_delete_reflog,
- .reflog_expire = files_reflog_expire
+ .reflog_expire = files_reflog_expire,
+
+ .fsck = files_fsck,
};
diff --git a/refs/packed-backend.c b/refs/packed-backend.c
index a0666407cd..5209b0b212 100644
--- a/refs/packed-backend.c
+++ b/refs/packed-backend.c
@@ -1735,6 +1735,12 @@ static struct ref_iterator *packed_reflog_iterator_begin(struct ref_store *ref_s
return empty_ref_iterator_begin();
}
+static int packed_fsck(struct ref_store *ref_store,
+ struct fsck_options *o)
+{
+ return 0;
+}
+
struct ref_storage_be refs_be_packed = {
.name = "packed",
.init = packed_ref_store_init,
@@ -1762,4 +1768,6 @@ struct ref_storage_be refs_be_packed = {
.create_reflog = NULL,
.delete_reflog = NULL,
.reflog_expire = NULL,
+
+ .fsck = packed_fsck,
};
diff --git a/refs/refs-internal.h b/refs/refs-internal.h
index fa975d69aa..a905e187cd 100644
--- a/refs/refs-internal.h
+++ b/refs/refs-internal.h
@@ -4,6 +4,7 @@
#include "refs.h"
#include "iterator.h"
+struct fsck_options;
struct ref_transaction;
/*
@@ -650,6 +651,9 @@ typedef int read_raw_ref_fn(struct ref_store *ref_store, const char *refname,
typedef int read_symbolic_ref_fn(struct ref_store *ref_store, const char *refname,
struct strbuf *referent);
+typedef int fsck_fn(struct ref_store *ref_store,
+ struct fsck_options *o);
+
struct ref_storage_be {
const char *name;
ref_store_init_fn *init;
@@ -677,6 +681,8 @@ struct ref_storage_be {
create_reflog_fn *create_reflog;
delete_reflog_fn *delete_reflog;
reflog_expire_fn *reflog_expire;
+
+ fsck_fn *fsck;
};
extern struct ref_storage_be refs_be_files;
diff --git a/refs/reftable-backend.c b/refs/reftable-backend.c
index fbe74c239d..b5a1a526df 100644
--- a/refs/reftable-backend.c
+++ b/refs/reftable-backend.c
@@ -2303,6 +2303,12 @@ static int reftable_be_reflog_expire(struct ref_store *ref_store,
return ret;
}
+static int reftable_be_fsck(struct ref_store *ref_store,
+ struct fsck_options *o)
+{
+ return 0;
+}
+
struct ref_storage_be refs_be_reftable = {
.name = "reftable",
.init = reftable_be_init,
@@ -2330,4 +2336,6 @@ struct ref_storage_be refs_be_reftable = {
.create_reflog = reftable_be_create_reflog,
.delete_reflog = reftable_be_delete_reflog,
.reflog_expire = reftable_be_reflog_expire,
+
+ .fsck = reftable_be_fsck,
};
--
2.45.2
^ permalink raw reply related [flat|nested] 282+ messages in thread
* Re: [GSoC][PATCH v9 4/9] refs: set up ref consistency check infrastructure
2024-07-09 12:35 ` [GSoC][PATCH v9 4/9] refs: set up ref consistency check infrastructure shejialuo
@ 2024-07-09 22:11 ` Justin Tobler
2024-07-10 12:29 ` shejialuo
0 siblings, 1 reply; 282+ messages in thread
From: Justin Tobler @ 2024-07-09 22:11 UTC (permalink / raw)
To: shejialuo
Cc: git, Patrick Steinhardt, Karthik Nayak, Junio C Hamano,
Eric Sunshine
On 24/07/09 08:35PM, shejialuo wrote:
> The interfaces defined in the `ref_storage_be` are carefully structured
> in semantic. It's organized as the five parts:
>
> 1. The name and the initialization interfaces.
> 2. The ref transaction interfaces.
> 3. The ref internal interfaces (pack, rename and copy).
> 4. The ref filesystem interfaces.
> 5. The reflog related interfaces.
>
> To keep consistent with the git-fsck(1), add a new interface named
> "fsck_refs_fn" to the end of "ref_storage_be". This semantic cannot be
> grouped into any above five categories. Explicitly add blank line to
> make it different from others.
In this patch we are not only adding the `fsck` infrastructure to
`ref_storage_be`, but also wiring what are essentially no-op functions
to the various implementations. It might be good to mention this in the
commit message so it is better understood that future patches will
actually provide proper function implementations.
-Justin
^ permalink raw reply [flat|nested] 282+ messages in thread
* Re: [GSoC][PATCH v9 4/9] refs: set up ref consistency check infrastructure
2024-07-09 22:11 ` Justin Tobler
@ 2024-07-10 12:29 ` shejialuo
0 siblings, 0 replies; 282+ messages in thread
From: shejialuo @ 2024-07-10 12:29 UTC (permalink / raw)
To: Justin Tobler
Cc: git, Patrick Steinhardt, Karthik Nayak, Junio C Hamano,
Eric Sunshine
On Tue, Jul 09, 2024 at 05:11:36PM -0500, Justin Tobler wrote:
> On 24/07/09 08:35PM, shejialuo wrote:
> > The interfaces defined in the `ref_storage_be` are carefully structured
> > in semantic. It's organized as the five parts:
> >
> > 1. The name and the initialization interfaces.
> > 2. The ref transaction interfaces.
> > 3. The ref internal interfaces (pack, rename and copy).
> > 4. The ref filesystem interfaces.
> > 5. The reflog related interfaces.
> >
> > To keep consistent with the git-fsck(1), add a new interface named
> > "fsck_refs_fn" to the end of "ref_storage_be". This semantic cannot be
> > grouped into any above five categories. Explicitly add blank line to
> > make it different from others.
>
> In this patch we are not only adding the `fsck` infrastructure to
> `ref_storage_be`, but also wiring what are essentially no-op functions
> to the various implementations. It might be good to mention this in the
> commit message so it is better understood that future patches will
> actually provide proper function implementations.
>
Yes, I agree. I will improve this in the next version.
> -Justin
^ permalink raw reply [flat|nested] 282+ messages in thread
* [GSoC][PATCH v9 5/9] builtin/refs: add verify subcommand
2024-07-09 12:32 ` [GSoC][PATCH v9 0/9] ref consistency check infra setup shejialuo
` (3 preceding siblings ...)
2024-07-09 12:35 ` [GSoC][PATCH v9 4/9] refs: set up ref consistency check infrastructure shejialuo
@ 2024-07-09 12:35 ` shejialuo
2024-07-09 22:30 ` Justin Tobler
2024-07-09 12:36 ` [GSoC][PATCH v9 6/9] builtin/fsck: add `git-refs verify` child process shejialuo
` (4 subsequent siblings)
9 siblings, 1 reply; 282+ messages in thread
From: shejialuo @ 2024-07-09 12:35 UTC (permalink / raw)
To: git
Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine,
Justin Tobler
Introduce a new subcommand "verify" in git-refs(1) to allow the user to
check the reference database consistency.
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
Documentation/git-refs.txt | 13 +++++++++++
builtin/refs.c | 44 ++++++++++++++++++++++++++++++++++++++
2 files changed, 57 insertions(+)
diff --git a/Documentation/git-refs.txt b/Documentation/git-refs.txt
index 5b99e04385..1244a85b64 100644
--- a/Documentation/git-refs.txt
+++ b/Documentation/git-refs.txt
@@ -10,6 +10,7 @@ SYNOPSIS
--------
[verse]
'git refs migrate' --ref-format=<format> [--dry-run]
+'git refs verify' [--strict] [--verbose]
DESCRIPTION
-----------
@@ -22,6 +23,9 @@ COMMANDS
migrate::
Migrate ref store between different formats.
+verify::
+ Verify reference database consistency.
+
OPTIONS
-------
@@ -39,6 +43,15 @@ include::ref-storage-format.txt[]
can be used to double check that the migration works as expected before
performing the actual migration.
+The following options are specific to 'git refs verify':
+
+--strict::
+ Enable more strict checking, every WARN severity for the `Fsck Messages`
+ be seen as ERROR. See linkgit:git-fsck[1].
+
+--verbose::
+ When verifying the reference database consistency, be chatty.
+
KNOWN LIMITATIONS
-----------------
diff --git a/builtin/refs.c b/builtin/refs.c
index 46dcd150d4..baa96f5b3f 100644
--- a/builtin/refs.c
+++ b/builtin/refs.c
@@ -1,4 +1,6 @@
#include "builtin.h"
+#include "config.h"
+#include "fsck.h"
#include "parse-options.h"
#include "refs.h"
#include "repository.h"
@@ -7,6 +9,9 @@
#define REFS_MIGRATE_USAGE \
N_("git refs migrate --ref-format=<format> [--dry-run]")
+#define REFS_VERIFY_USAGE \
+ N_("git refs verify [--strict] [--verbose]")
+
static int cmd_refs_migrate(int argc, const char **argv, const char *prefix)
{
const char * const migrate_usage[] = {
@@ -58,15 +63,54 @@ static int cmd_refs_migrate(int argc, const char **argv, const char *prefix)
return err;
}
+static int cmd_refs_verify(int argc, const char **argv, const char *prefix)
+{
+ struct fsck_options fsck_refs_options = FSCK_REFS_OPTIONS_DEFAULT;
+ const char * const verify_usage[] = {
+ REFS_VERIFY_USAGE,
+ NULL,
+ };
+ unsigned int verbose = 0, strict = 0;
+ struct option options[] = {
+ OPT__VERBOSE(&verbose, N_("be verbose")),
+ OPT_BOOL(0, "strict", &strict, N_("enable strict checking")),
+ OPT_END(),
+ };
+ int ret = 0;
+
+ argc = parse_options(argc, argv, prefix, options, verify_usage, 0);
+ if (argc)
+ usage(_("too many arguments"));
+
+ if (verbose)
+ fsck_refs_options.verbose_refs = 1;
+ if (strict)
+ fsck_refs_options.strict = 1;
+
+ git_config(git_fsck_config, &fsck_refs_options);
+ prepare_repo_settings(the_repository);
+
+ ret = refs_fsck(get_main_ref_store(the_repository), &fsck_refs_options);
+
+ /*
+ * Explicitly free the allocated array and "oid_skiplist"
+ */
+ free(fsck_refs_options.msg_type);
+ oidset_clear(&fsck_refs_options.skip_oids);
+ return ret;
+}
+
int cmd_refs(int argc, const char **argv, const char *prefix)
{
const char * const refs_usage[] = {
REFS_MIGRATE_USAGE,
+ REFS_VERIFY_USAGE,
NULL,
};
parse_opt_subcommand_fn *fn = NULL;
struct option opts[] = {
OPT_SUBCOMMAND("migrate", &fn, cmd_refs_migrate),
+ OPT_SUBCOMMAND("verify", &fn, cmd_refs_verify),
OPT_END(),
};
--
2.45.2
^ permalink raw reply related [flat|nested] 282+ messages in thread
* Re: [GSoC][PATCH v9 5/9] builtin/refs: add verify subcommand
2024-07-09 12:35 ` [GSoC][PATCH v9 5/9] builtin/refs: add verify subcommand shejialuo
@ 2024-07-09 22:30 ` Justin Tobler
2024-07-10 12:32 ` shejialuo
0 siblings, 1 reply; 282+ messages in thread
From: Justin Tobler @ 2024-07-09 22:30 UTC (permalink / raw)
To: shejialuo
Cc: git, Patrick Steinhardt, Karthik Nayak, Junio C Hamano,
Eric Sunshine
On 24/07/09 08:35PM, shejialuo wrote:
> Introduce a new subcommand "verify" in git-refs(1) to allow the user to
> check the reference database consistency.
The next patch in the series uses `git-refs verify` to when running
git-fsck(1). It might be worth mentioning here that this is also
intended as the entry point for fscking refs.
>
> Mentored-by: Patrick Steinhardt <ps@pks.im>
> Mentored-by: Karthik Nayak <karthik.188@gmail.com>
> Signed-off-by: shejialuo <shejialuo@gmail.com>
[snip]
> @@ -58,15 +63,54 @@ static int cmd_refs_migrate(int argc, const char **argv, const char *prefix)
> return err;
> }
>
> +static int cmd_refs_verify(int argc, const char **argv, const char *prefix)
> +{
> + struct fsck_options fsck_refs_options = FSCK_REFS_OPTIONS_DEFAULT;
> + const char * const verify_usage[] = {
> + REFS_VERIFY_USAGE,
> + NULL,
> + };
> + unsigned int verbose = 0, strict = 0;
> + struct option options[] = {
> + OPT__VERBOSE(&verbose, N_("be verbose")),
> + OPT_BOOL(0, "strict", &strict, N_("enable strict checking")),
> + OPT_END(),
> + };
> + int ret = 0;
nit: Being that we always assign a value to `ret`, defaulting here to
zero is redundant.
> +
> + argc = parse_options(argc, argv, prefix, options, verify_usage, 0);
> + if (argc)
> + usage(_("too many arguments"));
> +
> + if (verbose)
> + fsck_refs_options.verbose_refs = 1;
> + if (strict)
> + fsck_refs_options.strict = 1;
> +
> + git_config(git_fsck_config, &fsck_refs_options);
> + prepare_repo_settings(the_repository);
> +
> + ret = refs_fsck(get_main_ref_store(the_repository), &fsck_refs_options);
> +
> + /*
> + * Explicitly free the allocated array and "oid_skiplist"
s/oid_skiplist/skip_oids
I think we forgot to update this comment after the variable was renamed
in a previous patch version.
> + */
> + free(fsck_refs_options.msg_type);
> + oidset_clear(&fsck_refs_options.skip_oids);
> + return ret;
> +}
> +
> int cmd_refs(int argc, const char **argv, const char *prefix)
> {
> const char * const refs_usage[] = {
> REFS_MIGRATE_USAGE,
> + REFS_VERIFY_USAGE,
> NULL,
> };
> parse_opt_subcommand_fn *fn = NULL;
> struct option opts[] = {
> OPT_SUBCOMMAND("migrate", &fn, cmd_refs_migrate),
> + OPT_SUBCOMMAND("verify", &fn, cmd_refs_verify),
> OPT_END(),
> };
>
> --
> 2.45.2
>
^ permalink raw reply [flat|nested] 282+ messages in thread
* Re: [GSoC][PATCH v9 5/9] builtin/refs: add verify subcommand
2024-07-09 22:30 ` Justin Tobler
@ 2024-07-10 12:32 ` shejialuo
0 siblings, 0 replies; 282+ messages in thread
From: shejialuo @ 2024-07-10 12:32 UTC (permalink / raw)
To: Justin Tobler
Cc: git, Patrick Steinhardt, Karthik Nayak, Junio C Hamano,
Eric Sunshine
On Tue, Jul 09, 2024 at 05:30:42PM -0500, Justin Tobler wrote:
> On 24/07/09 08:35PM, shejialuo wrote:
> > Introduce a new subcommand "verify" in git-refs(1) to allow the user to
> > check the reference database consistency.
>
> The next patch in the series uses `git-refs verify` to when running
> git-fsck(1). It might be worth mentioning here that this is also
> intended as the entry point for fscking refs.
>
Yes, I will improve the commit message, in the next patch.
> >
> > Mentored-by: Patrick Steinhardt <ps@pks.im>
> > Mentored-by: Karthik Nayak <karthik.188@gmail.com>
> > Signed-off-by: shejialuo <shejialuo@gmail.com>
> [snip]
> > @@ -58,15 +63,54 @@ static int cmd_refs_migrate(int argc, const char **argv, const char *prefix)
> > return err;
> > }
> >
> > +static int cmd_refs_verify(int argc, const char **argv, const char *prefix)
> > +{
> > + struct fsck_options fsck_refs_options = FSCK_REFS_OPTIONS_DEFAULT;
> > + const char * const verify_usage[] = {
> > + REFS_VERIFY_USAGE,
> > + NULL,
> > + };
> > + unsigned int verbose = 0, strict = 0;
> > + struct option options[] = {
> > + OPT__VERBOSE(&verbose, N_("be verbose")),
> > + OPT_BOOL(0, "strict", &strict, N_("enable strict checking")),
> > + OPT_END(),
> > + };
> > + int ret = 0;
>
> nit: Being that we always assign a value to `ret`, defaulting here to
> zero is redundant.
>
I agree.
> > +
> > + argc = parse_options(argc, argv, prefix, options, verify_usage, 0);
> > + if (argc)
> > + usage(_("too many arguments"));
> > +
> > + if (verbose)
> > + fsck_refs_options.verbose_refs = 1;
> > + if (strict)
> > + fsck_refs_options.strict = 1;
> > +
> > + git_config(git_fsck_config, &fsck_refs_options);
> > + prepare_repo_settings(the_repository);
> > +
> > + ret = refs_fsck(get_main_ref_store(the_repository), &fsck_refs_options);
> > +
> > + /*
> > + * Explicitly free the allocated array and "oid_skiplist"
>
> s/oid_skiplist/skip_oids
>
> I think we forgot to update this comment after the variable was renamed
> in a previous patch version.
>
Yes, thanks for you remind. I have just forgotten here.
^ permalink raw reply [flat|nested] 282+ messages in thread
* [GSoC][PATCH v9 6/9] builtin/fsck: add `git-refs verify` child process
2024-07-09 12:32 ` [GSoC][PATCH v9 0/9] ref consistency check infra setup shejialuo
` (4 preceding siblings ...)
2024-07-09 12:35 ` [GSoC][PATCH v9 5/9] builtin/refs: add verify subcommand shejialuo
@ 2024-07-09 12:36 ` shejialuo
2024-07-09 12:36 ` [GSoC][PATCH v9 7/9] files-backend: add unified interface for refs scanning shejialuo
` (3 subsequent siblings)
9 siblings, 0 replies; 282+ messages in thread
From: shejialuo @ 2024-07-09 12:36 UTC (permalink / raw)
To: git
Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine,
Justin Tobler
Introduce a new function "fsck_refs" that initializes and runs a child
process to execute the "git-refs verify" command.
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
builtin/fsck.c | 17 +++++++++++++++++
1 file changed, 17 insertions(+)
diff --git a/builtin/fsck.c b/builtin/fsck.c
index de34538c4f..ec3357722c 100644
--- a/builtin/fsck.c
+++ b/builtin/fsck.c
@@ -897,6 +897,21 @@ static int check_pack_rev_indexes(struct repository *r, int show_progress)
return res;
}
+static void fsck_refs(void)
+{
+ struct child_process refs_verify = CHILD_PROCESS_INIT;
+ child_process_init(&refs_verify);
+ refs_verify.git_cmd = 1;
+ strvec_pushl(&refs_verify.args, "refs", "verify", NULL);
+ if (verbose)
+ strvec_push(&refs_verify.args, "--verbose");
+ if (check_strict)
+ strvec_push(&refs_verify.args, "--strict");
+
+ if (run_command(&refs_verify))
+ errors_found |= ERROR_REFS;
+}
+
static char const * const fsck_usage[] = {
N_("git fsck [--tags] [--root] [--unreachable] [--cache] [--no-reflogs]\n"
" [--[no-]full] [--strict] [--verbose] [--lost-found]\n"
@@ -1066,6 +1081,8 @@ int cmd_fsck(int argc, const char **argv, const char *prefix)
check_connectivity();
+ fsck_refs();
+
if (the_repository->settings.core_commit_graph) {
struct child_process commit_graph_verify = CHILD_PROCESS_INIT;
--
2.45.2
^ permalink raw reply related [flat|nested] 282+ messages in thread
* [GSoC][PATCH v9 7/9] files-backend: add unified interface for refs scanning
2024-07-09 12:32 ` [GSoC][PATCH v9 0/9] ref consistency check infra setup shejialuo
` (5 preceding siblings ...)
2024-07-09 12:36 ` [GSoC][PATCH v9 6/9] builtin/fsck: add `git-refs verify` child process shejialuo
@ 2024-07-09 12:36 ` shejialuo
2024-07-09 12:36 ` [GSoC][PATCH v9 8/9] fsck: add ref name check for files backend shejialuo
` (2 subsequent siblings)
9 siblings, 0 replies; 282+ messages in thread
From: shejialuo @ 2024-07-09 12:36 UTC (permalink / raw)
To: git
Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine,
Justin Tobler
For refs and reflogs, we need to scan its corresponding directories to
check every regular file or symbolic link which shares the same pattern.
Introduce a unified interface for scanning directories for
files-backend.
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
refs/files-backend.c | 77 +++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 76 insertions(+), 1 deletion(-)
diff --git a/refs/files-backend.c b/refs/files-backend.c
index d89eeda8ef..84acb58782 100644
--- a/refs/files-backend.c
+++ b/refs/files-backend.c
@@ -6,6 +6,7 @@
#include "../gettext.h"
#include "../hash.h"
#include "../hex.h"
+#include "../fsck.h"
#include "../refs.h"
#include "refs-internal.h"
#include "ref-cache.h"
@@ -3408,6 +3409,78 @@ static int files_ref_store_remove_on_disk(struct ref_store *ref_store,
return ret;
}
+/*
+ * For refs and reflogs, they share a unified interface when scanning
+ * the whole directory. This function is used as the callback for each
+ * regular file or symlink in the directory.
+ */
+typedef int (*files_fsck_refs_fn)(struct fsck_options *o,
+ const char *gitdir,
+ const char *refs_check_dir,
+ struct dir_iterator *iter);
+
+static int files_fsck_refs_dir(struct ref_store *ref_store,
+ struct fsck_options *o,
+ const char *refs_check_dir,
+ files_fsck_refs_fn *fsck_refs_fns)
+{
+ const char *gitdir = ref_store->gitdir;
+ struct strbuf sb = STRBUF_INIT;
+ struct dir_iterator *iter;
+ int iter_status;
+ int ret = 0;
+
+ strbuf_addf(&sb, "%s/%s", gitdir, refs_check_dir);
+
+ iter = dir_iterator_begin(sb.buf, 0);
+
+ if (!iter) {
+ ret = error_errno("cannot open directory %s", sb.buf);
+ goto out;
+ }
+
+ while ((iter_status = dir_iterator_advance(iter)) == ITER_OK) {
+ if (S_ISDIR(iter->st.st_mode)) {
+ continue;
+ } else if (S_ISREG(iter->st.st_mode) ||
+ S_ISLNK(iter->st.st_mode)) {
+ if (o->verbose_refs)
+ fprintf_ln(stderr, "Checking %s/%s",
+ refs_check_dir, iter->relative_path);
+ for (size_t i = 0; fsck_refs_fns[i]; i++) {
+ if (fsck_refs_fns[i](o, gitdir, refs_check_dir, iter))
+ ret = -1;
+ }
+ } else {
+ ret = error(_("unexpected file type for '%s'"),
+ iter->basename);
+ }
+ }
+
+ if (iter_status != ITER_DONE)
+ ret = error(_("failed to iterate over '%s'"), sb.buf);
+
+out:
+ strbuf_release(&sb);
+ return ret;
+}
+
+static int files_fsck_refs(struct ref_store *ref_store,
+ struct fsck_options *o)
+{
+ int ret;
+ files_fsck_refs_fn fsck_refs_fns[]= {
+ NULL
+ };
+
+ if (o->verbose_refs)
+ fprintf_ln(stderr, "Checking references consistency");
+
+ ret = files_fsck_refs_dir(ref_store, o, "refs", fsck_refs_fns);
+
+ return ret;
+}
+
static int files_fsck(struct ref_store *ref_store,
struct fsck_options *o)
{
@@ -3415,7 +3488,9 @@ static int files_fsck(struct ref_store *ref_store,
struct files_ref_store *refs =
files_downcast(ref_store, REF_STORE_READ, "fsck");
- ret = refs->packed_ref_store->be->fsck(refs->packed_ref_store, o);
+ ret = refs->packed_ref_store->be->fsck(refs->packed_ref_store, o)
+ | files_fsck_refs(ref_store, o);
+
return ret;
}
--
2.45.2
^ permalink raw reply related [flat|nested] 282+ messages in thread
* [GSoC][PATCH v9 8/9] fsck: add ref name check for files backend
2024-07-09 12:32 ` [GSoC][PATCH v9 0/9] ref consistency check infra setup shejialuo
` (6 preceding siblings ...)
2024-07-09 12:36 ` [GSoC][PATCH v9 7/9] files-backend: add unified interface for refs scanning shejialuo
@ 2024-07-09 12:36 ` shejialuo
2024-07-09 12:36 ` [GSoC][PATCH v9 9/9] fsck: add ref content " shejialuo
2024-07-10 14:43 ` [GSoC][PATCH v10 00/10] ref consistency check infra setup shejialuo
9 siblings, 0 replies; 282+ messages in thread
From: shejialuo @ 2024-07-09 12:36 UTC (permalink / raw)
To: git
Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine,
Justin Tobler
The git-fsck(1) only implicitly checks the reference, it does not fully
check refs with bad format name such as standalone "@" and name ending
with ".lock".
In order to provide such checks, add a new fsck message id "badRefName"
with default ERROR type. Use existing "check_refname_format" to explicit
check the ref name. And add a new unit test to verify the functionality.
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
Documentation/fsck-msgids.txt | 3 +
fsck.h | 1 +
refs/files-backend.c | 20 +++++++
t/t0602-reffiles-fsck.sh | 101 ++++++++++++++++++++++++++++++++++
4 files changed, 125 insertions(+)
create mode 100755 t/t0602-reffiles-fsck.sh
diff --git a/Documentation/fsck-msgids.txt b/Documentation/fsck-msgids.txt
index f643585a34..dab4012246 100644
--- a/Documentation/fsck-msgids.txt
+++ b/Documentation/fsck-msgids.txt
@@ -19,6 +19,9 @@
`badParentSha1`::
(ERROR) A commit object has a bad parent sha1.
+`badRefName`::
+ (ERROR) A ref has a bad name.
+
`badTagName`::
(INFO) A tag has an invalid format.
diff --git a/fsck.h b/fsck.h
index ff52913494..03825e86b1 100644
--- a/fsck.h
+++ b/fsck.h
@@ -31,6 +31,7 @@ enum fsck_msg_type {
FUNC(BAD_NAME, ERROR) \
FUNC(BAD_OBJECT_SHA1, ERROR) \
FUNC(BAD_PARENT_SHA1, ERROR) \
+ FUNC(BAD_REF_NAME, ERROR) \
FUNC(BAD_TIMEZONE, ERROR) \
FUNC(BAD_TREE, ERROR) \
FUNC(BAD_TREE_SHA1, ERROR) \
diff --git a/refs/files-backend.c b/refs/files-backend.c
index 84acb58782..69a76048d3 100644
--- a/refs/files-backend.c
+++ b/refs/files-backend.c
@@ -3419,6 +3419,25 @@ typedef int (*files_fsck_refs_fn)(struct fsck_options *o,
const char *refs_check_dir,
struct dir_iterator *iter);
+static int files_fsck_refs_name(struct fsck_options *o,
+ const char *gitdir UNUSED,
+ const char *refs_check_dir,
+ struct dir_iterator *iter)
+{
+ struct strbuf sb = STRBUF_INIT;
+ int ret = 0;
+
+ if (check_refname_format(iter->basename, REFNAME_ALLOW_ONELEVEL)) {
+ strbuf_addf(&sb, "%s/%s", refs_check_dir, iter->relative_path);
+ ret = fsck_refs_report(o, NULL, sb.buf,
+ FSCK_MSG_BAD_REF_NAME,
+ "invalid refname format");
+ }
+
+ strbuf_release(&sb);
+ return ret;
+}
+
static int files_fsck_refs_dir(struct ref_store *ref_store,
struct fsck_options *o,
const char *refs_check_dir,
@@ -3470,6 +3489,7 @@ static int files_fsck_refs(struct ref_store *ref_store,
{
int ret;
files_fsck_refs_fn fsck_refs_fns[]= {
+ files_fsck_refs_name,
NULL
};
diff --git a/t/t0602-reffiles-fsck.sh b/t/t0602-reffiles-fsck.sh
new file mode 100755
index 0000000000..b2db58d2c6
--- /dev/null
+++ b/t/t0602-reffiles-fsck.sh
@@ -0,0 +1,101 @@
+#!/bin/sh
+
+test_description='Test reffiles backend consistency check'
+
+GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=main
+export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME
+GIT_TEST_DEFAULT_REF_FORMAT=files
+export GIT_TEST_DEFAULT_REF_FORMAT
+
+. ./test-lib.sh
+
+test_expect_success 'ref name should be checked' '
+ test_when_finished "rm -rf repo" &&
+ git init repo &&
+ branch_dir_prefix=.git/refs/heads &&
+ tag_dir_prefix=.git/refs/tags &&
+ (
+ cd repo &&
+ git commit --allow-empty -m initial &&
+ git checkout -b branch-1 &&
+ git tag tag-1 &&
+ git commit --allow-empty -m second &&
+ git checkout -b branch-2 &&
+ git tag tag-2 &&
+ git tag multi_hierarchy/tag-2
+ ) &&
+ (
+ cd repo &&
+ cp $branch_dir_prefix/branch-1 $branch_dir_prefix/.branch-1 &&
+ test_must_fail git fsck 2>err &&
+ cat >expect <<-EOF &&
+ error: refs/heads/.branch-1: badRefName: invalid refname format
+ EOF
+ rm $branch_dir_prefix/.branch-1 &&
+ test_cmp expect err
+ ) &&
+ (
+ cd repo &&
+ cp $tag_dir_prefix/tag-1 $tag_dir_prefix/tag-1.lock &&
+ test_must_fail git fsck 2>err &&
+ cat >expect <<-EOF &&
+ error: refs/tags/tag-1.lock: badRefName: invalid refname format
+ EOF
+ rm $tag_dir_prefix/tag-1.lock &&
+ test_cmp expect err
+ ) &&
+ (
+ cd repo &&
+ cp $branch_dir_prefix/branch-1 $branch_dir_prefix/@ &&
+ test_must_fail git fsck 2>err &&
+ cat >expect <<-EOF &&
+ error: refs/heads/@: badRefName: invalid refname format
+ EOF
+ rm $branch_dir_prefix/@ &&
+ test_cmp expect err
+ ) &&
+ (
+ cd repo &&
+ cp $tag_dir_prefix/multi_hierarchy/tag-2 $tag_dir_prefix/multi_hierarchy/@ &&
+ test_must_fail git fsck 2>err &&
+ cat >expect <<-EOF &&
+ error: refs/tags/multi_hierarchy/@: badRefName: invalid refname format
+ EOF
+ rm $tag_dir_prefix/multi_hierarchy/@ &&
+ test_cmp expect err
+ )
+'
+
+test_expect_success 'ref name check should be adapted into fsck messages' '
+ test_when_finished "rm -rf repo" &&
+ git init repo &&
+ branch_dir_prefix=.git/refs/heads &&
+ tag_dir_prefix=.git/refs/tags &&
+ (
+ cd repo &&
+ git commit --allow-empty -m initial &&
+ git checkout -b branch-1 &&
+ git tag tag-1 &&
+ git commit --allow-empty -m second &&
+ git checkout -b branch-2 &&
+ git tag tag-2
+ ) &&
+ (
+ cd repo &&
+ cp $branch_dir_prefix/branch-1 $branch_dir_prefix/.branch-1 &&
+ git -c fsck.badRefName=warn fsck 2>err &&
+ cat >expect <<-EOF &&
+ warning: refs/heads/.branch-1: badRefName: invalid refname format
+ EOF
+ rm $branch_dir_prefix/.branch-1 &&
+ test_cmp expect err
+ ) &&
+ (
+ cd repo &&
+ cp $branch_dir_prefix/branch-1 $branch_dir_prefix/@ &&
+ git -c fsck.badRefName=ignore fsck 2>err &&
+ test_must_be_empty err
+ )
+'
+
+test_done
--
2.45.2
^ permalink raw reply related [flat|nested] 282+ messages in thread
* [GSoC][PATCH v9 9/9] fsck: add ref content check for files backend
2024-07-09 12:32 ` [GSoC][PATCH v9 0/9] ref consistency check infra setup shejialuo
` (7 preceding siblings ...)
2024-07-09 12:36 ` [GSoC][PATCH v9 8/9] fsck: add ref name check for files backend shejialuo
@ 2024-07-09 12:36 ` shejialuo
2024-07-10 14:43 ` [GSoC][PATCH v10 00/10] ref consistency check infra setup shejialuo
9 siblings, 0 replies; 282+ messages in thread
From: shejialuo @ 2024-07-09 12:36 UTC (permalink / raw)
To: git
Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine,
Justin Tobler
Enhance the git-fsck(1) command by adding a check for reference content
in the files backend. The new functionality ensures that symrefs, real
symbolic link and regular refs are validated correctly.
In order to check the trailing content of the regular refs, add a new
parameter `trailing` to `parse_loose_ref_contents`.
For symrefs, `parse_loose_ref_contents` will set the "referent".
However, symbolic link could be either absolute or relative. Use
"strbuf_add_real_path" to read the symbolic link and convert the
relative path to absolute path. Then use "skip_prefix" to make it align
with symref "referent".
Thus, the symrefs and symbolic links could share the same interface. Add
a new function "files_fsck_symref_target" which aims at checking the
following things:
1. whether the pointee is under the `refs/` directory.
2. whether the pointee name is correct.
3. whether the pointee path is a wrong type in filesystem.
Last, add the following FSCK MESSAGEs:
1. "badRefContent(ERROR)": A ref has a bad content
2. "badSymrefPointee(ERROR)": The pointee of a symref is bad.
3. "trailingRefContent(WARN)": A ref content has trailing contents.
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
Documentation/fsck-msgids.txt | 9 +++
fsck.h | 3 +
refs.c | 2 +-
refs/files-backend.c | 145 +++++++++++++++++++++++++++++++++-
refs/refs-internal.h | 5 +-
t/t0602-reffiles-fsck.sh | 110 ++++++++++++++++++++++++++
6 files changed, 269 insertions(+), 5 deletions(-)
diff --git a/Documentation/fsck-msgids.txt b/Documentation/fsck-msgids.txt
index dab4012246..b1630a478b 100644
--- a/Documentation/fsck-msgids.txt
+++ b/Documentation/fsck-msgids.txt
@@ -19,9 +19,15 @@
`badParentSha1`::
(ERROR) A commit object has a bad parent sha1.
+`badRefContent`::
+ (ERROR) A ref has a bad content.
+
`badRefName`::
(ERROR) A ref has a bad name.
+`badSymrefPointee`::
+ (ERROR) The pointee of a symref is bad.
+
`badTagName`::
(INFO) A tag has an invalid format.
@@ -167,6 +173,9 @@
`nullSha1`::
(WARN) Tree contains entries pointing to a null sha1.
+`trailingRefContent`::
+ (WARN) A ref content has trailing contents.
+
`treeNotSorted`::
(ERROR) A tree is not properly sorted.
diff --git a/fsck.h b/fsck.h
index 03825e86b1..73592c9d2a 100644
--- a/fsck.h
+++ b/fsck.h
@@ -32,6 +32,8 @@ enum fsck_msg_type {
FUNC(BAD_OBJECT_SHA1, ERROR) \
FUNC(BAD_PARENT_SHA1, ERROR) \
FUNC(BAD_REF_NAME, ERROR) \
+ FUNC(BAD_REF_CONTENT, ERROR) \
+ FUNC(BAD_SYMREF_POINTEE, ERROR) \
FUNC(BAD_TIMEZONE, ERROR) \
FUNC(BAD_TREE, ERROR) \
FUNC(BAD_TREE_SHA1, ERROR) \
@@ -72,6 +74,7 @@ enum fsck_msg_type {
FUNC(HAS_DOTDOT, WARN) \
FUNC(HAS_DOTGIT, WARN) \
FUNC(NULL_SHA1, WARN) \
+ FUNC(TRAILING_REF_CONTENT, WARN) \
FUNC(ZERO_PADDED_FILEMODE, WARN) \
FUNC(NUL_IN_COMMIT, WARN) \
FUNC(LARGE_PATHNAME, WARN) \
diff --git a/refs.c b/refs.c
index 410919246b..eb82fb7d4e 100644
--- a/refs.c
+++ b/refs.c
@@ -1760,7 +1760,7 @@ static int refs_read_special_head(struct ref_store *ref_store,
}
result = parse_loose_ref_contents(content.buf, oid, referent, type,
- failure_errno);
+ failure_errno, NULL);
done:
strbuf_release(&full_path);
diff --git a/refs/files-backend.c b/refs/files-backend.c
index 69a76048d3..d98ef45403 100644
--- a/refs/files-backend.c
+++ b/refs/files-backend.c
@@ -1,6 +1,7 @@
#define USE_THE_REPOSITORY_VARIABLE
#include "../git-compat-util.h"
+#include "../abspath.h"
#include "../copy.h"
#include "../environment.h"
#include "../gettext.h"
@@ -553,7 +554,7 @@ static int read_ref_internal(struct ref_store *ref_store, const char *refname,
strbuf_rtrim(&sb_contents);
buf = sb_contents.buf;
- ret = parse_loose_ref_contents(buf, oid, referent, type, &myerr);
+ ret = parse_loose_ref_contents(buf, oid, referent, type, &myerr, NULL);
out:
if (ret && !myerr)
@@ -589,7 +590,7 @@ static int files_read_symbolic_ref(struct ref_store *ref_store, const char *refn
int parse_loose_ref_contents(const char *buf, struct object_id *oid,
struct strbuf *referent, unsigned int *type,
- int *failure_errno)
+ int *failure_errno, const char **trailing)
{
const char *p;
if (skip_prefix(buf, "ref:", &buf)) {
@@ -611,6 +612,10 @@ int parse_loose_ref_contents(const char *buf, struct object_id *oid,
*failure_errno = EINVAL;
return -1;
}
+
+ if (trailing)
+ *trailing = p;
+
return 0;
}
@@ -3438,6 +3443,141 @@ static int files_fsck_refs_name(struct fsck_options *o,
return ret;
}
+/*
+ * Check the symref "pointee_name" and "pointee_path". The caller should
+ * make sure that "pointee_path" is absolute. For symbolic ref, "pointee_name"
+ * would be the content after "refs:". For symblic link, "pointee_name" would
+ * be the relative path agaignst "gitdir".
+ */
+static int files_fsck_symref_target(struct fsck_options *o,
+ const char *refname,
+ const char *pointee_name,
+ const char *pointee_path)
+{
+ const char *p = NULL;
+ struct stat st;
+ int ret = 0;
+
+ if (!skip_prefix(pointee_name, "refs/", &p)) {
+
+ ret = fsck_refs_report(o, NULL, refname,
+ FSCK_MSG_BAD_SYMREF_POINTEE,
+ "point to target out of refs hierarchy");
+ goto out;
+ }
+
+ if (check_refname_format(pointee_name, 0)) {
+ ret = fsck_refs_report(o, NULL, refname,
+ FSCK_MSG_BAD_SYMREF_POINTEE,
+ "point to invalid refname");
+ }
+
+ if (lstat(pointee_path, &st) < 0)
+ goto out;
+
+ if (!S_ISREG(st.st_mode) && !S_ISLNK(st.st_mode)) {
+ ret = fsck_refs_report(o, NULL, refname,
+ FSCK_MSG_BAD_SYMREF_POINTEE,
+ "point to invalid target");
+ goto out;
+ }
+out:
+ return ret;
+}
+
+static int files_fsck_refs_content(struct fsck_options *o,
+ const char *gitdir,
+ const char *refs_check_dir,
+ struct dir_iterator *iter)
+{
+ struct strbuf pointee_path = STRBUF_INIT,
+ ref_content = STRBUF_INIT,
+ abs_gitdir = STRBUF_INIT,
+ referent = STRBUF_INIT,
+ refname = STRBUF_INIT;
+ const char *trailing = NULL;
+ int failure_errno = 0;
+ unsigned int type = 0;
+ struct object_id oid;
+ int ret = 0;
+
+ strbuf_addf(&refname, "%s/%s", refs_check_dir, iter->relative_path);
+
+ /*
+ * If the file is a symlink, we need to only check the connectivity
+ * of the destination object.
+ */
+ if (S_ISLNK(iter->st.st_mode)) {
+ const char *pointee_name = NULL;
+
+ strbuf_add_real_path(&pointee_path, iter->path.buf);
+
+ strbuf_add_absolute_path(&abs_gitdir, gitdir);
+ strbuf_normalize_path(&abs_gitdir);
+ if (!is_dir_sep(abs_gitdir.buf[abs_gitdir.len - 1]))
+ strbuf_addch(&abs_gitdir, '/');
+
+ if (!skip_prefix(pointee_path.buf,
+ abs_gitdir.buf, &pointee_name)) {
+ ret = fsck_refs_report(o, NULL, refname.buf,
+ FSCK_MSG_BAD_SYMREF_POINTEE,
+ "point to target outside gitdir");
+ goto clean;
+ }
+
+ ret = files_fsck_symref_target(o, refname.buf, pointee_name,
+ pointee_path.buf);
+ goto clean;
+ }
+
+ if (strbuf_read_file(&ref_content, iter->path.buf, 0) < 0) {
+ ret = error_errno(_("%s/%s: unable to read the ref"),
+ refs_check_dir, iter->relative_path);
+ goto clean;
+ }
+
+ if (parse_loose_ref_contents(ref_content.buf, &oid,
+ &referent, &type,
+ &failure_errno, &trailing)) {
+ ret = fsck_refs_report(o, NULL, refname.buf,
+ FSCK_MSG_BAD_REF_CONTENT,
+ "invalid ref content");
+ goto clean;
+ }
+
+ /*
+ * If the ref is a symref, we need to check the destination name and
+ * connectivity.
+ */
+ if (referent.len && (type & REF_ISSYMREF)) {
+ strbuf_addf(&pointee_path, "%s/%s", gitdir, referent.buf);
+ strbuf_rtrim(&referent);
+
+ ret = files_fsck_symref_target(o, refname.buf, referent.buf,
+ pointee_path.buf);
+ goto clean;
+ } else {
+ /*
+ * Only regular refs could have a trailing garbage. Should
+ * be reported as a warning.
+ */
+ if (trailing && (*trailing != '\0' && *trailing != '\n')) {
+ ret = fsck_refs_report(o, NULL, refname.buf,
+ FSCK_MSG_TRAILING_REF_CONTENT,
+ "trailing garbage in ref");
+ goto clean;
+ }
+ }
+
+clean:
+ strbuf_release(&abs_gitdir);
+ strbuf_release(&pointee_path);
+ strbuf_release(&refname);
+ strbuf_release(&ref_content);
+ strbuf_release(&referent);
+ return ret;
+}
+
static int files_fsck_refs_dir(struct ref_store *ref_store,
struct fsck_options *o,
const char *refs_check_dir,
@@ -3490,6 +3630,7 @@ static int files_fsck_refs(struct ref_store *ref_store,
int ret;
files_fsck_refs_fn fsck_refs_fns[]= {
files_fsck_refs_name,
+ files_fsck_refs_content,
NULL
};
diff --git a/refs/refs-internal.h b/refs/refs-internal.h
index a905e187cd..2fabf41d14 100644
--- a/refs/refs-internal.h
+++ b/refs/refs-internal.h
@@ -709,11 +709,12 @@ struct ref_store {
/*
* Parse contents of a loose ref file. *failure_errno maybe be set to EINVAL for
- * invalid contents.
+ * invalid contents. Also *trailing is set to the first character after the
+ * refname or NULL if the referent is not empty.
*/
int parse_loose_ref_contents(const char *buf, struct object_id *oid,
struct strbuf *referent, unsigned int *type,
- int *failure_errno);
+ int *failure_errno, const char **trailing);
/*
* Fill in the generic part of refs and add it to our collection of
diff --git a/t/t0602-reffiles-fsck.sh b/t/t0602-reffiles-fsck.sh
index b2db58d2c6..35bf40ee64 100755
--- a/t/t0602-reffiles-fsck.sh
+++ b/t/t0602-reffiles-fsck.sh
@@ -98,4 +98,114 @@ test_expect_success 'ref name check should be adapted into fsck messages' '
)
'
+test_expect_success 'regular ref content should be checked' '
+ test_when_finished "rm -rf repo" &&
+ git init repo &&
+ branch_dir_prefix=.git/refs/heads &&
+ tag_dir_prefix=.git/refs/tags &&
+ (
+ cd repo &&
+ git commit --allow-empty -m initial &&
+ git checkout -b branch-1 &&
+ git tag tag-1 &&
+ git commit --allow-empty -m second &&
+ git checkout -b branch-2 &&
+ git tag tag-2 &&
+ git checkout -b a/b/tag-2
+ ) &&
+ (
+ cd repo &&
+ printf "%s garbage" "$(git rev-parse branch-1)" > $branch_dir_prefix/branch-1-garbage &&
+ git fsck 2>err &&
+ cat >expect <<-EOF &&
+ warning: refs/heads/branch-1-garbage: trailingRefContent: trailing garbage in ref
+ EOF
+ rm $branch_dir_prefix/branch-1-garbage &&
+ test_cmp expect err
+ ) &&
+ (
+ cd repo &&
+ printf "%s garbage" "$(git rev-parse tag-1)" > $tag_dir_prefix/tag-1-garbage &&
+ test_must_fail git -c fsck.trailingRefContent=error fsck 2>err &&
+ cat >expect <<-EOF &&
+ error: refs/tags/tag-1-garbage: trailingRefContent: trailing garbage in ref
+ EOF
+ rm $tag_dir_prefix/tag-1-garbage &&
+ test_cmp expect err
+ ) &&
+ (
+ cd repo &&
+ printf "%s " "$(git rev-parse tag-2)" > $tag_dir_prefix/tag-2-garbage &&
+ git fsck 2>err &&
+ cat >expect <<-EOF &&
+ warning: refs/tags/tag-2-garbage: trailingRefContent: trailing garbage in ref
+ EOF
+ rm $tag_dir_prefix/tag-2-garbage &&
+ test_cmp expect err
+ ) &&
+ (
+ cd repo &&
+ printf "xfsazqfxcadas" > $tag_dir_prefix/tag-2-bad &&
+ test_must_fail git refs verify 2>err &&
+ cat >expect <<-EOF &&
+ error: refs/tags/tag-2-bad: badRefContent: invalid ref content
+ EOF
+ rm $tag_dir_prefix/tag-2-bad &&
+ test_cmp expect err
+ ) &&
+ (
+ cd repo &&
+ printf "xfsazqfxcadas" > $branch_dir_prefix/a/b/branch-2-bad &&
+ test_must_fail git refs verify 2>err &&
+ cat >expect <<-EOF &&
+ error: refs/heads/a/b/branch-2-bad: badRefContent: invalid ref content
+ EOF
+ rm $branch_dir_prefix/a/b/branch-2-bad &&
+ test_cmp expect err
+ )
+'
+
+test_expect_success 'symbolic ref content should be checked' '
+ test_when_finished "rm -rf repo" &&
+ git init repo &&
+ branch_dir_prefix=.git/refs/heads &&
+ tag_dir_prefix=.git/refs/tags &&
+ (
+ cd repo &&
+ git commit --allow-empty -m initial &&
+ git checkout -b branch-1 &&
+ git tag tag-1
+ ) &&
+ (
+ cd repo &&
+ printf "ref: refs/heads/.branch" > $branch_dir_prefix/branch-2-bad &&
+ test_must_fail git refs verify 2>err &&
+ cat >expect <<-EOF &&
+ error: refs/heads/branch-2-bad: badSymrefPointee: point to invalid refname
+ EOF
+ rm $branch_dir_prefix/branch-2-bad &&
+ test_cmp expect err
+ ) &&
+ (
+ cd repo &&
+ printf "ref: refs/heads" > $branch_dir_prefix/branch-2-bad &&
+ test_must_fail git refs verify 2>err &&
+ cat >expect <<-EOF &&
+ error: refs/heads/branch-2-bad: badSymrefPointee: point to invalid target
+ EOF
+ rm $branch_dir_prefix/branch-2-bad &&
+ test_cmp expect err
+ ) &&
+ (
+ cd repo &&
+ printf "ref: logs/maint-v2.45" > $branch_dir_prefix/branch-2-bad &&
+ test_must_fail git refs verify 2>err &&
+ cat >expect <<-EOF &&
+ error: refs/heads/branch-2-bad: badSymrefPointee: point to target out of refs hierarchy
+ EOF
+ rm $branch_dir_prefix/branch-2-bad &&
+ test_cmp expect err
+ )
+'
+
test_done
--
2.45.2
^ permalink raw reply related [flat|nested] 282+ messages in thread
* [GSoC][PATCH v10 00/10] ref consistency check infra setup
2024-07-09 12:32 ` [GSoC][PATCH v9 0/9] ref consistency check infra setup shejialuo
` (8 preceding siblings ...)
2024-07-09 12:36 ` [GSoC][PATCH v9 9/9] fsck: add ref content " shejialuo
@ 2024-07-10 14:43 ` shejialuo
2024-07-10 14:46 ` [GSoC][PATCH v10 01/10] fsck: rename "skiplist" to "skip_oids" shejialuo
` (10 more replies)
9 siblings, 11 replies; 282+ messages in thread
From: shejialuo @ 2024-07-10 14:43 UTC (permalink / raw)
To: git
Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine,
Justin Tobler
Hi All:
This version handles the following problems:
1. Following the Justin's advice. Add a new commit to rename the
objects-related fsck error functions to make the series more clean.
2. Add a simple documentation for "fsck_refs_report".
3. The previous implementation of "fsck_refs_error_function" used
"static struct strbuf", as Eric said, it's a bad idea to use "static"
because of infrequent usage of "fsck_refs_error_function". And it will
make the code arder to "libify". I didn't consider this. So In this
version, use "struct strbuf" instead and add the corresponding
memory-free operation.
4. I should not add "verbose_refs" option in "fsck: add refs-related
error report function" commit. I add this option in "builtin/refs" to
make no confusion.
5. Enchance the commit message for "refs: set up ref consistency check
infrastructure" to mention that we have added placeholder functions.
6. Change the "oid_skiplist" from comments which are ignored by the
previous version.
Thanks for every reviwer.
CI: https://github.com/shejialuo/git/pull/8
Thanks,
Jialuo
shejialuo (10):
fsck: rename "skiplist" to "skip_oids"
fsck: rename objects-related fsck error functions
fsck: add a unified interface for reporting fsck messages
fsck: add refs-related error report function
refs: set up ref consistency check infrastructure
builtin/refs: add verify subcommand and verbose_refs for
"fsck_options"
builtin/fsck: add `git-refs verify` child process
files-backend: add unified interface for refs scanning
fsck: add ref name check for files backend
fsck: add ref content check for files backend
Documentation/fsck-msgids.txt | 12 ++
Documentation/git-refs.txt | 13 ++
builtin/fsck.c | 32 ++++-
builtin/mktag.c | 1 +
builtin/refs.c | 44 ++++++
fsck.c | 109 ++++++++++++---
fsck.h | 66 ++++++---
object-file.c | 11 +-
refs.c | 7 +-
refs.h | 8 ++
refs/debug.c | 11 ++
refs/files-backend.c | 255 +++++++++++++++++++++++++++++++++-
refs/packed-backend.c | 8 ++
refs/refs-internal.h | 11 +-
refs/reftable-backend.c | 8 ++
t/t0602-reffiles-fsck.sh | 211 ++++++++++++++++++++++++++++
16 files changed, 750 insertions(+), 57 deletions(-)
create mode 100755 t/t0602-reffiles-fsck.sh
Range-diff against v9:
1: e044f933de = 1: e044f933de fsck: rename "skiplist" to "skip_oids"
-: ---------- > 2: 73a7c53a23 fsck: rename objects-related fsck error functions
2: daaf3d0ffe ! 3: df4837e960 fsck: add a unified interface for reporting fsck messages
@@ Commit message
## builtin/fsck.c ##
@@ builtin/fsck.c: static int objerror(struct object *obj, const char *err)
- return -1;
- }
-
--static int fsck_error_func(struct fsck_options *o UNUSED,
-- const struct object_id *oid,
-- enum object_type object_type,
-- enum fsck_msg_type msg_type,
-- enum fsck_msg_id msg_id UNUSED,
-- const char *message)
-+static int fsck_objects_error_func(struct fsck_options *o UNUSED,
-+ const struct object_id *oid,
-+ enum object_type object_type,
+ static int fsck_objects_error_func(struct fsck_options *o UNUSED,
+ const struct object_id *oid,
+ enum object_type object_type,
+ const char *checked_ref_name UNUSED,
-+ enum fsck_msg_type msg_type,
-+ enum fsck_msg_id msg_id UNUSED,
-+ const char *message)
- {
- switch (msg_type) {
- case FSCK_WARN:
-@@ builtin/fsck.c: int cmd_fsck(int argc, const char **argv, const char *prefix)
-
- fsck_walk_options.walk = mark_object;
- fsck_obj_options.walk = mark_used;
-- fsck_obj_options.error_func = fsck_error_func;
-+ fsck_obj_options.error_func = fsck_objects_error_func;
- if (check_strict)
- fsck_obj_options.strict = 1;
-
+ enum fsck_msg_type msg_type,
+ enum fsck_msg_id msg_id UNUSED,
+ const char *message)
## builtin/mktag.c ##
@@ builtin/mktag.c: static struct fsck_options fsck_options = FSCK_OPTIONS_STRICT;
@@ fsck.c: static int report(struct fsck_options *options,
{
if (!options->object_names)
@@ fsck.c: int fsck_buffer(const struct object_id *oid, enum object_type type,
- type);
- }
-
--int fsck_error_function(struct fsck_options *o,
-- const struct object_id *oid,
-- enum object_type object_type UNUSED,
-- enum fsck_msg_type msg_type,
-- enum fsck_msg_id msg_id UNUSED,
-- const char *message)
-+int fsck_objects_error_function(struct fsck_options *o,
-+ const struct object_id *oid,
-+ enum object_type object_type UNUSED,
+ int fsck_objects_error_function(struct fsck_options *o,
+ const struct object_id *oid,
+ enum object_type object_type UNUSED,
+ const char *checked_ref_name UNUSED,
-+ enum fsck_msg_type msg_type,
-+ enum fsck_msg_id msg_id UNUSED,
-+ const char *message)
- {
- if (msg_type == FSCK_WARN) {
- warning("object %s: %s", fsck_describe_object(o, oid), message);
+ enum fsck_msg_type msg_type,
+ enum fsck_msg_id msg_id UNUSED,
+ const char *message)
@@ fsck.c: int git_fsck_config(const char *var, const char *value,
- * Custom error callbacks that are used in more than one place.
- */
-
--int fsck_error_cb_print_missing_gitmodules(struct fsck_options *o,
-- const struct object_id *oid,
-- enum object_type object_type,
-- enum fsck_msg_type msg_type,
-- enum fsck_msg_id msg_id,
-- const char *message)
-+int fsck_objects_error_cb_print_missing_gitmodules(struct fsck_options *o,
-+ const struct object_id *oid,
-+ enum object_type object_type,
+ int fsck_objects_error_cb_print_missing_gitmodules(struct fsck_options *o,
+ const struct object_id *oid,
+ enum object_type object_type,
+ const char *checked_ref_name,
-+ enum fsck_msg_type msg_type,
-+ enum fsck_msg_id msg_id,
-+ const char *message)
- {
- if (msg_id == FSCK_MSG_GITMODULES_MISSING) {
+ enum fsck_msg_type msg_type,
+ enum fsck_msg_id msg_id,
+ const char *message)
+@@ fsck.c: int fsck_objects_error_cb_print_missing_gitmodules(struct fsck_options *o,
puts(oid_to_hex(oid));
return 0;
}
-- return fsck_error_function(o, oid, object_type, msg_type, msg_id, message);
+- return fsck_objects_error_function(o, oid, object_type,
+ return fsck_objects_error_function(o, oid, object_type, checked_ref_name,
-+ msg_type, msg_id, message);
+ msg_type, msg_id, message);
}
## fsck.h ##
@@ fsck.h: int is_valid_msg_type(const char *msg_id, const char *msg_type);
enum fsck_msg_type msg_type, enum fsck_msg_id msg_id,
const char *message);
--int fsck_error_function(struct fsck_options *o,
-- const struct object_id *oid, enum object_type object_type,
-- enum fsck_msg_type msg_type, enum fsck_msg_id msg_id,
-- const char *message);
--int fsck_error_cb_print_missing_gitmodules(struct fsck_options *o,
-- const struct object_id *oid,
-- enum object_type object_type,
-- enum fsck_msg_type msg_type,
-- enum fsck_msg_id msg_id,
-- const char *message);
-+int fsck_objects_error_function(struct fsck_options *o,
-+ const struct object_id *oid, enum object_type object_type,
+ int fsck_objects_error_function(struct fsck_options *o,
+ const struct object_id *oid, enum object_type object_type,
+ const char *checked_ref_name,
-+ enum fsck_msg_type msg_type, enum fsck_msg_id msg_id,
-+ const char *message);
-+int fsck_objects_error_cb_print_missing_gitmodules(struct fsck_options *o,
-+ const struct object_id *oid,
-+ enum object_type object_type,
+ enum fsck_msg_type msg_type, enum fsck_msg_id msg_id,
+ const char *message);
+ int fsck_objects_error_cb_print_missing_gitmodules(struct fsck_options *o,
+ const struct object_id *oid,
+ enum object_type object_type,
+ const char *checked_ref_name,
-+ enum fsck_msg_type msg_type,
-+ enum fsck_msg_id msg_id,
-+ const char *message);
-
- struct fsck_options {
- fsck_walk_func walk;
-@@ fsck.h: struct fsck_options {
- .gitmodules_done = OIDSET_INIT, \
- .gitattributes_found = OIDSET_INIT, \
- .gitattributes_done = OIDSET_INIT, \
-- .error_func = fsck_error_function \
-+ .error_func = fsck_objects_error_function \
- }
- #define FSCK_OPTIONS_STRICT { \
- .strict = 1, \
-@@ fsck.h: struct fsck_options {
- .gitmodules_done = OIDSET_INIT, \
- .gitattributes_found = OIDSET_INIT, \
- .gitattributes_done = OIDSET_INIT, \
-- .error_func = fsck_error_function, \
-+ .error_func = fsck_objects_error_function, \
- }
- #define FSCK_OPTIONS_MISSING_GITMODULES { \
- .strict = 1, \
-@@ fsck.h: struct fsck_options {
- .gitmodules_done = OIDSET_INIT, \
- .gitattributes_found = OIDSET_INIT, \
- .gitattributes_done = OIDSET_INIT, \
-- .error_func = fsck_error_cb_print_missing_gitmodules, \
-+ .error_func = fsck_objects_error_cb_print_missing_gitmodules, \
- }
-
- /* descend in all linked child objects
+ enum fsck_msg_type msg_type,
+ enum fsck_msg_id msg_id,
+ const char *message);
@@ fsck.h: int fsck_tag_standalone(const struct object_id *oid, const char *buffer,
*/
int fsck_finish(struct fsck_options *options);
++/*
++ * Report an error or warning for refs.
++ */
+__attribute__((format (printf, 5, 6)))
+int fsck_refs_report(struct fsck_options *options,
+ const struct object_id *oid,
3: 40da85ae30 ! 4: c8eb01c987 fsck: add refs-related options and error report function
@@ Metadata
Author: shejialuo <shejialuo@gmail.com>
## Commit message ##
- fsck: add refs-related options and error report function
+ fsck: add refs-related error report function
Add refs-related options to the "fsck_options", create refs-specific
"error_func" callback "fsck_refs_error_function".
@@ fsck.c: int fsck_objects_error_function(struct fsck_options *o,
+ enum fsck_msg_id msg_id UNUSED,
+ const char *message)
+{
-+ static struct strbuf sb = STRBUF_INIT;
++ struct strbuf sb = STRBUF_INIT;
++ int ret = 0;
+
-+ strbuf_reset(&sb);
+ strbuf_addstr(&sb, checked_ref_name);
+ if (oid)
+ strbuf_addf(&sb, " -> (%s)", oid_to_hex(oid));
+
-+ if (msg_type == FSCK_WARN) {
++ if (msg_type == FSCK_WARN)
+ warning("%s: %s", sb.buf, message);
-+ return 0;
-+ }
-+ error("%s: %s", sb.buf, message);
-+ return 1;
++ else
++ ret = error("%s: %s", sb.buf, message);
++
++ strbuf_release(&sb);
++ return ret;
+}
+
static int fsck_blobs(struct oidset *blobs_found, struct oidset *blobs_done,
@@ fsck.h: int fsck_objects_error_cb_print_missing_gitmodules(struct fsck_options *
struct fsck_options {
fsck_walk_func walk;
- fsck_error error_func;
- unsigned strict:1;
-+ unsigned verbose_refs:1;
- enum fsck_msg_type *msg_type;
- struct oidset skip_oids;
- struct oidset gitmodules_found;
@@ fsck.h: struct fsck_options {
.gitattributes_done = OIDSET_INIT, \
.error_func = fsck_objects_error_cb_print_missing_gitmodules, \
4: a38ea1b117 ! 5: e4085df496 refs: set up ref consistency check infrastructure
@@ Commit message
grouped into any above five categories. Explicitly add blank line to
make it different from others.
+ Last, implement placeholder functions for each ref backends.
+
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
5: 8320f56e0b ! 6: 497f224bed builtin/refs: add verify subcommand
@@ Metadata
Author: shejialuo <shejialuo@gmail.com>
## Commit message ##
- builtin/refs: add verify subcommand
+ builtin/refs: add verify subcommand and verbose_refs for "fsck_options"
Introduce a new subcommand "verify" in git-refs(1) to allow the user to
- check the reference database consistency.
+ check the reference database consistency and also this subcommand will
+ be used as the entry point of checking refs for "git-fsck(1)". Last, add
+ "verbose_refs" field into "fsck_options" to indicate whether we should
+ print verbose messages when checking refs consistency.
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
@@ builtin/refs.c: static int cmd_refs_migrate(int argc, const char **argv, const c
+ OPT_BOOL(0, "strict", &strict, N_("enable strict checking")),
+ OPT_END(),
+ };
-+ int ret = 0;
++ int ret;
+
+ argc = parse_options(argc, argv, prefix, options, verify_usage, 0);
+ if (argc)
@@ builtin/refs.c: static int cmd_refs_migrate(int argc, const char **argv, const c
+ ret = refs_fsck(get_main_ref_store(the_repository), &fsck_refs_options);
+
+ /*
-+ * Explicitly free the allocated array and "oid_skiplist"
++ * Explicitly free the allocated array and "skip_oids" set
+ */
+ free(fsck_refs_options.msg_type);
+ oidset_clear(&fsck_refs_options.skip_oids);
@@ builtin/refs.c: static int cmd_refs_migrate(int argc, const char **argv, const c
OPT_END(),
};
+
+ ## fsck.h ##
+@@ fsck.h: struct fsck_options {
+ fsck_walk_func walk;
+ fsck_error error_func;
+ unsigned strict:1;
++ unsigned verbose_refs:1;
+ enum fsck_msg_type *msg_type;
+ struct oidset skip_oids;
+ struct oidset gitmodules_found;
6: 6614a06ef5 = 7: 86a14c7b43 builtin/fsck: add `git-refs verify` child process
7: 928cc96396 = 8: daedb80b47 files-backend: add unified interface for refs scanning
8: 4d50d4932f = 9: c36d588e4f fsck: add ref name check for files backend
9: 7edb810819 = 10: 521e0d9ca3 fsck: add ref content check for files backend
--
2.45.2
^ permalink raw reply [flat|nested] 282+ messages in thread
* [GSoC][PATCH v10 01/10] fsck: rename "skiplist" to "skip_oids"
2024-07-10 14:43 ` [GSoC][PATCH v10 00/10] ref consistency check infra setup shejialuo
@ 2024-07-10 14:46 ` shejialuo
2024-07-10 14:46 ` [GSoC][PATCH v10 02/10] fsck: rename objects-related fsck error functions shejialuo
` (9 subsequent siblings)
10 siblings, 0 replies; 282+ messages in thread
From: shejialuo @ 2024-07-10 14:46 UTC (permalink / raw)
To: git
Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine,
Justin Tobler
The "skiplist" field in "fsck_options" is related to objects. Because we
are going to introduce ref consistency check, the "skiplist" name is too
general which will make the caller think "skiplist" is related to both
the refs and objects.
It may seem that for both refs and objects, we should provide a general
"skiplist" here. However, the type for "skiplist" is `struct oidset`
which is totally unsuitable for refs.
To avoid above ambiguity, rename "skiplist" to "skip_oids".
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
fsck.c | 4 ++--
fsck.h | 4 ++--
2 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/fsck.c b/fsck.c
index eea7145470..3f32441492 100644
--- a/fsck.c
+++ b/fsck.c
@@ -205,7 +205,7 @@ void fsck_set_msg_types(struct fsck_options *options, const char *values)
if (!strcmp(buf, "skiplist")) {
if (equal == len)
die("skiplist requires a path");
- oidset_parse_file(&options->skiplist, buf + equal + 1,
+ oidset_parse_file(&options->skip_oids, buf + equal + 1,
the_repository->hash_algo);
buf += len + 1;
continue;
@@ -223,7 +223,7 @@ void fsck_set_msg_types(struct fsck_options *options, const char *values)
static int object_on_skiplist(struct fsck_options *opts,
const struct object_id *oid)
{
- return opts && oid && oidset_contains(&opts->skiplist, oid);
+ return opts && oid && oidset_contains(&opts->skip_oids, oid);
}
__attribute__((format (printf, 5, 6)))
diff --git a/fsck.h b/fsck.h
index 6085a384f6..bcfb2e34cd 100644
--- a/fsck.h
+++ b/fsck.h
@@ -136,7 +136,7 @@ struct fsck_options {
fsck_error error_func;
unsigned strict:1;
enum fsck_msg_type *msg_type;
- struct oidset skiplist;
+ struct oidset skip_oids;
struct oidset gitmodules_found;
struct oidset gitmodules_done;
struct oidset gitattributes_found;
@@ -145,7 +145,7 @@ struct fsck_options {
};
#define FSCK_OPTIONS_DEFAULT { \
- .skiplist = OIDSET_INIT, \
+ .skip_oids = OIDSET_INIT, \
.gitmodules_found = OIDSET_INIT, \
.gitmodules_done = OIDSET_INIT, \
.gitattributes_found = OIDSET_INIT, \
--
2.45.2
^ permalink raw reply related [flat|nested] 282+ messages in thread
* [GSoC][PATCH v10 02/10] fsck: rename objects-related fsck error functions
2024-07-10 14:43 ` [GSoC][PATCH v10 00/10] ref consistency check infra setup shejialuo
2024-07-10 14:46 ` [GSoC][PATCH v10 01/10] fsck: rename "skiplist" to "skip_oids" shejialuo
@ 2024-07-10 14:46 ` shejialuo
2024-07-10 14:47 ` [GSoC][PATCH v10 03/10] fsck: add a unified interface for reporting fsck messages shejialuo
` (8 subsequent siblings)
10 siblings, 0 replies; 282+ messages in thread
From: shejialuo @ 2024-07-10 14:46 UTC (permalink / raw)
To: git
Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine,
Justin Tobler
The names of objects-related fsck error functions are general. It's OK
when there is only object database check. However, we are going to
introduce refs database check. In order to avoid ambiguity, rename
objects-related fsck error functions to explicitly indicate these
functions are used to report objects-related messages.
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
builtin/fsck.c | 14 +++++++-------
fsck.c | 27 ++++++++++++++-------------
fsck.h | 26 +++++++++++++-------------
3 files changed, 34 insertions(+), 33 deletions(-)
diff --git a/builtin/fsck.c b/builtin/fsck.c
index d13a226c2e..6d86bbe1e9 100644
--- a/builtin/fsck.c
+++ b/builtin/fsck.c
@@ -89,12 +89,12 @@ static int objerror(struct object *obj, const char *err)
return -1;
}
-static int fsck_error_func(struct fsck_options *o UNUSED,
- const struct object_id *oid,
- enum object_type object_type,
- enum fsck_msg_type msg_type,
- enum fsck_msg_id msg_id UNUSED,
- const char *message)
+static int fsck_objects_error_func(struct fsck_options *o UNUSED,
+ const struct object_id *oid,
+ enum object_type object_type,
+ enum fsck_msg_type msg_type,
+ enum fsck_msg_id msg_id UNUSED,
+ const char *message)
{
switch (msg_type) {
case FSCK_WARN:
@@ -938,7 +938,7 @@ int cmd_fsck(int argc, const char **argv, const char *prefix)
fsck_walk_options.walk = mark_object;
fsck_obj_options.walk = mark_used;
- fsck_obj_options.error_func = fsck_error_func;
+ fsck_obj_options.error_func = fsck_objects_error_func;
if (check_strict)
fsck_obj_options.strict = 1;
diff --git a/fsck.c b/fsck.c
index 3f32441492..0aaff7f635 100644
--- a/fsck.c
+++ b/fsck.c
@@ -1200,12 +1200,12 @@ int fsck_buffer(const struct object_id *oid, enum object_type type,
type);
}
-int fsck_error_function(struct fsck_options *o,
- const struct object_id *oid,
- enum object_type object_type UNUSED,
- enum fsck_msg_type msg_type,
- enum fsck_msg_id msg_id UNUSED,
- const char *message)
+int fsck_objects_error_function(struct fsck_options *o,
+ const struct object_id *oid,
+ enum object_type object_type UNUSED,
+ enum fsck_msg_type msg_type,
+ enum fsck_msg_id msg_id UNUSED,
+ const char *message)
{
if (msg_type == FSCK_WARN) {
warning("object %s: %s", fsck_describe_object(o, oid), message);
@@ -1303,16 +1303,17 @@ int git_fsck_config(const char *var, const char *value,
* Custom error callbacks that are used in more than one place.
*/
-int fsck_error_cb_print_missing_gitmodules(struct fsck_options *o,
- const struct object_id *oid,
- enum object_type object_type,
- enum fsck_msg_type msg_type,
- enum fsck_msg_id msg_id,
- const char *message)
+int fsck_objects_error_cb_print_missing_gitmodules(struct fsck_options *o,
+ const struct object_id *oid,
+ enum object_type object_type,
+ enum fsck_msg_type msg_type,
+ enum fsck_msg_id msg_id,
+ const char *message)
{
if (msg_id == FSCK_MSG_GITMODULES_MISSING) {
puts(oid_to_hex(oid));
return 0;
}
- return fsck_error_function(o, oid, object_type, msg_type, msg_id, message);
+ return fsck_objects_error_function(o, oid, object_type,
+ msg_type, msg_id, message);
}
diff --git a/fsck.h b/fsck.h
index bcfb2e34cd..41ebebbb59 100644
--- a/fsck.h
+++ b/fsck.h
@@ -120,16 +120,16 @@ typedef int (*fsck_error)(struct fsck_options *o,
enum fsck_msg_type msg_type, enum fsck_msg_id msg_id,
const char *message);
-int fsck_error_function(struct fsck_options *o,
- const struct object_id *oid, enum object_type object_type,
- enum fsck_msg_type msg_type, enum fsck_msg_id msg_id,
- const char *message);
-int fsck_error_cb_print_missing_gitmodules(struct fsck_options *o,
- const struct object_id *oid,
- enum object_type object_type,
- enum fsck_msg_type msg_type,
- enum fsck_msg_id msg_id,
- const char *message);
+int fsck_objects_error_function(struct fsck_options *o,
+ const struct object_id *oid, enum object_type object_type,
+ enum fsck_msg_type msg_type, enum fsck_msg_id msg_id,
+ const char *message);
+int fsck_objects_error_cb_print_missing_gitmodules(struct fsck_options *o,
+ const struct object_id *oid,
+ enum object_type object_type,
+ enum fsck_msg_type msg_type,
+ enum fsck_msg_id msg_id,
+ const char *message);
struct fsck_options {
fsck_walk_func walk;
@@ -150,7 +150,7 @@ struct fsck_options {
.gitmodules_done = OIDSET_INIT, \
.gitattributes_found = OIDSET_INIT, \
.gitattributes_done = OIDSET_INIT, \
- .error_func = fsck_error_function \
+ .error_func = fsck_objects_error_function \
}
#define FSCK_OPTIONS_STRICT { \
.strict = 1, \
@@ -158,7 +158,7 @@ struct fsck_options {
.gitmodules_done = OIDSET_INIT, \
.gitattributes_found = OIDSET_INIT, \
.gitattributes_done = OIDSET_INIT, \
- .error_func = fsck_error_function, \
+ .error_func = fsck_objects_error_function, \
}
#define FSCK_OPTIONS_MISSING_GITMODULES { \
.strict = 1, \
@@ -166,7 +166,7 @@ struct fsck_options {
.gitmodules_done = OIDSET_INIT, \
.gitattributes_found = OIDSET_INIT, \
.gitattributes_done = OIDSET_INIT, \
- .error_func = fsck_error_cb_print_missing_gitmodules, \
+ .error_func = fsck_objects_error_cb_print_missing_gitmodules, \
}
/* descend in all linked child objects
--
2.45.2
^ permalink raw reply related [flat|nested] 282+ messages in thread
* [GSoC][PATCH v10 03/10] fsck: add a unified interface for reporting fsck messages
2024-07-10 14:43 ` [GSoC][PATCH v10 00/10] ref consistency check infra setup shejialuo
2024-07-10 14:46 ` [GSoC][PATCH v10 01/10] fsck: rename "skiplist" to "skip_oids" shejialuo
2024-07-10 14:46 ` [GSoC][PATCH v10 02/10] fsck: rename objects-related fsck error functions shejialuo
@ 2024-07-10 14:47 ` shejialuo
2024-07-10 21:04 ` Junio C Hamano
2024-07-10 14:47 ` [GSoC][PATCH v10 04/10] fsck: add refs-related error report function shejialuo
` (7 subsequent siblings)
10 siblings, 1 reply; 282+ messages in thread
From: shejialuo @ 2024-07-10 14:47 UTC (permalink / raw)
To: git
Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine,
Justin Tobler
The static function "report" provided by "fsck.c" aims at checking fsck
error type and calling the callback "error_func" to report the message.
However, "report" function is only related to object database which
cannot be reused for refs. In order to provide a unified interface which
can report either objects or refs, create a new function "vfsck_report"
by adding "checked_ref_name" parameter following the "report" prototype.
Instead of using "...", provide "va_list" to allow more flexibility.
Like "report", the "vfsck_report" function will use "error_func"
registered in "fsck_options" to report customized messages. Change
"error_func" prototype to align with the new "vfsck_report".
Then, change "report" function to use "vfsck_report" to report objects
related messages. Add a new function called "fsck_refs_report" to use
"vfsck_report" to report refs related messages.
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
builtin/fsck.c | 1 +
builtin/mktag.c | 1 +
fsck.c | 56 +++++++++++++++++++++++++++++++++++++++++--------
fsck.h | 17 ++++++++++++++-
object-file.c | 11 +++++-----
5 files changed, 71 insertions(+), 15 deletions(-)
diff --git a/builtin/fsck.c b/builtin/fsck.c
index 6d86bbe1e9..de34538c4f 100644
--- a/builtin/fsck.c
+++ b/builtin/fsck.c
@@ -92,6 +92,7 @@ static int objerror(struct object *obj, const char *err)
static int fsck_objects_error_func(struct fsck_options *o UNUSED,
const struct object_id *oid,
enum object_type object_type,
+ const char *checked_ref_name UNUSED,
enum fsck_msg_type msg_type,
enum fsck_msg_id msg_id UNUSED,
const char *message)
diff --git a/builtin/mktag.c b/builtin/mktag.c
index 4767f1a97e..42f945c584 100644
--- a/builtin/mktag.c
+++ b/builtin/mktag.c
@@ -20,6 +20,7 @@ static struct fsck_options fsck_options = FSCK_OPTIONS_STRICT;
static int mktag_fsck_error_func(struct fsck_options *o UNUSED,
const struct object_id *oid UNUSED,
enum object_type object_type UNUSED,
+ const char *checked_ref_name UNUSED,
enum fsck_msg_type msg_type,
enum fsck_msg_id msg_id UNUSED,
const char *message)
diff --git a/fsck.c b/fsck.c
index 0aaff7f635..e1819964e3 100644
--- a/fsck.c
+++ b/fsck.c
@@ -226,12 +226,18 @@ static int object_on_skiplist(struct fsck_options *opts,
return opts && oid && oidset_contains(&opts->skip_oids, oid);
}
-__attribute__((format (printf, 5, 6)))
-static int report(struct fsck_options *options,
- const struct object_id *oid, enum object_type object_type,
- enum fsck_msg_id msg_id, const char *fmt, ...)
+/*
+ * Provide a unified interface for either fscking refs or objects.
+ * It will get the current msg error type and call the error_func callback
+ * which is registered in the "fsck_options" struct.
+ */
+static int vfsck_report(struct fsck_options *options,
+ const struct object_id *oid,
+ enum object_type object_type,
+ const char *checked_ref_name,
+ enum fsck_msg_id msg_id, const char *fmt, va_list ap)
{
- va_list ap;
+ va_list ap_copy;
struct strbuf sb = STRBUF_INIT;
enum fsck_msg_type msg_type = fsck_msg_type(msg_id, options);
int result;
@@ -250,9 +256,9 @@ static int report(struct fsck_options *options,
prepare_msg_ids();
strbuf_addf(&sb, "%s: ", msg_id_info[msg_id].camelcased);
- va_start(ap, fmt);
- strbuf_vaddf(&sb, fmt, ap);
- result = options->error_func(options, oid, object_type,
+ va_copy(ap_copy, ap);
+ strbuf_vaddf(&sb, fmt, ap_copy);
+ result = options->error_func(options, oid, object_type, checked_ref_name,
msg_type, msg_id, sb.buf);
strbuf_release(&sb);
va_end(ap);
@@ -260,6 +266,36 @@ static int report(struct fsck_options *options,
return result;
}
+__attribute__((format (printf, 5, 6)))
+static int report(struct fsck_options *options,
+ const struct object_id *oid, enum object_type object_type,
+ enum fsck_msg_id msg_id, const char *fmt, ...)
+{
+ va_list ap;
+ int result;
+ va_start(ap, fmt);
+ result = vfsck_report(options, oid, object_type, NULL,
+ msg_id, fmt, ap);
+ va_end(ap);
+ return result;
+}
+
+
+
+int fsck_refs_report(struct fsck_options *options,
+ const struct object_id *oid,
+ const char *checked_ref_name,
+ enum fsck_msg_id msg_id, const char *fmt, ...)
+{
+ va_list ap;
+ int result;
+ va_start(ap, fmt);
+ result = vfsck_report(options, oid, OBJ_NONE,
+ checked_ref_name, msg_id, fmt, ap);
+ va_end(ap);
+ return result;
+}
+
void fsck_enable_object_names(struct fsck_options *options)
{
if (!options->object_names)
@@ -1203,6 +1239,7 @@ int fsck_buffer(const struct object_id *oid, enum object_type type,
int fsck_objects_error_function(struct fsck_options *o,
const struct object_id *oid,
enum object_type object_type UNUSED,
+ const char *checked_ref_name UNUSED,
enum fsck_msg_type msg_type,
enum fsck_msg_id msg_id UNUSED,
const char *message)
@@ -1306,6 +1343,7 @@ int git_fsck_config(const char *var, const char *value,
int fsck_objects_error_cb_print_missing_gitmodules(struct fsck_options *o,
const struct object_id *oid,
enum object_type object_type,
+ const char *checked_ref_name,
enum fsck_msg_type msg_type,
enum fsck_msg_id msg_id,
const char *message)
@@ -1314,6 +1352,6 @@ int fsck_objects_error_cb_print_missing_gitmodules(struct fsck_options *o,
puts(oid_to_hex(oid));
return 0;
}
- return fsck_objects_error_function(o, oid, object_type,
+ return fsck_objects_error_function(o, oid, object_type, checked_ref_name,
msg_type, msg_id, message);
}
diff --git a/fsck.h b/fsck.h
index 41ebebbb59..f88e5faa94 100644
--- a/fsck.h
+++ b/fsck.h
@@ -114,19 +114,24 @@ int is_valid_msg_type(const char *msg_id, const char *msg_type);
typedef int (*fsck_walk_func)(struct object *obj, enum object_type object_type,
void *data, struct fsck_options *options);
-/* callback for fsck_object, type is FSCK_ERROR or FSCK_WARN */
+/*
+ * callback function for reporting errors when checking either objects or refs
+ */
typedef int (*fsck_error)(struct fsck_options *o,
const struct object_id *oid, enum object_type object_type,
+ const char *checked_ref_name,
enum fsck_msg_type msg_type, enum fsck_msg_id msg_id,
const char *message);
int fsck_objects_error_function(struct fsck_options *o,
const struct object_id *oid, enum object_type object_type,
+ const char *checked_ref_name,
enum fsck_msg_type msg_type, enum fsck_msg_id msg_id,
const char *message);
int fsck_objects_error_cb_print_missing_gitmodules(struct fsck_options *o,
const struct object_id *oid,
enum object_type object_type,
+ const char *checked_ref_name,
enum fsck_msg_type msg_type,
enum fsck_msg_id msg_id,
const char *message);
@@ -209,6 +214,16 @@ int fsck_tag_standalone(const struct object_id *oid, const char *buffer,
*/
int fsck_finish(struct fsck_options *options);
+/*
+ * Report an error or warning for refs.
+ */
+__attribute__((format (printf, 5, 6)))
+int fsck_refs_report(struct fsck_options *options,
+ const struct object_id *oid,
+ const char *checked_ref_name,
+ enum fsck_msg_id msg_id,
+ const char *fmt, ...);
+
/*
* Subsystem for storing human-readable names for each object.
*
diff --git a/object-file.c b/object-file.c
index 065103be3e..d2c6427935 100644
--- a/object-file.c
+++ b/object-file.c
@@ -2470,11 +2470,12 @@ int repo_has_object_file(struct repository *r,
* give more context.
*/
static int hash_format_check_report(struct fsck_options *opts UNUSED,
- const struct object_id *oid UNUSED,
- enum object_type object_type UNUSED,
- enum fsck_msg_type msg_type UNUSED,
- enum fsck_msg_id msg_id UNUSED,
- const char *message)
+ const struct object_id *oid UNUSED,
+ enum object_type object_type UNUSED,
+ const char *ref_checked_name UNUSED,
+ enum fsck_msg_type msg_type UNUSED,
+ enum fsck_msg_id msg_id UNUSED,
+ const char *message)
{
error(_("object fails fsck: %s"), message);
return 1;
--
2.45.2
^ permalink raw reply related [flat|nested] 282+ messages in thread
* Re: [GSoC][PATCH v10 03/10] fsck: add a unified interface for reporting fsck messages
2024-07-10 14:47 ` [GSoC][PATCH v10 03/10] fsck: add a unified interface for reporting fsck messages shejialuo
@ 2024-07-10 21:04 ` Junio C Hamano
2024-07-11 11:59 ` shejialuo
0 siblings, 1 reply; 282+ messages in thread
From: Junio C Hamano @ 2024-07-10 21:04 UTC (permalink / raw)
To: shejialuo
Cc: git, Patrick Steinhardt, Karthik Nayak, Eric Sunshine,
Justin Tobler
shejialuo <shejialuo@gmail.com> writes:
> The static function "report" provided by "fsck.c" aims at checking fsck
> error type and calling the callback "error_func" to report the message.
> However, "report" function is only related to object database which
> cannot be reused for refs. In order to provide a unified interface which
> can report either objects or refs, create a new function "vfsck_report"
> by adding "checked_ref_name" parameter following the "report" prototype.
> Instead of using "...", provide "va_list" to allow more flexibility.
Like strbuf_vinsertf(), it is a good idea to have "v" in the name of
a function that takes va_list, but fsck_vreport() would probably be
a better name here. Arguably, the original report() is misnamed (as
a printf-like function that takes format string, it probably would
have wanted to be reportf() instead), but unless we are fixing that
at the same time, calling this fsck_vreportf() would probably be too
much. Consistently misnaming it by omitting the final "f" would be
fine.
At this step it is still not clear if the previous step was really
needed; you have this "v" thing that is designed to be usable by
both reporting issues around objects and issues around refs, but we
will hopefully see why when we read later patches.
> diff --git a/object-file.c b/object-file.c
> index 065103be3e..d2c6427935 100644
> --- a/object-file.c
> +++ b/object-file.c
> @@ -2470,11 +2470,12 @@ int repo_has_object_file(struct repository *r,
> * give more context.
> */
> static int hash_format_check_report(struct fsck_options *opts UNUSED,
> - const struct object_id *oid UNUSED,
> - enum object_type object_type UNUSED,
> - enum fsck_msg_type msg_type UNUSED,
> - enum fsck_msg_id msg_id UNUSED,
> - const char *message)
> + const struct object_id *oid UNUSED,
> + enum object_type object_type UNUSED,
> + const char *ref_checked_name UNUSED,
> + enum fsck_msg_type msg_type UNUSED,
> + enum fsck_msg_id msg_id UNUSED,
> + const char *message)
That is somewhat annoying reindentation. What happened here?
^ permalink raw reply [flat|nested] 282+ messages in thread
* Re: [GSoC][PATCH v10 03/10] fsck: add a unified interface for reporting fsck messages
2024-07-10 21:04 ` Junio C Hamano
@ 2024-07-11 11:59 ` shejialuo
0 siblings, 0 replies; 282+ messages in thread
From: shejialuo @ 2024-07-11 11:59 UTC (permalink / raw)
To: Junio C Hamano
Cc: git, Patrick Steinhardt, Karthik Nayak, Eric Sunshine,
Justin Tobler
On Wed, Jul 10, 2024 at 02:04:15PM -0700, Junio C Hamano wrote:
> shejialuo <shejialuo@gmail.com> writes:
>
> > The static function "report" provided by "fsck.c" aims at checking fsck
> > error type and calling the callback "error_func" to report the message.
> > However, "report" function is only related to object database which
> > cannot be reused for refs. In order to provide a unified interface which
> > can report either objects or refs, create a new function "vfsck_report"
> > by adding "checked_ref_name" parameter following the "report" prototype.
> > Instead of using "...", provide "va_list" to allow more flexibility.
>
> Like strbuf_vinsertf(), it is a good idea to have "v" in the name of
> a function that takes va_list, but fsck_vreport() would probably be
> a better name here. Arguably, the original report() is misnamed (as
> a printf-like function that takes format string, it probably would
> have wanted to be reportf() instead), but unless we are fixing that
> at the same time, calling this fsck_vreportf() would probably be too
> much. Consistently misnaming it by omitting the final "f" would be
> fine.
>
Yes,I will rename it to "fsck_vreport".
> At this step it is still not clear if the previous step was really
> needed; you have this "v" thing that is designed to be usable by
> both reporting issues around objects and issues around refs, but we
> will hopefully see why when we read later patches.
From my perspective, I think we should put the previous commit after
this commit. I agree with you that if we put it later, it will be
much clearer and eaiser to understand.
> > diff --git a/object-file.c b/object-file.c
> > index 065103be3e..d2c6427935 100644
> > --- a/object-file.c
> > +++ b/object-file.c
> > @@ -2470,11 +2470,12 @@ int repo_has_object_file(struct repository *r,
> > * give more context.
> > */
> > static int hash_format_check_report(struct fsck_options *opts UNUSED,
> > - const struct object_id *oid UNUSED,
> > - enum object_type object_type UNUSED,
> > - enum fsck_msg_type msg_type UNUSED,
> > - enum fsck_msg_id msg_id UNUSED,
> > - const char *message)
> > + const struct object_id *oid UNUSED,
> > + enum object_type object_type UNUSED,
> > + const char *ref_checked_name UNUSED,
> > + enum fsck_msg_type msg_type UNUSED,
> > + enum fsck_msg_id msg_id UNUSED,
> > + const char *message)
>
> That is somewhat annoying reindentation. What happened here?
The original code's indentation breaks. There are one more space in each
line like the following:
static int hash_format_check_report(struct fsck_options *opts UNUSED,
const struct object_id *oid UNUSED)
...
I think I could fix this by the way.
^ permalink raw reply [flat|nested] 282+ messages in thread
* [GSoC][PATCH v10 04/10] fsck: add refs-related error report function
2024-07-10 14:43 ` [GSoC][PATCH v10 00/10] ref consistency check infra setup shejialuo
` (2 preceding siblings ...)
2024-07-10 14:47 ` [GSoC][PATCH v10 03/10] fsck: add a unified interface for reporting fsck messages shejialuo
@ 2024-07-10 14:47 ` shejialuo
2024-07-10 14:47 ` [GSoC][PATCH v10 05/10] refs: set up ref consistency check infrastructure shejialuo
` (6 subsequent siblings)
10 siblings, 0 replies; 282+ messages in thread
From: shejialuo @ 2024-07-10 14:47 UTC (permalink / raw)
To: git
Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine,
Justin Tobler
Add refs-related options to the "fsck_options", create refs-specific
"error_func" callback "fsck_refs_error_function".
"fsck_refs_error_function" will use the "oid" parameter. When the caller
passes the oid, it will use "oid_to_hex" to get the corresponding hex
value to report to the caller.
Last, add "FSCK_REFS_OPTIONS_DEFAULT" and "FSCK_REFS_OPTIONS_STRICT"
macros to create refs options easily.
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
fsck.c | 24 ++++++++++++++++++++++++
fsck.h | 14 ++++++++++++++
2 files changed, 38 insertions(+)
diff --git a/fsck.c b/fsck.c
index e1819964e3..f14ac533b0 100644
--- a/fsck.c
+++ b/fsck.c
@@ -1252,6 +1252,30 @@ int fsck_objects_error_function(struct fsck_options *o,
return 1;
}
+int fsck_refs_error_function(struct fsck_options *options UNUSED,
+ const struct object_id *oid,
+ enum object_type object_type UNUSED,
+ const char *checked_ref_name,
+ enum fsck_msg_type msg_type,
+ enum fsck_msg_id msg_id UNUSED,
+ const char *message)
+{
+ struct strbuf sb = STRBUF_INIT;
+ int ret = 0;
+
+ strbuf_addstr(&sb, checked_ref_name);
+ if (oid)
+ strbuf_addf(&sb, " -> (%s)", oid_to_hex(oid));
+
+ if (msg_type == FSCK_WARN)
+ warning("%s: %s", sb.buf, message);
+ else
+ ret = error("%s: %s", sb.buf, message);
+
+ strbuf_release(&sb);
+ return ret;
+}
+
static int fsck_blobs(struct oidset *blobs_found, struct oidset *blobs_done,
enum fsck_msg_id msg_missing, enum fsck_msg_id msg_type,
struct fsck_options *options, const char *blob_type)
diff --git a/fsck.h b/fsck.h
index f88e5faa94..fe5d4d2ad9 100644
--- a/fsck.h
+++ b/fsck.h
@@ -135,6 +135,13 @@ int fsck_objects_error_cb_print_missing_gitmodules(struct fsck_options *o,
enum fsck_msg_type msg_type,
enum fsck_msg_id msg_id,
const char *message);
+int fsck_refs_error_function(struct fsck_options *options,
+ const struct object_id *oid,
+ enum object_type object_type,
+ const char *checked_ref_name,
+ enum fsck_msg_type msg_type,
+ enum fsck_msg_id msg_id,
+ const char *message);
struct fsck_options {
fsck_walk_func walk;
@@ -173,6 +180,13 @@ struct fsck_options {
.gitattributes_done = OIDSET_INIT, \
.error_func = fsck_objects_error_cb_print_missing_gitmodules, \
}
+#define FSCK_REFS_OPTIONS_DEFAULT { \
+ .error_func = fsck_refs_error_function, \
+}
+#define FSCK_REFS_OPTIONS_STRICT { \
+ .strict = 1, \
+ .error_func = fsck_refs_error_function, \
+}
/* descend in all linked child objects
* the return value is:
--
2.45.2
^ permalink raw reply related [flat|nested] 282+ messages in thread
* [GSoC][PATCH v10 05/10] refs: set up ref consistency check infrastructure
2024-07-10 14:43 ` [GSoC][PATCH v10 00/10] ref consistency check infra setup shejialuo
` (3 preceding siblings ...)
2024-07-10 14:47 ` [GSoC][PATCH v10 04/10] fsck: add refs-related error report function shejialuo
@ 2024-07-10 14:47 ` shejialuo
2024-07-10 14:47 ` [GSoC][PATCH v10 06/10] builtin/refs: add verify subcommand and verbose_refs for "fsck_options" shejialuo
` (5 subsequent siblings)
10 siblings, 0 replies; 282+ messages in thread
From: shejialuo @ 2024-07-10 14:47 UTC (permalink / raw)
To: git
Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine,
Justin Tobler
The interfaces defined in the `ref_storage_be` are carefully structured
in semantic. It's organized as the five parts:
1. The name and the initialization interfaces.
2. The ref transaction interfaces.
3. The ref internal interfaces (pack, rename and copy).
4. The ref filesystem interfaces.
5. The reflog related interfaces.
To keep consistent with the git-fsck(1), add a new interface named
"fsck_refs_fn" to the end of "ref_storage_be". This semantic cannot be
grouped into any above five categories. Explicitly add blank line to
make it different from others.
Last, implement placeholder functions for each ref backends.
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
refs.c | 5 +++++
refs.h | 8 ++++++++
refs/debug.c | 11 +++++++++++
refs/files-backend.c | 15 ++++++++++++++-
refs/packed-backend.c | 8 ++++++++
refs/refs-internal.h | 6 ++++++
refs/reftable-backend.c | 8 ++++++++
7 files changed, 60 insertions(+), 1 deletion(-)
diff --git a/refs.c b/refs.c
index bb90a18875..410919246b 100644
--- a/refs.c
+++ b/refs.c
@@ -318,6 +318,11 @@ int check_refname_format(const char *refname, int flags)
return check_or_sanitize_refname(refname, flags, NULL);
}
+int refs_fsck(struct ref_store *refs, struct fsck_options *o)
+{
+ return refs->be->fsck(refs, o);
+}
+
void sanitize_refname_component(const char *refname, struct strbuf *out)
{
if (check_or_sanitize_refname(refname, REFNAME_ALLOW_ONELEVEL, out))
diff --git a/refs.h b/refs.h
index 0ecba21b4a..804d6a7fce 100644
--- a/refs.h
+++ b/refs.h
@@ -4,6 +4,7 @@
#include "commit.h"
#include "repository.h"
+struct fsck_options;
struct object_id;
struct ref_store;
struct strbuf;
@@ -541,6 +542,13 @@ int refs_for_each_reflog(struct ref_store *refs, each_reflog_fn fn, void *cb_dat
*/
int check_refname_format(const char *refname, int flags);
+/*
+ * Check the reference database for consistency. Return 0 if refs and
+ * reflogs are consistent, and non-zero otherwise. The errors will be
+ * written to stderr.
+ */
+int refs_fsck(struct ref_store *refs, struct fsck_options *o);
+
/*
* Apply the rules from check_refname_format, but mutate the result until it
* is acceptable, and place the result in "out".
diff --git a/refs/debug.c b/refs/debug.c
index 547d9245b9..45e2e784a0 100644
--- a/refs/debug.c
+++ b/refs/debug.c
@@ -419,6 +419,15 @@ static int debug_reflog_expire(struct ref_store *ref_store, const char *refname,
return res;
}
+static int debug_fsck(struct ref_store *ref_store,
+ struct fsck_options *o)
+{
+ struct debug_ref_store *drefs = (struct debug_ref_store *)ref_store;
+ int res = drefs->refs->be->fsck(drefs->refs, o);
+ trace_printf_key(&trace_refs, "fsck: %d\n", res);
+ return res;
+}
+
struct ref_storage_be refs_be_debug = {
.name = "debug",
.init = NULL,
@@ -451,4 +460,6 @@ struct ref_storage_be refs_be_debug = {
.create_reflog = debug_create_reflog,
.delete_reflog = debug_delete_reflog,
.reflog_expire = debug_reflog_expire,
+
+ .fsck = debug_fsck,
};
diff --git a/refs/files-backend.c b/refs/files-backend.c
index aa52d9be7c..d89eeda8ef 100644
--- a/refs/files-backend.c
+++ b/refs/files-backend.c
@@ -3408,6 +3408,17 @@ static int files_ref_store_remove_on_disk(struct ref_store *ref_store,
return ret;
}
+static int files_fsck(struct ref_store *ref_store,
+ struct fsck_options *o)
+{
+ int ret;
+ struct files_ref_store *refs =
+ files_downcast(ref_store, REF_STORE_READ, "fsck");
+
+ ret = refs->packed_ref_store->be->fsck(refs->packed_ref_store, o);
+ return ret;
+}
+
struct ref_storage_be refs_be_files = {
.name = "files",
.init = files_ref_store_init,
@@ -3434,5 +3445,7 @@ struct ref_storage_be refs_be_files = {
.reflog_exists = files_reflog_exists,
.create_reflog = files_create_reflog,
.delete_reflog = files_delete_reflog,
- .reflog_expire = files_reflog_expire
+ .reflog_expire = files_reflog_expire,
+
+ .fsck = files_fsck,
};
diff --git a/refs/packed-backend.c b/refs/packed-backend.c
index a0666407cd..5209b0b212 100644
--- a/refs/packed-backend.c
+++ b/refs/packed-backend.c
@@ -1735,6 +1735,12 @@ static struct ref_iterator *packed_reflog_iterator_begin(struct ref_store *ref_s
return empty_ref_iterator_begin();
}
+static int packed_fsck(struct ref_store *ref_store,
+ struct fsck_options *o)
+{
+ return 0;
+}
+
struct ref_storage_be refs_be_packed = {
.name = "packed",
.init = packed_ref_store_init,
@@ -1762,4 +1768,6 @@ struct ref_storage_be refs_be_packed = {
.create_reflog = NULL,
.delete_reflog = NULL,
.reflog_expire = NULL,
+
+ .fsck = packed_fsck,
};
diff --git a/refs/refs-internal.h b/refs/refs-internal.h
index fa975d69aa..a905e187cd 100644
--- a/refs/refs-internal.h
+++ b/refs/refs-internal.h
@@ -4,6 +4,7 @@
#include "refs.h"
#include "iterator.h"
+struct fsck_options;
struct ref_transaction;
/*
@@ -650,6 +651,9 @@ typedef int read_raw_ref_fn(struct ref_store *ref_store, const char *refname,
typedef int read_symbolic_ref_fn(struct ref_store *ref_store, const char *refname,
struct strbuf *referent);
+typedef int fsck_fn(struct ref_store *ref_store,
+ struct fsck_options *o);
+
struct ref_storage_be {
const char *name;
ref_store_init_fn *init;
@@ -677,6 +681,8 @@ struct ref_storage_be {
create_reflog_fn *create_reflog;
delete_reflog_fn *delete_reflog;
reflog_expire_fn *reflog_expire;
+
+ fsck_fn *fsck;
};
extern struct ref_storage_be refs_be_files;
diff --git a/refs/reftable-backend.c b/refs/reftable-backend.c
index fbe74c239d..b5a1a526df 100644
--- a/refs/reftable-backend.c
+++ b/refs/reftable-backend.c
@@ -2303,6 +2303,12 @@ static int reftable_be_reflog_expire(struct ref_store *ref_store,
return ret;
}
+static int reftable_be_fsck(struct ref_store *ref_store,
+ struct fsck_options *o)
+{
+ return 0;
+}
+
struct ref_storage_be refs_be_reftable = {
.name = "reftable",
.init = reftable_be_init,
@@ -2330,4 +2336,6 @@ struct ref_storage_be refs_be_reftable = {
.create_reflog = reftable_be_create_reflog,
.delete_reflog = reftable_be_delete_reflog,
.reflog_expire = reftable_be_reflog_expire,
+
+ .fsck = reftable_be_fsck,
};
--
2.45.2
^ permalink raw reply related [flat|nested] 282+ messages in thread
* [GSoC][PATCH v10 06/10] builtin/refs: add verify subcommand and verbose_refs for "fsck_options"
2024-07-10 14:43 ` [GSoC][PATCH v10 00/10] ref consistency check infra setup shejialuo
` (4 preceding siblings ...)
2024-07-10 14:47 ` [GSoC][PATCH v10 05/10] refs: set up ref consistency check infrastructure shejialuo
@ 2024-07-10 14:47 ` shejialuo
2024-07-10 21:31 ` Junio C Hamano
2024-07-10 14:48 ` [GSoC][PATCH v10 07/10] builtin/fsck: add `git-refs verify` child process shejialuo
` (4 subsequent siblings)
10 siblings, 1 reply; 282+ messages in thread
From: shejialuo @ 2024-07-10 14:47 UTC (permalink / raw)
To: git
Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine,
Justin Tobler
Introduce a new subcommand "verify" in git-refs(1) to allow the user to
check the reference database consistency and also this subcommand will
be used as the entry point of checking refs for "git-fsck(1)". Last, add
"verbose_refs" field into "fsck_options" to indicate whether we should
print verbose messages when checking refs consistency.
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
Documentation/git-refs.txt | 13 +++++++++++
builtin/refs.c | 44 ++++++++++++++++++++++++++++++++++++++
fsck.h | 1 +
3 files changed, 58 insertions(+)
diff --git a/Documentation/git-refs.txt b/Documentation/git-refs.txt
index 5b99e04385..1244a85b64 100644
--- a/Documentation/git-refs.txt
+++ b/Documentation/git-refs.txt
@@ -10,6 +10,7 @@ SYNOPSIS
--------
[verse]
'git refs migrate' --ref-format=<format> [--dry-run]
+'git refs verify' [--strict] [--verbose]
DESCRIPTION
-----------
@@ -22,6 +23,9 @@ COMMANDS
migrate::
Migrate ref store between different formats.
+verify::
+ Verify reference database consistency.
+
OPTIONS
-------
@@ -39,6 +43,15 @@ include::ref-storage-format.txt[]
can be used to double check that the migration works as expected before
performing the actual migration.
+The following options are specific to 'git refs verify':
+
+--strict::
+ Enable more strict checking, every WARN severity for the `Fsck Messages`
+ be seen as ERROR. See linkgit:git-fsck[1].
+
+--verbose::
+ When verifying the reference database consistency, be chatty.
+
KNOWN LIMITATIONS
-----------------
diff --git a/builtin/refs.c b/builtin/refs.c
index 46dcd150d4..599b526786 100644
--- a/builtin/refs.c
+++ b/builtin/refs.c
@@ -1,4 +1,6 @@
#include "builtin.h"
+#include "config.h"
+#include "fsck.h"
#include "parse-options.h"
#include "refs.h"
#include "repository.h"
@@ -7,6 +9,9 @@
#define REFS_MIGRATE_USAGE \
N_("git refs migrate --ref-format=<format> [--dry-run]")
+#define REFS_VERIFY_USAGE \
+ N_("git refs verify [--strict] [--verbose]")
+
static int cmd_refs_migrate(int argc, const char **argv, const char *prefix)
{
const char * const migrate_usage[] = {
@@ -58,15 +63,54 @@ static int cmd_refs_migrate(int argc, const char **argv, const char *prefix)
return err;
}
+static int cmd_refs_verify(int argc, const char **argv, const char *prefix)
+{
+ struct fsck_options fsck_refs_options = FSCK_REFS_OPTIONS_DEFAULT;
+ const char * const verify_usage[] = {
+ REFS_VERIFY_USAGE,
+ NULL,
+ };
+ unsigned int verbose = 0, strict = 0;
+ struct option options[] = {
+ OPT__VERBOSE(&verbose, N_("be verbose")),
+ OPT_BOOL(0, "strict", &strict, N_("enable strict checking")),
+ OPT_END(),
+ };
+ int ret;
+
+ argc = parse_options(argc, argv, prefix, options, verify_usage, 0);
+ if (argc)
+ usage(_("too many arguments"));
+
+ if (verbose)
+ fsck_refs_options.verbose_refs = 1;
+ if (strict)
+ fsck_refs_options.strict = 1;
+
+ git_config(git_fsck_config, &fsck_refs_options);
+ prepare_repo_settings(the_repository);
+
+ ret = refs_fsck(get_main_ref_store(the_repository), &fsck_refs_options);
+
+ /*
+ * Explicitly free the allocated array and "skip_oids" set
+ */
+ free(fsck_refs_options.msg_type);
+ oidset_clear(&fsck_refs_options.skip_oids);
+ return ret;
+}
+
int cmd_refs(int argc, const char **argv, const char *prefix)
{
const char * const refs_usage[] = {
REFS_MIGRATE_USAGE,
+ REFS_VERIFY_USAGE,
NULL,
};
parse_opt_subcommand_fn *fn = NULL;
struct option opts[] = {
OPT_SUBCOMMAND("migrate", &fn, cmd_refs_migrate),
+ OPT_SUBCOMMAND("verify", &fn, cmd_refs_verify),
OPT_END(),
};
diff --git a/fsck.h b/fsck.h
index fe5d4d2ad9..ef1f1ed15e 100644
--- a/fsck.h
+++ b/fsck.h
@@ -147,6 +147,7 @@ struct fsck_options {
fsck_walk_func walk;
fsck_error error_func;
unsigned strict:1;
+ unsigned verbose_refs:1;
enum fsck_msg_type *msg_type;
struct oidset skip_oids;
struct oidset gitmodules_found;
--
2.45.2
^ permalink raw reply related [flat|nested] 282+ messages in thread
* Re: [GSoC][PATCH v10 06/10] builtin/refs: add verify subcommand and verbose_refs for "fsck_options"
2024-07-10 14:47 ` [GSoC][PATCH v10 06/10] builtin/refs: add verify subcommand and verbose_refs for "fsck_options" shejialuo
@ 2024-07-10 21:31 ` Junio C Hamano
2024-07-11 12:39 ` shejialuo
0 siblings, 1 reply; 282+ messages in thread
From: Junio C Hamano @ 2024-07-10 21:31 UTC (permalink / raw)
To: shejialuo
Cc: git, Patrick Steinhardt, Karthik Nayak, Eric Sunshine,
Justin Tobler
shejialuo <shejialuo@gmail.com> writes:
> Subject: Re: [GSoC][PATCH v10 06/10] builtin/refs: add verify subcommand and verbose_refs for "fsck_options"
Just saying
git refs: add verify subcommand
would be clearer. If you really want to talk about two modes, you
could say
git refs: add "verify [--strict|--verbose]" subcommand
but that may be too much.
> Introduce a new subcommand "verify" in git-refs(1) to allow the user to
> check the reference database consistency and also this subcommand will
> be used as the entry point of checking refs for "git-fsck(1)". Last, add
> "verbose_refs" field into "fsck_options" to indicate whether we should
> print verbose messages when checking refs consistency.
Is there a reason why this has to be verbose_refs and not a simple
verbose bit? When people see how it is useful to ask for the
verbose output while checking refs, wouldn't people wish to add the
same "--verbose" support while checking objects, and at that point,
wouldn't it be awkward to add verbose_objs member to the struct and
having to flip both bits on?
> Mentored-by: Patrick Steinhardt <ps@pks.im>
> Mentored-by: Karthik Nayak <karthik.188@gmail.com>
> Signed-off-by: shejialuo <shejialuo@gmail.com>
> ---
> Documentation/git-refs.txt | 13 +++++++++++
> builtin/refs.c | 44 ++++++++++++++++++++++++++++++++++++++
> fsck.h | 1 +
> 3 files changed, 58 insertions(+)
>
> diff --git a/Documentation/git-refs.txt b/Documentation/git-refs.txt
> index 5b99e04385..1244a85b64 100644
> --- a/Documentation/git-refs.txt
> +++ b/Documentation/git-refs.txt
> @@ -10,6 +10,7 @@ SYNOPSIS
> --------
> [verse]
> 'git refs migrate' --ref-format=<format> [--dry-run]
> +'git refs verify' [--strict] [--verbose]
>
> DESCRIPTION
> -----------
> @@ -22,6 +23,9 @@ COMMANDS
> migrate::
> Migrate ref store between different formats.
>
> +verify::
> + Verify reference database consistency.
> +
The error reporting function for refs consistency check was still
about reporting a problem for a single ref. I am wondering how
consistency violations that are not about a single ref should be
handled. For example, if refs/packed-backend.c:packed_fsck() finds
that the file is not sorted properly or has some unparseable garbage
in it, it is not something you can report as "refs/heads/main is
broken", but those who are interested in seeing the "reference
database consistency" verified, it is very much what they want the
tool to notice. How would detection of such a breakage that is not
attributed to a single ref fit in this "ref consistency check
infrastructure" that was introduced by [05/10]?
> + argc = parse_options(argc, argv, prefix, options, verify_usage, 0);
> + if (argc)
> + usage(_("too many arguments"));
I do not think we want to change this line in this topic, but
because I noticed that the issue is widespread, let me make a note
here that we may want to clean up all the commands that give this
message as a #leftoverbit item:
$ git cmd foo baz
usage: too many arguments
is very unfriendly in that it is not immediately obvious to users
which arguments are excess. Should they have given "git cmd<RET>"?
Is it "git cmd foo" that does not take any argument?
If you said something like
$ git refs verify baz
error: 'git refs verify' takes no arguments
or even
$ git refs verify baz
error: unknown argument 'baz' given to 'git refs verify'
it would be much more helpful.
Thanks.
^ permalink raw reply [flat|nested] 282+ messages in thread
* Re: [GSoC][PATCH v10 06/10] builtin/refs: add verify subcommand and verbose_refs for "fsck_options"
2024-07-10 21:31 ` Junio C Hamano
@ 2024-07-11 12:39 ` shejialuo
0 siblings, 0 replies; 282+ messages in thread
From: shejialuo @ 2024-07-11 12:39 UTC (permalink / raw)
To: Junio C Hamano
Cc: git, Patrick Steinhardt, Karthik Nayak, Eric Sunshine,
Justin Tobler
On Wed, Jul 10, 2024 at 02:31:03PM -0700, Junio C Hamano wrote:
> shejialuo <shejialuo@gmail.com> writes:
>
> > Subject: Re: [GSoC][PATCH v10 06/10] builtin/refs: add verify subcommand and verbose_refs for "fsck_options"
>
> Just saying
>
> git refs: add verify subcommand
>
> would be clearer. If you really want to talk about two modes, you
> could say
>
> git refs: add "verify [--strict|--verbose]" subcommand
>
> but that may be too much.
>
Thanks, I will change in the next version.
> > Introduce a new subcommand "verify" in git-refs(1) to allow the user to
> > check the reference database consistency and also this subcommand will
> > be used as the entry point of checking refs for "git-fsck(1)". Last, add
> > "verbose_refs" field into "fsck_options" to indicate whether we should
> > print verbose messages when checking refs consistency.
>
> Is there a reason why this has to be verbose_refs and not a simple
> verbose bit? When people see how it is useful to ask for the
> verbose output while checking refs, wouldn't people wish to add the
> same "--verbose" support while checking objects, and at that point,
> wouldn't it be awkward to add verbose_objs member to the struct and
> having to flip both bits on?
>
Actually, this is really what I thought. I just want to provide more
find-grained control here. However, when I implemented the code, I also
felt awkward about this. I can't find the balance here.
I will improve this in the next version.
> > Mentored-by: Patrick Steinhardt <ps@pks.im>
> > Mentored-by: Karthik Nayak <karthik.188@gmail.com>
> > Signed-off-by: shejialuo <shejialuo@gmail.com>
> > ---
> > Documentation/git-refs.txt | 13 +++++++++++
> > builtin/refs.c | 44 ++++++++++++++++++++++++++++++++++++++
> > fsck.h | 1 +
> > 3 files changed, 58 insertions(+)
> >
> > diff --git a/Documentation/git-refs.txt b/Documentation/git-refs.txt
> > index 5b99e04385..1244a85b64 100644
> > --- a/Documentation/git-refs.txt
> > +++ b/Documentation/git-refs.txt
> > @@ -10,6 +10,7 @@ SYNOPSIS
> > --------
> > [verse]
> > 'git refs migrate' --ref-format=<format> [--dry-run]
> > +'git refs verify' [--strict] [--verbose]
> >
> > DESCRIPTION
> > -----------
> > @@ -22,6 +23,9 @@ COMMANDS
> > migrate::
> > Migrate ref store between different formats.
> >
> > +verify::
> > + Verify reference database consistency.
> > +
>
> The error reporting function for refs consistency check was still
> about reporting a problem for a single ref. I am wondering how
> consistency violations that are not about a single ref should be
> handled. For example, if refs/packed-backend.c:packed_fsck() finds
> that the file is not sorted properly or has some unparseable garbage
> in it, it is not something you can report as "refs/heads/main is
> broken", but those who are interested in seeing the "reference
> database consistency" verified, it is very much what they want the
> tool to notice. How would detection of such a breakage that is not
> attributed to a single ref fit in this "ref consistency check
> infrastructure" that was introduced by [05/10]?
>
Yes, I didn't consider other cases. Although I have said in the subject
that this series is to set up the infrastructure of fscking refs. It's
a little hard for me to set up a perfect "fsck_refs_report" at the
moment.
As you said, I think currently I should consider about the packed-refs
in this series. I will find a way to achieve this in the next version.
Well, I could say I intentionally ignored this problem. But we should
face the problem directly.
Really thanks.
> > + argc = parse_options(argc, argv, prefix, options, verify_usage, 0);
> > + if (argc)
> > + usage(_("too many arguments"));
>
> I do not think we want to change this line in this topic, but
> because I noticed that the issue is widespread, let me make a note
> here that we may want to clean up all the commands that give this
> message as a #leftoverbit item:
>
> $ git cmd foo baz
> usage: too many arguments
>
> is very unfriendly in that it is not immediately obvious to users
> which arguments are excess. Should they have given "git cmd<RET>"?
> Is it "git cmd foo" that does not take any argument?
>
> If you said something like
>
> $ git refs verify baz
> error: 'git refs verify' takes no arguments
>
> or even
>
> $ git refs verify baz
> error: unknown argument 'baz' given to 'git refs verify'
>
> it would be much more helpful.
>
I will improve this in the next version.
>
> Thanks.
^ permalink raw reply [flat|nested] 282+ messages in thread
* [GSoC][PATCH v10 07/10] builtin/fsck: add `git-refs verify` child process
2024-07-10 14:43 ` [GSoC][PATCH v10 00/10] ref consistency check infra setup shejialuo
` (5 preceding siblings ...)
2024-07-10 14:47 ` [GSoC][PATCH v10 06/10] builtin/refs: add verify subcommand and verbose_refs for "fsck_options" shejialuo
@ 2024-07-10 14:48 ` shejialuo
2024-07-10 14:48 ` [GSoC][PATCH v10 08/10] files-backend: add unified interface for refs scanning shejialuo
` (3 subsequent siblings)
10 siblings, 0 replies; 282+ messages in thread
From: shejialuo @ 2024-07-10 14:48 UTC (permalink / raw)
To: git
Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine,
Justin Tobler
Introduce a new function "fsck_refs" that initializes and runs a child
process to execute the "git-refs verify" command.
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
builtin/fsck.c | 17 +++++++++++++++++
1 file changed, 17 insertions(+)
diff --git a/builtin/fsck.c b/builtin/fsck.c
index de34538c4f..ec3357722c 100644
--- a/builtin/fsck.c
+++ b/builtin/fsck.c
@@ -897,6 +897,21 @@ static int check_pack_rev_indexes(struct repository *r, int show_progress)
return res;
}
+static void fsck_refs(void)
+{
+ struct child_process refs_verify = CHILD_PROCESS_INIT;
+ child_process_init(&refs_verify);
+ refs_verify.git_cmd = 1;
+ strvec_pushl(&refs_verify.args, "refs", "verify", NULL);
+ if (verbose)
+ strvec_push(&refs_verify.args, "--verbose");
+ if (check_strict)
+ strvec_push(&refs_verify.args, "--strict");
+
+ if (run_command(&refs_verify))
+ errors_found |= ERROR_REFS;
+}
+
static char const * const fsck_usage[] = {
N_("git fsck [--tags] [--root] [--unreachable] [--cache] [--no-reflogs]\n"
" [--[no-]full] [--strict] [--verbose] [--lost-found]\n"
@@ -1066,6 +1081,8 @@ int cmd_fsck(int argc, const char **argv, const char *prefix)
check_connectivity();
+ fsck_refs();
+
if (the_repository->settings.core_commit_graph) {
struct child_process commit_graph_verify = CHILD_PROCESS_INIT;
--
2.45.2
^ permalink raw reply related [flat|nested] 282+ messages in thread
* [GSoC][PATCH v10 08/10] files-backend: add unified interface for refs scanning
2024-07-10 14:43 ` [GSoC][PATCH v10 00/10] ref consistency check infra setup shejialuo
` (6 preceding siblings ...)
2024-07-10 14:48 ` [GSoC][PATCH v10 07/10] builtin/fsck: add `git-refs verify` child process shejialuo
@ 2024-07-10 14:48 ` shejialuo
2024-07-10 14:48 ` [GSoC][PATCH v10 09/10] fsck: add ref name check for files backend shejialuo
` (2 subsequent siblings)
10 siblings, 0 replies; 282+ messages in thread
From: shejialuo @ 2024-07-10 14:48 UTC (permalink / raw)
To: git
Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine,
Justin Tobler
For refs and reflogs, we need to scan its corresponding directories to
check every regular file or symbolic link which shares the same pattern.
Introduce a unified interface for scanning directories for
files-backend.
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
refs/files-backend.c | 77 +++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 76 insertions(+), 1 deletion(-)
diff --git a/refs/files-backend.c b/refs/files-backend.c
index d89eeda8ef..84acb58782 100644
--- a/refs/files-backend.c
+++ b/refs/files-backend.c
@@ -6,6 +6,7 @@
#include "../gettext.h"
#include "../hash.h"
#include "../hex.h"
+#include "../fsck.h"
#include "../refs.h"
#include "refs-internal.h"
#include "ref-cache.h"
@@ -3408,6 +3409,78 @@ static int files_ref_store_remove_on_disk(struct ref_store *ref_store,
return ret;
}
+/*
+ * For refs and reflogs, they share a unified interface when scanning
+ * the whole directory. This function is used as the callback for each
+ * regular file or symlink in the directory.
+ */
+typedef int (*files_fsck_refs_fn)(struct fsck_options *o,
+ const char *gitdir,
+ const char *refs_check_dir,
+ struct dir_iterator *iter);
+
+static int files_fsck_refs_dir(struct ref_store *ref_store,
+ struct fsck_options *o,
+ const char *refs_check_dir,
+ files_fsck_refs_fn *fsck_refs_fns)
+{
+ const char *gitdir = ref_store->gitdir;
+ struct strbuf sb = STRBUF_INIT;
+ struct dir_iterator *iter;
+ int iter_status;
+ int ret = 0;
+
+ strbuf_addf(&sb, "%s/%s", gitdir, refs_check_dir);
+
+ iter = dir_iterator_begin(sb.buf, 0);
+
+ if (!iter) {
+ ret = error_errno("cannot open directory %s", sb.buf);
+ goto out;
+ }
+
+ while ((iter_status = dir_iterator_advance(iter)) == ITER_OK) {
+ if (S_ISDIR(iter->st.st_mode)) {
+ continue;
+ } else if (S_ISREG(iter->st.st_mode) ||
+ S_ISLNK(iter->st.st_mode)) {
+ if (o->verbose_refs)
+ fprintf_ln(stderr, "Checking %s/%s",
+ refs_check_dir, iter->relative_path);
+ for (size_t i = 0; fsck_refs_fns[i]; i++) {
+ if (fsck_refs_fns[i](o, gitdir, refs_check_dir, iter))
+ ret = -1;
+ }
+ } else {
+ ret = error(_("unexpected file type for '%s'"),
+ iter->basename);
+ }
+ }
+
+ if (iter_status != ITER_DONE)
+ ret = error(_("failed to iterate over '%s'"), sb.buf);
+
+out:
+ strbuf_release(&sb);
+ return ret;
+}
+
+static int files_fsck_refs(struct ref_store *ref_store,
+ struct fsck_options *o)
+{
+ int ret;
+ files_fsck_refs_fn fsck_refs_fns[]= {
+ NULL
+ };
+
+ if (o->verbose_refs)
+ fprintf_ln(stderr, "Checking references consistency");
+
+ ret = files_fsck_refs_dir(ref_store, o, "refs", fsck_refs_fns);
+
+ return ret;
+}
+
static int files_fsck(struct ref_store *ref_store,
struct fsck_options *o)
{
@@ -3415,7 +3488,9 @@ static int files_fsck(struct ref_store *ref_store,
struct files_ref_store *refs =
files_downcast(ref_store, REF_STORE_READ, "fsck");
- ret = refs->packed_ref_store->be->fsck(refs->packed_ref_store, o);
+ ret = refs->packed_ref_store->be->fsck(refs->packed_ref_store, o)
+ | files_fsck_refs(ref_store, o);
+
return ret;
}
--
2.45.2
^ permalink raw reply related [flat|nested] 282+ messages in thread
* [GSoC][PATCH v10 09/10] fsck: add ref name check for files backend
2024-07-10 14:43 ` [GSoC][PATCH v10 00/10] ref consistency check infra setup shejialuo
` (7 preceding siblings ...)
2024-07-10 14:48 ` [GSoC][PATCH v10 08/10] files-backend: add unified interface for refs scanning shejialuo
@ 2024-07-10 14:48 ` shejialuo
2024-07-10 14:48 ` [GSoC][PATCH v10 10/10] fsck: add ref content " shejialuo
2024-07-14 12:28 ` [GSoC][PATCH v11 00/10] ref consistency check infra setup shejialuo
10 siblings, 0 replies; 282+ messages in thread
From: shejialuo @ 2024-07-10 14:48 UTC (permalink / raw)
To: git
Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine,
Justin Tobler
The git-fsck(1) only implicitly checks the reference, it does not fully
check refs with bad format name such as standalone "@" and name ending
with ".lock".
In order to provide such checks, add a new fsck message id "badRefName"
with default ERROR type. Use existing "check_refname_format" to explicit
check the ref name. And add a new unit test to verify the functionality.
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
Documentation/fsck-msgids.txt | 3 +
fsck.h | 1 +
refs/files-backend.c | 20 +++++++
t/t0602-reffiles-fsck.sh | 101 ++++++++++++++++++++++++++++++++++
4 files changed, 125 insertions(+)
create mode 100755 t/t0602-reffiles-fsck.sh
diff --git a/Documentation/fsck-msgids.txt b/Documentation/fsck-msgids.txt
index f643585a34..dab4012246 100644
--- a/Documentation/fsck-msgids.txt
+++ b/Documentation/fsck-msgids.txt
@@ -19,6 +19,9 @@
`badParentSha1`::
(ERROR) A commit object has a bad parent sha1.
+`badRefName`::
+ (ERROR) A ref has a bad name.
+
`badTagName`::
(INFO) A tag has an invalid format.
diff --git a/fsck.h b/fsck.h
index ef1f1ed15e..5fcb249735 100644
--- a/fsck.h
+++ b/fsck.h
@@ -31,6 +31,7 @@ enum fsck_msg_type {
FUNC(BAD_NAME, ERROR) \
FUNC(BAD_OBJECT_SHA1, ERROR) \
FUNC(BAD_PARENT_SHA1, ERROR) \
+ FUNC(BAD_REF_NAME, ERROR) \
FUNC(BAD_TIMEZONE, ERROR) \
FUNC(BAD_TREE, ERROR) \
FUNC(BAD_TREE_SHA1, ERROR) \
diff --git a/refs/files-backend.c b/refs/files-backend.c
index 84acb58782..69a76048d3 100644
--- a/refs/files-backend.c
+++ b/refs/files-backend.c
@@ -3419,6 +3419,25 @@ typedef int (*files_fsck_refs_fn)(struct fsck_options *o,
const char *refs_check_dir,
struct dir_iterator *iter);
+static int files_fsck_refs_name(struct fsck_options *o,
+ const char *gitdir UNUSED,
+ const char *refs_check_dir,
+ struct dir_iterator *iter)
+{
+ struct strbuf sb = STRBUF_INIT;
+ int ret = 0;
+
+ if (check_refname_format(iter->basename, REFNAME_ALLOW_ONELEVEL)) {
+ strbuf_addf(&sb, "%s/%s", refs_check_dir, iter->relative_path);
+ ret = fsck_refs_report(o, NULL, sb.buf,
+ FSCK_MSG_BAD_REF_NAME,
+ "invalid refname format");
+ }
+
+ strbuf_release(&sb);
+ return ret;
+}
+
static int files_fsck_refs_dir(struct ref_store *ref_store,
struct fsck_options *o,
const char *refs_check_dir,
@@ -3470,6 +3489,7 @@ static int files_fsck_refs(struct ref_store *ref_store,
{
int ret;
files_fsck_refs_fn fsck_refs_fns[]= {
+ files_fsck_refs_name,
NULL
};
diff --git a/t/t0602-reffiles-fsck.sh b/t/t0602-reffiles-fsck.sh
new file mode 100755
index 0000000000..b2db58d2c6
--- /dev/null
+++ b/t/t0602-reffiles-fsck.sh
@@ -0,0 +1,101 @@
+#!/bin/sh
+
+test_description='Test reffiles backend consistency check'
+
+GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=main
+export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME
+GIT_TEST_DEFAULT_REF_FORMAT=files
+export GIT_TEST_DEFAULT_REF_FORMAT
+
+. ./test-lib.sh
+
+test_expect_success 'ref name should be checked' '
+ test_when_finished "rm -rf repo" &&
+ git init repo &&
+ branch_dir_prefix=.git/refs/heads &&
+ tag_dir_prefix=.git/refs/tags &&
+ (
+ cd repo &&
+ git commit --allow-empty -m initial &&
+ git checkout -b branch-1 &&
+ git tag tag-1 &&
+ git commit --allow-empty -m second &&
+ git checkout -b branch-2 &&
+ git tag tag-2 &&
+ git tag multi_hierarchy/tag-2
+ ) &&
+ (
+ cd repo &&
+ cp $branch_dir_prefix/branch-1 $branch_dir_prefix/.branch-1 &&
+ test_must_fail git fsck 2>err &&
+ cat >expect <<-EOF &&
+ error: refs/heads/.branch-1: badRefName: invalid refname format
+ EOF
+ rm $branch_dir_prefix/.branch-1 &&
+ test_cmp expect err
+ ) &&
+ (
+ cd repo &&
+ cp $tag_dir_prefix/tag-1 $tag_dir_prefix/tag-1.lock &&
+ test_must_fail git fsck 2>err &&
+ cat >expect <<-EOF &&
+ error: refs/tags/tag-1.lock: badRefName: invalid refname format
+ EOF
+ rm $tag_dir_prefix/tag-1.lock &&
+ test_cmp expect err
+ ) &&
+ (
+ cd repo &&
+ cp $branch_dir_prefix/branch-1 $branch_dir_prefix/@ &&
+ test_must_fail git fsck 2>err &&
+ cat >expect <<-EOF &&
+ error: refs/heads/@: badRefName: invalid refname format
+ EOF
+ rm $branch_dir_prefix/@ &&
+ test_cmp expect err
+ ) &&
+ (
+ cd repo &&
+ cp $tag_dir_prefix/multi_hierarchy/tag-2 $tag_dir_prefix/multi_hierarchy/@ &&
+ test_must_fail git fsck 2>err &&
+ cat >expect <<-EOF &&
+ error: refs/tags/multi_hierarchy/@: badRefName: invalid refname format
+ EOF
+ rm $tag_dir_prefix/multi_hierarchy/@ &&
+ test_cmp expect err
+ )
+'
+
+test_expect_success 'ref name check should be adapted into fsck messages' '
+ test_when_finished "rm -rf repo" &&
+ git init repo &&
+ branch_dir_prefix=.git/refs/heads &&
+ tag_dir_prefix=.git/refs/tags &&
+ (
+ cd repo &&
+ git commit --allow-empty -m initial &&
+ git checkout -b branch-1 &&
+ git tag tag-1 &&
+ git commit --allow-empty -m second &&
+ git checkout -b branch-2 &&
+ git tag tag-2
+ ) &&
+ (
+ cd repo &&
+ cp $branch_dir_prefix/branch-1 $branch_dir_prefix/.branch-1 &&
+ git -c fsck.badRefName=warn fsck 2>err &&
+ cat >expect <<-EOF &&
+ warning: refs/heads/.branch-1: badRefName: invalid refname format
+ EOF
+ rm $branch_dir_prefix/.branch-1 &&
+ test_cmp expect err
+ ) &&
+ (
+ cd repo &&
+ cp $branch_dir_prefix/branch-1 $branch_dir_prefix/@ &&
+ git -c fsck.badRefName=ignore fsck 2>err &&
+ test_must_be_empty err
+ )
+'
+
+test_done
--
2.45.2
^ permalink raw reply related [flat|nested] 282+ messages in thread
* [GSoC][PATCH v10 10/10] fsck: add ref content check for files backend
2024-07-10 14:43 ` [GSoC][PATCH v10 00/10] ref consistency check infra setup shejialuo
` (8 preceding siblings ...)
2024-07-10 14:48 ` [GSoC][PATCH v10 09/10] fsck: add ref name check for files backend shejialuo
@ 2024-07-10 14:48 ` shejialuo
2024-07-14 12:28 ` [GSoC][PATCH v11 00/10] ref consistency check infra setup shejialuo
10 siblings, 0 replies; 282+ messages in thread
From: shejialuo @ 2024-07-10 14:48 UTC (permalink / raw)
To: git
Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine,
Justin Tobler
Enhance the git-fsck(1) command by adding a check for reference content
in the files backend. The new functionality ensures that symrefs, real
symbolic link and regular refs are validated correctly.
In order to check the trailing content of the regular refs, add a new
parameter `trailing` to `parse_loose_ref_contents`.
For symrefs, `parse_loose_ref_contents` will set the "referent".
However, symbolic link could be either absolute or relative. Use
"strbuf_add_real_path" to read the symbolic link and convert the
relative path to absolute path. Then use "skip_prefix" to make it align
with symref "referent".
Thus, the symrefs and symbolic links could share the same interface. Add
a new function "files_fsck_symref_target" which aims at checking the
following things:
1. whether the pointee is under the `refs/` directory.
2. whether the pointee name is correct.
3. whether the pointee path is a wrong type in filesystem.
Last, add the following FSCK MESSAGEs:
1. "badRefContent(ERROR)": A ref has a bad content
2. "badSymrefPointee(ERROR)": The pointee of a symref is bad.
3. "trailingRefContent(WARN)": A ref content has trailing contents.
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
Documentation/fsck-msgids.txt | 9 +++
fsck.h | 3 +
refs.c | 2 +-
refs/files-backend.c | 145 +++++++++++++++++++++++++++++++++-
refs/refs-internal.h | 5 +-
t/t0602-reffiles-fsck.sh | 110 ++++++++++++++++++++++++++
6 files changed, 269 insertions(+), 5 deletions(-)
diff --git a/Documentation/fsck-msgids.txt b/Documentation/fsck-msgids.txt
index dab4012246..b1630a478b 100644
--- a/Documentation/fsck-msgids.txt
+++ b/Documentation/fsck-msgids.txt
@@ -19,9 +19,15 @@
`badParentSha1`::
(ERROR) A commit object has a bad parent sha1.
+`badRefContent`::
+ (ERROR) A ref has a bad content.
+
`badRefName`::
(ERROR) A ref has a bad name.
+`badSymrefPointee`::
+ (ERROR) The pointee of a symref is bad.
+
`badTagName`::
(INFO) A tag has an invalid format.
@@ -167,6 +173,9 @@
`nullSha1`::
(WARN) Tree contains entries pointing to a null sha1.
+`trailingRefContent`::
+ (WARN) A ref content has trailing contents.
+
`treeNotSorted`::
(ERROR) A tree is not properly sorted.
diff --git a/fsck.h b/fsck.h
index 5fcb249735..3ad2cc86c9 100644
--- a/fsck.h
+++ b/fsck.h
@@ -32,6 +32,8 @@ enum fsck_msg_type {
FUNC(BAD_OBJECT_SHA1, ERROR) \
FUNC(BAD_PARENT_SHA1, ERROR) \
FUNC(BAD_REF_NAME, ERROR) \
+ FUNC(BAD_REF_CONTENT, ERROR) \
+ FUNC(BAD_SYMREF_POINTEE, ERROR) \
FUNC(BAD_TIMEZONE, ERROR) \
FUNC(BAD_TREE, ERROR) \
FUNC(BAD_TREE_SHA1, ERROR) \
@@ -72,6 +74,7 @@ enum fsck_msg_type {
FUNC(HAS_DOTDOT, WARN) \
FUNC(HAS_DOTGIT, WARN) \
FUNC(NULL_SHA1, WARN) \
+ FUNC(TRAILING_REF_CONTENT, WARN) \
FUNC(ZERO_PADDED_FILEMODE, WARN) \
FUNC(NUL_IN_COMMIT, WARN) \
FUNC(LARGE_PATHNAME, WARN) \
diff --git a/refs.c b/refs.c
index 410919246b..eb82fb7d4e 100644
--- a/refs.c
+++ b/refs.c
@@ -1760,7 +1760,7 @@ static int refs_read_special_head(struct ref_store *ref_store,
}
result = parse_loose_ref_contents(content.buf, oid, referent, type,
- failure_errno);
+ failure_errno, NULL);
done:
strbuf_release(&full_path);
diff --git a/refs/files-backend.c b/refs/files-backend.c
index 69a76048d3..d98ef45403 100644
--- a/refs/files-backend.c
+++ b/refs/files-backend.c
@@ -1,6 +1,7 @@
#define USE_THE_REPOSITORY_VARIABLE
#include "../git-compat-util.h"
+#include "../abspath.h"
#include "../copy.h"
#include "../environment.h"
#include "../gettext.h"
@@ -553,7 +554,7 @@ static int read_ref_internal(struct ref_store *ref_store, const char *refname,
strbuf_rtrim(&sb_contents);
buf = sb_contents.buf;
- ret = parse_loose_ref_contents(buf, oid, referent, type, &myerr);
+ ret = parse_loose_ref_contents(buf, oid, referent, type, &myerr, NULL);
out:
if (ret && !myerr)
@@ -589,7 +590,7 @@ static int files_read_symbolic_ref(struct ref_store *ref_store, const char *refn
int parse_loose_ref_contents(const char *buf, struct object_id *oid,
struct strbuf *referent, unsigned int *type,
- int *failure_errno)
+ int *failure_errno, const char **trailing)
{
const char *p;
if (skip_prefix(buf, "ref:", &buf)) {
@@ -611,6 +612,10 @@ int parse_loose_ref_contents(const char *buf, struct object_id *oid,
*failure_errno = EINVAL;
return -1;
}
+
+ if (trailing)
+ *trailing = p;
+
return 0;
}
@@ -3438,6 +3443,141 @@ static int files_fsck_refs_name(struct fsck_options *o,
return ret;
}
+/*
+ * Check the symref "pointee_name" and "pointee_path". The caller should
+ * make sure that "pointee_path" is absolute. For symbolic ref, "pointee_name"
+ * would be the content after "refs:". For symblic link, "pointee_name" would
+ * be the relative path agaignst "gitdir".
+ */
+static int files_fsck_symref_target(struct fsck_options *o,
+ const char *refname,
+ const char *pointee_name,
+ const char *pointee_path)
+{
+ const char *p = NULL;
+ struct stat st;
+ int ret = 0;
+
+ if (!skip_prefix(pointee_name, "refs/", &p)) {
+
+ ret = fsck_refs_report(o, NULL, refname,
+ FSCK_MSG_BAD_SYMREF_POINTEE,
+ "point to target out of refs hierarchy");
+ goto out;
+ }
+
+ if (check_refname_format(pointee_name, 0)) {
+ ret = fsck_refs_report(o, NULL, refname,
+ FSCK_MSG_BAD_SYMREF_POINTEE,
+ "point to invalid refname");
+ }
+
+ if (lstat(pointee_path, &st) < 0)
+ goto out;
+
+ if (!S_ISREG(st.st_mode) && !S_ISLNK(st.st_mode)) {
+ ret = fsck_refs_report(o, NULL, refname,
+ FSCK_MSG_BAD_SYMREF_POINTEE,
+ "point to invalid target");
+ goto out;
+ }
+out:
+ return ret;
+}
+
+static int files_fsck_refs_content(struct fsck_options *o,
+ const char *gitdir,
+ const char *refs_check_dir,
+ struct dir_iterator *iter)
+{
+ struct strbuf pointee_path = STRBUF_INIT,
+ ref_content = STRBUF_INIT,
+ abs_gitdir = STRBUF_INIT,
+ referent = STRBUF_INIT,
+ refname = STRBUF_INIT;
+ const char *trailing = NULL;
+ int failure_errno = 0;
+ unsigned int type = 0;
+ struct object_id oid;
+ int ret = 0;
+
+ strbuf_addf(&refname, "%s/%s", refs_check_dir, iter->relative_path);
+
+ /*
+ * If the file is a symlink, we need to only check the connectivity
+ * of the destination object.
+ */
+ if (S_ISLNK(iter->st.st_mode)) {
+ const char *pointee_name = NULL;
+
+ strbuf_add_real_path(&pointee_path, iter->path.buf);
+
+ strbuf_add_absolute_path(&abs_gitdir, gitdir);
+ strbuf_normalize_path(&abs_gitdir);
+ if (!is_dir_sep(abs_gitdir.buf[abs_gitdir.len - 1]))
+ strbuf_addch(&abs_gitdir, '/');
+
+ if (!skip_prefix(pointee_path.buf,
+ abs_gitdir.buf, &pointee_name)) {
+ ret = fsck_refs_report(o, NULL, refname.buf,
+ FSCK_MSG_BAD_SYMREF_POINTEE,
+ "point to target outside gitdir");
+ goto clean;
+ }
+
+ ret = files_fsck_symref_target(o, refname.buf, pointee_name,
+ pointee_path.buf);
+ goto clean;
+ }
+
+ if (strbuf_read_file(&ref_content, iter->path.buf, 0) < 0) {
+ ret = error_errno(_("%s/%s: unable to read the ref"),
+ refs_check_dir, iter->relative_path);
+ goto clean;
+ }
+
+ if (parse_loose_ref_contents(ref_content.buf, &oid,
+ &referent, &type,
+ &failure_errno, &trailing)) {
+ ret = fsck_refs_report(o, NULL, refname.buf,
+ FSCK_MSG_BAD_REF_CONTENT,
+ "invalid ref content");
+ goto clean;
+ }
+
+ /*
+ * If the ref is a symref, we need to check the destination name and
+ * connectivity.
+ */
+ if (referent.len && (type & REF_ISSYMREF)) {
+ strbuf_addf(&pointee_path, "%s/%s", gitdir, referent.buf);
+ strbuf_rtrim(&referent);
+
+ ret = files_fsck_symref_target(o, refname.buf, referent.buf,
+ pointee_path.buf);
+ goto clean;
+ } else {
+ /*
+ * Only regular refs could have a trailing garbage. Should
+ * be reported as a warning.
+ */
+ if (trailing && (*trailing != '\0' && *trailing != '\n')) {
+ ret = fsck_refs_report(o, NULL, refname.buf,
+ FSCK_MSG_TRAILING_REF_CONTENT,
+ "trailing garbage in ref");
+ goto clean;
+ }
+ }
+
+clean:
+ strbuf_release(&abs_gitdir);
+ strbuf_release(&pointee_path);
+ strbuf_release(&refname);
+ strbuf_release(&ref_content);
+ strbuf_release(&referent);
+ return ret;
+}
+
static int files_fsck_refs_dir(struct ref_store *ref_store,
struct fsck_options *o,
const char *refs_check_dir,
@@ -3490,6 +3630,7 @@ static int files_fsck_refs(struct ref_store *ref_store,
int ret;
files_fsck_refs_fn fsck_refs_fns[]= {
files_fsck_refs_name,
+ files_fsck_refs_content,
NULL
};
diff --git a/refs/refs-internal.h b/refs/refs-internal.h
index a905e187cd..2fabf41d14 100644
--- a/refs/refs-internal.h
+++ b/refs/refs-internal.h
@@ -709,11 +709,12 @@ struct ref_store {
/*
* Parse contents of a loose ref file. *failure_errno maybe be set to EINVAL for
- * invalid contents.
+ * invalid contents. Also *trailing is set to the first character after the
+ * refname or NULL if the referent is not empty.
*/
int parse_loose_ref_contents(const char *buf, struct object_id *oid,
struct strbuf *referent, unsigned int *type,
- int *failure_errno);
+ int *failure_errno, const char **trailing);
/*
* Fill in the generic part of refs and add it to our collection of
diff --git a/t/t0602-reffiles-fsck.sh b/t/t0602-reffiles-fsck.sh
index b2db58d2c6..35bf40ee64 100755
--- a/t/t0602-reffiles-fsck.sh
+++ b/t/t0602-reffiles-fsck.sh
@@ -98,4 +98,114 @@ test_expect_success 'ref name check should be adapted into fsck messages' '
)
'
+test_expect_success 'regular ref content should be checked' '
+ test_when_finished "rm -rf repo" &&
+ git init repo &&
+ branch_dir_prefix=.git/refs/heads &&
+ tag_dir_prefix=.git/refs/tags &&
+ (
+ cd repo &&
+ git commit --allow-empty -m initial &&
+ git checkout -b branch-1 &&
+ git tag tag-1 &&
+ git commit --allow-empty -m second &&
+ git checkout -b branch-2 &&
+ git tag tag-2 &&
+ git checkout -b a/b/tag-2
+ ) &&
+ (
+ cd repo &&
+ printf "%s garbage" "$(git rev-parse branch-1)" > $branch_dir_prefix/branch-1-garbage &&
+ git fsck 2>err &&
+ cat >expect <<-EOF &&
+ warning: refs/heads/branch-1-garbage: trailingRefContent: trailing garbage in ref
+ EOF
+ rm $branch_dir_prefix/branch-1-garbage &&
+ test_cmp expect err
+ ) &&
+ (
+ cd repo &&
+ printf "%s garbage" "$(git rev-parse tag-1)" > $tag_dir_prefix/tag-1-garbage &&
+ test_must_fail git -c fsck.trailingRefContent=error fsck 2>err &&
+ cat >expect <<-EOF &&
+ error: refs/tags/tag-1-garbage: trailingRefContent: trailing garbage in ref
+ EOF
+ rm $tag_dir_prefix/tag-1-garbage &&
+ test_cmp expect err
+ ) &&
+ (
+ cd repo &&
+ printf "%s " "$(git rev-parse tag-2)" > $tag_dir_prefix/tag-2-garbage &&
+ git fsck 2>err &&
+ cat >expect <<-EOF &&
+ warning: refs/tags/tag-2-garbage: trailingRefContent: trailing garbage in ref
+ EOF
+ rm $tag_dir_prefix/tag-2-garbage &&
+ test_cmp expect err
+ ) &&
+ (
+ cd repo &&
+ printf "xfsazqfxcadas" > $tag_dir_prefix/tag-2-bad &&
+ test_must_fail git refs verify 2>err &&
+ cat >expect <<-EOF &&
+ error: refs/tags/tag-2-bad: badRefContent: invalid ref content
+ EOF
+ rm $tag_dir_prefix/tag-2-bad &&
+ test_cmp expect err
+ ) &&
+ (
+ cd repo &&
+ printf "xfsazqfxcadas" > $branch_dir_prefix/a/b/branch-2-bad &&
+ test_must_fail git refs verify 2>err &&
+ cat >expect <<-EOF &&
+ error: refs/heads/a/b/branch-2-bad: badRefContent: invalid ref content
+ EOF
+ rm $branch_dir_prefix/a/b/branch-2-bad &&
+ test_cmp expect err
+ )
+'
+
+test_expect_success 'symbolic ref content should be checked' '
+ test_when_finished "rm -rf repo" &&
+ git init repo &&
+ branch_dir_prefix=.git/refs/heads &&
+ tag_dir_prefix=.git/refs/tags &&
+ (
+ cd repo &&
+ git commit --allow-empty -m initial &&
+ git checkout -b branch-1 &&
+ git tag tag-1
+ ) &&
+ (
+ cd repo &&
+ printf "ref: refs/heads/.branch" > $branch_dir_prefix/branch-2-bad &&
+ test_must_fail git refs verify 2>err &&
+ cat >expect <<-EOF &&
+ error: refs/heads/branch-2-bad: badSymrefPointee: point to invalid refname
+ EOF
+ rm $branch_dir_prefix/branch-2-bad &&
+ test_cmp expect err
+ ) &&
+ (
+ cd repo &&
+ printf "ref: refs/heads" > $branch_dir_prefix/branch-2-bad &&
+ test_must_fail git refs verify 2>err &&
+ cat >expect <<-EOF &&
+ error: refs/heads/branch-2-bad: badSymrefPointee: point to invalid target
+ EOF
+ rm $branch_dir_prefix/branch-2-bad &&
+ test_cmp expect err
+ ) &&
+ (
+ cd repo &&
+ printf "ref: logs/maint-v2.45" > $branch_dir_prefix/branch-2-bad &&
+ test_must_fail git refs verify 2>err &&
+ cat >expect <<-EOF &&
+ error: refs/heads/branch-2-bad: badSymrefPointee: point to target out of refs hierarchy
+ EOF
+ rm $branch_dir_prefix/branch-2-bad &&
+ test_cmp expect err
+ )
+'
+
test_done
--
2.45.2
^ permalink raw reply related [flat|nested] 282+ messages in thread
* [GSoC][PATCH v11 00/10] ref consistency check infra setup
2024-07-10 14:43 ` [GSoC][PATCH v10 00/10] ref consistency check infra setup shejialuo
` (9 preceding siblings ...)
2024-07-10 14:48 ` [GSoC][PATCH v10 10/10] fsck: add ref content " shejialuo
@ 2024-07-14 12:28 ` shejialuo
2024-07-14 12:30 ` [GSoC][PATCH v11 01/10] fsck: rename "skiplist" to "skip_oids" shejialuo
` (10 more replies)
10 siblings, 11 replies; 282+ messages in thread
From: shejialuo @ 2024-07-14 12:28 UTC (permalink / raw)
To: git
Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine,
Justin Tobler
Hi All:
This version handles the following problems:
1. Reorder the commit message to let "fsck: rename objects-related fsck
error functions" behind the "fsck: add a unified interface for reporting
fsck messages".
2. Rename "vfsck_report" to "fsck_vreport" to be align with the
codebase.
3. Make the "git-refs verify" commit message clean, and add
user-friendly error message here.
4. Rename "verbose_refs" to "verbose" field in "fsck_options".
And the most important change in this version is to enhance the
"fsck_refs_report" function. After some investigations, there are the
following situations where we should handle when checking ref
consistency.
1. When checking loose refs and reflofs, we only need the checkee
information, because they are standalone files.
2. When checking packed-refs, we should check the packed-refs itself,
for example whether it is sorted or there are some garbage trailing
contents. However, we should also check each ref (sub_checkee) in the
file.
3. When checking reftable refs, we need to check the binary file,
however, I does not truly understand the principle of reftable refs. But
we could still use the idea like 2 case.
By the above statements, I change the "fsck_refs_report" shown as below:
int fsck_refs_error_function(struct fsck_options *options UNUSED,
const struct object_id *oid,
enum object_type object_type UNUSED,
const char *ref_checkee,
const char *sub_ref_checkee,
enum fsck_msg_type msg_type,
enum fsck_msg_id msg_id UNUSED,
const char *message)
{
struct strbuf sb = STRBUF_INIT;
int ret = 0;
if (sub_ref_checkee)
strbuf_addf(&sb, "%s.%s", ref_checkee, sub_ref_checkee);
else
strbuf_addstr(&sb, ref_checkee);
if (oid)
strbuf_addf(&sb, " -> (%s)", oid_to_hex(oid));
if (msg_type == FSCK_WARN)
warning("%s: %s", sb.buf, message);
else
ret = error("%s: %s", sb.buf, message);
strbuf_release(&sb);
return ret;
}
It could provide the following report messages:
1. "ref_checkee": "fsck error name": "user message".
2. "ref_checkee.sub_ref_checkee": "fsck error name": "user message".
3. "ref_checkee -> (oid hex)": "fsck error name": "user message".
4. "ref_checkee.sub_ref_checkee -> (oid hex)": "fsck error name": "user
message".
shejialuo (10):
fsck: rename "skiplist" to "skip_oids"
fsck: add a unified interface for reporting fsck messages
fsck: rename objects-related fsck error functions
fsck: add refs-related error report function
refs: set up ref consistency check infrastructure
git refs: add verify subcommand
builtin/fsck: add `git-refs verify` child process
files-backend: add unified interface for refs scanning
fsck: add ref name check for files backend
fsck: add ref content check for files backend
Documentation/fsck-msgids.txt | 12 ++
Documentation/git-refs.txt | 13 ++
builtin/fsck.c | 33 ++++-
builtin/mktag.c | 2 +
builtin/refs.c | 44 ++++++
fsck.c | 118 +++++++++++++---
fsck.h | 69 ++++++---
object-file.c | 12 +-
refs.c | 7 +-
refs.h | 8 ++
refs/debug.c | 11 ++
refs/files-backend.c | 255 +++++++++++++++++++++++++++++++++-
refs/packed-backend.c | 8 ++
refs/refs-internal.h | 11 +-
refs/reftable-backend.c | 8 ++
t/t0602-reffiles-fsck.sh | 211 ++++++++++++++++++++++++++++
16 files changed, 766 insertions(+), 56 deletions(-)
create mode 100755 t/t0602-reffiles-fsck.sh
Range-diff against v10:
1: e044f933de = 1: a69705b777 fsck: rename "skiplist" to "skip_oids"
3: df4837e960 ! 2: 1ef1036348 fsck: add a unified interface for reporting fsck messages
@@ Commit message
error type and calling the callback "error_func" to report the message.
However, "report" function is only related to object database which
cannot be reused for refs. In order to provide a unified interface which
- can report either objects or refs, create a new function "vfsck_report"
- by adding "checked_ref_name" parameter following the "report" prototype.
- Instead of using "...", provide "va_list" to allow more flexibility.
+ can report either objects or refs, create a new function "fsck_vreport"
+ following the "report" prototype. Instead of using "...", provide
+ "va_list" to allow more flexibility.
- Like "report", the "vfsck_report" function will use "error_func"
+ When checking loose refs and reflogs, we only need to pass the checked
+ name to the fsck error report function. However, for packed-refs and
+ reftable refs, we need to check both the consistency of the file itself
+ and the refs or reflogs contained in the file. In order to provide above
+ checks, add two parameters "ref_checkee" and "sub_ref_checkee" in
+ "fsck_vreport" function.
+
+ Like "report", the "fsck_vreport" function will use "error_func"
registered in "fsck_options" to report customized messages. Change
- "error_func" prototype to align with the new "vfsck_report".
+ "error_func" prototype to align with the new "fsck_vreport".
- Then, change "report" function to use "vfsck_report" to report objects
+ Then, change "report" function to use "fsck_vreport" to report objects
related messages. Add a new function called "fsck_refs_report" to use
- "vfsck_report" to report refs related messages.
+ "fsck_vreport" to report refs related messages.
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
@@ Commit message
## builtin/fsck.c ##
@@ builtin/fsck.c: static int objerror(struct object *obj, const char *err)
- static int fsck_objects_error_func(struct fsck_options *o UNUSED,
- const struct object_id *oid,
- enum object_type object_type,
-+ const char *checked_ref_name UNUSED,
- enum fsck_msg_type msg_type,
- enum fsck_msg_id msg_id UNUSED,
- const char *message)
+ static int fsck_error_func(struct fsck_options *o UNUSED,
+ const struct object_id *oid,
+ enum object_type object_type,
++ const char *ref_checkee UNUSED,
++ const char *sub_ref_checkee UNUSED,
+ enum fsck_msg_type msg_type,
+ enum fsck_msg_id msg_id UNUSED,
+ const char *message)
## builtin/mktag.c ##
@@ builtin/mktag.c: static struct fsck_options fsck_options = FSCK_OPTIONS_STRICT;
static int mktag_fsck_error_func(struct fsck_options *o UNUSED,
const struct object_id *oid UNUSED,
enum object_type object_type UNUSED,
-+ const char *checked_ref_name UNUSED,
++ const char *ref_checkee UNUSED,
++ const char *sub_ref_checkee UNUSED,
enum fsck_msg_type msg_type,
enum fsck_msg_id msg_id UNUSED,
const char *message)
@@ fsck.c: static int object_on_skiplist(struct fsck_options *opts,
+ * It will get the current msg error type and call the error_func callback
+ * which is registered in the "fsck_options" struct.
+ */
-+static int vfsck_report(struct fsck_options *options,
++static int fsck_vreport(struct fsck_options *options,
+ const struct object_id *oid,
+ enum object_type object_type,
-+ const char *checked_ref_name,
++ const char *ref_checkee,
++ const char *sub_ref_checkee,
+ enum fsck_msg_id msg_id, const char *fmt, va_list ap)
{
- va_list ap;
@@ fsck.c: static int report(struct fsck_options *options,
- va_start(ap, fmt);
- strbuf_vaddf(&sb, fmt, ap);
-- result = options->error_func(options, oid, object_type,
+ va_copy(ap_copy, ap);
+ strbuf_vaddf(&sb, fmt, ap_copy);
-+ result = options->error_func(options, oid, object_type, checked_ref_name,
+ result = options->error_func(options, oid, object_type,
++ ref_checkee, sub_ref_checkee,
msg_type, msg_id, sb.buf);
strbuf_release(&sb);
va_end(ap);
@@ fsck.c: static int report(struct fsck_options *options,
+{
+ va_list ap;
+ int result;
++
+ va_start(ap, fmt);
-+ result = vfsck_report(options, oid, object_type, NULL,
++ result = fsck_vreport(options, oid, object_type, NULL, NULL,
+ msg_id, fmt, ap);
+ va_end(ap);
++
+ return result;
+}
+
-+
-+
+int fsck_refs_report(struct fsck_options *options,
+ const struct object_id *oid,
-+ const char *checked_ref_name,
++ const char *ref_checkee,
++ const char *sub_ref_checkee,
+ enum fsck_msg_id msg_id, const char *fmt, ...)
+{
+ va_list ap;
+ int result;
+ va_start(ap, fmt);
-+ result = vfsck_report(options, oid, OBJ_NONE,
-+ checked_ref_name, msg_id, fmt, ap);
++ result = fsck_vreport(options, oid, OBJ_NONE, ref_checkee, sub_ref_checkee,
++ msg_id, fmt, ap);
+ va_end(ap);
+ return result;
+}
@@ fsck.c: static int report(struct fsck_options *options,
{
if (!options->object_names)
@@ fsck.c: int fsck_buffer(const struct object_id *oid, enum object_type type,
- int fsck_objects_error_function(struct fsck_options *o,
- const struct object_id *oid,
- enum object_type object_type UNUSED,
-+ const char *checked_ref_name UNUSED,
- enum fsck_msg_type msg_type,
- enum fsck_msg_id msg_id UNUSED,
- const char *message)
+ int fsck_error_function(struct fsck_options *o,
+ const struct object_id *oid,
+ enum object_type object_type UNUSED,
++ const char *ref_checkee UNUSED,
++ const char *sub_ref_checkee UNUSED,
+ enum fsck_msg_type msg_type,
+ enum fsck_msg_id msg_id UNUSED,
+ const char *message)
@@ fsck.c: int git_fsck_config(const char *var, const char *value,
- int fsck_objects_error_cb_print_missing_gitmodules(struct fsck_options *o,
- const struct object_id *oid,
- enum object_type object_type,
-+ const char *checked_ref_name,
- enum fsck_msg_type msg_type,
- enum fsck_msg_id msg_id,
- const char *message)
-@@ fsck.c: int fsck_objects_error_cb_print_missing_gitmodules(struct fsck_options *o,
+ int fsck_error_cb_print_missing_gitmodules(struct fsck_options *o,
+ const struct object_id *oid,
+ enum object_type object_type,
++ const char *ref_checkee,
++ const char *sub_ref_checkee,
+ enum fsck_msg_type msg_type,
+ enum fsck_msg_id msg_id,
+ const char *message)
+@@ fsck.c: int fsck_error_cb_print_missing_gitmodules(struct fsck_options *o,
puts(oid_to_hex(oid));
return 0;
}
-- return fsck_objects_error_function(o, oid, object_type,
-+ return fsck_objects_error_function(o, oid, object_type, checked_ref_name,
- msg_type, msg_id, message);
+- return fsck_error_function(o, oid, object_type, msg_type, msg_id, message);
++ return fsck_error_function(o, oid, object_type, ref_checkee,
++ sub_ref_checkee, msg_type, msg_id, message);
}
## fsck.h ##
@@ fsck.h: int is_valid_msg_type(const char *msg_id, const char *msg_type);
+ */
typedef int (*fsck_error)(struct fsck_options *o,
const struct object_id *oid, enum object_type object_type,
-+ const char *checked_ref_name,
++ const char *ref_checkee, const char *sub_ref_checkee,
enum fsck_msg_type msg_type, enum fsck_msg_id msg_id,
const char *message);
- int fsck_objects_error_function(struct fsck_options *o,
- const struct object_id *oid, enum object_type object_type,
-+ const char *checked_ref_name,
- enum fsck_msg_type msg_type, enum fsck_msg_id msg_id,
- const char *message);
- int fsck_objects_error_cb_print_missing_gitmodules(struct fsck_options *o,
- const struct object_id *oid,
- enum object_type object_type,
-+ const char *checked_ref_name,
- enum fsck_msg_type msg_type,
- enum fsck_msg_id msg_id,
- const char *message);
+ int fsck_error_function(struct fsck_options *o,
+ const struct object_id *oid, enum object_type object_type,
++ const char *ref_checkee, const char *sub_ref_checkee,
+ enum fsck_msg_type msg_type, enum fsck_msg_id msg_id,
+ const char *message);
+ int fsck_error_cb_print_missing_gitmodules(struct fsck_options *o,
+ const struct object_id *oid,
+ enum object_type object_type,
++ const char *ref_checkee,
++ const char *sub_ref_checkee,
+ enum fsck_msg_type msg_type,
+ enum fsck_msg_id msg_id,
+ const char *message);
@@ fsck.h: int fsck_tag_standalone(const struct object_id *oid, const char *buffer,
*/
int fsck_finish(struct fsck_options *options);
@@ fsck.h: int fsck_tag_standalone(const struct object_id *oid, const char *buffer,
+/*
+ * Report an error or warning for refs.
+ */
-+__attribute__((format (printf, 5, 6)))
++__attribute__((format (printf, 6, 7)))
+int fsck_refs_report(struct fsck_options *options,
+ const struct object_id *oid,
-+ const char *checked_ref_name,
++ const char *ref_checkee,
++ const char *sub_ref_checkee,
+ enum fsck_msg_id msg_id,
+ const char *fmt, ...);
+
@@ object-file.c: int repo_has_object_file(struct repository *r,
- const char *message)
+ const struct object_id *oid UNUSED,
+ enum object_type object_type UNUSED,
-+ const char *ref_checked_name UNUSED,
++ const char *ref_checkee UNUSED,
++ const char *sub_ref_checkee UNUSED,
+ enum fsck_msg_type msg_type UNUSED,
+ enum fsck_msg_id msg_id UNUSED,
+ const char *message)
2: 73a7c53a23 ! 3: d17cf6166e fsck: rename objects-related fsck error functions
@@ Commit message
fsck: rename objects-related fsck error functions
The names of objects-related fsck error functions are general. It's OK
- when there is only object database check. However, we are going to
- introduce refs database check. In order to avoid ambiguity, rename
- objects-related fsck error functions to explicitly indicate these
+ when there is only object database check. However, we have introduced
+ refs database check report function. To avoid ambiguity, rename
+ object-related fsck error functions to explicitly indicate these
functions are used to report objects-related messages.
Mentored-by: Patrick Steinhardt <ps@pks.im>
@@ builtin/fsck.c: static int objerror(struct object *obj, const char *err)
-static int fsck_error_func(struct fsck_options *o UNUSED,
- const struct object_id *oid,
- enum object_type object_type,
+- const char *ref_checkee UNUSED,
+- const char *sub_ref_checkee UNUSED,
- enum fsck_msg_type msg_type,
- enum fsck_msg_id msg_id UNUSED,
- const char *message)
+static int fsck_objects_error_func(struct fsck_options *o UNUSED,
+ const struct object_id *oid,
+ enum object_type object_type,
++ const char *ref_checkee UNUSED,
++ const char *sub_ref_checkee UNUSED,
+ enum fsck_msg_type msg_type,
+ enum fsck_msg_id msg_id UNUSED,
+ const char *message)
@@ fsck.c: int fsck_buffer(const struct object_id *oid, enum object_type type,
-int fsck_error_function(struct fsck_options *o,
- const struct object_id *oid,
- enum object_type object_type UNUSED,
+- const char *ref_checkee UNUSED,
+- const char *sub_ref_checkee UNUSED,
- enum fsck_msg_type msg_type,
- enum fsck_msg_id msg_id UNUSED,
- const char *message)
+int fsck_objects_error_function(struct fsck_options *o,
+ const struct object_id *oid,
+ enum object_type object_type UNUSED,
++ const char *ref_checkee UNUSED,
++ const char *sub_ref_checkee UNUSED,
+ enum fsck_msg_type msg_type,
+ enum fsck_msg_id msg_id UNUSED,
+ const char *message)
@@ fsck.c: int git_fsck_config(const char *var, const char *value,
-int fsck_error_cb_print_missing_gitmodules(struct fsck_options *o,
- const struct object_id *oid,
- enum object_type object_type,
+- const char *ref_checkee,
+- const char *sub_ref_checkee,
- enum fsck_msg_type msg_type,
- enum fsck_msg_id msg_id,
- const char *message)
+int fsck_objects_error_cb_print_missing_gitmodules(struct fsck_options *o,
+ const struct object_id *oid,
+ enum object_type object_type,
++ const char *ref_checkee,
++ const char *sub_ref_checkee,
+ enum fsck_msg_type msg_type,
+ enum fsck_msg_id msg_id,
+ const char *message)
@@ fsck.c: int git_fsck_config(const char *var, const char *value,
puts(oid_to_hex(oid));
return 0;
}
-- return fsck_error_function(o, oid, object_type, msg_type, msg_id, message);
-+ return fsck_objects_error_function(o, oid, object_type,
-+ msg_type, msg_id, message);
+- return fsck_error_function(o, oid, object_type, ref_checkee,
+- sub_ref_checkee, msg_type, msg_id, message);
++ return fsck_objects_error_function(o, oid, object_type, ref_checkee,
++ sub_ref_checkee, msg_type, msg_id,
++ message);
}
## fsck.h ##
@@ fsck.h: typedef int (*fsck_error)(struct fsck_options *o,
-int fsck_error_function(struct fsck_options *o,
- const struct object_id *oid, enum object_type object_type,
+- const char *ref_checkee, const char *sub_ref_checkee,
- enum fsck_msg_type msg_type, enum fsck_msg_id msg_id,
- const char *message);
-int fsck_error_cb_print_missing_gitmodules(struct fsck_options *o,
- const struct object_id *oid,
- enum object_type object_type,
+- const char *ref_checkee,
+- const char *sub_ref_checkee,
- enum fsck_msg_type msg_type,
- enum fsck_msg_id msg_id,
- const char *message);
+int fsck_objects_error_function(struct fsck_options *o,
+ const struct object_id *oid, enum object_type object_type,
++ const char *ref_checkee, const char *sub_ref_checkee,
+ enum fsck_msg_type msg_type, enum fsck_msg_id msg_id,
+ const char *message);
+int fsck_objects_error_cb_print_missing_gitmodules(struct fsck_options *o,
+ const struct object_id *oid,
+ enum object_type object_type,
++ const char *ref_checkee,
++ const char *sub_ref_checkee,
+ enum fsck_msg_type msg_type,
+ enum fsck_msg_id msg_id,
+ const char *message);
4: c8eb01c987 ! 4: ee17c0835b fsck: add refs-related error report function
@@ Metadata
## Commit message ##
fsck: add refs-related error report function
- Add refs-related options to the "fsck_options", create refs-specific
- "error_func" callback "fsck_refs_error_function".
+ Create refs-specific "error_func" callback "fsck_refs_error_function"
+ which could provide the following report messages.
- "fsck_refs_error_function" will use the "oid" parameter. When the caller
+ 1. "ref_checkee": "fsck error name": "user message".
+ 2. "ref_checkee.sub_ref_checkee": "fsck error name": "user message".
+ 3. "ref_checkee -> (oid hex)": "fsck error name": "user message".
+ 4. "ref_checkee.sub_ref_checkee -> (pid hex)": "fsck error name": "user
+ message".
+
+ "fsck_refs_error_function" uses the "ref_checkee" and "sub_ref_checkee"
+ parameters to indicate the information of the checked refs. For loose
+ ref and reflog, it only uses the "ref_checkee" parameter. For packed
+ refs and reftable refs, when checking the consistency of the file
+ itself, it still only uses "ref_checkee" parameter. However, when
+ checking the consistency of the ref or reflog contained in the file, it
+ will use "sub_ref_checkee" parameter to indicate that we are not
+ checking the file but the incorporated ref or reflog.
+
+ "fsck_refs_error_function" will use the "oid" parameter if the caller
passes the oid, it will use "oid_to_hex" to get the corresponding hex
value to report to the caller.
@@ fsck.c: int fsck_objects_error_function(struct fsck_options *o,
+int fsck_refs_error_function(struct fsck_options *options UNUSED,
+ const struct object_id *oid,
+ enum object_type object_type UNUSED,
-+ const char *checked_ref_name,
++ const char *ref_checkee,
++ const char *sub_ref_checkee,
+ enum fsck_msg_type msg_type,
+ enum fsck_msg_id msg_id UNUSED,
+ const char *message)
@@ fsck.c: int fsck_objects_error_function(struct fsck_options *o,
+ struct strbuf sb = STRBUF_INIT;
+ int ret = 0;
+
-+ strbuf_addstr(&sb, checked_ref_name);
++ if (sub_ref_checkee)
++ strbuf_addf(&sb, "%s.%s", ref_checkee, sub_ref_checkee);
++ else
++ strbuf_addstr(&sb, ref_checkee);
++
+ if (oid)
+ strbuf_addf(&sb, " -> (%s)", oid_to_hex(oid));
+
@@ fsck.h: int fsck_objects_error_cb_print_missing_gitmodules(struct fsck_options *
+int fsck_refs_error_function(struct fsck_options *options,
+ const struct object_id *oid,
+ enum object_type object_type,
-+ const char *checked_ref_name,
++ const char *ref_checkee,
++ const char *sub_ref_checkee,
+ enum fsck_msg_type msg_type,
+ enum fsck_msg_id msg_id,
+ const char *message);
5: e4085df496 = 5: 4718ba7ddc refs: set up ref consistency check infrastructure
6: 497f224bed ! 6: 76163fb5d9 builtin/refs: add verify subcommand and verbose_refs for "fsck_options"
@@ Metadata
Author: shejialuo <shejialuo@gmail.com>
## Commit message ##
- builtin/refs: add verify subcommand and verbose_refs for "fsck_options"
+ git refs: add verify subcommand
Introduce a new subcommand "verify" in git-refs(1) to allow the user to
check the reference database consistency and also this subcommand will
be used as the entry point of checking refs for "git-fsck(1)". Last, add
- "verbose_refs" field into "fsck_options" to indicate whether we should
- print verbose messages when checking refs consistency.
+ "verbose" field into "fsck_options" to indicate whether we should print
+ verbose messages when checking refs and objects consistency.
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
@@ builtin/refs.c: static int cmd_refs_migrate(int argc, const char **argv, const c
+
+ argc = parse_options(argc, argv, prefix, options, verify_usage, 0);
+ if (argc)
-+ usage(_("too many arguments"));
++ usage(_("'git refs verify' takes no arguments"));
+
+ if (verbose)
-+ fsck_refs_options.verbose_refs = 1;
++ fsck_refs_options.verbose = 1;
+ if (strict)
+ fsck_refs_options.strict = 1;
+
@@ fsck.h: struct fsck_options {
fsck_walk_func walk;
fsck_error error_func;
unsigned strict:1;
-+ unsigned verbose_refs:1;
++ unsigned verbose:1;
enum fsck_msg_type *msg_type;
struct oidset skip_oids;
struct oidset gitmodules_found;
7: 86a14c7b43 = 7: 27f766fb8e builtin/fsck: add `git-refs verify` child process
8: daedb80b47 ! 8: e2ab45ec9f files-backend: add unified interface for refs scanning
@@ refs/files-backend.c: static int files_ref_store_remove_on_disk(struct ref_store
+ continue;
+ } else if (S_ISREG(iter->st.st_mode) ||
+ S_ISLNK(iter->st.st_mode)) {
-+ if (o->verbose_refs)
++ if (o->verbose)
+ fprintf_ln(stderr, "Checking %s/%s",
+ refs_check_dir, iter->relative_path);
+ for (size_t i = 0; fsck_refs_fns[i]; i++) {
@@ refs/files-backend.c: static int files_ref_store_remove_on_disk(struct ref_store
+ NULL
+ };
+
-+ if (o->verbose_refs)
++ if (o->verbose)
+ fprintf_ln(stderr, "Checking references consistency");
+
+ ret = files_fsck_refs_dir(ref_store, o, "refs", fsck_refs_fns);
9: c36d588e4f ! 9: cd438fb56d fsck: add ref name check for files backend
@@ refs/files-backend.c: typedef int (*files_fsck_refs_fn)(struct fsck_options *o,
+
+ if (check_refname_format(iter->basename, REFNAME_ALLOW_ONELEVEL)) {
+ strbuf_addf(&sb, "%s/%s", refs_check_dir, iter->relative_path);
-+ ret = fsck_refs_report(o, NULL, sb.buf,
++ ret = fsck_refs_report(o, NULL, sb.buf, NULL,
+ FSCK_MSG_BAD_REF_NAME,
+ "invalid refname format");
+ }
10: 521e0d9ca3 ! 10: 15662c6934 fsck: add ref content check for files backend
@@ refs/files-backend.c: static int files_fsck_refs_name(struct fsck_options *o,
+
+ if (!skip_prefix(pointee_name, "refs/", &p)) {
+
-+ ret = fsck_refs_report(o, NULL, refname,
++ ret = fsck_refs_report(o, NULL, refname, NULL,
+ FSCK_MSG_BAD_SYMREF_POINTEE,
+ "point to target out of refs hierarchy");
+ goto out;
+ }
+
+ if (check_refname_format(pointee_name, 0)) {
-+ ret = fsck_refs_report(o, NULL, refname,
++ ret = fsck_refs_report(o, NULL, refname, NULL,
+ FSCK_MSG_BAD_SYMREF_POINTEE,
+ "point to invalid refname");
+ }
@@ refs/files-backend.c: static int files_fsck_refs_name(struct fsck_options *o,
+ goto out;
+
+ if (!S_ISREG(st.st_mode) && !S_ISLNK(st.st_mode)) {
-+ ret = fsck_refs_report(o, NULL, refname,
++ ret = fsck_refs_report(o, NULL, refname, NULL,
+ FSCK_MSG_BAD_SYMREF_POINTEE,
+ "point to invalid target");
+ goto out;
@@ refs/files-backend.c: static int files_fsck_refs_name(struct fsck_options *o,
+
+ if (!skip_prefix(pointee_path.buf,
+ abs_gitdir.buf, &pointee_name)) {
-+ ret = fsck_refs_report(o, NULL, refname.buf,
++ ret = fsck_refs_report(o, NULL, refname.buf, NULL,
+ FSCK_MSG_BAD_SYMREF_POINTEE,
+ "point to target outside gitdir");
+ goto clean;
@@ refs/files-backend.c: static int files_fsck_refs_name(struct fsck_options *o,
+ if (parse_loose_ref_contents(ref_content.buf, &oid,
+ &referent, &type,
+ &failure_errno, &trailing)) {
-+ ret = fsck_refs_report(o, NULL, refname.buf,
++ ret = fsck_refs_report(o, NULL, refname.buf, NULL,
+ FSCK_MSG_BAD_REF_CONTENT,
+ "invalid ref content");
+ goto clean;
@@ refs/files-backend.c: static int files_fsck_refs_name(struct fsck_options *o,
+ * be reported as a warning.
+ */
+ if (trailing && (*trailing != '\0' && *trailing != '\n')) {
-+ ret = fsck_refs_report(o, NULL, refname.buf,
++ ret = fsck_refs_report(o, NULL, refname.buf, NULL,
+ FSCK_MSG_TRAILING_REF_CONTENT,
+ "trailing garbage in ref");
+ goto clean;
--
2.45.2
^ permalink raw reply [flat|nested] 282+ messages in thread
* [GSoC][PATCH v11 01/10] fsck: rename "skiplist" to "skip_oids"
2024-07-14 12:28 ` [GSoC][PATCH v11 00/10] ref consistency check infra setup shejialuo
@ 2024-07-14 12:30 ` shejialuo
2024-07-14 12:31 ` [GSoC][PATCH v11 02/10] fsck: add a unified interface for reporting fsck messages shejialuo
` (9 subsequent siblings)
10 siblings, 0 replies; 282+ messages in thread
From: shejialuo @ 2024-07-14 12:30 UTC (permalink / raw)
To: git
Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine,
Justin Tobler
The "skiplist" field in "fsck_options" is related to objects. Because we
are going to introduce ref consistency check, the "skiplist" name is too
general which will make the caller think "skiplist" is related to both
the refs and objects.
It may seem that for both refs and objects, we should provide a general
"skiplist" here. However, the type for "skiplist" is `struct oidset`
which is totally unsuitable for refs.
To avoid above ambiguity, rename "skiplist" to "skip_oids".
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
fsck.c | 4 ++--
fsck.h | 4 ++--
2 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/fsck.c b/fsck.c
index eea7145470..3f32441492 100644
--- a/fsck.c
+++ b/fsck.c
@@ -205,7 +205,7 @@ void fsck_set_msg_types(struct fsck_options *options, const char *values)
if (!strcmp(buf, "skiplist")) {
if (equal == len)
die("skiplist requires a path");
- oidset_parse_file(&options->skiplist, buf + equal + 1,
+ oidset_parse_file(&options->skip_oids, buf + equal + 1,
the_repository->hash_algo);
buf += len + 1;
continue;
@@ -223,7 +223,7 @@ void fsck_set_msg_types(struct fsck_options *options, const char *values)
static int object_on_skiplist(struct fsck_options *opts,
const struct object_id *oid)
{
- return opts && oid && oidset_contains(&opts->skiplist, oid);
+ return opts && oid && oidset_contains(&opts->skip_oids, oid);
}
__attribute__((format (printf, 5, 6)))
diff --git a/fsck.h b/fsck.h
index 6085a384f6..bcfb2e34cd 100644
--- a/fsck.h
+++ b/fsck.h
@@ -136,7 +136,7 @@ struct fsck_options {
fsck_error error_func;
unsigned strict:1;
enum fsck_msg_type *msg_type;
- struct oidset skiplist;
+ struct oidset skip_oids;
struct oidset gitmodules_found;
struct oidset gitmodules_done;
struct oidset gitattributes_found;
@@ -145,7 +145,7 @@ struct fsck_options {
};
#define FSCK_OPTIONS_DEFAULT { \
- .skiplist = OIDSET_INIT, \
+ .skip_oids = OIDSET_INIT, \
.gitmodules_found = OIDSET_INIT, \
.gitmodules_done = OIDSET_INIT, \
.gitattributes_found = OIDSET_INIT, \
--
2.45.2
^ permalink raw reply related [flat|nested] 282+ messages in thread
* [GSoC][PATCH v11 02/10] fsck: add a unified interface for reporting fsck messages
2024-07-14 12:28 ` [GSoC][PATCH v11 00/10] ref consistency check infra setup shejialuo
2024-07-14 12:30 ` [GSoC][PATCH v11 01/10] fsck: rename "skiplist" to "skip_oids" shejialuo
@ 2024-07-14 12:31 ` shejialuo
2024-07-18 13:26 ` Karthik Nayak
2024-07-14 12:31 ` [GSoC][PATCH v11 03/10] fsck: rename objects-related fsck error functions shejialuo
` (8 subsequent siblings)
10 siblings, 1 reply; 282+ messages in thread
From: shejialuo @ 2024-07-14 12:31 UTC (permalink / raw)
To: git
Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine,
Justin Tobler
The static function "report" provided by "fsck.c" aims at checking fsck
error type and calling the callback "error_func" to report the message.
However, "report" function is only related to object database which
cannot be reused for refs. In order to provide a unified interface which
can report either objects or refs, create a new function "fsck_vreport"
following the "report" prototype. Instead of using "...", provide
"va_list" to allow more flexibility.
When checking loose refs and reflogs, we only need to pass the checked
name to the fsck error report function. However, for packed-refs and
reftable refs, we need to check both the consistency of the file itself
and the refs or reflogs contained in the file. In order to provide above
checks, add two parameters "ref_checkee" and "sub_ref_checkee" in
"fsck_vreport" function.
Like "report", the "fsck_vreport" function will use "error_func"
registered in "fsck_options" to report customized messages. Change
"error_func" prototype to align with the new "fsck_vreport".
Then, change "report" function to use "fsck_vreport" to report objects
related messages. Add a new function called "fsck_refs_report" to use
"fsck_vreport" to report refs related messages.
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
builtin/fsck.c | 2 ++
builtin/mktag.c | 2 ++
fsck.c | 60 ++++++++++++++++++++++++++++++++++++++++++-------
fsck.h | 19 +++++++++++++++-
object-file.c | 12 +++++-----
5 files changed, 81 insertions(+), 14 deletions(-)
diff --git a/builtin/fsck.c b/builtin/fsck.c
index d13a226c2e..8aeb8b17e2 100644
--- a/builtin/fsck.c
+++ b/builtin/fsck.c
@@ -92,6 +92,8 @@ static int objerror(struct object *obj, const char *err)
static int fsck_error_func(struct fsck_options *o UNUSED,
const struct object_id *oid,
enum object_type object_type,
+ const char *ref_checkee UNUSED,
+ const char *sub_ref_checkee UNUSED,
enum fsck_msg_type msg_type,
enum fsck_msg_id msg_id UNUSED,
const char *message)
diff --git a/builtin/mktag.c b/builtin/mktag.c
index 4767f1a97e..b5f9e108e5 100644
--- a/builtin/mktag.c
+++ b/builtin/mktag.c
@@ -20,6 +20,8 @@ static struct fsck_options fsck_options = FSCK_OPTIONS_STRICT;
static int mktag_fsck_error_func(struct fsck_options *o UNUSED,
const struct object_id *oid UNUSED,
enum object_type object_type UNUSED,
+ const char *ref_checkee UNUSED,
+ const char *sub_ref_checkee UNUSED,
enum fsck_msg_type msg_type,
enum fsck_msg_id msg_id UNUSED,
const char *message)
diff --git a/fsck.c b/fsck.c
index 3f32441492..7fceecdfae 100644
--- a/fsck.c
+++ b/fsck.c
@@ -226,12 +226,19 @@ static int object_on_skiplist(struct fsck_options *opts,
return opts && oid && oidset_contains(&opts->skip_oids, oid);
}
-__attribute__((format (printf, 5, 6)))
-static int report(struct fsck_options *options,
- const struct object_id *oid, enum object_type object_type,
- enum fsck_msg_id msg_id, const char *fmt, ...)
+/*
+ * Provide a unified interface for either fscking refs or objects.
+ * It will get the current msg error type and call the error_func callback
+ * which is registered in the "fsck_options" struct.
+ */
+static int fsck_vreport(struct fsck_options *options,
+ const struct object_id *oid,
+ enum object_type object_type,
+ const char *ref_checkee,
+ const char *sub_ref_checkee,
+ enum fsck_msg_id msg_id, const char *fmt, va_list ap)
{
- va_list ap;
+ va_list ap_copy;
struct strbuf sb = STRBUF_INIT;
enum fsck_msg_type msg_type = fsck_msg_type(msg_id, options);
int result;
@@ -250,9 +257,10 @@ static int report(struct fsck_options *options,
prepare_msg_ids();
strbuf_addf(&sb, "%s: ", msg_id_info[msg_id].camelcased);
- va_start(ap, fmt);
- strbuf_vaddf(&sb, fmt, ap);
+ va_copy(ap_copy, ap);
+ strbuf_vaddf(&sb, fmt, ap_copy);
result = options->error_func(options, oid, object_type,
+ ref_checkee, sub_ref_checkee,
msg_type, msg_id, sb.buf);
strbuf_release(&sb);
va_end(ap);
@@ -260,6 +268,37 @@ static int report(struct fsck_options *options,
return result;
}
+__attribute__((format (printf, 5, 6)))
+static int report(struct fsck_options *options,
+ const struct object_id *oid, enum object_type object_type,
+ enum fsck_msg_id msg_id, const char *fmt, ...)
+{
+ va_list ap;
+ int result;
+
+ va_start(ap, fmt);
+ result = fsck_vreport(options, oid, object_type, NULL, NULL,
+ msg_id, fmt, ap);
+ va_end(ap);
+
+ return result;
+}
+
+int fsck_refs_report(struct fsck_options *options,
+ const struct object_id *oid,
+ const char *ref_checkee,
+ const char *sub_ref_checkee,
+ enum fsck_msg_id msg_id, const char *fmt, ...)
+{
+ va_list ap;
+ int result;
+ va_start(ap, fmt);
+ result = fsck_vreport(options, oid, OBJ_NONE, ref_checkee, sub_ref_checkee,
+ msg_id, fmt, ap);
+ va_end(ap);
+ return result;
+}
+
void fsck_enable_object_names(struct fsck_options *options)
{
if (!options->object_names)
@@ -1203,6 +1242,8 @@ int fsck_buffer(const struct object_id *oid, enum object_type type,
int fsck_error_function(struct fsck_options *o,
const struct object_id *oid,
enum object_type object_type UNUSED,
+ const char *ref_checkee UNUSED,
+ const char *sub_ref_checkee UNUSED,
enum fsck_msg_type msg_type,
enum fsck_msg_id msg_id UNUSED,
const char *message)
@@ -1306,6 +1347,8 @@ int git_fsck_config(const char *var, const char *value,
int fsck_error_cb_print_missing_gitmodules(struct fsck_options *o,
const struct object_id *oid,
enum object_type object_type,
+ const char *ref_checkee,
+ const char *sub_ref_checkee,
enum fsck_msg_type msg_type,
enum fsck_msg_id msg_id,
const char *message)
@@ -1314,5 +1357,6 @@ int fsck_error_cb_print_missing_gitmodules(struct fsck_options *o,
puts(oid_to_hex(oid));
return 0;
}
- return fsck_error_function(o, oid, object_type, msg_type, msg_id, message);
+ return fsck_error_function(o, oid, object_type, ref_checkee,
+ sub_ref_checkee, msg_type, msg_id, message);
}
diff --git a/fsck.h b/fsck.h
index bcfb2e34cd..61ca38afd6 100644
--- a/fsck.h
+++ b/fsck.h
@@ -114,19 +114,25 @@ int is_valid_msg_type(const char *msg_id, const char *msg_type);
typedef int (*fsck_walk_func)(struct object *obj, enum object_type object_type,
void *data, struct fsck_options *options);
-/* callback for fsck_object, type is FSCK_ERROR or FSCK_WARN */
+/*
+ * callback function for reporting errors when checking either objects or refs
+ */
typedef int (*fsck_error)(struct fsck_options *o,
const struct object_id *oid, enum object_type object_type,
+ const char *ref_checkee, const char *sub_ref_checkee,
enum fsck_msg_type msg_type, enum fsck_msg_id msg_id,
const char *message);
int fsck_error_function(struct fsck_options *o,
const struct object_id *oid, enum object_type object_type,
+ const char *ref_checkee, const char *sub_ref_checkee,
enum fsck_msg_type msg_type, enum fsck_msg_id msg_id,
const char *message);
int fsck_error_cb_print_missing_gitmodules(struct fsck_options *o,
const struct object_id *oid,
enum object_type object_type,
+ const char *ref_checkee,
+ const char *sub_ref_checkee,
enum fsck_msg_type msg_type,
enum fsck_msg_id msg_id,
const char *message);
@@ -209,6 +215,17 @@ int fsck_tag_standalone(const struct object_id *oid, const char *buffer,
*/
int fsck_finish(struct fsck_options *options);
+/*
+ * Report an error or warning for refs.
+ */
+__attribute__((format (printf, 6, 7)))
+int fsck_refs_report(struct fsck_options *options,
+ const struct object_id *oid,
+ const char *ref_checkee,
+ const char *sub_ref_checkee,
+ enum fsck_msg_id msg_id,
+ const char *fmt, ...);
+
/*
* Subsystem for storing human-readable names for each object.
*
diff --git a/object-file.c b/object-file.c
index 065103be3e..bc63b80c48 100644
--- a/object-file.c
+++ b/object-file.c
@@ -2470,11 +2470,13 @@ int repo_has_object_file(struct repository *r,
* give more context.
*/
static int hash_format_check_report(struct fsck_options *opts UNUSED,
- const struct object_id *oid UNUSED,
- enum object_type object_type UNUSED,
- enum fsck_msg_type msg_type UNUSED,
- enum fsck_msg_id msg_id UNUSED,
- const char *message)
+ const struct object_id *oid UNUSED,
+ enum object_type object_type UNUSED,
+ const char *ref_checkee UNUSED,
+ const char *sub_ref_checkee UNUSED,
+ enum fsck_msg_type msg_type UNUSED,
+ enum fsck_msg_id msg_id UNUSED,
+ const char *message)
{
error(_("object fails fsck: %s"), message);
return 1;
--
2.45.2
^ permalink raw reply related [flat|nested] 282+ messages in thread
* Re: [GSoC][PATCH v11 02/10] fsck: add a unified interface for reporting fsck messages
2024-07-14 12:31 ` [GSoC][PATCH v11 02/10] fsck: add a unified interface for reporting fsck messages shejialuo
@ 2024-07-18 13:26 ` Karthik Nayak
2024-07-20 7:24 ` shejialuo
0 siblings, 1 reply; 282+ messages in thread
From: Karthik Nayak @ 2024-07-18 13:26 UTC (permalink / raw)
To: shejialuo, git
Cc: Patrick Steinhardt, Junio C Hamano, Eric Sunshine, Justin Tobler
[-- Attachment #1: Type: text/plain, Size: 4490 bytes --]
shejialuo <shejialuo@gmail.com> writes:
> The static function "report" provided by "fsck.c" aims at checking fsck
> error type and calling the callback "error_func" to report the message.
> However, "report" function is only related to object database which
> cannot be reused for refs. In order to provide a unified interface which
> can report either objects or refs, create a new function "fsck_vreport"
> following the "report" prototype. Instead of using "...", provide
> "va_list" to allow more flexibility.
>
> When checking loose refs and reflogs, we only need to pass the checked
> name to the fsck error report function. However, for packed-refs and
> reftable refs, we need to check both the consistency of the file itself
> and the refs or reflogs contained in the file. In order to provide above
> checks, add two parameters "ref_checkee" and "sub_ref_checkee" in
> "fsck_vreport" function.
Nit: It would be nice, if you described here, what is the expected usage
of "ref_checkee" and "sub_ref_checkee".
[snip]
> diff --git a/fsck.h b/fsck.h
> index bcfb2e34cd..61ca38afd6 100644
> --- a/fsck.h
> +++ b/fsck.h
> @@ -114,19 +114,25 @@ int is_valid_msg_type(const char *msg_id, const char *msg_type);
> typedef int (*fsck_walk_func)(struct object *obj, enum object_type object_type,
> void *data, struct fsck_options *options);
>
> -/* callback for fsck_object, type is FSCK_ERROR or FSCK_WARN */
> +/*
> + * callback function for reporting errors when checking either objects or refs
> + */
> typedef int (*fsck_error)(struct fsck_options *o,
> const struct object_id *oid, enum object_type object_type,
> + const char *ref_checkee, const char *sub_ref_checkee,
This makes me really wonder if this is the best way we can do this? This
seems to solve for the current situation, but what happens if you want
to also adding the reftable size or packed-refs size here? Would you
introduce another field?
would it be better to add a single `const struct *fsck_refs_info`
instead?
Perhaps something like:
struct fsck_refs_info {
char *refname;
union {
struct {
...
} reftable;
struct {
...
} files;
} u;
}
Of course we can fill in the details as we need them.
> enum fsck_msg_type msg_type, enum fsck_msg_id msg_id,
> const char *message);
>
> int fsck_error_function(struct fsck_options *o,
> const struct object_id *oid, enum object_type object_type,
> + const char *ref_checkee, const char *sub_ref_checkee,
> enum fsck_msg_type msg_type, enum fsck_msg_id msg_id,
> const char *message);
> int fsck_error_cb_print_missing_gitmodules(struct fsck_options *o,
> const struct object_id *oid,
> enum object_type object_type,
> + const char *ref_checkee,
> + const char *sub_ref_checkee,
> enum fsck_msg_type msg_type,
> enum fsck_msg_id msg_id,
> const char *message);
> @@ -209,6 +215,17 @@ int fsck_tag_standalone(const struct object_id *oid, const char *buffer,
> */
> int fsck_finish(struct fsck_options *options);
>
> +/*
> + * Report an error or warning for refs.
> + */
> +__attribute__((format (printf, 6, 7)))
> +int fsck_refs_report(struct fsck_options *options,
> + const struct object_id *oid,
> + const char *ref_checkee,
> + const char *sub_ref_checkee,
> + enum fsck_msg_id msg_id,
> + const char *fmt, ...);
> +
> /*
> * Subsystem for storing human-readable names for each object.
> *
> diff --git a/object-file.c b/object-file.c
> index 065103be3e..bc63b80c48 100644
> --- a/object-file.c
> +++ b/object-file.c
> @@ -2470,11 +2470,13 @@ int repo_has_object_file(struct repository *r,
> * give more context.
> */
> static int hash_format_check_report(struct fsck_options *opts UNUSED,
> - const struct object_id *oid UNUSED,
> - enum object_type object_type UNUSED,
> - enum fsck_msg_type msg_type UNUSED,
> - enum fsck_msg_id msg_id UNUSED,
> - const char *message)
> + const struct object_id *oid UNUSED,
> + enum object_type object_type UNUSED,
> + const char *ref_checkee UNUSED,
> + const char *sub_ref_checkee UNUSED,
> + enum fsck_msg_type msg_type UNUSED,
> + enum fsck_msg_id msg_id UNUSED,
> + const char *message)
> {
> error(_("object fails fsck: %s"), message);
> return 1;
> --
> 2.45.2
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 690 bytes --]
^ permalink raw reply [flat|nested] 282+ messages in thread
* Re: [GSoC][PATCH v11 02/10] fsck: add a unified interface for reporting fsck messages
2024-07-18 13:26 ` Karthik Nayak
@ 2024-07-20 7:24 ` shejialuo
0 siblings, 0 replies; 282+ messages in thread
From: shejialuo @ 2024-07-20 7:24 UTC (permalink / raw)
To: Karthik Nayak
Cc: git, Patrick Steinhardt, Junio C Hamano, Eric Sunshine,
Justin Tobler
On Thu, Jul 18, 2024 at 06:26:30AM -0700, Karthik Nayak wrote:
> shejialuo <shejialuo@gmail.com> writes:
>
> > diff --git a/fsck.h b/fsck.h
> > index bcfb2e34cd..61ca38afd6 100644
> > --- a/fsck.h
> > +++ b/fsck.h
> > @@ -114,19 +114,25 @@ int is_valid_msg_type(const char *msg_id, const char *msg_type);
> > typedef int (*fsck_walk_func)(struct object *obj, enum object_type object_type,
> > void *data, struct fsck_options *options);
> >
> > -/* callback for fsck_object, type is FSCK_ERROR or FSCK_WARN */
> > +/*
> > + * callback function for reporting errors when checking either objects or refs
> > + */
> > typedef int (*fsck_error)(struct fsck_options *o,
> > const struct object_id *oid, enum object_type object_type,
> > + const char *ref_checkee, const char *sub_ref_checkee,
>
> This makes me really wonder if this is the best way we can do this? This
> seems to solve for the current situation, but what happens if you want
> to also adding the reftable size or packed-refs size here? Would you
> introduce another field?
>
> would it be better to add a single `const struct *fsck_refs_info`
> instead?
>
> Perhaps something like:
>
> struct fsck_refs_info {
> char *refname;
> union {
> struct {
> ...
> } reftable;
> struct {
> ...
> } files;
> } u;
> }
>
> Of course we can fill in the details as we need them.
>
I agree, we should design an extensible data structure here. I will use
this idea. Because we don't know what we will do in the current time.
However, I think "refname" is not good, instead I decide to use
"ref_checkee", "refname" may let caller think we only check the refname.
However, we need to also check reflog.
^ permalink raw reply [flat|nested] 282+ messages in thread
* [GSoC][PATCH v11 03/10] fsck: rename objects-related fsck error functions
2024-07-14 12:28 ` [GSoC][PATCH v11 00/10] ref consistency check infra setup shejialuo
2024-07-14 12:30 ` [GSoC][PATCH v11 01/10] fsck: rename "skiplist" to "skip_oids" shejialuo
2024-07-14 12:31 ` [GSoC][PATCH v11 02/10] fsck: add a unified interface for reporting fsck messages shejialuo
@ 2024-07-14 12:31 ` shejialuo
2024-07-14 12:31 ` [GSoC][PATCH v11 04/10] fsck: add refs-related error report function shejialuo
` (7 subsequent siblings)
10 siblings, 0 replies; 282+ messages in thread
From: shejialuo @ 2024-07-14 12:31 UTC (permalink / raw)
To: git
Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine,
Justin Tobler
The names of objects-related fsck error functions are general. It's OK
when there is only object database check. However, we have introduced
refs database check report function. To avoid ambiguity, rename
object-related fsck error functions to explicitly indicate these
functions are used to report objects-related messages.
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
builtin/fsck.c | 18 +++++++++---------
fsck.c | 37 +++++++++++++++++++------------------
fsck.h | 32 ++++++++++++++++----------------
3 files changed, 44 insertions(+), 43 deletions(-)
diff --git a/builtin/fsck.c b/builtin/fsck.c
index 8aeb8b17e2..d22488c5d0 100644
--- a/builtin/fsck.c
+++ b/builtin/fsck.c
@@ -89,14 +89,14 @@ static int objerror(struct object *obj, const char *err)
return -1;
}
-static int fsck_error_func(struct fsck_options *o UNUSED,
- const struct object_id *oid,
- enum object_type object_type,
- const char *ref_checkee UNUSED,
- const char *sub_ref_checkee UNUSED,
- enum fsck_msg_type msg_type,
- enum fsck_msg_id msg_id UNUSED,
- const char *message)
+static int fsck_objects_error_func(struct fsck_options *o UNUSED,
+ const struct object_id *oid,
+ enum object_type object_type,
+ const char *ref_checkee UNUSED,
+ const char *sub_ref_checkee UNUSED,
+ enum fsck_msg_type msg_type,
+ enum fsck_msg_id msg_id UNUSED,
+ const char *message)
{
switch (msg_type) {
case FSCK_WARN:
@@ -940,7 +940,7 @@ int cmd_fsck(int argc, const char **argv, const char *prefix)
fsck_walk_options.walk = mark_object;
fsck_obj_options.walk = mark_used;
- fsck_obj_options.error_func = fsck_error_func;
+ fsck_obj_options.error_func = fsck_objects_error_func;
if (check_strict)
fsck_obj_options.strict = 1;
diff --git a/fsck.c b/fsck.c
index 7fceecdfae..d66ea4ef44 100644
--- a/fsck.c
+++ b/fsck.c
@@ -1239,14 +1239,14 @@ int fsck_buffer(const struct object_id *oid, enum object_type type,
type);
}
-int fsck_error_function(struct fsck_options *o,
- const struct object_id *oid,
- enum object_type object_type UNUSED,
- const char *ref_checkee UNUSED,
- const char *sub_ref_checkee UNUSED,
- enum fsck_msg_type msg_type,
- enum fsck_msg_id msg_id UNUSED,
- const char *message)
+int fsck_objects_error_function(struct fsck_options *o,
+ const struct object_id *oid,
+ enum object_type object_type UNUSED,
+ const char *ref_checkee UNUSED,
+ const char *sub_ref_checkee UNUSED,
+ enum fsck_msg_type msg_type,
+ enum fsck_msg_id msg_id UNUSED,
+ const char *message)
{
if (msg_type == FSCK_WARN) {
warning("object %s: %s", fsck_describe_object(o, oid), message);
@@ -1344,19 +1344,20 @@ int git_fsck_config(const char *var, const char *value,
* Custom error callbacks that are used in more than one place.
*/
-int fsck_error_cb_print_missing_gitmodules(struct fsck_options *o,
- const struct object_id *oid,
- enum object_type object_type,
- const char *ref_checkee,
- const char *sub_ref_checkee,
- enum fsck_msg_type msg_type,
- enum fsck_msg_id msg_id,
- const char *message)
+int fsck_objects_error_cb_print_missing_gitmodules(struct fsck_options *o,
+ const struct object_id *oid,
+ enum object_type object_type,
+ const char *ref_checkee,
+ const char *sub_ref_checkee,
+ enum fsck_msg_type msg_type,
+ enum fsck_msg_id msg_id,
+ const char *message)
{
if (msg_id == FSCK_MSG_GITMODULES_MISSING) {
puts(oid_to_hex(oid));
return 0;
}
- return fsck_error_function(o, oid, object_type, ref_checkee,
- sub_ref_checkee, msg_type, msg_id, message);
+ return fsck_objects_error_function(o, oid, object_type, ref_checkee,
+ sub_ref_checkee, msg_type, msg_id,
+ message);
}
diff --git a/fsck.h b/fsck.h
index 61ca38afd6..0b40d9ec28 100644
--- a/fsck.h
+++ b/fsck.h
@@ -123,19 +123,19 @@ typedef int (*fsck_error)(struct fsck_options *o,
enum fsck_msg_type msg_type, enum fsck_msg_id msg_id,
const char *message);
-int fsck_error_function(struct fsck_options *o,
- const struct object_id *oid, enum object_type object_type,
- const char *ref_checkee, const char *sub_ref_checkee,
- enum fsck_msg_type msg_type, enum fsck_msg_id msg_id,
- const char *message);
-int fsck_error_cb_print_missing_gitmodules(struct fsck_options *o,
- const struct object_id *oid,
- enum object_type object_type,
- const char *ref_checkee,
- const char *sub_ref_checkee,
- enum fsck_msg_type msg_type,
- enum fsck_msg_id msg_id,
- const char *message);
+int fsck_objects_error_function(struct fsck_options *o,
+ const struct object_id *oid, enum object_type object_type,
+ const char *ref_checkee, const char *sub_ref_checkee,
+ enum fsck_msg_type msg_type, enum fsck_msg_id msg_id,
+ const char *message);
+int fsck_objects_error_cb_print_missing_gitmodules(struct fsck_options *o,
+ const struct object_id *oid,
+ enum object_type object_type,
+ const char *ref_checkee,
+ const char *sub_ref_checkee,
+ enum fsck_msg_type msg_type,
+ enum fsck_msg_id msg_id,
+ const char *message);
struct fsck_options {
fsck_walk_func walk;
@@ -156,7 +156,7 @@ struct fsck_options {
.gitmodules_done = OIDSET_INIT, \
.gitattributes_found = OIDSET_INIT, \
.gitattributes_done = OIDSET_INIT, \
- .error_func = fsck_error_function \
+ .error_func = fsck_objects_error_function \
}
#define FSCK_OPTIONS_STRICT { \
.strict = 1, \
@@ -164,7 +164,7 @@ struct fsck_options {
.gitmodules_done = OIDSET_INIT, \
.gitattributes_found = OIDSET_INIT, \
.gitattributes_done = OIDSET_INIT, \
- .error_func = fsck_error_function, \
+ .error_func = fsck_objects_error_function, \
}
#define FSCK_OPTIONS_MISSING_GITMODULES { \
.strict = 1, \
@@ -172,7 +172,7 @@ struct fsck_options {
.gitmodules_done = OIDSET_INIT, \
.gitattributes_found = OIDSET_INIT, \
.gitattributes_done = OIDSET_INIT, \
- .error_func = fsck_error_cb_print_missing_gitmodules, \
+ .error_func = fsck_objects_error_cb_print_missing_gitmodules, \
}
/* descend in all linked child objects
--
2.45.2
^ permalink raw reply related [flat|nested] 282+ messages in thread
* [GSoC][PATCH v11 04/10] fsck: add refs-related error report function
2024-07-14 12:28 ` [GSoC][PATCH v11 00/10] ref consistency check infra setup shejialuo
` (2 preceding siblings ...)
2024-07-14 12:31 ` [GSoC][PATCH v11 03/10] fsck: rename objects-related fsck error functions shejialuo
@ 2024-07-14 12:31 ` shejialuo
2024-07-14 12:31 ` [GSoC][PATCH v11 05/10] refs: set up ref consistency check infrastructure shejialuo
` (6 subsequent siblings)
10 siblings, 0 replies; 282+ messages in thread
From: shejialuo @ 2024-07-14 12:31 UTC (permalink / raw)
To: git
Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine,
Justin Tobler
Create refs-specific "error_func" callback "fsck_refs_error_function"
which could provide the following report messages.
1. "ref_checkee": "fsck error name": "user message".
2. "ref_checkee.sub_ref_checkee": "fsck error name": "user message".
3. "ref_checkee -> (oid hex)": "fsck error name": "user message".
4. "ref_checkee.sub_ref_checkee -> (oid hex)": "fsck error name": "user
message".
"fsck_refs_error_function" uses the "ref_checkee" and "sub_ref_checkee"
parameters to indicate the information of the checked refs. For loose
ref and reflog, it only uses the "ref_checkee" parameter. For packed
refs and reftable refs, when checking the consistency of the file
itself, it still only uses "ref_checkee" parameter. However, when
checking the consistency of the ref or reflog contained in the file, it
will use "sub_ref_checkee" parameter to indicate that we are not
checking the file but the incorporated ref or reflog.
"fsck_refs_error_function" will use the "oid" parameter if the caller
passes the oid, it will use "oid_to_hex" to get the corresponding hex
value to report to the caller.
Last, add "FSCK_REFS_OPTIONS_DEFAULT" and "FSCK_REFS_OPTIONS_STRICT"
macros to create refs options easily.
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
fsck.c | 29 +++++++++++++++++++++++++++++
fsck.h | 15 +++++++++++++++
2 files changed, 44 insertions(+)
diff --git a/fsck.c b/fsck.c
index d66ea4ef44..4d18f20b09 100644
--- a/fsck.c
+++ b/fsck.c
@@ -1256,6 +1256,35 @@ int fsck_objects_error_function(struct fsck_options *o,
return 1;
}
+int fsck_refs_error_function(struct fsck_options *options UNUSED,
+ const struct object_id *oid,
+ enum object_type object_type UNUSED,
+ const char *ref_checkee,
+ const char *sub_ref_checkee,
+ enum fsck_msg_type msg_type,
+ enum fsck_msg_id msg_id UNUSED,
+ const char *message)
+{
+ struct strbuf sb = STRBUF_INIT;
+ int ret = 0;
+
+ if (sub_ref_checkee)
+ strbuf_addf(&sb, "%s.%s", ref_checkee, sub_ref_checkee);
+ else
+ strbuf_addstr(&sb, ref_checkee);
+
+ if (oid)
+ strbuf_addf(&sb, " -> (%s)", oid_to_hex(oid));
+
+ if (msg_type == FSCK_WARN)
+ warning("%s: %s", sb.buf, message);
+ else
+ ret = error("%s: %s", sb.buf, message);
+
+ strbuf_release(&sb);
+ return ret;
+}
+
static int fsck_blobs(struct oidset *blobs_found, struct oidset *blobs_done,
enum fsck_msg_id msg_missing, enum fsck_msg_id msg_type,
struct fsck_options *options, const char *blob_type)
diff --git a/fsck.h b/fsck.h
index 0b40d9ec28..7ae640ac6c 100644
--- a/fsck.h
+++ b/fsck.h
@@ -136,6 +136,14 @@ int fsck_objects_error_cb_print_missing_gitmodules(struct fsck_options *o,
enum fsck_msg_type msg_type,
enum fsck_msg_id msg_id,
const char *message);
+int fsck_refs_error_function(struct fsck_options *options,
+ const struct object_id *oid,
+ enum object_type object_type,
+ const char *ref_checkee,
+ const char *sub_ref_checkee,
+ enum fsck_msg_type msg_type,
+ enum fsck_msg_id msg_id,
+ const char *message);
struct fsck_options {
fsck_walk_func walk;
@@ -174,6 +182,13 @@ struct fsck_options {
.gitattributes_done = OIDSET_INIT, \
.error_func = fsck_objects_error_cb_print_missing_gitmodules, \
}
+#define FSCK_REFS_OPTIONS_DEFAULT { \
+ .error_func = fsck_refs_error_function, \
+}
+#define FSCK_REFS_OPTIONS_STRICT { \
+ .strict = 1, \
+ .error_func = fsck_refs_error_function, \
+}
/* descend in all linked child objects
* the return value is:
--
2.45.2
^ permalink raw reply related [flat|nested] 282+ messages in thread
* [GSoC][PATCH v11 05/10] refs: set up ref consistency check infrastructure
2024-07-14 12:28 ` [GSoC][PATCH v11 00/10] ref consistency check infra setup shejialuo
` (3 preceding siblings ...)
2024-07-14 12:31 ` [GSoC][PATCH v11 04/10] fsck: add refs-related error report function shejialuo
@ 2024-07-14 12:31 ` shejialuo
2024-07-14 12:31 ` [GSoC][PATCH v11 06/10] git refs: add verify subcommand shejialuo
` (5 subsequent siblings)
10 siblings, 0 replies; 282+ messages in thread
From: shejialuo @ 2024-07-14 12:31 UTC (permalink / raw)
To: git
Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine,
Justin Tobler
The interfaces defined in the `ref_storage_be` are carefully structured
in semantic. It's organized as the five parts:
1. The name and the initialization interfaces.
2. The ref transaction interfaces.
3. The ref internal interfaces (pack, rename and copy).
4. The ref filesystem interfaces.
5. The reflog related interfaces.
To keep consistent with the git-fsck(1), add a new interface named
"fsck_refs_fn" to the end of "ref_storage_be". This semantic cannot be
grouped into any above five categories. Explicitly add blank line to
make it different from others.
Last, implement placeholder functions for each ref backends.
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
refs.c | 5 +++++
refs.h | 8 ++++++++
refs/debug.c | 11 +++++++++++
refs/files-backend.c | 15 ++++++++++++++-
refs/packed-backend.c | 8 ++++++++
refs/refs-internal.h | 6 ++++++
refs/reftable-backend.c | 8 ++++++++
7 files changed, 60 insertions(+), 1 deletion(-)
diff --git a/refs.c b/refs.c
index bb90a18875..410919246b 100644
--- a/refs.c
+++ b/refs.c
@@ -318,6 +318,11 @@ int check_refname_format(const char *refname, int flags)
return check_or_sanitize_refname(refname, flags, NULL);
}
+int refs_fsck(struct ref_store *refs, struct fsck_options *o)
+{
+ return refs->be->fsck(refs, o);
+}
+
void sanitize_refname_component(const char *refname, struct strbuf *out)
{
if (check_or_sanitize_refname(refname, REFNAME_ALLOW_ONELEVEL, out))
diff --git a/refs.h b/refs.h
index 0ecba21b4a..804d6a7fce 100644
--- a/refs.h
+++ b/refs.h
@@ -4,6 +4,7 @@
#include "commit.h"
#include "repository.h"
+struct fsck_options;
struct object_id;
struct ref_store;
struct strbuf;
@@ -541,6 +542,13 @@ int refs_for_each_reflog(struct ref_store *refs, each_reflog_fn fn, void *cb_dat
*/
int check_refname_format(const char *refname, int flags);
+/*
+ * Check the reference database for consistency. Return 0 if refs and
+ * reflogs are consistent, and non-zero otherwise. The errors will be
+ * written to stderr.
+ */
+int refs_fsck(struct ref_store *refs, struct fsck_options *o);
+
/*
* Apply the rules from check_refname_format, but mutate the result until it
* is acceptable, and place the result in "out".
diff --git a/refs/debug.c b/refs/debug.c
index 547d9245b9..45e2e784a0 100644
--- a/refs/debug.c
+++ b/refs/debug.c
@@ -419,6 +419,15 @@ static int debug_reflog_expire(struct ref_store *ref_store, const char *refname,
return res;
}
+static int debug_fsck(struct ref_store *ref_store,
+ struct fsck_options *o)
+{
+ struct debug_ref_store *drefs = (struct debug_ref_store *)ref_store;
+ int res = drefs->refs->be->fsck(drefs->refs, o);
+ trace_printf_key(&trace_refs, "fsck: %d\n", res);
+ return res;
+}
+
struct ref_storage_be refs_be_debug = {
.name = "debug",
.init = NULL,
@@ -451,4 +460,6 @@ struct ref_storage_be refs_be_debug = {
.create_reflog = debug_create_reflog,
.delete_reflog = debug_delete_reflog,
.reflog_expire = debug_reflog_expire,
+
+ .fsck = debug_fsck,
};
diff --git a/refs/files-backend.c b/refs/files-backend.c
index aa52d9be7c..d89eeda8ef 100644
--- a/refs/files-backend.c
+++ b/refs/files-backend.c
@@ -3408,6 +3408,17 @@ static int files_ref_store_remove_on_disk(struct ref_store *ref_store,
return ret;
}
+static int files_fsck(struct ref_store *ref_store,
+ struct fsck_options *o)
+{
+ int ret;
+ struct files_ref_store *refs =
+ files_downcast(ref_store, REF_STORE_READ, "fsck");
+
+ ret = refs->packed_ref_store->be->fsck(refs->packed_ref_store, o);
+ return ret;
+}
+
struct ref_storage_be refs_be_files = {
.name = "files",
.init = files_ref_store_init,
@@ -3434,5 +3445,7 @@ struct ref_storage_be refs_be_files = {
.reflog_exists = files_reflog_exists,
.create_reflog = files_create_reflog,
.delete_reflog = files_delete_reflog,
- .reflog_expire = files_reflog_expire
+ .reflog_expire = files_reflog_expire,
+
+ .fsck = files_fsck,
};
diff --git a/refs/packed-backend.c b/refs/packed-backend.c
index a0666407cd..5209b0b212 100644
--- a/refs/packed-backend.c
+++ b/refs/packed-backend.c
@@ -1735,6 +1735,12 @@ static struct ref_iterator *packed_reflog_iterator_begin(struct ref_store *ref_s
return empty_ref_iterator_begin();
}
+static int packed_fsck(struct ref_store *ref_store,
+ struct fsck_options *o)
+{
+ return 0;
+}
+
struct ref_storage_be refs_be_packed = {
.name = "packed",
.init = packed_ref_store_init,
@@ -1762,4 +1768,6 @@ struct ref_storage_be refs_be_packed = {
.create_reflog = NULL,
.delete_reflog = NULL,
.reflog_expire = NULL,
+
+ .fsck = packed_fsck,
};
diff --git a/refs/refs-internal.h b/refs/refs-internal.h
index fa975d69aa..a905e187cd 100644
--- a/refs/refs-internal.h
+++ b/refs/refs-internal.h
@@ -4,6 +4,7 @@
#include "refs.h"
#include "iterator.h"
+struct fsck_options;
struct ref_transaction;
/*
@@ -650,6 +651,9 @@ typedef int read_raw_ref_fn(struct ref_store *ref_store, const char *refname,
typedef int read_symbolic_ref_fn(struct ref_store *ref_store, const char *refname,
struct strbuf *referent);
+typedef int fsck_fn(struct ref_store *ref_store,
+ struct fsck_options *o);
+
struct ref_storage_be {
const char *name;
ref_store_init_fn *init;
@@ -677,6 +681,8 @@ struct ref_storage_be {
create_reflog_fn *create_reflog;
delete_reflog_fn *delete_reflog;
reflog_expire_fn *reflog_expire;
+
+ fsck_fn *fsck;
};
extern struct ref_storage_be refs_be_files;
diff --git a/refs/reftable-backend.c b/refs/reftable-backend.c
index fbe74c239d..b5a1a526df 100644
--- a/refs/reftable-backend.c
+++ b/refs/reftable-backend.c
@@ -2303,6 +2303,12 @@ static int reftable_be_reflog_expire(struct ref_store *ref_store,
return ret;
}
+static int reftable_be_fsck(struct ref_store *ref_store,
+ struct fsck_options *o)
+{
+ return 0;
+}
+
struct ref_storage_be refs_be_reftable = {
.name = "reftable",
.init = reftable_be_init,
@@ -2330,4 +2336,6 @@ struct ref_storage_be refs_be_reftable = {
.create_reflog = reftable_be_create_reflog,
.delete_reflog = reftable_be_delete_reflog,
.reflog_expire = reftable_be_reflog_expire,
+
+ .fsck = reftable_be_fsck,
};
--
2.45.2
^ permalink raw reply related [flat|nested] 282+ messages in thread
* [GSoC][PATCH v11 06/10] git refs: add verify subcommand
2024-07-14 12:28 ` [GSoC][PATCH v11 00/10] ref consistency check infra setup shejialuo
` (4 preceding siblings ...)
2024-07-14 12:31 ` [GSoC][PATCH v11 05/10] refs: set up ref consistency check infrastructure shejialuo
@ 2024-07-14 12:31 ` shejialuo
2024-07-14 12:32 ` [GSoC][PATCH v11 07/10] builtin/fsck: add `git-refs verify` child process shejialuo
` (4 subsequent siblings)
10 siblings, 0 replies; 282+ messages in thread
From: shejialuo @ 2024-07-14 12:31 UTC (permalink / raw)
To: git
Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine,
Justin Tobler
Introduce a new subcommand "verify" in git-refs(1) to allow the user to
check the reference database consistency and also this subcommand will
be used as the entry point of checking refs for "git-fsck(1)". Last, add
"verbose" field into "fsck_options" to indicate whether we should print
verbose messages when checking refs and objects consistency.
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
Documentation/git-refs.txt | 13 +++++++++++
builtin/refs.c | 44 ++++++++++++++++++++++++++++++++++++++
fsck.h | 1 +
3 files changed, 58 insertions(+)
diff --git a/Documentation/git-refs.txt b/Documentation/git-refs.txt
index 5b99e04385..1244a85b64 100644
--- a/Documentation/git-refs.txt
+++ b/Documentation/git-refs.txt
@@ -10,6 +10,7 @@ SYNOPSIS
--------
[verse]
'git refs migrate' --ref-format=<format> [--dry-run]
+'git refs verify' [--strict] [--verbose]
DESCRIPTION
-----------
@@ -22,6 +23,9 @@ COMMANDS
migrate::
Migrate ref store between different formats.
+verify::
+ Verify reference database consistency.
+
OPTIONS
-------
@@ -39,6 +43,15 @@ include::ref-storage-format.txt[]
can be used to double check that the migration works as expected before
performing the actual migration.
+The following options are specific to 'git refs verify':
+
+--strict::
+ Enable more strict checking, every WARN severity for the `Fsck Messages`
+ be seen as ERROR. See linkgit:git-fsck[1].
+
+--verbose::
+ When verifying the reference database consistency, be chatty.
+
KNOWN LIMITATIONS
-----------------
diff --git a/builtin/refs.c b/builtin/refs.c
index 46dcd150d4..4831c9e28e 100644
--- a/builtin/refs.c
+++ b/builtin/refs.c
@@ -1,4 +1,6 @@
#include "builtin.h"
+#include "config.h"
+#include "fsck.h"
#include "parse-options.h"
#include "refs.h"
#include "repository.h"
@@ -7,6 +9,9 @@
#define REFS_MIGRATE_USAGE \
N_("git refs migrate --ref-format=<format> [--dry-run]")
+#define REFS_VERIFY_USAGE \
+ N_("git refs verify [--strict] [--verbose]")
+
static int cmd_refs_migrate(int argc, const char **argv, const char *prefix)
{
const char * const migrate_usage[] = {
@@ -58,15 +63,54 @@ static int cmd_refs_migrate(int argc, const char **argv, const char *prefix)
return err;
}
+static int cmd_refs_verify(int argc, const char **argv, const char *prefix)
+{
+ struct fsck_options fsck_refs_options = FSCK_REFS_OPTIONS_DEFAULT;
+ const char * const verify_usage[] = {
+ REFS_VERIFY_USAGE,
+ NULL,
+ };
+ unsigned int verbose = 0, strict = 0;
+ struct option options[] = {
+ OPT__VERBOSE(&verbose, N_("be verbose")),
+ OPT_BOOL(0, "strict", &strict, N_("enable strict checking")),
+ OPT_END(),
+ };
+ int ret;
+
+ argc = parse_options(argc, argv, prefix, options, verify_usage, 0);
+ if (argc)
+ usage(_("'git refs verify' takes no arguments"));
+
+ if (verbose)
+ fsck_refs_options.verbose = 1;
+ if (strict)
+ fsck_refs_options.strict = 1;
+
+ git_config(git_fsck_config, &fsck_refs_options);
+ prepare_repo_settings(the_repository);
+
+ ret = refs_fsck(get_main_ref_store(the_repository), &fsck_refs_options);
+
+ /*
+ * Explicitly free the allocated array and "skip_oids" set
+ */
+ free(fsck_refs_options.msg_type);
+ oidset_clear(&fsck_refs_options.skip_oids);
+ return ret;
+}
+
int cmd_refs(int argc, const char **argv, const char *prefix)
{
const char * const refs_usage[] = {
REFS_MIGRATE_USAGE,
+ REFS_VERIFY_USAGE,
NULL,
};
parse_opt_subcommand_fn *fn = NULL;
struct option opts[] = {
OPT_SUBCOMMAND("migrate", &fn, cmd_refs_migrate),
+ OPT_SUBCOMMAND("verify", &fn, cmd_refs_verify),
OPT_END(),
};
diff --git a/fsck.h b/fsck.h
index 7ae640ac6c..6803696f68 100644
--- a/fsck.h
+++ b/fsck.h
@@ -149,6 +149,7 @@ struct fsck_options {
fsck_walk_func walk;
fsck_error error_func;
unsigned strict:1;
+ unsigned verbose:1;
enum fsck_msg_type *msg_type;
struct oidset skip_oids;
struct oidset gitmodules_found;
--
2.45.2
^ permalink raw reply related [flat|nested] 282+ messages in thread
* [GSoC][PATCH v11 07/10] builtin/fsck: add `git-refs verify` child process
2024-07-14 12:28 ` [GSoC][PATCH v11 00/10] ref consistency check infra setup shejialuo
` (5 preceding siblings ...)
2024-07-14 12:31 ` [GSoC][PATCH v11 06/10] git refs: add verify subcommand shejialuo
@ 2024-07-14 12:32 ` shejialuo
2024-07-14 12:32 ` [GSoC][PATCH v11 08/10] files-backend: add unified interface for refs scanning shejialuo
` (3 subsequent siblings)
10 siblings, 0 replies; 282+ messages in thread
From: shejialuo @ 2024-07-14 12:32 UTC (permalink / raw)
To: git
Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine,
Justin Tobler
Introduce a new function "fsck_refs" that initializes and runs a child
process to execute the "git-refs verify" command.
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
builtin/fsck.c | 17 +++++++++++++++++
1 file changed, 17 insertions(+)
diff --git a/builtin/fsck.c b/builtin/fsck.c
index d22488c5d0..d3b466b84e 100644
--- a/builtin/fsck.c
+++ b/builtin/fsck.c
@@ -898,6 +898,21 @@ static int check_pack_rev_indexes(struct repository *r, int show_progress)
return res;
}
+static void fsck_refs(void)
+{
+ struct child_process refs_verify = CHILD_PROCESS_INIT;
+ child_process_init(&refs_verify);
+ refs_verify.git_cmd = 1;
+ strvec_pushl(&refs_verify.args, "refs", "verify", NULL);
+ if (verbose)
+ strvec_push(&refs_verify.args, "--verbose");
+ if (check_strict)
+ strvec_push(&refs_verify.args, "--strict");
+
+ if (run_command(&refs_verify))
+ errors_found |= ERROR_REFS;
+}
+
static char const * const fsck_usage[] = {
N_("git fsck [--tags] [--root] [--unreachable] [--cache] [--no-reflogs]\n"
" [--[no-]full] [--strict] [--verbose] [--lost-found]\n"
@@ -1067,6 +1082,8 @@ int cmd_fsck(int argc, const char **argv, const char *prefix)
check_connectivity();
+ fsck_refs();
+
if (the_repository->settings.core_commit_graph) {
struct child_process commit_graph_verify = CHILD_PROCESS_INIT;
--
2.45.2
^ permalink raw reply related [flat|nested] 282+ messages in thread
* [GSoC][PATCH v11 08/10] files-backend: add unified interface for refs scanning
2024-07-14 12:28 ` [GSoC][PATCH v11 00/10] ref consistency check infra setup shejialuo
` (6 preceding siblings ...)
2024-07-14 12:32 ` [GSoC][PATCH v11 07/10] builtin/fsck: add `git-refs verify` child process shejialuo
@ 2024-07-14 12:32 ` shejialuo
2024-07-14 12:32 ` [GSoC][PATCH v11 09/10] fsck: add ref name check for files backend shejialuo
` (2 subsequent siblings)
10 siblings, 0 replies; 282+ messages in thread
From: shejialuo @ 2024-07-14 12:32 UTC (permalink / raw)
To: git
Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine,
Justin Tobler
For refs and reflogs, we need to scan its corresponding directories to
check every regular file or symbolic link which shares the same pattern.
Introduce a unified interface for scanning directories for
files-backend.
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
refs/files-backend.c | 77 +++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 76 insertions(+), 1 deletion(-)
diff --git a/refs/files-backend.c b/refs/files-backend.c
index d89eeda8ef..794e9f3f2e 100644
--- a/refs/files-backend.c
+++ b/refs/files-backend.c
@@ -6,6 +6,7 @@
#include "../gettext.h"
#include "../hash.h"
#include "../hex.h"
+#include "../fsck.h"
#include "../refs.h"
#include "refs-internal.h"
#include "ref-cache.h"
@@ -3408,6 +3409,78 @@ static int files_ref_store_remove_on_disk(struct ref_store *ref_store,
return ret;
}
+/*
+ * For refs and reflogs, they share a unified interface when scanning
+ * the whole directory. This function is used as the callback for each
+ * regular file or symlink in the directory.
+ */
+typedef int (*files_fsck_refs_fn)(struct fsck_options *o,
+ const char *gitdir,
+ const char *refs_check_dir,
+ struct dir_iterator *iter);
+
+static int files_fsck_refs_dir(struct ref_store *ref_store,
+ struct fsck_options *o,
+ const char *refs_check_dir,
+ files_fsck_refs_fn *fsck_refs_fns)
+{
+ const char *gitdir = ref_store->gitdir;
+ struct strbuf sb = STRBUF_INIT;
+ struct dir_iterator *iter;
+ int iter_status;
+ int ret = 0;
+
+ strbuf_addf(&sb, "%s/%s", gitdir, refs_check_dir);
+
+ iter = dir_iterator_begin(sb.buf, 0);
+
+ if (!iter) {
+ ret = error_errno("cannot open directory %s", sb.buf);
+ goto out;
+ }
+
+ while ((iter_status = dir_iterator_advance(iter)) == ITER_OK) {
+ if (S_ISDIR(iter->st.st_mode)) {
+ continue;
+ } else if (S_ISREG(iter->st.st_mode) ||
+ S_ISLNK(iter->st.st_mode)) {
+ if (o->verbose)
+ fprintf_ln(stderr, "Checking %s/%s",
+ refs_check_dir, iter->relative_path);
+ for (size_t i = 0; fsck_refs_fns[i]; i++) {
+ if (fsck_refs_fns[i](o, gitdir, refs_check_dir, iter))
+ ret = -1;
+ }
+ } else {
+ ret = error(_("unexpected file type for '%s'"),
+ iter->basename);
+ }
+ }
+
+ if (iter_status != ITER_DONE)
+ ret = error(_("failed to iterate over '%s'"), sb.buf);
+
+out:
+ strbuf_release(&sb);
+ return ret;
+}
+
+static int files_fsck_refs(struct ref_store *ref_store,
+ struct fsck_options *o)
+{
+ int ret;
+ files_fsck_refs_fn fsck_refs_fns[]= {
+ NULL
+ };
+
+ if (o->verbose)
+ fprintf_ln(stderr, "Checking references consistency");
+
+ ret = files_fsck_refs_dir(ref_store, o, "refs", fsck_refs_fns);
+
+ return ret;
+}
+
static int files_fsck(struct ref_store *ref_store,
struct fsck_options *o)
{
@@ -3415,7 +3488,9 @@ static int files_fsck(struct ref_store *ref_store,
struct files_ref_store *refs =
files_downcast(ref_store, REF_STORE_READ, "fsck");
- ret = refs->packed_ref_store->be->fsck(refs->packed_ref_store, o);
+ ret = refs->packed_ref_store->be->fsck(refs->packed_ref_store, o)
+ | files_fsck_refs(ref_store, o);
+
return ret;
}
--
2.45.2
^ permalink raw reply related [flat|nested] 282+ messages in thread
* [GSoC][PATCH v11 09/10] fsck: add ref name check for files backend
2024-07-14 12:28 ` [GSoC][PATCH v11 00/10] ref consistency check infra setup shejialuo
` (7 preceding siblings ...)
2024-07-14 12:32 ` [GSoC][PATCH v11 08/10] files-backend: add unified interface for refs scanning shejialuo
@ 2024-07-14 12:32 ` shejialuo
2024-07-14 12:32 ` [GSoC][PATCH v11 10/10] fsck: add ref content " shejialuo
2024-07-20 9:25 ` [GSoC][PATCH v12 00/10] ref consistency check infra setup shejialuo
10 siblings, 0 replies; 282+ messages in thread
From: shejialuo @ 2024-07-14 12:32 UTC (permalink / raw)
To: git
Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine,
Justin Tobler
The git-fsck(1) only implicitly checks the reference, it does not fully
check refs with bad format name such as standalone "@" and name ending
with ".lock".
In order to provide such checks, add a new fsck message id "badRefName"
with default ERROR type. Use existing "check_refname_format" to explicit
check the ref name. And add a new unit test to verify the functionality.
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
Documentation/fsck-msgids.txt | 3 +
fsck.h | 1 +
refs/files-backend.c | 20 +++++++
t/t0602-reffiles-fsck.sh | 101 ++++++++++++++++++++++++++++++++++
4 files changed, 125 insertions(+)
create mode 100755 t/t0602-reffiles-fsck.sh
diff --git a/Documentation/fsck-msgids.txt b/Documentation/fsck-msgids.txt
index f643585a34..dab4012246 100644
--- a/Documentation/fsck-msgids.txt
+++ b/Documentation/fsck-msgids.txt
@@ -19,6 +19,9 @@
`badParentSha1`::
(ERROR) A commit object has a bad parent sha1.
+`badRefName`::
+ (ERROR) A ref has a bad name.
+
`badTagName`::
(INFO) A tag has an invalid format.
diff --git a/fsck.h b/fsck.h
index 6803696f68..2a2441e147 100644
--- a/fsck.h
+++ b/fsck.h
@@ -31,6 +31,7 @@ enum fsck_msg_type {
FUNC(BAD_NAME, ERROR) \
FUNC(BAD_OBJECT_SHA1, ERROR) \
FUNC(BAD_PARENT_SHA1, ERROR) \
+ FUNC(BAD_REF_NAME, ERROR) \
FUNC(BAD_TIMEZONE, ERROR) \
FUNC(BAD_TREE, ERROR) \
FUNC(BAD_TREE_SHA1, ERROR) \
diff --git a/refs/files-backend.c b/refs/files-backend.c
index 794e9f3f2e..d20e149214 100644
--- a/refs/files-backend.c
+++ b/refs/files-backend.c
@@ -3419,6 +3419,25 @@ typedef int (*files_fsck_refs_fn)(struct fsck_options *o,
const char *refs_check_dir,
struct dir_iterator *iter);
+static int files_fsck_refs_name(struct fsck_options *o,
+ const char *gitdir UNUSED,
+ const char *refs_check_dir,
+ struct dir_iterator *iter)
+{
+ struct strbuf sb = STRBUF_INIT;
+ int ret = 0;
+
+ if (check_refname_format(iter->basename, REFNAME_ALLOW_ONELEVEL)) {
+ strbuf_addf(&sb, "%s/%s", refs_check_dir, iter->relative_path);
+ ret = fsck_refs_report(o, NULL, sb.buf, NULL,
+ FSCK_MSG_BAD_REF_NAME,
+ "invalid refname format");
+ }
+
+ strbuf_release(&sb);
+ return ret;
+}
+
static int files_fsck_refs_dir(struct ref_store *ref_store,
struct fsck_options *o,
const char *refs_check_dir,
@@ -3470,6 +3489,7 @@ static int files_fsck_refs(struct ref_store *ref_store,
{
int ret;
files_fsck_refs_fn fsck_refs_fns[]= {
+ files_fsck_refs_name,
NULL
};
diff --git a/t/t0602-reffiles-fsck.sh b/t/t0602-reffiles-fsck.sh
new file mode 100755
index 0000000000..b2db58d2c6
--- /dev/null
+++ b/t/t0602-reffiles-fsck.sh
@@ -0,0 +1,101 @@
+#!/bin/sh
+
+test_description='Test reffiles backend consistency check'
+
+GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=main
+export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME
+GIT_TEST_DEFAULT_REF_FORMAT=files
+export GIT_TEST_DEFAULT_REF_FORMAT
+
+. ./test-lib.sh
+
+test_expect_success 'ref name should be checked' '
+ test_when_finished "rm -rf repo" &&
+ git init repo &&
+ branch_dir_prefix=.git/refs/heads &&
+ tag_dir_prefix=.git/refs/tags &&
+ (
+ cd repo &&
+ git commit --allow-empty -m initial &&
+ git checkout -b branch-1 &&
+ git tag tag-1 &&
+ git commit --allow-empty -m second &&
+ git checkout -b branch-2 &&
+ git tag tag-2 &&
+ git tag multi_hierarchy/tag-2
+ ) &&
+ (
+ cd repo &&
+ cp $branch_dir_prefix/branch-1 $branch_dir_prefix/.branch-1 &&
+ test_must_fail git fsck 2>err &&
+ cat >expect <<-EOF &&
+ error: refs/heads/.branch-1: badRefName: invalid refname format
+ EOF
+ rm $branch_dir_prefix/.branch-1 &&
+ test_cmp expect err
+ ) &&
+ (
+ cd repo &&
+ cp $tag_dir_prefix/tag-1 $tag_dir_prefix/tag-1.lock &&
+ test_must_fail git fsck 2>err &&
+ cat >expect <<-EOF &&
+ error: refs/tags/tag-1.lock: badRefName: invalid refname format
+ EOF
+ rm $tag_dir_prefix/tag-1.lock &&
+ test_cmp expect err
+ ) &&
+ (
+ cd repo &&
+ cp $branch_dir_prefix/branch-1 $branch_dir_prefix/@ &&
+ test_must_fail git fsck 2>err &&
+ cat >expect <<-EOF &&
+ error: refs/heads/@: badRefName: invalid refname format
+ EOF
+ rm $branch_dir_prefix/@ &&
+ test_cmp expect err
+ ) &&
+ (
+ cd repo &&
+ cp $tag_dir_prefix/multi_hierarchy/tag-2 $tag_dir_prefix/multi_hierarchy/@ &&
+ test_must_fail git fsck 2>err &&
+ cat >expect <<-EOF &&
+ error: refs/tags/multi_hierarchy/@: badRefName: invalid refname format
+ EOF
+ rm $tag_dir_prefix/multi_hierarchy/@ &&
+ test_cmp expect err
+ )
+'
+
+test_expect_success 'ref name check should be adapted into fsck messages' '
+ test_when_finished "rm -rf repo" &&
+ git init repo &&
+ branch_dir_prefix=.git/refs/heads &&
+ tag_dir_prefix=.git/refs/tags &&
+ (
+ cd repo &&
+ git commit --allow-empty -m initial &&
+ git checkout -b branch-1 &&
+ git tag tag-1 &&
+ git commit --allow-empty -m second &&
+ git checkout -b branch-2 &&
+ git tag tag-2
+ ) &&
+ (
+ cd repo &&
+ cp $branch_dir_prefix/branch-1 $branch_dir_prefix/.branch-1 &&
+ git -c fsck.badRefName=warn fsck 2>err &&
+ cat >expect <<-EOF &&
+ warning: refs/heads/.branch-1: badRefName: invalid refname format
+ EOF
+ rm $branch_dir_prefix/.branch-1 &&
+ test_cmp expect err
+ ) &&
+ (
+ cd repo &&
+ cp $branch_dir_prefix/branch-1 $branch_dir_prefix/@ &&
+ git -c fsck.badRefName=ignore fsck 2>err &&
+ test_must_be_empty err
+ )
+'
+
+test_done
--
2.45.2
^ permalink raw reply related [flat|nested] 282+ messages in thread
* [GSoC][PATCH v11 10/10] fsck: add ref content check for files backend
2024-07-14 12:28 ` [GSoC][PATCH v11 00/10] ref consistency check infra setup shejialuo
` (8 preceding siblings ...)
2024-07-14 12:32 ` [GSoC][PATCH v11 09/10] fsck: add ref name check for files backend shejialuo
@ 2024-07-14 12:32 ` shejialuo
2024-07-18 14:31 ` Karthik Nayak
2024-07-20 9:25 ` [GSoC][PATCH v12 00/10] ref consistency check infra setup shejialuo
10 siblings, 1 reply; 282+ messages in thread
From: shejialuo @ 2024-07-14 12:32 UTC (permalink / raw)
To: git
Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine,
Justin Tobler
Enhance the git-fsck(1) command by adding a check for reference content
in the files backend. The new functionality ensures that symrefs, real
symbolic link and regular refs are validated correctly.
In order to check the trailing content of the regular refs, add a new
parameter `trailing` to `parse_loose_ref_contents`.
For symrefs, `parse_loose_ref_contents` will set the "referent".
However, symbolic link could be either absolute or relative. Use
"strbuf_add_real_path" to read the symbolic link and convert the
relative path to absolute path. Then use "skip_prefix" to make it align
with symref "referent".
Thus, the symrefs and symbolic links could share the same interface. Add
a new function "files_fsck_symref_target" which aims at checking the
following things:
1. whether the pointee is under the `refs/` directory.
2. whether the pointee name is correct.
3. whether the pointee path is a wrong type in filesystem.
Last, add the following FSCK MESSAGEs:
1. "badRefContent(ERROR)": A ref has a bad content
2. "badSymrefPointee(ERROR)": The pointee of a symref is bad.
3. "trailingRefContent(WARN)": A ref content has trailing contents.
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
Documentation/fsck-msgids.txt | 9 +++
fsck.h | 3 +
refs.c | 2 +-
refs/files-backend.c | 145 +++++++++++++++++++++++++++++++++-
refs/refs-internal.h | 5 +-
t/t0602-reffiles-fsck.sh | 110 ++++++++++++++++++++++++++
6 files changed, 269 insertions(+), 5 deletions(-)
diff --git a/Documentation/fsck-msgids.txt b/Documentation/fsck-msgids.txt
index dab4012246..b1630a478b 100644
--- a/Documentation/fsck-msgids.txt
+++ b/Documentation/fsck-msgids.txt
@@ -19,9 +19,15 @@
`badParentSha1`::
(ERROR) A commit object has a bad parent sha1.
+`badRefContent`::
+ (ERROR) A ref has a bad content.
+
`badRefName`::
(ERROR) A ref has a bad name.
+`badSymrefPointee`::
+ (ERROR) The pointee of a symref is bad.
+
`badTagName`::
(INFO) A tag has an invalid format.
@@ -167,6 +173,9 @@
`nullSha1`::
(WARN) Tree contains entries pointing to a null sha1.
+`trailingRefContent`::
+ (WARN) A ref content has trailing contents.
+
`treeNotSorted`::
(ERROR) A tree is not properly sorted.
diff --git a/fsck.h b/fsck.h
index 2a2441e147..e92a5844ae 100644
--- a/fsck.h
+++ b/fsck.h
@@ -32,6 +32,8 @@ enum fsck_msg_type {
FUNC(BAD_OBJECT_SHA1, ERROR) \
FUNC(BAD_PARENT_SHA1, ERROR) \
FUNC(BAD_REF_NAME, ERROR) \
+ FUNC(BAD_REF_CONTENT, ERROR) \
+ FUNC(BAD_SYMREF_POINTEE, ERROR) \
FUNC(BAD_TIMEZONE, ERROR) \
FUNC(BAD_TREE, ERROR) \
FUNC(BAD_TREE_SHA1, ERROR) \
@@ -72,6 +74,7 @@ enum fsck_msg_type {
FUNC(HAS_DOTDOT, WARN) \
FUNC(HAS_DOTGIT, WARN) \
FUNC(NULL_SHA1, WARN) \
+ FUNC(TRAILING_REF_CONTENT, WARN) \
FUNC(ZERO_PADDED_FILEMODE, WARN) \
FUNC(NUL_IN_COMMIT, WARN) \
FUNC(LARGE_PATHNAME, WARN) \
diff --git a/refs.c b/refs.c
index 410919246b..eb82fb7d4e 100644
--- a/refs.c
+++ b/refs.c
@@ -1760,7 +1760,7 @@ static int refs_read_special_head(struct ref_store *ref_store,
}
result = parse_loose_ref_contents(content.buf, oid, referent, type,
- failure_errno);
+ failure_errno, NULL);
done:
strbuf_release(&full_path);
diff --git a/refs/files-backend.c b/refs/files-backend.c
index d20e149214..42d2f676b9 100644
--- a/refs/files-backend.c
+++ b/refs/files-backend.c
@@ -1,6 +1,7 @@
#define USE_THE_REPOSITORY_VARIABLE
#include "../git-compat-util.h"
+#include "../abspath.h"
#include "../copy.h"
#include "../environment.h"
#include "../gettext.h"
@@ -553,7 +554,7 @@ static int read_ref_internal(struct ref_store *ref_store, const char *refname,
strbuf_rtrim(&sb_contents);
buf = sb_contents.buf;
- ret = parse_loose_ref_contents(buf, oid, referent, type, &myerr);
+ ret = parse_loose_ref_contents(buf, oid, referent, type, &myerr, NULL);
out:
if (ret && !myerr)
@@ -589,7 +590,7 @@ static int files_read_symbolic_ref(struct ref_store *ref_store, const char *refn
int parse_loose_ref_contents(const char *buf, struct object_id *oid,
struct strbuf *referent, unsigned int *type,
- int *failure_errno)
+ int *failure_errno, const char **trailing)
{
const char *p;
if (skip_prefix(buf, "ref:", &buf)) {
@@ -611,6 +612,10 @@ int parse_loose_ref_contents(const char *buf, struct object_id *oid,
*failure_errno = EINVAL;
return -1;
}
+
+ if (trailing)
+ *trailing = p;
+
return 0;
}
@@ -3438,6 +3443,141 @@ static int files_fsck_refs_name(struct fsck_options *o,
return ret;
}
+/*
+ * Check the symref "pointee_name" and "pointee_path". The caller should
+ * make sure that "pointee_path" is absolute. For symbolic ref, "pointee_name"
+ * would be the content after "refs:". For symblic link, "pointee_name" would
+ * be the relative path agaignst "gitdir".
+ */
+static int files_fsck_symref_target(struct fsck_options *o,
+ const char *refname,
+ const char *pointee_name,
+ const char *pointee_path)
+{
+ const char *p = NULL;
+ struct stat st;
+ int ret = 0;
+
+ if (!skip_prefix(pointee_name, "refs/", &p)) {
+
+ ret = fsck_refs_report(o, NULL, refname, NULL,
+ FSCK_MSG_BAD_SYMREF_POINTEE,
+ "point to target out of refs hierarchy");
+ goto out;
+ }
+
+ if (check_refname_format(pointee_name, 0)) {
+ ret = fsck_refs_report(o, NULL, refname, NULL,
+ FSCK_MSG_BAD_SYMREF_POINTEE,
+ "point to invalid refname");
+ }
+
+ if (lstat(pointee_path, &st) < 0)
+ goto out;
+
+ if (!S_ISREG(st.st_mode) && !S_ISLNK(st.st_mode)) {
+ ret = fsck_refs_report(o, NULL, refname, NULL,
+ FSCK_MSG_BAD_SYMREF_POINTEE,
+ "point to invalid target");
+ goto out;
+ }
+out:
+ return ret;
+}
+
+static int files_fsck_refs_content(struct fsck_options *o,
+ const char *gitdir,
+ const char *refs_check_dir,
+ struct dir_iterator *iter)
+{
+ struct strbuf pointee_path = STRBUF_INIT,
+ ref_content = STRBUF_INIT,
+ abs_gitdir = STRBUF_INIT,
+ referent = STRBUF_INIT,
+ refname = STRBUF_INIT;
+ const char *trailing = NULL;
+ int failure_errno = 0;
+ unsigned int type = 0;
+ struct object_id oid;
+ int ret = 0;
+
+ strbuf_addf(&refname, "%s/%s", refs_check_dir, iter->relative_path);
+
+ /*
+ * If the file is a symlink, we need to only check the connectivity
+ * of the destination object.
+ */
+ if (S_ISLNK(iter->st.st_mode)) {
+ const char *pointee_name = NULL;
+
+ strbuf_add_real_path(&pointee_path, iter->path.buf);
+
+ strbuf_add_absolute_path(&abs_gitdir, gitdir);
+ strbuf_normalize_path(&abs_gitdir);
+ if (!is_dir_sep(abs_gitdir.buf[abs_gitdir.len - 1]))
+ strbuf_addch(&abs_gitdir, '/');
+
+ if (!skip_prefix(pointee_path.buf,
+ abs_gitdir.buf, &pointee_name)) {
+ ret = fsck_refs_report(o, NULL, refname.buf, NULL,
+ FSCK_MSG_BAD_SYMREF_POINTEE,
+ "point to target outside gitdir");
+ goto clean;
+ }
+
+ ret = files_fsck_symref_target(o, refname.buf, pointee_name,
+ pointee_path.buf);
+ goto clean;
+ }
+
+ if (strbuf_read_file(&ref_content, iter->path.buf, 0) < 0) {
+ ret = error_errno(_("%s/%s: unable to read the ref"),
+ refs_check_dir, iter->relative_path);
+ goto clean;
+ }
+
+ if (parse_loose_ref_contents(ref_content.buf, &oid,
+ &referent, &type,
+ &failure_errno, &trailing)) {
+ ret = fsck_refs_report(o, NULL, refname.buf, NULL,
+ FSCK_MSG_BAD_REF_CONTENT,
+ "invalid ref content");
+ goto clean;
+ }
+
+ /*
+ * If the ref is a symref, we need to check the destination name and
+ * connectivity.
+ */
+ if (referent.len && (type & REF_ISSYMREF)) {
+ strbuf_addf(&pointee_path, "%s/%s", gitdir, referent.buf);
+ strbuf_rtrim(&referent);
+
+ ret = files_fsck_symref_target(o, refname.buf, referent.buf,
+ pointee_path.buf);
+ goto clean;
+ } else {
+ /*
+ * Only regular refs could have a trailing garbage. Should
+ * be reported as a warning.
+ */
+ if (trailing && (*trailing != '\0' && *trailing != '\n')) {
+ ret = fsck_refs_report(o, NULL, refname.buf, NULL,
+ FSCK_MSG_TRAILING_REF_CONTENT,
+ "trailing garbage in ref");
+ goto clean;
+ }
+ }
+
+clean:
+ strbuf_release(&abs_gitdir);
+ strbuf_release(&pointee_path);
+ strbuf_release(&refname);
+ strbuf_release(&ref_content);
+ strbuf_release(&referent);
+ return ret;
+}
+
static int files_fsck_refs_dir(struct ref_store *ref_store,
struct fsck_options *o,
const char *refs_check_dir,
@@ -3490,6 +3630,7 @@ static int files_fsck_refs(struct ref_store *ref_store,
int ret;
files_fsck_refs_fn fsck_refs_fns[]= {
files_fsck_refs_name,
+ files_fsck_refs_content,
NULL
};
diff --git a/refs/refs-internal.h b/refs/refs-internal.h
index a905e187cd..2fabf41d14 100644
--- a/refs/refs-internal.h
+++ b/refs/refs-internal.h
@@ -709,11 +709,12 @@ struct ref_store {
/*
* Parse contents of a loose ref file. *failure_errno maybe be set to EINVAL for
- * invalid contents.
+ * invalid contents. Also *trailing is set to the first character after the
+ * refname or NULL if the referent is not empty.
*/
int parse_loose_ref_contents(const char *buf, struct object_id *oid,
struct strbuf *referent, unsigned int *type,
- int *failure_errno);
+ int *failure_errno, const char **trailing);
/*
* Fill in the generic part of refs and add it to our collection of
diff --git a/t/t0602-reffiles-fsck.sh b/t/t0602-reffiles-fsck.sh
index b2db58d2c6..35bf40ee64 100755
--- a/t/t0602-reffiles-fsck.sh
+++ b/t/t0602-reffiles-fsck.sh
@@ -98,4 +98,114 @@ test_expect_success 'ref name check should be adapted into fsck messages' '
)
'
+test_expect_success 'regular ref content should be checked' '
+ test_when_finished "rm -rf repo" &&
+ git init repo &&
+ branch_dir_prefix=.git/refs/heads &&
+ tag_dir_prefix=.git/refs/tags &&
+ (
+ cd repo &&
+ git commit --allow-empty -m initial &&
+ git checkout -b branch-1 &&
+ git tag tag-1 &&
+ git commit --allow-empty -m second &&
+ git checkout -b branch-2 &&
+ git tag tag-2 &&
+ git checkout -b a/b/tag-2
+ ) &&
+ (
+ cd repo &&
+ printf "%s garbage" "$(git rev-parse branch-1)" > $branch_dir_prefix/branch-1-garbage &&
+ git fsck 2>err &&
+ cat >expect <<-EOF &&
+ warning: refs/heads/branch-1-garbage: trailingRefContent: trailing garbage in ref
+ EOF
+ rm $branch_dir_prefix/branch-1-garbage &&
+ test_cmp expect err
+ ) &&
+ (
+ cd repo &&
+ printf "%s garbage" "$(git rev-parse tag-1)" > $tag_dir_prefix/tag-1-garbage &&
+ test_must_fail git -c fsck.trailingRefContent=error fsck 2>err &&
+ cat >expect <<-EOF &&
+ error: refs/tags/tag-1-garbage: trailingRefContent: trailing garbage in ref
+ EOF
+ rm $tag_dir_prefix/tag-1-garbage &&
+ test_cmp expect err
+ ) &&
+ (
+ cd repo &&
+ printf "%s " "$(git rev-parse tag-2)" > $tag_dir_prefix/tag-2-garbage &&
+ git fsck 2>err &&
+ cat >expect <<-EOF &&
+ warning: refs/tags/tag-2-garbage: trailingRefContent: trailing garbage in ref
+ EOF
+ rm $tag_dir_prefix/tag-2-garbage &&
+ test_cmp expect err
+ ) &&
+ (
+ cd repo &&
+ printf "xfsazqfxcadas" > $tag_dir_prefix/tag-2-bad &&
+ test_must_fail git refs verify 2>err &&
+ cat >expect <<-EOF &&
+ error: refs/tags/tag-2-bad: badRefContent: invalid ref content
+ EOF
+ rm $tag_dir_prefix/tag-2-bad &&
+ test_cmp expect err
+ ) &&
+ (
+ cd repo &&
+ printf "xfsazqfxcadas" > $branch_dir_prefix/a/b/branch-2-bad &&
+ test_must_fail git refs verify 2>err &&
+ cat >expect <<-EOF &&
+ error: refs/heads/a/b/branch-2-bad: badRefContent: invalid ref content
+ EOF
+ rm $branch_dir_prefix/a/b/branch-2-bad &&
+ test_cmp expect err
+ )
+'
+
+test_expect_success 'symbolic ref content should be checked' '
+ test_when_finished "rm -rf repo" &&
+ git init repo &&
+ branch_dir_prefix=.git/refs/heads &&
+ tag_dir_prefix=.git/refs/tags &&
+ (
+ cd repo &&
+ git commit --allow-empty -m initial &&
+ git checkout -b branch-1 &&
+ git tag tag-1
+ ) &&
+ (
+ cd repo &&
+ printf "ref: refs/heads/.branch" > $branch_dir_prefix/branch-2-bad &&
+ test_must_fail git refs verify 2>err &&
+ cat >expect <<-EOF &&
+ error: refs/heads/branch-2-bad: badSymrefPointee: point to invalid refname
+ EOF
+ rm $branch_dir_prefix/branch-2-bad &&
+ test_cmp expect err
+ ) &&
+ (
+ cd repo &&
+ printf "ref: refs/heads" > $branch_dir_prefix/branch-2-bad &&
+ test_must_fail git refs verify 2>err &&
+ cat >expect <<-EOF &&
+ error: refs/heads/branch-2-bad: badSymrefPointee: point to invalid target
+ EOF
+ rm $branch_dir_prefix/branch-2-bad &&
+ test_cmp expect err
+ ) &&
+ (
+ cd repo &&
+ printf "ref: logs/maint-v2.45" > $branch_dir_prefix/branch-2-bad &&
+ test_must_fail git refs verify 2>err &&
+ cat >expect <<-EOF &&
+ error: refs/heads/branch-2-bad: badSymrefPointee: point to target out of refs hierarchy
+ EOF
+ rm $branch_dir_prefix/branch-2-bad &&
+ test_cmp expect err
+ )
+'
+
test_done
--
2.45.2
^ permalink raw reply related [flat|nested] 282+ messages in thread
* Re: [GSoC][PATCH v11 10/10] fsck: add ref content check for files backend
2024-07-14 12:32 ` [GSoC][PATCH v11 10/10] fsck: add ref content " shejialuo
@ 2024-07-18 14:31 ` Karthik Nayak
2024-07-18 16:03 ` Junio C Hamano
2024-07-20 7:16 ` shejialuo
0 siblings, 2 replies; 282+ messages in thread
From: Karthik Nayak @ 2024-07-18 14:31 UTC (permalink / raw)
To: shejialuo, git
Cc: Patrick Steinhardt, Junio C Hamano, Eric Sunshine, Justin Tobler
[-- Attachment #1: Type: text/plain, Size: 14045 bytes --]
shejialuo <shejialuo@gmail.com> writes:
> Enhance the git-fsck(1) command by adding a check for reference content
> in the files backend. The new functionality ensures that symrefs, real
> symbolic link and regular refs are validated correctly.
>
> In order to check the trailing content of the regular refs, add a new
> parameter `trailing` to `parse_loose_ref_contents`.
>
> For symrefs, `parse_loose_ref_contents` will set the "referent".
> However, symbolic link could be either absolute or relative. Use
> "strbuf_add_real_path" to read the symbolic link and convert the
> relative path to absolute path. Then use "skip_prefix" to make it align
> with symref "referent".
>
> Thus, the symrefs and symbolic links could share the same interface. Add
> a new function "files_fsck_symref_target" which aims at checking the
> following things:
>
> 1. whether the pointee is under the `refs/` directory.
> 2. whether the pointee name is correct.
> 3. whether the pointee path is a wrong type in filesystem.
>
> Last, add the following FSCK MESSAGEs:
>
> 1. "badRefContent(ERROR)": A ref has a bad content
> 2. "badSymrefPointee(ERROR)": The pointee of a symref is bad.
> 3. "trailingRefContent(WARN)": A ref content has trailing contents.
>
> Mentored-by: Patrick Steinhardt <ps@pks.im>
> Mentored-by: Karthik Nayak <karthik.188@gmail.com>
> Signed-off-by: shejialuo <shejialuo@gmail.com>
> ---
> Documentation/fsck-msgids.txt | 9 +++
> fsck.h | 3 +
> refs.c | 2 +-
> refs/files-backend.c | 145 +++++++++++++++++++++++++++++++++-
> refs/refs-internal.h | 5 +-
> t/t0602-reffiles-fsck.sh | 110 ++++++++++++++++++++++++++
> 6 files changed, 269 insertions(+), 5 deletions(-)
>
> diff --git a/Documentation/fsck-msgids.txt b/Documentation/fsck-msgids.txt
> index dab4012246..b1630a478b 100644
> --- a/Documentation/fsck-msgids.txt
> +++ b/Documentation/fsck-msgids.txt
> @@ -19,9 +19,15 @@
> `badParentSha1`::
> (ERROR) A commit object has a bad parent sha1.
>
> +`badRefContent`::
> + (ERROR) A ref has a bad content.
> +
> `badRefName`::
> (ERROR) A ref has a bad name.
>
> +`badSymrefPointee`::
> + (ERROR) The pointee of a symref is bad.
> +
> `badTagName`::
> (INFO) A tag has an invalid format.
>
> @@ -167,6 +173,9 @@
> `nullSha1`::
> (WARN) Tree contains entries pointing to a null sha1.
>
> +`trailingRefContent`::
> + (WARN) A ref content has trailing contents.
> +
> `treeNotSorted`::
> (ERROR) A tree is not properly sorted.
>
> diff --git a/fsck.h b/fsck.h
> index 2a2441e147..e92a5844ae 100644
> --- a/fsck.h
> +++ b/fsck.h
> @@ -32,6 +32,8 @@ enum fsck_msg_type {
> FUNC(BAD_OBJECT_SHA1, ERROR) \
> FUNC(BAD_PARENT_SHA1, ERROR) \
> FUNC(BAD_REF_NAME, ERROR) \
> + FUNC(BAD_REF_CONTENT, ERROR) \
> + FUNC(BAD_SYMREF_POINTEE, ERROR) \
> FUNC(BAD_TIMEZONE, ERROR) \
> FUNC(BAD_TREE, ERROR) \
> FUNC(BAD_TREE_SHA1, ERROR) \
> @@ -72,6 +74,7 @@ enum fsck_msg_type {
> FUNC(HAS_DOTDOT, WARN) \
> FUNC(HAS_DOTGIT, WARN) \
> FUNC(NULL_SHA1, WARN) \
> + FUNC(TRAILING_REF_CONTENT, WARN) \
> FUNC(ZERO_PADDED_FILEMODE, WARN) \
> FUNC(NUL_IN_COMMIT, WARN) \
> FUNC(LARGE_PATHNAME, WARN) \
> diff --git a/refs.c b/refs.c
> index 410919246b..eb82fb7d4e 100644
> --- a/refs.c
> +++ b/refs.c
> @@ -1760,7 +1760,7 @@ static int refs_read_special_head(struct ref_store *ref_store,
> }
>
> result = parse_loose_ref_contents(content.buf, oid, referent, type,
> - failure_errno);
> + failure_errno, NULL);
>
> done:
> strbuf_release(&full_path);
> diff --git a/refs/files-backend.c b/refs/files-backend.c
> index d20e149214..42d2f676b9 100644
> --- a/refs/files-backend.c
> +++ b/refs/files-backend.c
> @@ -1,6 +1,7 @@
> #define USE_THE_REPOSITORY_VARIABLE
>
> #include "../git-compat-util.h"
> +#include "../abspath.h"
> #include "../copy.h"
> #include "../environment.h"
> #include "../gettext.h"
> @@ -553,7 +554,7 @@ static int read_ref_internal(struct ref_store *ref_store, const char *refname,
> strbuf_rtrim(&sb_contents);
> buf = sb_contents.buf;
>
> - ret = parse_loose_ref_contents(buf, oid, referent, type, &myerr);
> + ret = parse_loose_ref_contents(buf, oid, referent, type, &myerr, NULL);
>
> out:
> if (ret && !myerr)
> @@ -589,7 +590,7 @@ static int files_read_symbolic_ref(struct ref_store *ref_store, const char *refn
>
> int parse_loose_ref_contents(const char *buf, struct object_id *oid,
> struct strbuf *referent, unsigned int *type,
> - int *failure_errno)
> + int *failure_errno, const char **trailing)
> {
> const char *p;
> if (skip_prefix(buf, "ref:", &buf)) {
> @@ -611,6 +612,10 @@ int parse_loose_ref_contents(const char *buf, struct object_id *oid,
> *failure_errno = EINVAL;
> return -1;
> }
> +
> + if (trailing)
> + *trailing = p;
> +
> return 0;
> }
>
> @@ -3438,6 +3443,141 @@ static int files_fsck_refs_name(struct fsck_options *o,
> return ret;
> }
>
> +/*
> + * Check the symref "pointee_name" and "pointee_path". The caller should
> + * make sure that "pointee_path" is absolute. For symbolic ref, "pointee_name"
> + * would be the content after "refs:". For symblic link, "pointee_name" would
> + * be the relative path agaignst "gitdir".
> + */
> +static int files_fsck_symref_target(struct fsck_options *o,
> + const char *refname,
> + const char *pointee_name,
> + const char *pointee_path)
> +{
> + const char *p = NULL;
> + struct stat st;
> + int ret = 0;
> +
> + if (!skip_prefix(pointee_name, "refs/", &p)) {
> +
> + ret = fsck_refs_report(o, NULL, refname, NULL,
> + FSCK_MSG_BAD_SYMREF_POINTEE,
> + "point to target out of refs hierarchy");
> + goto out;
> + }
> +
> + if (check_refname_format(pointee_name, 0)) {
> + ret = fsck_refs_report(o, NULL, refname, NULL,
> + FSCK_MSG_BAD_SYMREF_POINTEE,
> + "point to invalid refname");
> + }
> +
> + if (lstat(pointee_path, &st) < 0)
> + goto out;
> +
> + if (!S_ISREG(st.st_mode) && !S_ISLNK(st.st_mode)) {
> + ret = fsck_refs_report(o, NULL, refname, NULL,
> + FSCK_MSG_BAD_SYMREF_POINTEE,
> + "point to invalid target");
> + goto out;
> + }
> +out:
> + return ret;
> +}
> +
> +static int files_fsck_refs_content(struct fsck_options *o,
> + const char *gitdir,
> + const char *refs_check_dir,
> + struct dir_iterator *iter)
> +{
> + struct strbuf pointee_path = STRBUF_INIT,
> + ref_content = STRBUF_INIT,
> + abs_gitdir = STRBUF_INIT,
> + referent = STRBUF_INIT,
> + refname = STRBUF_INIT;
> + const char *trailing = NULL;
> + int failure_errno = 0;
> + unsigned int type = 0;
> + struct object_id oid;
> + int ret = 0;
> +
> + strbuf_addf(&refname, "%s/%s", refs_check_dir, iter->relative_path);
> +
> + /*
> + * If the file is a symlink, we need to only check the connectivity
> + * of the destination object.
> + */
> + if (S_ISLNK(iter->st.st_mode)) {
> + const char *pointee_name = NULL;
> +
> + strbuf_add_real_path(&pointee_path, iter->path.buf);
> +
> + strbuf_add_absolute_path(&abs_gitdir, gitdir);
> + strbuf_normalize_path(&abs_gitdir);
> + if (!is_dir_sep(abs_gitdir.buf[abs_gitdir.len - 1]))
> + strbuf_addch(&abs_gitdir, '/');
> +
> + if (!skip_prefix(pointee_path.buf,
> + abs_gitdir.buf, &pointee_name)) {
> + ret = fsck_refs_report(o, NULL, refname.buf, NULL,
> + FSCK_MSG_BAD_SYMREF_POINTEE,
> + "point to target outside gitdir");
> + goto clean;
> + }
> +
> + ret = files_fsck_symref_target(o, refname.buf, pointee_name,
> + pointee_path.buf);
> + goto clean;
> + }
> +
> + if (strbuf_read_file(&ref_content, iter->path.buf, 0) < 0) {
> + ret = error_errno(_("%s/%s: unable to read the ref"),
> + refs_check_dir, iter->relative_path);
> + goto clean;
> + }
> +
> + if (parse_loose_ref_contents(ref_content.buf, &oid,
> + &referent, &type,
> + &failure_errno, &trailing)) {
> + ret = fsck_refs_report(o, NULL, refname.buf, NULL,
> + FSCK_MSG_BAD_REF_CONTENT,
> + "invalid ref content");
> + goto clean;
> + }
> +
> + /*
> + * If the ref is a symref, we need to check the destination name and
> + * connectivity.
> + */
> + if (referent.len && (type & REF_ISSYMREF)) {
> + strbuf_addf(&pointee_path, "%s/%s", gitdir, referent.buf);
> + strbuf_rtrim(&referent);
> +
> + ret = files_fsck_symref_target(o, refname.buf, referent.buf,
> + pointee_path.buf);
> + goto clean;
> + } else {
> + /*
> + * Only regular refs could have a trailing garbage. Should
> + * be reported as a warning.
> + */
What happens if a symbolic reference has trailing garbage ?
> + if (trailing && (*trailing != '\0' && *trailing != '\n')) {
> + ret = fsck_refs_report(o, NULL, refname.buf, NULL,
> + FSCK_MSG_TRAILING_REF_CONTENT,
> + "trailing garbage in ref");
> + goto clean;
> + }
> + }
> +
> +clean:
> + strbuf_release(&abs_gitdir);
> + strbuf_release(&pointee_path);
> + strbuf_release(&refname);
> + strbuf_release(&ref_content);
> + strbuf_release(&referent);
> + return ret;
> +}
> +
> static int files_fsck_refs_dir(struct ref_store *ref_store,
> struct fsck_options *o,
> const char *refs_check_dir,
> @@ -3490,6 +3630,7 @@ static int files_fsck_refs(struct ref_store *ref_store,
> int ret;
> files_fsck_refs_fn fsck_refs_fns[]= {
> files_fsck_refs_name,
> + files_fsck_refs_content,
> NULL
> };
>
> diff --git a/refs/refs-internal.h b/refs/refs-internal.h
> index a905e187cd..2fabf41d14 100644
> --- a/refs/refs-internal.h
> +++ b/refs/refs-internal.h
> @@ -709,11 +709,12 @@ struct ref_store {
>
> /*
> * Parse contents of a loose ref file. *failure_errno maybe be set to EINVAL for
> - * invalid contents.
> + * invalid contents. Also *trailing is set to the first character after the
> + * refname or NULL if the referent is not empty.
> */
> int parse_loose_ref_contents(const char *buf, struct object_id *oid,
> struct strbuf *referent, unsigned int *type,
> - int *failure_errno);
> + int *failure_errno, const char **trailing);
>
> /*
> * Fill in the generic part of refs and add it to our collection of
> diff --git a/t/t0602-reffiles-fsck.sh b/t/t0602-reffiles-fsck.sh
> index b2db58d2c6..35bf40ee64 100755
> --- a/t/t0602-reffiles-fsck.sh
> +++ b/t/t0602-reffiles-fsck.sh
> @@ -98,4 +98,114 @@ test_expect_success 'ref name check should be adapted into fsck messages' '
> )
> '
>
> +test_expect_success 'regular ref content should be checked' '
> + test_when_finished "rm -rf repo" &&
> + git init repo &&
> + branch_dir_prefix=.git/refs/heads &&
> + tag_dir_prefix=.git/refs/tags &&
> + (
> + cd repo &&
> + git commit --allow-empty -m initial &&
> + git checkout -b branch-1 &&
> + git tag tag-1 &&
> + git commit --allow-empty -m second &&
> + git checkout -b branch-2 &&
> + git tag tag-2 &&
> + git checkout -b a/b/tag-2
> + ) &&
> + (
> + cd repo &&
> + printf "%s garbage" "$(git rev-parse branch-1)" > $branch_dir_prefix/branch-1-garbage &&
> + git fsck 2>err &&
> + cat >expect <<-EOF &&
> + warning: refs/heads/branch-1-garbage: trailingRefContent: trailing garbage in ref
> + EOF
> + rm $branch_dir_prefix/branch-1-garbage &&
> + test_cmp expect err
> + ) &&
> + (
> + cd repo &&
> + printf "%s garbage" "$(git rev-parse tag-1)" > $tag_dir_prefix/tag-1-garbage &&
> + test_must_fail git -c fsck.trailingRefContent=error fsck 2>err &&
> + cat >expect <<-EOF &&
> + error: refs/tags/tag-1-garbage: trailingRefContent: trailing garbage in ref
> + EOF
> + rm $tag_dir_prefix/tag-1-garbage &&
> + test_cmp expect err
> + ) &&
> + (
> + cd repo &&
> + printf "%s " "$(git rev-parse tag-2)" > $tag_dir_prefix/tag-2-garbage &&
> + git fsck 2>err &&
> + cat >expect <<-EOF &&
> + warning: refs/tags/tag-2-garbage: trailingRefContent: trailing garbage in ref
> + EOF
> + rm $tag_dir_prefix/tag-2-garbage &&
> + test_cmp expect err
> + ) &&
> + (
> + cd repo &&
> + printf "xfsazqfxcadas" > $tag_dir_prefix/tag-2-bad &&
> + test_must_fail git refs verify 2>err &&
> + cat >expect <<-EOF &&
> + error: refs/tags/tag-2-bad: badRefContent: invalid ref content
> + EOF
> + rm $tag_dir_prefix/tag-2-bad &&
> + test_cmp expect err
> + ) &&
> + (
> + cd repo &&
> + printf "xfsazqfxcadas" > $branch_dir_prefix/a/b/branch-2-bad &&
> + test_must_fail git refs verify 2>err &&
> + cat >expect <<-EOF &&
> + error: refs/heads/a/b/branch-2-bad: badRefContent: invalid ref content
> + EOF
> + rm $branch_dir_prefix/a/b/branch-2-bad &&
> + test_cmp expect err
> + )
> +'
> +
> +test_expect_success 'symbolic ref content should be checked' '
> + test_when_finished "rm -rf repo" &&
> + git init repo &&
> + branch_dir_prefix=.git/refs/heads &&
> + tag_dir_prefix=.git/refs/tags &&
> + (
> + cd repo &&
> + git commit --allow-empty -m initial &&
> + git checkout -b branch-1 &&
> + git tag tag-1
> + ) &&
> + (
> + cd repo &&
> + printf "ref: refs/heads/.branch" > $branch_dir_prefix/branch-2-bad &&
> + test_must_fail git refs verify 2>err &&
> + cat >expect <<-EOF &&
> + error: refs/heads/branch-2-bad: badSymrefPointee: point to invalid refname
> + EOF
> + rm $branch_dir_prefix/branch-2-bad &&
> + test_cmp expect err
> + ) &&
> + (
> + cd repo &&
> + printf "ref: refs/heads" > $branch_dir_prefix/branch-2-bad &&
> + test_must_fail git refs verify 2>err &&
> + cat >expect <<-EOF &&
> + error: refs/heads/branch-2-bad: badSymrefPointee: point to invalid target
> + EOF
> + rm $branch_dir_prefix/branch-2-bad &&
> + test_cmp expect err
> + ) &&
> + (
> + cd repo &&
> + printf "ref: logs/maint-v2.45" > $branch_dir_prefix/branch-2-bad &&
> + test_must_fail git refs verify 2>err &&
> + cat >expect <<-EOF &&
> + error: refs/heads/branch-2-bad: badSymrefPointee: point to target out of refs hierarchy
> + EOF
> + rm $branch_dir_prefix/branch-2-bad &&
> + test_cmp expect err
> + )
> +'
> +
> test_done
> --
> 2.45.2
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 690 bytes --]
^ permalink raw reply [flat|nested] 282+ messages in thread
* Re: [GSoC][PATCH v11 10/10] fsck: add ref content check for files backend
2024-07-18 14:31 ` Karthik Nayak
@ 2024-07-18 16:03 ` Junio C Hamano
2024-07-19 8:33 ` Karthik Nayak
2024-07-20 7:16 ` shejialuo
1 sibling, 1 reply; 282+ messages in thread
From: Junio C Hamano @ 2024-07-18 16:03 UTC (permalink / raw)
To: Karthik Nayak
Cc: shejialuo, git, Patrick Steinhardt, Eric Sunshine, Justin Tobler
Karthik Nayak <karthik.188@gmail.com> writes:
> shejialuo <shejialuo@gmail.com> writes:
> ... 260+ lines of the original removed ...
>> + */
>
> What happens if a symbolic reference has trailing garbage ?
>
> ... 160+ lines of the original removed ...
It is a pain to have to look for only a single line with a new piece
of information in a 400+ line response, more than 99% of which are
quoted original. Can you trim your quote a bit better?
Thanks.
^ permalink raw reply [flat|nested] 282+ messages in thread
* Re: [GSoC][PATCH v11 10/10] fsck: add ref content check for files backend
2024-07-18 14:31 ` Karthik Nayak
2024-07-18 16:03 ` Junio C Hamano
@ 2024-07-20 7:16 ` shejialuo
2024-07-20 8:43 ` shejialuo
1 sibling, 1 reply; 282+ messages in thread
From: shejialuo @ 2024-07-20 7:16 UTC (permalink / raw)
To: Karthik Nayak
Cc: git, Patrick Steinhardt, Junio C Hamano, Eric Sunshine,
Justin Tobler
Karthik Nayak <karthik.188@gmail.com> writes:
>
> shejialuo <shejialuo@gmail.com> writes:
> > + /*
> > + * If the ref is a symref, we need to check the destination name and
> > + * connectivity.
> > + */
> > + if (referent.len && (type & REF_ISSYMREF)) {
> > + strbuf_addf(&pointee_path, "%s/%s", gitdir, referent.buf);
> > + strbuf_rtrim(&referent);
> > +
> > + ret = files_fsck_symref_target(o, refname.buf, referent.buf,
> > + pointee_path.buf);
> > + goto clean;
> > + } else {
> > + /*
> > + * Only regular refs could have a trailing garbage. Should
> > + * be reported as a warning.
> > + */
>
> What happens if a symbolic reference has trailing garbage ?
>
The "parse_loose_ref_contents" will return the referent. In this function,
it will skip the prefix "ref:" to get the pointee. If there are some trailing
garbage, it will be reported by the "files_fsck_symref_target".
"files_fsck_symref_target" will use "check_refname_format" function
to check the pointee. For example, if the content is "ref: refs/heads/
master garbage". The "refs/heads/master garbage" is a bad name.
However, in my design, the trailing spaces or newline will be ignored,
I thought we may not report this problem. And I use "strbuf_rtrim" here
to ignore spaces and newlines.
And I think there are some differences between symbolic refs and
regular refs when parsing. For regular refs, git will ignore any trailing
garbage, however for symbolic refs, git will only ignore the newlines
and spaces garbage. And git will not parse "refs/heads/master gar",
it's an error here. But for regular refs, for example "edaca... garbage",
git will parse it normally without any warnings.
So question comes here, should we warn the user about the trailing
newlines or spaces. When using "git symbolic-ref refs/heads/maint
refs/heads/main", the "refs/heads/maint" will contain the newline
'\n' here and git also accepts content without newline '\n'. And I
think we should not warn the user about one newline or no newline.
In my opinion, we should do this. It's not hard to do that. We only
warn the user for the following two situations:
1. two or more newlines.
2. one or more spaces.
I will improve code in the next version.
Thanks,
Jialuo
^ permalink raw reply [flat|nested] 282+ messages in thread
* Re: [GSoC][PATCH v11 10/10] fsck: add ref content check for files backend
2024-07-20 7:16 ` shejialuo
@ 2024-07-20 8:43 ` shejialuo
0 siblings, 0 replies; 282+ messages in thread
From: shejialuo @ 2024-07-20 8:43 UTC (permalink / raw)
To: Karthik Nayak
Cc: git, Patrick Steinhardt, Junio C Hamano, Eric Sunshine,
Justin Tobler
>
> The "parse_loose_ref_contents" will return the referent. In this function,
> it will skip the prefix "ref:" to get the pointee. If there are some trailing
> garbage, it will be reported by the "files_fsck_symref_target".
>
> "files_fsck_symref_target" will use "check_refname_format" function
> to check the pointee. For example, if the content is "ref: refs/heads/
> master garbage". The "refs/heads/master garbage" is a bad name.
>
> However, in my design, the trailing spaces or newline will be ignored,
> I thought we may not report this problem. And I use "strbuf_rtrim" here
> to ignore spaces and newlines.
>
> And I think there are some differences between symbolic refs and
> regular refs when parsing. For regular refs, git will ignore any trailing
> garbage, however for symbolic refs, git will only ignore the newlines
> and spaces garbage. And git will not parse "refs/heads/master gar",
> it's an error here. But for regular refs, for example "edaca... garbage",
> git will parse it normally without any warnings.
>
> So question comes here, should we warn the user about the trailing
> newlines or spaces. When using "git symbolic-ref refs/heads/maint
> refs/heads/main", the "refs/heads/maint" will contain the newline
> '\n' here and git also accepts content without newline '\n'. And I
> think we should not warn the user about one newline or no newline.
>
> In my opinion, we should do this. It's not hard to do that. We only
> warn the user for the following two situations:
>
> 1. two or more newlines.
> 2. one or more spaces.
>
> I will improve code in the next version.
>
Well, I have changed my idea here. Sorry for that. If we report this as
warning, what is the difference between the following two cases:
1. "ref: refs/heads/master garbage "
2. "ref: refs/heads/master garbage"
For 1, if we do this, we will first report it has the extra spaces and then
report it's a bad refname. For 2, we will report it's a bad refname. I think
we should not report extra spaces or newlines here even with the error
type "warning".
^ permalink raw reply [flat|nested] 282+ messages in thread
* [GSoC][PATCH v12 00/10] ref consistency check infra setup
2024-07-14 12:28 ` [GSoC][PATCH v11 00/10] ref consistency check infra setup shejialuo
` (9 preceding siblings ...)
2024-07-14 12:32 ` [GSoC][PATCH v11 10/10] fsck: add ref content " shejialuo
@ 2024-07-20 9:25 ` shejialuo
2024-07-20 9:27 ` [GSoC][PATCH v12 01/10] fsck: rename "skiplist" to "skip_oids" shejialuo
` (10 more replies)
10 siblings, 11 replies; 282+ messages in thread
From: shejialuo @ 2024-07-20 9:25 UTC (permalink / raw)
To: git
Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine,
Justin Tobler
Hi All:
This version handles the following problem:
In order to provide more extensibility, I follow the Karthik's advice
using the following data structure:
struct fsck_refs_info {
char *ref_checkee;
union {
struct {
char *sub_ref_checkee;
} files;
};
};
Because we use a `struct` here, we could add more fields when we want to
implement morec checks without changing the existing functions. And I
move this structure into the "fsck_options". Thus there is no need for
us to change the "error_func" prototype which makes this series more
clearer.
And Karthik asked me why I does not handle the trailing garbage for
symbolic refs. And I cited my understanding here:
> The "parse_loose_ref_contents" will return the referent. In this function,
> it will skip the prefix "ref:" to get the pointee. If there are some trailing
> garbage, it will be reported by the "files_fsck_symref_target".
> "files_fsck_symref_target" will use "check_refname_format" function
> to check the pointee. For example, if the content is "ref: refs/heads/
> master garbage". The "refs/heads/master garbage" is a bad name.
> However, in my design, the trailing spaces or newline will be ignored,
> I thought we may not report this problem. And I use "strbuf_rtrim" here
> to ignore spaces and newlines.
We should not report this even with warning in my perspecitve.
CI: https://github.com/shejialuo/git/pull/10
shejialuo (10):
fsck: rename "skiplist" to "skip_oids"
fsck: add a unified interface for reporting fsck messages
fsck: rename objects-related fsck error functions
fsck: add refs-related error report function
refs: set up ref consistency check infrastructure
git refs: add verify subcommand
builtin/fsck: add `git-refs verify` child process
files-backend: add unified interface for refs scanning
fsck: add ref name check for files backend
fsck: add ref content check for files backend
Documentation/fsck-msgids.txt | 12 ++
Documentation/git-refs.txt | 13 ++
builtin/fsck.c | 31 ++++-
builtin/refs.c | 44 ++++++
fsck.c | 106 +++++++++++---
fsck.h | 74 +++++++---
refs.c | 7 +-
refs.h | 8 ++
refs/debug.c | 11 ++
refs/files-backend.c | 253 +++++++++++++++++++++++++++++++++-
refs/packed-backend.c | 8 ++
refs/refs-internal.h | 11 +-
refs/reftable-backend.c | 8 ++
t/t0602-reffiles-fsck.sh | 211 ++++++++++++++++++++++++++++
14 files changed, 746 insertions(+), 51 deletions(-)
create mode 100755 t/t0602-reffiles-fsck.sh
Range-diff against v11:
1: a69705b777 = 1: a69705b777 fsck: rename "skiplist" to "skip_oids"
2: 1ef1036348 < -: ---------- fsck: add a unified interface for reporting fsck messages
-: ---------- > 2: a4bfccd938 fsck: add a unified interface for reporting fsck messages
3: d17cf6166e ! 3: 9bc8892761 fsck: rename objects-related fsck error functions
@@ builtin/fsck.c: static int objerror(struct object *obj, const char *err)
-static int fsck_error_func(struct fsck_options *o UNUSED,
- const struct object_id *oid,
- enum object_type object_type,
-- const char *ref_checkee UNUSED,
-- const char *sub_ref_checkee UNUSED,
- enum fsck_msg_type msg_type,
- enum fsck_msg_id msg_id UNUSED,
- const char *message)
+static int fsck_objects_error_func(struct fsck_options *o UNUSED,
+ const struct object_id *oid,
+ enum object_type object_type,
-+ const char *ref_checkee UNUSED,
-+ const char *sub_ref_checkee UNUSED,
+ enum fsck_msg_type msg_type,
+ enum fsck_msg_id msg_id UNUSED,
+ const char *message)
@@ fsck.c: int fsck_buffer(const struct object_id *oid, enum object_type type,
-int fsck_error_function(struct fsck_options *o,
- const struct object_id *oid,
- enum object_type object_type UNUSED,
-- const char *ref_checkee UNUSED,
-- const char *sub_ref_checkee UNUSED,
- enum fsck_msg_type msg_type,
- enum fsck_msg_id msg_id UNUSED,
- const char *message)
+int fsck_objects_error_function(struct fsck_options *o,
+ const struct object_id *oid,
+ enum object_type object_type UNUSED,
-+ const char *ref_checkee UNUSED,
-+ const char *sub_ref_checkee UNUSED,
+ enum fsck_msg_type msg_type,
+ enum fsck_msg_id msg_id UNUSED,
+ const char *message)
@@ fsck.c: int git_fsck_config(const char *var, const char *value,
-int fsck_error_cb_print_missing_gitmodules(struct fsck_options *o,
- const struct object_id *oid,
- enum object_type object_type,
-- const char *ref_checkee,
-- const char *sub_ref_checkee,
- enum fsck_msg_type msg_type,
- enum fsck_msg_id msg_id,
- const char *message)
+int fsck_objects_error_cb_print_missing_gitmodules(struct fsck_options *o,
+ const struct object_id *oid,
+ enum object_type object_type,
-+ const char *ref_checkee,
-+ const char *sub_ref_checkee,
+ enum fsck_msg_type msg_type,
+ enum fsck_msg_id msg_id,
+ const char *message)
@@ fsck.c: int git_fsck_config(const char *var, const char *value,
puts(oid_to_hex(oid));
return 0;
}
-- return fsck_error_function(o, oid, object_type, ref_checkee,
-- sub_ref_checkee, msg_type, msg_id, message);
-+ return fsck_objects_error_function(o, oid, object_type, ref_checkee,
-+ sub_ref_checkee, msg_type, msg_id,
-+ message);
+- return fsck_error_function(o, oid, object_type, msg_type, msg_id, message);
++ return fsck_objects_error_function(o, oid, object_type,
++ msg_type, msg_id, message);
}
## fsck.h ##
@@ fsck.h: typedef int (*fsck_error)(struct fsck_options *o,
-int fsck_error_function(struct fsck_options *o,
- const struct object_id *oid, enum object_type object_type,
-- const char *ref_checkee, const char *sub_ref_checkee,
- enum fsck_msg_type msg_type, enum fsck_msg_id msg_id,
- const char *message);
-int fsck_error_cb_print_missing_gitmodules(struct fsck_options *o,
- const struct object_id *oid,
- enum object_type object_type,
-- const char *ref_checkee,
-- const char *sub_ref_checkee,
- enum fsck_msg_type msg_type,
- enum fsck_msg_id msg_id,
- const char *message);
+int fsck_objects_error_function(struct fsck_options *o,
+ const struct object_id *oid, enum object_type object_type,
-+ const char *ref_checkee, const char *sub_ref_checkee,
+ enum fsck_msg_type msg_type, enum fsck_msg_id msg_id,
+ const char *message);
+int fsck_objects_error_cb_print_missing_gitmodules(struct fsck_options *o,
+ const struct object_id *oid,
+ enum object_type object_type,
-+ const char *ref_checkee,
-+ const char *sub_ref_checkee,
+ enum fsck_msg_type msg_type,
+ enum fsck_msg_id msg_id,
+ const char *message);
- struct fsck_options {
- fsck_walk_func walk;
+ /*
+ * The information for reporting refs-related error message
@@ fsck.h: struct fsck_options {
.gitmodules_done = OIDSET_INIT, \
.gitattributes_found = OIDSET_INIT, \
4: 19e049ee15 ! 4: 82296dc2b9 fsck: add refs-related error report function
@@ Commit message
fsck: add refs-related error report function
Create refs-specific "error_func" callback "fsck_refs_error_function"
- which could provide the following report messages.
+ which could provide the following report messages for files backend
1. "ref_checkee": "fsck error name": "user message".
2. "ref_checkee.sub_ref_checkee": "fsck error name": "user message".
@@ Commit message
message".
"fsck_refs_error_function" uses the "ref_checkee" and "sub_ref_checkee"
- parameters to indicate the information of the checked refs. For loose
- ref and reflog, it only uses the "ref_checkee" parameter. For packed
+ in the "fsck_refs_info" to indicate the information of the checked refs.
+ For loose ref and reflog, it only uses the "ref_checkee". For packed
refs and reftable refs, when checking the consistency of the file
- itself, it still only uses "ref_checkee" parameter. However, when
- checking the consistency of the ref or reflog contained in the file, it
- will use "sub_ref_checkee" parameter to indicate that we are not
- checking the file but the incorporated ref or reflog.
+ itself, it still only uses "ref_checkee". However, when checking the
+ consistency of the ref or reflog contained in the file ,it will use the
+ "sub_ref_checkee" to indicate that we are not checking the file but the
+ incorporated ref or reflog.
"fsck_refs_error_function" will use the "oid" parameter if the caller
passes the oid, it will use "oid_to_hex" to get the corresponding hex
@@ fsck.c: int fsck_objects_error_function(struct fsck_options *o,
return 1;
}
-+int fsck_refs_error_function(struct fsck_options *options UNUSED,
++int fsck_refs_error_function(struct fsck_options *options,
+ const struct object_id *oid,
+ enum object_type object_type UNUSED,
-+ const char *ref_checkee,
-+ const char *sub_ref_checkee,
+ enum fsck_msg_type msg_type,
+ enum fsck_msg_id msg_id UNUSED,
+ const char *message)
+{
+ struct strbuf sb = STRBUF_INIT;
++ struct fsck_refs_info *refs_info = &options->refs_info;
+ int ret = 0;
+
-+ if (sub_ref_checkee)
-+ strbuf_addf(&sb, "%s.%s", ref_checkee, sub_ref_checkee);
-+ else
-+ strbuf_addstr(&sb, ref_checkee);
++ if (the_repository->ref_storage_format == REF_STORAGE_FORMAT_FILES) {
++ strbuf_addstr(&sb, refs_info->ref_checkee);
++ if (refs_info->u.files.sub_ref_checkee)
++ strbuf_addf(&sb, ".%s", refs_info->u.files.sub_ref_checkee);
+
-+ if (oid)
-+ strbuf_addf(&sb, " -> (%s)", oid_to_hex(oid));
++ if (oid)
++ strbuf_addf(&sb, " -> (%s)", oid_to_hex(oid));
++ }
+
+ if (msg_type == FSCK_WARN)
+ warning("%s: %s", sb.buf, message);
@@ fsck.h: int fsck_objects_error_cb_print_missing_gitmodules(struct fsck_options *
+int fsck_refs_error_function(struct fsck_options *options,
+ const struct object_id *oid,
+ enum object_type object_type,
-+ const char *ref_checkee,
-+ const char *sub_ref_checkee,
+ enum fsck_msg_type msg_type,
+ enum fsck_msg_id msg_id,
+ const char *message);
- struct fsck_options {
- fsck_walk_func walk;
+ /*
+ * The information for reporting refs-related error message
@@ fsck.h: struct fsck_options {
.gitattributes_done = OIDSET_INIT, \
.error_func = fsck_objects_error_cb_print_missing_gitmodules, \
5: f175afc37c = 5: c5cac2e318 refs: set up ref consistency check infrastructure
6: e177157faa ! 6: 84d840506e git refs: add verify subcommand
@@ fsck.h: struct fsck_options {
unsigned strict:1;
+ unsigned verbose:1;
enum fsck_msg_type *msg_type;
+ struct fsck_refs_info refs_info;
struct oidset skip_oids;
- struct oidset gitmodules_found;
7: ee0e322f2b = 7: 3fc77ec329 builtin/fsck: add `git-refs verify` child process
8: 6a04fb0170 = 8: 44a75141fa files-backend: add unified interface for refs scanning
9: 7d11836deb ! 9: 4a0d58b07d fsck: add ref name check for files backend
@@ refs/files-backend.c: typedef int (*files_fsck_refs_fn)(struct fsck_options *o,
+
+ if (check_refname_format(iter->basename, REFNAME_ALLOW_ONELEVEL)) {
+ strbuf_addf(&sb, "%s/%s", refs_check_dir, iter->relative_path);
-+ ret = fsck_refs_report(o, NULL, sb.buf, NULL,
++ o->refs_info.ref_checkee = sb.buf;
++ ret = fsck_refs_report(o, NULL,
+ FSCK_MSG_BAD_REF_NAME,
+ "invalid refname format");
+ }
10: ad696852ba ! 10: c529670e54 fsck: add ref content check for files backend
@@ refs/files-backend.c: static int files_fsck_refs_name(struct fsck_options *o,
+
+ if (!skip_prefix(pointee_name, "refs/", &p)) {
+
-+ ret = fsck_refs_report(o, NULL, refname, NULL,
++ ret = fsck_refs_report(o, NULL,
+ FSCK_MSG_BAD_SYMREF_POINTEE,
+ "point to target out of refs hierarchy");
+ goto out;
+ }
+
+ if (check_refname_format(pointee_name, 0)) {
-+ ret = fsck_refs_report(o, NULL, refname, NULL,
++ ret = fsck_refs_report(o, NULL,
+ FSCK_MSG_BAD_SYMREF_POINTEE,
+ "point to invalid refname");
+ }
@@ refs/files-backend.c: static int files_fsck_refs_name(struct fsck_options *o,
+ goto out;
+
+ if (!S_ISREG(st.st_mode) && !S_ISLNK(st.st_mode)) {
-+ ret = fsck_refs_report(o, NULL, refname, NULL,
++ ret = fsck_refs_report(o, NULL,
+ FSCK_MSG_BAD_SYMREF_POINTEE,
+ "point to invalid target");
+ goto out;
@@ refs/files-backend.c: static int files_fsck_refs_name(struct fsck_options *o,
+ int ret = 0;
+
+ strbuf_addf(&refname, "%s/%s", refs_check_dir, iter->relative_path);
++ o->refs_info.ref_checkee = refname.buf;
+
+ /*
+ * If the file is a symlink, we need to only check the connectivity
@@ refs/files-backend.c: static int files_fsck_refs_name(struct fsck_options *o,
+
+ if (!skip_prefix(pointee_path.buf,
+ abs_gitdir.buf, &pointee_name)) {
-+ ret = fsck_refs_report(o, NULL, refname.buf, NULL,
++ ret = fsck_refs_report(o, NULL,
+ FSCK_MSG_BAD_SYMREF_POINTEE,
+ "point to target outside gitdir");
+ goto clean;
@@ refs/files-backend.c: static int files_fsck_refs_name(struct fsck_options *o,
+ if (parse_loose_ref_contents(ref_content.buf, &oid,
+ &referent, &type,
+ &failure_errno, &trailing)) {
-+ ret = fsck_refs_report(o, NULL, refname.buf, NULL,
++ ret = fsck_refs_report(o, NULL,
+ FSCK_MSG_BAD_REF_CONTENT,
+ "invalid ref content");
+ goto clean;
@@ refs/files-backend.c: static int files_fsck_refs_name(struct fsck_options *o,
+ pointee_path.buf);
+ goto clean;
+ } else {
-+ /*
-+ * Only regular refs could have a trailing garbage. Should
-+ * be reported as a warning.
-+ */
+ if (trailing && (*trailing != '\0' && *trailing != '\n')) {
-+ ret = fsck_refs_report(o, NULL, refname.buf, NULL,
++ ret = fsck_refs_report(o, NULL,
+ FSCK_MSG_TRAILING_REF_CONTENT,
+ "trailing garbage in ref");
+ goto clean;
--
2.45.2
^ permalink raw reply [flat|nested] 282+ messages in thread
* [GSoC][PATCH v12 01/10] fsck: rename "skiplist" to "skip_oids"
2024-07-20 9:25 ` [GSoC][PATCH v12 00/10] ref consistency check infra setup shejialuo
@ 2024-07-20 9:27 ` shejialuo
2024-07-20 9:27 ` [GSoC][PATCH v12 02/10] fsck: add a unified interface for reporting fsck messages shejialuo
` (9 subsequent siblings)
10 siblings, 0 replies; 282+ messages in thread
From: shejialuo @ 2024-07-20 9:27 UTC (permalink / raw)
To: git
Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine,
Justin Tobler
The "skiplist" field in "fsck_options" is related to objects. Because we
are going to introduce ref consistency check, the "skiplist" name is too
general which will make the caller think "skiplist" is related to both
the refs and objects.
It may seem that for both refs and objects, we should provide a general
"skiplist" here. However, the type for "skiplist" is `struct oidset`
which is totally unsuitable for refs.
To avoid above ambiguity, rename "skiplist" to "skip_oids".
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
fsck.c | 4 ++--
fsck.h | 4 ++--
2 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/fsck.c b/fsck.c
index eea7145470..3f32441492 100644
--- a/fsck.c
+++ b/fsck.c
@@ -205,7 +205,7 @@ void fsck_set_msg_types(struct fsck_options *options, const char *values)
if (!strcmp(buf, "skiplist")) {
if (equal == len)
die("skiplist requires a path");
- oidset_parse_file(&options->skiplist, buf + equal + 1,
+ oidset_parse_file(&options->skip_oids, buf + equal + 1,
the_repository->hash_algo);
buf += len + 1;
continue;
@@ -223,7 +223,7 @@ void fsck_set_msg_types(struct fsck_options *options, const char *values)
static int object_on_skiplist(struct fsck_options *opts,
const struct object_id *oid)
{
- return opts && oid && oidset_contains(&opts->skiplist, oid);
+ return opts && oid && oidset_contains(&opts->skip_oids, oid);
}
__attribute__((format (printf, 5, 6)))
diff --git a/fsck.h b/fsck.h
index 6085a384f6..bcfb2e34cd 100644
--- a/fsck.h
+++ b/fsck.h
@@ -136,7 +136,7 @@ struct fsck_options {
fsck_error error_func;
unsigned strict:1;
enum fsck_msg_type *msg_type;
- struct oidset skiplist;
+ struct oidset skip_oids;
struct oidset gitmodules_found;
struct oidset gitmodules_done;
struct oidset gitattributes_found;
@@ -145,7 +145,7 @@ struct fsck_options {
};
#define FSCK_OPTIONS_DEFAULT { \
- .skiplist = OIDSET_INIT, \
+ .skip_oids = OIDSET_INIT, \
.gitmodules_found = OIDSET_INIT, \
.gitmodules_done = OIDSET_INIT, \
.gitattributes_found = OIDSET_INIT, \
--
2.45.2
^ permalink raw reply related [flat|nested] 282+ messages in thread
* [GSoC][PATCH v12 02/10] fsck: add a unified interface for reporting fsck messages
2024-07-20 9:25 ` [GSoC][PATCH v12 00/10] ref consistency check infra setup shejialuo
2024-07-20 9:27 ` [GSoC][PATCH v12 01/10] fsck: rename "skiplist" to "skip_oids" shejialuo
@ 2024-07-20 9:27 ` shejialuo
2024-07-23 8:35 ` Karthik Nayak
2024-07-20 9:27 ` [GSoC][PATCH v12 03/10] fsck: rename objects-related fsck error functions shejialuo
` (8 subsequent siblings)
10 siblings, 1 reply; 282+ messages in thread
From: shejialuo @ 2024-07-20 9:27 UTC (permalink / raw)
To: git
Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine,
Justin Tobler
The static function "report" provided by "fsck.c" aims at checking fsck
error type and calling the callback "error_func" to report the message.
However, "report" function is only related to object database which
cannot be reused for refs. In order to provide a unified interface which
can report either objects or refs, create a new function "fsck_vreport"
following the "report" prototype. Instead of using "...", provide
"va_list" to allow more flexibility.
In order to provide an extensible error report for refs, add a new
"fsck_refs_info" structure to incorporate an union for supporting
different checks for files backend and reftable backend. Then
incorporate this structure into the "fsck_options" to reuse the
"error_func" callback.
Then, change "report" function to use "fsck_vreport" to report objects
related messages. Add a new function called "fsck_refs_report" to use
"fsck_vreport" to report refs related messages.
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
fsck.c | 46 +++++++++++++++++++++++++++++++++++++++-------
fsck.h | 26 +++++++++++++++++++++++++-
2 files changed, 64 insertions(+), 8 deletions(-)
diff --git a/fsck.c b/fsck.c
index 3f32441492..f2417e65fd 100644
--- a/fsck.c
+++ b/fsck.c
@@ -226,12 +226,17 @@ static int object_on_skiplist(struct fsck_options *opts,
return opts && oid && oidset_contains(&opts->skip_oids, oid);
}
-__attribute__((format (printf, 5, 6)))
-static int report(struct fsck_options *options,
- const struct object_id *oid, enum object_type object_type,
- enum fsck_msg_id msg_id, const char *fmt, ...)
+/*
+ * Provide a unified interface for either fscking refs or objects.
+ * It will get the current msg error type and call the error_func callback
+ * which is registered in the "fsck_options" struct.
+ */
+static int fsck_vreport(struct fsck_options *options,
+ const struct object_id *oid,
+ enum object_type object_type,
+ enum fsck_msg_id msg_id, const char *fmt, va_list ap)
{
- va_list ap;
+ va_list ap_copy;
struct strbuf sb = STRBUF_INIT;
enum fsck_msg_type msg_type = fsck_msg_type(msg_id, options);
int result;
@@ -250,8 +255,8 @@ static int report(struct fsck_options *options,
prepare_msg_ids();
strbuf_addf(&sb, "%s: ", msg_id_info[msg_id].camelcased);
- va_start(ap, fmt);
- strbuf_vaddf(&sb, fmt, ap);
+ va_copy(ap_copy, ap);
+ strbuf_vaddf(&sb, fmt, ap_copy);
result = options->error_func(options, oid, object_type,
msg_type, msg_id, sb.buf);
strbuf_release(&sb);
@@ -260,6 +265,33 @@ static int report(struct fsck_options *options,
return result;
}
+__attribute__((format (printf, 5, 6)))
+static int report(struct fsck_options *options,
+ const struct object_id *oid, enum object_type object_type,
+ enum fsck_msg_id msg_id, const char *fmt, ...)
+{
+ va_list ap;
+ int result;
+
+ va_start(ap, fmt);
+ result = fsck_vreport(options, oid, object_type, msg_id, fmt, ap);
+ va_end(ap);
+
+ return result;
+}
+
+int fsck_refs_report(struct fsck_options *options,
+ const struct object_id *oid,
+ enum fsck_msg_id msg_id, const char *fmt, ...)
+{
+ va_list ap;
+ int result;
+ va_start(ap, fmt);
+ result = fsck_vreport(options, oid, OBJ_NONE, msg_id, fmt, ap);
+ va_end(ap);
+ return result;
+}
+
void fsck_enable_object_names(struct fsck_options *options)
{
if (!options->object_names)
diff --git a/fsck.h b/fsck.h
index bcfb2e34cd..a2ecb39b51 100644
--- a/fsck.h
+++ b/fsck.h
@@ -114,7 +114,9 @@ int is_valid_msg_type(const char *msg_id, const char *msg_type);
typedef int (*fsck_walk_func)(struct object *obj, enum object_type object_type,
void *data, struct fsck_options *options);
-/* callback for fsck_object, type is FSCK_ERROR or FSCK_WARN */
+/*
+ * callback function for reporting errors when checking either objects or refs
+ */
typedef int (*fsck_error)(struct fsck_options *o,
const struct object_id *oid, enum object_type object_type,
enum fsck_msg_type msg_type, enum fsck_msg_id msg_id,
@@ -131,11 +133,24 @@ int fsck_error_cb_print_missing_gitmodules(struct fsck_options *o,
enum fsck_msg_id msg_id,
const char *message);
+/*
+ * The information for reporting refs-related error message
+ */
+struct fsck_refs_info {
+ char *ref_checkee;
+ union {
+ struct {
+ char *sub_ref_checkee;
+ } files;
+ } u;
+};
+
struct fsck_options {
fsck_walk_func walk;
fsck_error error_func;
unsigned strict:1;
enum fsck_msg_type *msg_type;
+ struct fsck_refs_info refs_info;
struct oidset skip_oids;
struct oidset gitmodules_found;
struct oidset gitmodules_done;
@@ -209,6 +224,15 @@ int fsck_tag_standalone(const struct object_id *oid, const char *buffer,
*/
int fsck_finish(struct fsck_options *options);
+/*
+ * Report an error or warning for refs.
+ */
+__attribute__((format (printf, 4, 5)))
+int fsck_refs_report(struct fsck_options *options,
+ const struct object_id *oid,
+ enum fsck_msg_id msg_id,
+ const char *fmt, ...);
+
/*
* Subsystem for storing human-readable names for each object.
*
--
2.45.2
^ permalink raw reply related [flat|nested] 282+ messages in thread
* Re: [GSoC][PATCH v12 02/10] fsck: add a unified interface for reporting fsck messages
2024-07-20 9:27 ` [GSoC][PATCH v12 02/10] fsck: add a unified interface for reporting fsck messages shejialuo
@ 2024-07-23 8:35 ` Karthik Nayak
0 siblings, 0 replies; 282+ messages in thread
From: Karthik Nayak @ 2024-07-23 8:35 UTC (permalink / raw)
To: shejialuo, git
Cc: Patrick Steinhardt, Junio C Hamano, Eric Sunshine, Justin Tobler
[-- Attachment #1: Type: text/plain, Size: 1514 bytes --]
shejialuo <shejialuo@gmail.com> writes:
[snip]
> diff --git a/fsck.h b/fsck.h
> index bcfb2e34cd..a2ecb39b51 100644
> --- a/fsck.h
> +++ b/fsck.h
> @@ -114,7 +114,9 @@ int is_valid_msg_type(const char *msg_id, const char *msg_type);
> typedef int (*fsck_walk_func)(struct object *obj, enum object_type object_type,
> void *data, struct fsck_options *options);
>
> -/* callback for fsck_object, type is FSCK_ERROR or FSCK_WARN */
> +/*
> + * callback function for reporting errors when checking either objects or refs
> + */
> typedef int (*fsck_error)(struct fsck_options *o,
> const struct object_id *oid, enum object_type object_type,
> enum fsck_msg_type msg_type, enum fsck_msg_id msg_id,
> @@ -131,11 +133,24 @@ int fsck_error_cb_print_missing_gitmodules(struct fsck_options *o,
> enum fsck_msg_id msg_id,
> const char *message);
>
> +/*
> + * The information for reporting refs-related error message
> + */
> +struct fsck_refs_info {
> + char *ref_checkee;
> + union {
> + struct {
> + char *sub_ref_checkee;
> + } files;
> + } u;
> +};
> +
When I suggested using a struct, it was to replace sending both
'ref_checkee' and 'sub_ref_checkee' to the error functions. I.e to send
a single struct instead of two different fields. But now, we've included
it in the 'fsck_options' struct, I don't think it should be part of the
options. Because the fsck_options is used beyond the error function
while 'fsck_refs_info' is only needed in the error functions, right?
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 690 bytes --]
^ permalink raw reply [flat|nested] 282+ messages in thread
* [GSoC][PATCH v12 03/10] fsck: rename objects-related fsck error functions
2024-07-20 9:25 ` [GSoC][PATCH v12 00/10] ref consistency check infra setup shejialuo
2024-07-20 9:27 ` [GSoC][PATCH v12 01/10] fsck: rename "skiplist" to "skip_oids" shejialuo
2024-07-20 9:27 ` [GSoC][PATCH v12 02/10] fsck: add a unified interface for reporting fsck messages shejialuo
@ 2024-07-20 9:27 ` shejialuo
2024-07-20 9:28 ` [GSoC][PATCH v12 04/10] fsck: add refs-related error report function shejialuo
` (7 subsequent siblings)
10 siblings, 0 replies; 282+ messages in thread
From: shejialuo @ 2024-07-20 9:27 UTC (permalink / raw)
To: git
Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine,
Justin Tobler
The names of objects-related fsck error functions are general. It's OK
when there is only object database check. However, we have introduced
refs database check report function. To avoid ambiguity, rename
object-related fsck error functions to explicitly indicate these
functions are used to report objects-related messages.
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
builtin/fsck.c | 14 +++++++-------
fsck.c | 27 ++++++++++++++-------------
fsck.h | 26 +++++++++++++-------------
3 files changed, 34 insertions(+), 33 deletions(-)
diff --git a/builtin/fsck.c b/builtin/fsck.c
index d13a226c2e..6d86bbe1e9 100644
--- a/builtin/fsck.c
+++ b/builtin/fsck.c
@@ -89,12 +89,12 @@ static int objerror(struct object *obj, const char *err)
return -1;
}
-static int fsck_error_func(struct fsck_options *o UNUSED,
- const struct object_id *oid,
- enum object_type object_type,
- enum fsck_msg_type msg_type,
- enum fsck_msg_id msg_id UNUSED,
- const char *message)
+static int fsck_objects_error_func(struct fsck_options *o UNUSED,
+ const struct object_id *oid,
+ enum object_type object_type,
+ enum fsck_msg_type msg_type,
+ enum fsck_msg_id msg_id UNUSED,
+ const char *message)
{
switch (msg_type) {
case FSCK_WARN:
@@ -938,7 +938,7 @@ int cmd_fsck(int argc, const char **argv, const char *prefix)
fsck_walk_options.walk = mark_object;
fsck_obj_options.walk = mark_used;
- fsck_obj_options.error_func = fsck_error_func;
+ fsck_obj_options.error_func = fsck_objects_error_func;
if (check_strict)
fsck_obj_options.strict = 1;
diff --git a/fsck.c b/fsck.c
index f2417e65fd..9a7e3d8679 100644
--- a/fsck.c
+++ b/fsck.c
@@ -1232,12 +1232,12 @@ int fsck_buffer(const struct object_id *oid, enum object_type type,
type);
}
-int fsck_error_function(struct fsck_options *o,
- const struct object_id *oid,
- enum object_type object_type UNUSED,
- enum fsck_msg_type msg_type,
- enum fsck_msg_id msg_id UNUSED,
- const char *message)
+int fsck_objects_error_function(struct fsck_options *o,
+ const struct object_id *oid,
+ enum object_type object_type UNUSED,
+ enum fsck_msg_type msg_type,
+ enum fsck_msg_id msg_id UNUSED,
+ const char *message)
{
if (msg_type == FSCK_WARN) {
warning("object %s: %s", fsck_describe_object(o, oid), message);
@@ -1335,16 +1335,17 @@ int git_fsck_config(const char *var, const char *value,
* Custom error callbacks that are used in more than one place.
*/
-int fsck_error_cb_print_missing_gitmodules(struct fsck_options *o,
- const struct object_id *oid,
- enum object_type object_type,
- enum fsck_msg_type msg_type,
- enum fsck_msg_id msg_id,
- const char *message)
+int fsck_objects_error_cb_print_missing_gitmodules(struct fsck_options *o,
+ const struct object_id *oid,
+ enum object_type object_type,
+ enum fsck_msg_type msg_type,
+ enum fsck_msg_id msg_id,
+ const char *message)
{
if (msg_id == FSCK_MSG_GITMODULES_MISSING) {
puts(oid_to_hex(oid));
return 0;
}
- return fsck_error_function(o, oid, object_type, msg_type, msg_id, message);
+ return fsck_objects_error_function(o, oid, object_type,
+ msg_type, msg_id, message);
}
diff --git a/fsck.h b/fsck.h
index a2ecb39b51..6411437334 100644
--- a/fsck.h
+++ b/fsck.h
@@ -122,16 +122,16 @@ typedef int (*fsck_error)(struct fsck_options *o,
enum fsck_msg_type msg_type, enum fsck_msg_id msg_id,
const char *message);
-int fsck_error_function(struct fsck_options *o,
- const struct object_id *oid, enum object_type object_type,
- enum fsck_msg_type msg_type, enum fsck_msg_id msg_id,
- const char *message);
-int fsck_error_cb_print_missing_gitmodules(struct fsck_options *o,
- const struct object_id *oid,
- enum object_type object_type,
- enum fsck_msg_type msg_type,
- enum fsck_msg_id msg_id,
- const char *message);
+int fsck_objects_error_function(struct fsck_options *o,
+ const struct object_id *oid, enum object_type object_type,
+ enum fsck_msg_type msg_type, enum fsck_msg_id msg_id,
+ const char *message);
+int fsck_objects_error_cb_print_missing_gitmodules(struct fsck_options *o,
+ const struct object_id *oid,
+ enum object_type object_type,
+ enum fsck_msg_type msg_type,
+ enum fsck_msg_id msg_id,
+ const char *message);
/*
* The information for reporting refs-related error message
@@ -165,7 +165,7 @@ struct fsck_options {
.gitmodules_done = OIDSET_INIT, \
.gitattributes_found = OIDSET_INIT, \
.gitattributes_done = OIDSET_INIT, \
- .error_func = fsck_error_function \
+ .error_func = fsck_objects_error_function \
}
#define FSCK_OPTIONS_STRICT { \
.strict = 1, \
@@ -173,7 +173,7 @@ struct fsck_options {
.gitmodules_done = OIDSET_INIT, \
.gitattributes_found = OIDSET_INIT, \
.gitattributes_done = OIDSET_INIT, \
- .error_func = fsck_error_function, \
+ .error_func = fsck_objects_error_function, \
}
#define FSCK_OPTIONS_MISSING_GITMODULES { \
.strict = 1, \
@@ -181,7 +181,7 @@ struct fsck_options {
.gitmodules_done = OIDSET_INIT, \
.gitattributes_found = OIDSET_INIT, \
.gitattributes_done = OIDSET_INIT, \
- .error_func = fsck_error_cb_print_missing_gitmodules, \
+ .error_func = fsck_objects_error_cb_print_missing_gitmodules, \
}
/* descend in all linked child objects
--
2.45.2
^ permalink raw reply related [flat|nested] 282+ messages in thread
* [GSoC][PATCH v12 04/10] fsck: add refs-related error report function
2024-07-20 9:25 ` [GSoC][PATCH v12 00/10] ref consistency check infra setup shejialuo
` (2 preceding siblings ...)
2024-07-20 9:27 ` [GSoC][PATCH v12 03/10] fsck: rename objects-related fsck error functions shejialuo
@ 2024-07-20 9:28 ` shejialuo
2024-07-20 9:28 ` [GSoC][PATCH v12 05/10] refs: set up ref consistency check infrastructure shejialuo
` (6 subsequent siblings)
10 siblings, 0 replies; 282+ messages in thread
From: shejialuo @ 2024-07-20 9:28 UTC (permalink / raw)
To: git
Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine,
Justin Tobler
Create refs-specific "error_func" callback "fsck_refs_error_function"
which could provide the following report messages for files backend
1. "ref_checkee": "fsck error name": "user message".
2. "ref_checkee.sub_ref_checkee": "fsck error name": "user message".
3. "ref_checkee -> (oid hex)": "fsck error name": "user message".
4. "ref_checkee.sub_ref_checkee -> (oid hex)": "fsck error name": "user
message".
"fsck_refs_error_function" uses the "ref_checkee" and "sub_ref_checkee"
in the "fsck_refs_info" to indicate the information of the checked refs.
For loose ref and reflog, it only uses the "ref_checkee". For packed
refs and reftable refs, when checking the consistency of the file
itself, it still only uses "ref_checkee". However, when checking the
consistency of the ref or reflog contained in the file ,it will use the
"sub_ref_checkee" to indicate that we are not checking the file but the
incorporated ref or reflog.
"fsck_refs_error_function" will use the "oid" parameter if the caller
passes the oid, it will use "oid_to_hex" to get the corresponding hex
value to report to the caller.
Last, add "FSCK_REFS_OPTIONS_DEFAULT" and "FSCK_REFS_OPTIONS_STRICT"
macros to create refs options easily.
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
fsck.c | 29 +++++++++++++++++++++++++++++
fsck.h | 13 +++++++++++++
2 files changed, 42 insertions(+)
diff --git a/fsck.c b/fsck.c
index 9a7e3d8679..e87b13fdc3 100644
--- a/fsck.c
+++ b/fsck.c
@@ -1247,6 +1247,35 @@ int fsck_objects_error_function(struct fsck_options *o,
return 1;
}
+int fsck_refs_error_function(struct fsck_options *options,
+ const struct object_id *oid,
+ enum object_type object_type UNUSED,
+ enum fsck_msg_type msg_type,
+ enum fsck_msg_id msg_id UNUSED,
+ const char *message)
+{
+ struct strbuf sb = STRBUF_INIT;
+ struct fsck_refs_info *refs_info = &options->refs_info;
+ int ret = 0;
+
+ if (the_repository->ref_storage_format == REF_STORAGE_FORMAT_FILES) {
+ strbuf_addstr(&sb, refs_info->ref_checkee);
+ if (refs_info->u.files.sub_ref_checkee)
+ strbuf_addf(&sb, ".%s", refs_info->u.files.sub_ref_checkee);
+
+ if (oid)
+ strbuf_addf(&sb, " -> (%s)", oid_to_hex(oid));
+ }
+
+ if (msg_type == FSCK_WARN)
+ warning("%s: %s", sb.buf, message);
+ else
+ ret = error("%s: %s", sb.buf, message);
+
+ strbuf_release(&sb);
+ return ret;
+}
+
static int fsck_blobs(struct oidset *blobs_found, struct oidset *blobs_done,
enum fsck_msg_id msg_missing, enum fsck_msg_id msg_type,
struct fsck_options *options, const char *blob_type)
diff --git a/fsck.h b/fsck.h
index 6411437334..a3870ffe2b 100644
--- a/fsck.h
+++ b/fsck.h
@@ -132,6 +132,12 @@ int fsck_objects_error_cb_print_missing_gitmodules(struct fsck_options *o,
enum fsck_msg_type msg_type,
enum fsck_msg_id msg_id,
const char *message);
+int fsck_refs_error_function(struct fsck_options *options,
+ const struct object_id *oid,
+ enum object_type object_type,
+ enum fsck_msg_type msg_type,
+ enum fsck_msg_id msg_id,
+ const char *message);
/*
* The information for reporting refs-related error message
@@ -183,6 +189,13 @@ struct fsck_options {
.gitattributes_done = OIDSET_INIT, \
.error_func = fsck_objects_error_cb_print_missing_gitmodules, \
}
+#define FSCK_REFS_OPTIONS_DEFAULT { \
+ .error_func = fsck_refs_error_function, \
+}
+#define FSCK_REFS_OPTIONS_STRICT { \
+ .strict = 1, \
+ .error_func = fsck_refs_error_function, \
+}
/* descend in all linked child objects
* the return value is:
--
2.45.2
^ permalink raw reply related [flat|nested] 282+ messages in thread
* [GSoC][PATCH v12 05/10] refs: set up ref consistency check infrastructure
2024-07-20 9:25 ` [GSoC][PATCH v12 00/10] ref consistency check infra setup shejialuo
` (3 preceding siblings ...)
2024-07-20 9:28 ` [GSoC][PATCH v12 04/10] fsck: add refs-related error report function shejialuo
@ 2024-07-20 9:28 ` shejialuo
2024-07-20 9:28 ` [GSoC][PATCH v12 06/10] git refs: add verify subcommand shejialuo
` (5 subsequent siblings)
10 siblings, 0 replies; 282+ messages in thread
From: shejialuo @ 2024-07-20 9:28 UTC (permalink / raw)
To: git
Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine,
Justin Tobler
The interfaces defined in the `ref_storage_be` are carefully structured
in semantic. It's organized as the five parts:
1. The name and the initialization interfaces.
2. The ref transaction interfaces.
3. The ref internal interfaces (pack, rename and copy).
4. The ref filesystem interfaces.
5. The reflog related interfaces.
To keep consistent with the git-fsck(1), add a new interface named
"fsck_refs_fn" to the end of "ref_storage_be". This semantic cannot be
grouped into any above five categories. Explicitly add blank line to
make it different from others.
Last, implement placeholder functions for each ref backends.
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
refs.c | 5 +++++
refs.h | 8 ++++++++
refs/debug.c | 11 +++++++++++
refs/files-backend.c | 15 ++++++++++++++-
refs/packed-backend.c | 8 ++++++++
refs/refs-internal.h | 6 ++++++
refs/reftable-backend.c | 8 ++++++++
7 files changed, 60 insertions(+), 1 deletion(-)
diff --git a/refs.c b/refs.c
index bb90a18875..410919246b 100644
--- a/refs.c
+++ b/refs.c
@@ -318,6 +318,11 @@ int check_refname_format(const char *refname, int flags)
return check_or_sanitize_refname(refname, flags, NULL);
}
+int refs_fsck(struct ref_store *refs, struct fsck_options *o)
+{
+ return refs->be->fsck(refs, o);
+}
+
void sanitize_refname_component(const char *refname, struct strbuf *out)
{
if (check_or_sanitize_refname(refname, REFNAME_ALLOW_ONELEVEL, out))
diff --git a/refs.h b/refs.h
index 0ecba21b4a..804d6a7fce 100644
--- a/refs.h
+++ b/refs.h
@@ -4,6 +4,7 @@
#include "commit.h"
#include "repository.h"
+struct fsck_options;
struct object_id;
struct ref_store;
struct strbuf;
@@ -541,6 +542,13 @@ int refs_for_each_reflog(struct ref_store *refs, each_reflog_fn fn, void *cb_dat
*/
int check_refname_format(const char *refname, int flags);
+/*
+ * Check the reference database for consistency. Return 0 if refs and
+ * reflogs are consistent, and non-zero otherwise. The errors will be
+ * written to stderr.
+ */
+int refs_fsck(struct ref_store *refs, struct fsck_options *o);
+
/*
* Apply the rules from check_refname_format, but mutate the result until it
* is acceptable, and place the result in "out".
diff --git a/refs/debug.c b/refs/debug.c
index 547d9245b9..45e2e784a0 100644
--- a/refs/debug.c
+++ b/refs/debug.c
@@ -419,6 +419,15 @@ static int debug_reflog_expire(struct ref_store *ref_store, const char *refname,
return res;
}
+static int debug_fsck(struct ref_store *ref_store,
+ struct fsck_options *o)
+{
+ struct debug_ref_store *drefs = (struct debug_ref_store *)ref_store;
+ int res = drefs->refs->be->fsck(drefs->refs, o);
+ trace_printf_key(&trace_refs, "fsck: %d\n", res);
+ return res;
+}
+
struct ref_storage_be refs_be_debug = {
.name = "debug",
.init = NULL,
@@ -451,4 +460,6 @@ struct ref_storage_be refs_be_debug = {
.create_reflog = debug_create_reflog,
.delete_reflog = debug_delete_reflog,
.reflog_expire = debug_reflog_expire,
+
+ .fsck = debug_fsck,
};
diff --git a/refs/files-backend.c b/refs/files-backend.c
index aa52d9be7c..d89eeda8ef 100644
--- a/refs/files-backend.c
+++ b/refs/files-backend.c
@@ -3408,6 +3408,17 @@ static int files_ref_store_remove_on_disk(struct ref_store *ref_store,
return ret;
}
+static int files_fsck(struct ref_store *ref_store,
+ struct fsck_options *o)
+{
+ int ret;
+ struct files_ref_store *refs =
+ files_downcast(ref_store, REF_STORE_READ, "fsck");
+
+ ret = refs->packed_ref_store->be->fsck(refs->packed_ref_store, o);
+ return ret;
+}
+
struct ref_storage_be refs_be_files = {
.name = "files",
.init = files_ref_store_init,
@@ -3434,5 +3445,7 @@ struct ref_storage_be refs_be_files = {
.reflog_exists = files_reflog_exists,
.create_reflog = files_create_reflog,
.delete_reflog = files_delete_reflog,
- .reflog_expire = files_reflog_expire
+ .reflog_expire = files_reflog_expire,
+
+ .fsck = files_fsck,
};
diff --git a/refs/packed-backend.c b/refs/packed-backend.c
index a0666407cd..5209b0b212 100644
--- a/refs/packed-backend.c
+++ b/refs/packed-backend.c
@@ -1735,6 +1735,12 @@ static struct ref_iterator *packed_reflog_iterator_begin(struct ref_store *ref_s
return empty_ref_iterator_begin();
}
+static int packed_fsck(struct ref_store *ref_store,
+ struct fsck_options *o)
+{
+ return 0;
+}
+
struct ref_storage_be refs_be_packed = {
.name = "packed",
.init = packed_ref_store_init,
@@ -1762,4 +1768,6 @@ struct ref_storage_be refs_be_packed = {
.create_reflog = NULL,
.delete_reflog = NULL,
.reflog_expire = NULL,
+
+ .fsck = packed_fsck,
};
diff --git a/refs/refs-internal.h b/refs/refs-internal.h
index fa975d69aa..a905e187cd 100644
--- a/refs/refs-internal.h
+++ b/refs/refs-internal.h
@@ -4,6 +4,7 @@
#include "refs.h"
#include "iterator.h"
+struct fsck_options;
struct ref_transaction;
/*
@@ -650,6 +651,9 @@ typedef int read_raw_ref_fn(struct ref_store *ref_store, const char *refname,
typedef int read_symbolic_ref_fn(struct ref_store *ref_store, const char *refname,
struct strbuf *referent);
+typedef int fsck_fn(struct ref_store *ref_store,
+ struct fsck_options *o);
+
struct ref_storage_be {
const char *name;
ref_store_init_fn *init;
@@ -677,6 +681,8 @@ struct ref_storage_be {
create_reflog_fn *create_reflog;
delete_reflog_fn *delete_reflog;
reflog_expire_fn *reflog_expire;
+
+ fsck_fn *fsck;
};
extern struct ref_storage_be refs_be_files;
diff --git a/refs/reftable-backend.c b/refs/reftable-backend.c
index fbe74c239d..b5a1a526df 100644
--- a/refs/reftable-backend.c
+++ b/refs/reftable-backend.c
@@ -2303,6 +2303,12 @@ static int reftable_be_reflog_expire(struct ref_store *ref_store,
return ret;
}
+static int reftable_be_fsck(struct ref_store *ref_store,
+ struct fsck_options *o)
+{
+ return 0;
+}
+
struct ref_storage_be refs_be_reftable = {
.name = "reftable",
.init = reftable_be_init,
@@ -2330,4 +2336,6 @@ struct ref_storage_be refs_be_reftable = {
.create_reflog = reftable_be_create_reflog,
.delete_reflog = reftable_be_delete_reflog,
.reflog_expire = reftable_be_reflog_expire,
+
+ .fsck = reftable_be_fsck,
};
--
2.45.2
^ permalink raw reply related [flat|nested] 282+ messages in thread
* [GSoC][PATCH v12 06/10] git refs: add verify subcommand
2024-07-20 9:25 ` [GSoC][PATCH v12 00/10] ref consistency check infra setup shejialuo
` (4 preceding siblings ...)
2024-07-20 9:28 ` [GSoC][PATCH v12 05/10] refs: set up ref consistency check infrastructure shejialuo
@ 2024-07-20 9:28 ` shejialuo
2024-07-20 9:28 ` [GSoC][PATCH v12 07/10] builtin/fsck: add `git-refs verify` child process shejialuo
` (4 subsequent siblings)
10 siblings, 0 replies; 282+ messages in thread
From: shejialuo @ 2024-07-20 9:28 UTC (permalink / raw)
To: git
Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine,
Justin Tobler
Introduce a new subcommand "verify" in git-refs(1) to allow the user to
check the reference database consistency and also this subcommand will
be used as the entry point of checking refs for "git-fsck(1)". Last, add
"verbose" field into "fsck_options" to indicate whether we should print
verbose messages when checking refs and objects consistency.
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
Documentation/git-refs.txt | 13 +++++++++++
builtin/refs.c | 44 ++++++++++++++++++++++++++++++++++++++
fsck.h | 1 +
3 files changed, 58 insertions(+)
diff --git a/Documentation/git-refs.txt b/Documentation/git-refs.txt
index 5b99e04385..1244a85b64 100644
--- a/Documentation/git-refs.txt
+++ b/Documentation/git-refs.txt
@@ -10,6 +10,7 @@ SYNOPSIS
--------
[verse]
'git refs migrate' --ref-format=<format> [--dry-run]
+'git refs verify' [--strict] [--verbose]
DESCRIPTION
-----------
@@ -22,6 +23,9 @@ COMMANDS
migrate::
Migrate ref store between different formats.
+verify::
+ Verify reference database consistency.
+
OPTIONS
-------
@@ -39,6 +43,15 @@ include::ref-storage-format.txt[]
can be used to double check that the migration works as expected before
performing the actual migration.
+The following options are specific to 'git refs verify':
+
+--strict::
+ Enable more strict checking, every WARN severity for the `Fsck Messages`
+ be seen as ERROR. See linkgit:git-fsck[1].
+
+--verbose::
+ When verifying the reference database consistency, be chatty.
+
KNOWN LIMITATIONS
-----------------
diff --git a/builtin/refs.c b/builtin/refs.c
index 46dcd150d4..4831c9e28e 100644
--- a/builtin/refs.c
+++ b/builtin/refs.c
@@ -1,4 +1,6 @@
#include "builtin.h"
+#include "config.h"
+#include "fsck.h"
#include "parse-options.h"
#include "refs.h"
#include "repository.h"
@@ -7,6 +9,9 @@
#define REFS_MIGRATE_USAGE \
N_("git refs migrate --ref-format=<format> [--dry-run]")
+#define REFS_VERIFY_USAGE \
+ N_("git refs verify [--strict] [--verbose]")
+
static int cmd_refs_migrate(int argc, const char **argv, const char *prefix)
{
const char * const migrate_usage[] = {
@@ -58,15 +63,54 @@ static int cmd_refs_migrate(int argc, const char **argv, const char *prefix)
return err;
}
+static int cmd_refs_verify(int argc, const char **argv, const char *prefix)
+{
+ struct fsck_options fsck_refs_options = FSCK_REFS_OPTIONS_DEFAULT;
+ const char * const verify_usage[] = {
+ REFS_VERIFY_USAGE,
+ NULL,
+ };
+ unsigned int verbose = 0, strict = 0;
+ struct option options[] = {
+ OPT__VERBOSE(&verbose, N_("be verbose")),
+ OPT_BOOL(0, "strict", &strict, N_("enable strict checking")),
+ OPT_END(),
+ };
+ int ret;
+
+ argc = parse_options(argc, argv, prefix, options, verify_usage, 0);
+ if (argc)
+ usage(_("'git refs verify' takes no arguments"));
+
+ if (verbose)
+ fsck_refs_options.verbose = 1;
+ if (strict)
+ fsck_refs_options.strict = 1;
+
+ git_config(git_fsck_config, &fsck_refs_options);
+ prepare_repo_settings(the_repository);
+
+ ret = refs_fsck(get_main_ref_store(the_repository), &fsck_refs_options);
+
+ /*
+ * Explicitly free the allocated array and "skip_oids" set
+ */
+ free(fsck_refs_options.msg_type);
+ oidset_clear(&fsck_refs_options.skip_oids);
+ return ret;
+}
+
int cmd_refs(int argc, const char **argv, const char *prefix)
{
const char * const refs_usage[] = {
REFS_MIGRATE_USAGE,
+ REFS_VERIFY_USAGE,
NULL,
};
parse_opt_subcommand_fn *fn = NULL;
struct option opts[] = {
OPT_SUBCOMMAND("migrate", &fn, cmd_refs_migrate),
+ OPT_SUBCOMMAND("verify", &fn, cmd_refs_verify),
OPT_END(),
};
diff --git a/fsck.h b/fsck.h
index a3870ffe2b..98e2225593 100644
--- a/fsck.h
+++ b/fsck.h
@@ -155,6 +155,7 @@ struct fsck_options {
fsck_walk_func walk;
fsck_error error_func;
unsigned strict:1;
+ unsigned verbose:1;
enum fsck_msg_type *msg_type;
struct fsck_refs_info refs_info;
struct oidset skip_oids;
--
2.45.2
^ permalink raw reply related [flat|nested] 282+ messages in thread
* [GSoC][PATCH v12 07/10] builtin/fsck: add `git-refs verify` child process
2024-07-20 9:25 ` [GSoC][PATCH v12 00/10] ref consistency check infra setup shejialuo
` (5 preceding siblings ...)
2024-07-20 9:28 ` [GSoC][PATCH v12 06/10] git refs: add verify subcommand shejialuo
@ 2024-07-20 9:28 ` shejialuo
2024-07-20 9:28 ` [GSoC][PATCH v12 08/10] files-backend: add unified interface for refs scanning shejialuo
` (3 subsequent siblings)
10 siblings, 0 replies; 282+ messages in thread
From: shejialuo @ 2024-07-20 9:28 UTC (permalink / raw)
To: git
Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine,
Justin Tobler
Introduce a new function "fsck_refs" that initializes and runs a child
process to execute the "git-refs verify" command.
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
builtin/fsck.c | 17 +++++++++++++++++
1 file changed, 17 insertions(+)
diff --git a/builtin/fsck.c b/builtin/fsck.c
index 6d86bbe1e9..6cdff55fd6 100644
--- a/builtin/fsck.c
+++ b/builtin/fsck.c
@@ -896,6 +896,21 @@ static int check_pack_rev_indexes(struct repository *r, int show_progress)
return res;
}
+static void fsck_refs(void)
+{
+ struct child_process refs_verify = CHILD_PROCESS_INIT;
+ child_process_init(&refs_verify);
+ refs_verify.git_cmd = 1;
+ strvec_pushl(&refs_verify.args, "refs", "verify", NULL);
+ if (verbose)
+ strvec_push(&refs_verify.args, "--verbose");
+ if (check_strict)
+ strvec_push(&refs_verify.args, "--strict");
+
+ if (run_command(&refs_verify))
+ errors_found |= ERROR_REFS;
+}
+
static char const * const fsck_usage[] = {
N_("git fsck [--tags] [--root] [--unreachable] [--cache] [--no-reflogs]\n"
" [--[no-]full] [--strict] [--verbose] [--lost-found]\n"
@@ -1065,6 +1080,8 @@ int cmd_fsck(int argc, const char **argv, const char *prefix)
check_connectivity();
+ fsck_refs();
+
if (the_repository->settings.core_commit_graph) {
struct child_process commit_graph_verify = CHILD_PROCESS_INIT;
--
2.45.2
^ permalink raw reply related [flat|nested] 282+ messages in thread
* [GSoC][PATCH v12 08/10] files-backend: add unified interface for refs scanning
2024-07-20 9:25 ` [GSoC][PATCH v12 00/10] ref consistency check infra setup shejialuo
` (6 preceding siblings ...)
2024-07-20 9:28 ` [GSoC][PATCH v12 07/10] builtin/fsck: add `git-refs verify` child process shejialuo
@ 2024-07-20 9:28 ` shejialuo
2024-07-20 9:29 ` [GSoC][PATCH v12 09/10] fsck: add ref name check for files backend shejialuo
` (2 subsequent siblings)
10 siblings, 0 replies; 282+ messages in thread
From: shejialuo @ 2024-07-20 9:28 UTC (permalink / raw)
To: git
Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine,
Justin Tobler
For refs and reflogs, we need to scan its corresponding directories to
check every regular file or symbolic link which shares the same pattern.
Introduce a unified interface for scanning directories for
files-backend.
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
refs/files-backend.c | 77 +++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 76 insertions(+), 1 deletion(-)
diff --git a/refs/files-backend.c b/refs/files-backend.c
index d89eeda8ef..794e9f3f2e 100644
--- a/refs/files-backend.c
+++ b/refs/files-backend.c
@@ -6,6 +6,7 @@
#include "../gettext.h"
#include "../hash.h"
#include "../hex.h"
+#include "../fsck.h"
#include "../refs.h"
#include "refs-internal.h"
#include "ref-cache.h"
@@ -3408,6 +3409,78 @@ static int files_ref_store_remove_on_disk(struct ref_store *ref_store,
return ret;
}
+/*
+ * For refs and reflogs, they share a unified interface when scanning
+ * the whole directory. This function is used as the callback for each
+ * regular file or symlink in the directory.
+ */
+typedef int (*files_fsck_refs_fn)(struct fsck_options *o,
+ const char *gitdir,
+ const char *refs_check_dir,
+ struct dir_iterator *iter);
+
+static int files_fsck_refs_dir(struct ref_store *ref_store,
+ struct fsck_options *o,
+ const char *refs_check_dir,
+ files_fsck_refs_fn *fsck_refs_fns)
+{
+ const char *gitdir = ref_store->gitdir;
+ struct strbuf sb = STRBUF_INIT;
+ struct dir_iterator *iter;
+ int iter_status;
+ int ret = 0;
+
+ strbuf_addf(&sb, "%s/%s", gitdir, refs_check_dir);
+
+ iter = dir_iterator_begin(sb.buf, 0);
+
+ if (!iter) {
+ ret = error_errno("cannot open directory %s", sb.buf);
+ goto out;
+ }
+
+ while ((iter_status = dir_iterator_advance(iter)) == ITER_OK) {
+ if (S_ISDIR(iter->st.st_mode)) {
+ continue;
+ } else if (S_ISREG(iter->st.st_mode) ||
+ S_ISLNK(iter->st.st_mode)) {
+ if (o->verbose)
+ fprintf_ln(stderr, "Checking %s/%s",
+ refs_check_dir, iter->relative_path);
+ for (size_t i = 0; fsck_refs_fns[i]; i++) {
+ if (fsck_refs_fns[i](o, gitdir, refs_check_dir, iter))
+ ret = -1;
+ }
+ } else {
+ ret = error(_("unexpected file type for '%s'"),
+ iter->basename);
+ }
+ }
+
+ if (iter_status != ITER_DONE)
+ ret = error(_("failed to iterate over '%s'"), sb.buf);
+
+out:
+ strbuf_release(&sb);
+ return ret;
+}
+
+static int files_fsck_refs(struct ref_store *ref_store,
+ struct fsck_options *o)
+{
+ int ret;
+ files_fsck_refs_fn fsck_refs_fns[]= {
+ NULL
+ };
+
+ if (o->verbose)
+ fprintf_ln(stderr, "Checking references consistency");
+
+ ret = files_fsck_refs_dir(ref_store, o, "refs", fsck_refs_fns);
+
+ return ret;
+}
+
static int files_fsck(struct ref_store *ref_store,
struct fsck_options *o)
{
@@ -3415,7 +3488,9 @@ static int files_fsck(struct ref_store *ref_store,
struct files_ref_store *refs =
files_downcast(ref_store, REF_STORE_READ, "fsck");
- ret = refs->packed_ref_store->be->fsck(refs->packed_ref_store, o);
+ ret = refs->packed_ref_store->be->fsck(refs->packed_ref_store, o)
+ | files_fsck_refs(ref_store, o);
+
return ret;
}
--
2.45.2
^ permalink raw reply related [flat|nested] 282+ messages in thread
* [GSoC][PATCH v12 09/10] fsck: add ref name check for files backend
2024-07-20 9:25 ` [GSoC][PATCH v12 00/10] ref consistency check infra setup shejialuo
` (7 preceding siblings ...)
2024-07-20 9:28 ` [GSoC][PATCH v12 08/10] files-backend: add unified interface for refs scanning shejialuo
@ 2024-07-20 9:29 ` shejialuo
2024-07-20 9:29 ` [GSoC][PATCH v12 10/10] fsck: add ref content " shejialuo
2024-07-29 13:22 ` [GSoC][PATCH v13 00/10] ref consistency check infra setup shejialuo
10 siblings, 0 replies; 282+ messages in thread
From: shejialuo @ 2024-07-20 9:29 UTC (permalink / raw)
To: git
Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine,
Justin Tobler
The git-fsck(1) only implicitly checks the reference, it does not fully
check refs with bad format name such as standalone "@" and name ending
with ".lock".
In order to provide such checks, add a new fsck message id "badRefName"
with default ERROR type. Use existing "check_refname_format" to explicit
check the ref name. And add a new unit test to verify the functionality.
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
Documentation/fsck-msgids.txt | 3 +
fsck.h | 1 +
refs/files-backend.c | 21 +++++++
t/t0602-reffiles-fsck.sh | 101 ++++++++++++++++++++++++++++++++++
4 files changed, 126 insertions(+)
create mode 100755 t/t0602-reffiles-fsck.sh
diff --git a/Documentation/fsck-msgids.txt b/Documentation/fsck-msgids.txt
index f643585a34..dab4012246 100644
--- a/Documentation/fsck-msgids.txt
+++ b/Documentation/fsck-msgids.txt
@@ -19,6 +19,9 @@
`badParentSha1`::
(ERROR) A commit object has a bad parent sha1.
+`badRefName`::
+ (ERROR) A ref has a bad name.
+
`badTagName`::
(INFO) A tag has an invalid format.
diff --git a/fsck.h b/fsck.h
index 98e2225593..421ec62b8f 100644
--- a/fsck.h
+++ b/fsck.h
@@ -31,6 +31,7 @@ enum fsck_msg_type {
FUNC(BAD_NAME, ERROR) \
FUNC(BAD_OBJECT_SHA1, ERROR) \
FUNC(BAD_PARENT_SHA1, ERROR) \
+ FUNC(BAD_REF_NAME, ERROR) \
FUNC(BAD_TIMEZONE, ERROR) \
FUNC(BAD_TREE, ERROR) \
FUNC(BAD_TREE_SHA1, ERROR) \
diff --git a/refs/files-backend.c b/refs/files-backend.c
index 794e9f3f2e..f93c44c4d1 100644
--- a/refs/files-backend.c
+++ b/refs/files-backend.c
@@ -3419,6 +3419,26 @@ typedef int (*files_fsck_refs_fn)(struct fsck_options *o,
const char *refs_check_dir,
struct dir_iterator *iter);
+static int files_fsck_refs_name(struct fsck_options *o,
+ const char *gitdir UNUSED,
+ const char *refs_check_dir,
+ struct dir_iterator *iter)
+{
+ struct strbuf sb = STRBUF_INIT;
+ int ret = 0;
+
+ if (check_refname_format(iter->basename, REFNAME_ALLOW_ONELEVEL)) {
+ strbuf_addf(&sb, "%s/%s", refs_check_dir, iter->relative_path);
+ o->refs_info.ref_checkee = sb.buf;
+ ret = fsck_refs_report(o, NULL,
+ FSCK_MSG_BAD_REF_NAME,
+ "invalid refname format");
+ }
+
+ strbuf_release(&sb);
+ return ret;
+}
+
static int files_fsck_refs_dir(struct ref_store *ref_store,
struct fsck_options *o,
const char *refs_check_dir,
@@ -3470,6 +3490,7 @@ static int files_fsck_refs(struct ref_store *ref_store,
{
int ret;
files_fsck_refs_fn fsck_refs_fns[]= {
+ files_fsck_refs_name,
NULL
};
diff --git a/t/t0602-reffiles-fsck.sh b/t/t0602-reffiles-fsck.sh
new file mode 100755
index 0000000000..b2db58d2c6
--- /dev/null
+++ b/t/t0602-reffiles-fsck.sh
@@ -0,0 +1,101 @@
+#!/bin/sh
+
+test_description='Test reffiles backend consistency check'
+
+GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=main
+export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME
+GIT_TEST_DEFAULT_REF_FORMAT=files
+export GIT_TEST_DEFAULT_REF_FORMAT
+
+. ./test-lib.sh
+
+test_expect_success 'ref name should be checked' '
+ test_when_finished "rm -rf repo" &&
+ git init repo &&
+ branch_dir_prefix=.git/refs/heads &&
+ tag_dir_prefix=.git/refs/tags &&
+ (
+ cd repo &&
+ git commit --allow-empty -m initial &&
+ git checkout -b branch-1 &&
+ git tag tag-1 &&
+ git commit --allow-empty -m second &&
+ git checkout -b branch-2 &&
+ git tag tag-2 &&
+ git tag multi_hierarchy/tag-2
+ ) &&
+ (
+ cd repo &&
+ cp $branch_dir_prefix/branch-1 $branch_dir_prefix/.branch-1 &&
+ test_must_fail git fsck 2>err &&
+ cat >expect <<-EOF &&
+ error: refs/heads/.branch-1: badRefName: invalid refname format
+ EOF
+ rm $branch_dir_prefix/.branch-1 &&
+ test_cmp expect err
+ ) &&
+ (
+ cd repo &&
+ cp $tag_dir_prefix/tag-1 $tag_dir_prefix/tag-1.lock &&
+ test_must_fail git fsck 2>err &&
+ cat >expect <<-EOF &&
+ error: refs/tags/tag-1.lock: badRefName: invalid refname format
+ EOF
+ rm $tag_dir_prefix/tag-1.lock &&
+ test_cmp expect err
+ ) &&
+ (
+ cd repo &&
+ cp $branch_dir_prefix/branch-1 $branch_dir_prefix/@ &&
+ test_must_fail git fsck 2>err &&
+ cat >expect <<-EOF &&
+ error: refs/heads/@: badRefName: invalid refname format
+ EOF
+ rm $branch_dir_prefix/@ &&
+ test_cmp expect err
+ ) &&
+ (
+ cd repo &&
+ cp $tag_dir_prefix/multi_hierarchy/tag-2 $tag_dir_prefix/multi_hierarchy/@ &&
+ test_must_fail git fsck 2>err &&
+ cat >expect <<-EOF &&
+ error: refs/tags/multi_hierarchy/@: badRefName: invalid refname format
+ EOF
+ rm $tag_dir_prefix/multi_hierarchy/@ &&
+ test_cmp expect err
+ )
+'
+
+test_expect_success 'ref name check should be adapted into fsck messages' '
+ test_when_finished "rm -rf repo" &&
+ git init repo &&
+ branch_dir_prefix=.git/refs/heads &&
+ tag_dir_prefix=.git/refs/tags &&
+ (
+ cd repo &&
+ git commit --allow-empty -m initial &&
+ git checkout -b branch-1 &&
+ git tag tag-1 &&
+ git commit --allow-empty -m second &&
+ git checkout -b branch-2 &&
+ git tag tag-2
+ ) &&
+ (
+ cd repo &&
+ cp $branch_dir_prefix/branch-1 $branch_dir_prefix/.branch-1 &&
+ git -c fsck.badRefName=warn fsck 2>err &&
+ cat >expect <<-EOF &&
+ warning: refs/heads/.branch-1: badRefName: invalid refname format
+ EOF
+ rm $branch_dir_prefix/.branch-1 &&
+ test_cmp expect err
+ ) &&
+ (
+ cd repo &&
+ cp $branch_dir_prefix/branch-1 $branch_dir_prefix/@ &&
+ git -c fsck.badRefName=ignore fsck 2>err &&
+ test_must_be_empty err
+ )
+'
+
+test_done
--
2.45.2
^ permalink raw reply related [flat|nested] 282+ messages in thread
* [GSoC][PATCH v12 10/10] fsck: add ref content check for files backend
2024-07-20 9:25 ` [GSoC][PATCH v12 00/10] ref consistency check infra setup shejialuo
` (8 preceding siblings ...)
2024-07-20 9:29 ` [GSoC][PATCH v12 09/10] fsck: add ref name check for files backend shejialuo
@ 2024-07-20 9:29 ` shejialuo
2024-07-29 13:22 ` [GSoC][PATCH v13 00/10] ref consistency check infra setup shejialuo
10 siblings, 0 replies; 282+ messages in thread
From: shejialuo @ 2024-07-20 9:29 UTC (permalink / raw)
To: git
Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine,
Justin Tobler
Enhance the git-fsck(1) command by adding a check for reference content
in the files backend. The new functionality ensures that symrefs, real
symbolic link and regular refs are validated correctly.
In order to check the trailing content of the regular refs, add a new
parameter `trailing` to `parse_loose_ref_contents`.
For symrefs, `parse_loose_ref_contents` will set the "referent".
However, symbolic link could be either absolute or relative. Use
"strbuf_add_real_path" to read the symbolic link and convert the
relative path to absolute path. Then use "skip_prefix" to make it align
with symref "referent".
Thus, the symrefs and symbolic links could share the same interface. Add
a new function "files_fsck_symref_target" which aims at checking the
following things:
1. whether the pointee is under the `refs/` directory.
2. whether the pointee name is correct.
3. whether the pointee path is a wrong type in filesystem.
Last, add the following FSCK MESSAGEs:
1. "badRefContent(ERROR)": A ref has a bad content
2. "badSymrefPointee(ERROR)": The pointee of a symref is bad.
3. "trailingRefContent(WARN)": A ref content has trailing contents.
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
Documentation/fsck-msgids.txt | 9 +++
fsck.h | 3 +
refs.c | 2 +-
refs/files-backend.c | 142 +++++++++++++++++++++++++++++++++-
refs/refs-internal.h | 5 +-
t/t0602-reffiles-fsck.sh | 110 ++++++++++++++++++++++++++
6 files changed, 266 insertions(+), 5 deletions(-)
diff --git a/Documentation/fsck-msgids.txt b/Documentation/fsck-msgids.txt
index dab4012246..b1630a478b 100644
--- a/Documentation/fsck-msgids.txt
+++ b/Documentation/fsck-msgids.txt
@@ -19,9 +19,15 @@
`badParentSha1`::
(ERROR) A commit object has a bad parent sha1.
+`badRefContent`::
+ (ERROR) A ref has a bad content.
+
`badRefName`::
(ERROR) A ref has a bad name.
+`badSymrefPointee`::
+ (ERROR) The pointee of a symref is bad.
+
`badTagName`::
(INFO) A tag has an invalid format.
@@ -167,6 +173,9 @@
`nullSha1`::
(WARN) Tree contains entries pointing to a null sha1.
+`trailingRefContent`::
+ (WARN) A ref content has trailing contents.
+
`treeNotSorted`::
(ERROR) A tree is not properly sorted.
diff --git a/fsck.h b/fsck.h
index 421ec62b8f..adc88e01c7 100644
--- a/fsck.h
+++ b/fsck.h
@@ -32,6 +32,8 @@ enum fsck_msg_type {
FUNC(BAD_OBJECT_SHA1, ERROR) \
FUNC(BAD_PARENT_SHA1, ERROR) \
FUNC(BAD_REF_NAME, ERROR) \
+ FUNC(BAD_REF_CONTENT, ERROR) \
+ FUNC(BAD_SYMREF_POINTEE, ERROR) \
FUNC(BAD_TIMEZONE, ERROR) \
FUNC(BAD_TREE, ERROR) \
FUNC(BAD_TREE_SHA1, ERROR) \
@@ -72,6 +74,7 @@ enum fsck_msg_type {
FUNC(HAS_DOTDOT, WARN) \
FUNC(HAS_DOTGIT, WARN) \
FUNC(NULL_SHA1, WARN) \
+ FUNC(TRAILING_REF_CONTENT, WARN) \
FUNC(ZERO_PADDED_FILEMODE, WARN) \
FUNC(NUL_IN_COMMIT, WARN) \
FUNC(LARGE_PATHNAME, WARN) \
diff --git a/refs.c b/refs.c
index 410919246b..eb82fb7d4e 100644
--- a/refs.c
+++ b/refs.c
@@ -1760,7 +1760,7 @@ static int refs_read_special_head(struct ref_store *ref_store,
}
result = parse_loose_ref_contents(content.buf, oid, referent, type,
- failure_errno);
+ failure_errno, NULL);
done:
strbuf_release(&full_path);
diff --git a/refs/files-backend.c b/refs/files-backend.c
index f93c44c4d1..d8712269f1 100644
--- a/refs/files-backend.c
+++ b/refs/files-backend.c
@@ -1,6 +1,7 @@
#define USE_THE_REPOSITORY_VARIABLE
#include "../git-compat-util.h"
+#include "../abspath.h"
#include "../copy.h"
#include "../environment.h"
#include "../gettext.h"
@@ -553,7 +554,7 @@ static int read_ref_internal(struct ref_store *ref_store, const char *refname,
strbuf_rtrim(&sb_contents);
buf = sb_contents.buf;
- ret = parse_loose_ref_contents(buf, oid, referent, type, &myerr);
+ ret = parse_loose_ref_contents(buf, oid, referent, type, &myerr, NULL);
out:
if (ret && !myerr)
@@ -589,7 +590,7 @@ static int files_read_symbolic_ref(struct ref_store *ref_store, const char *refn
int parse_loose_ref_contents(const char *buf, struct object_id *oid,
struct strbuf *referent, unsigned int *type,
- int *failure_errno)
+ int *failure_errno, const char **trailing)
{
const char *p;
if (skip_prefix(buf, "ref:", &buf)) {
@@ -611,6 +612,10 @@ int parse_loose_ref_contents(const char *buf, struct object_id *oid,
*failure_errno = EINVAL;
return -1;
}
+
+ if (trailing)
+ *trailing = p;
+
return 0;
}
@@ -3439,6 +3444,138 @@ static int files_fsck_refs_name(struct fsck_options *o,
return ret;
}
+/*
+ * Check the symref "pointee_name" and "pointee_path". The caller should
+ * make sure that "pointee_path" is absolute. For symbolic ref, "pointee_name"
+ * would be the content after "refs:". For symblic link, "pointee_name" would
+ * be the relative path agaignst "gitdir".
+ */
+static int files_fsck_symref_target(struct fsck_options *o,
+ const char *refname,
+ const char *pointee_name,
+ const char *pointee_path)
+{
+ const char *p = NULL;
+ struct stat st;
+ int ret = 0;
+
+ if (!skip_prefix(pointee_name, "refs/", &p)) {
+
+ ret = fsck_refs_report(o, NULL,
+ FSCK_MSG_BAD_SYMREF_POINTEE,
+ "point to target out of refs hierarchy");
+ goto out;
+ }
+
+ if (check_refname_format(pointee_name, 0)) {
+ ret = fsck_refs_report(o, NULL,
+ FSCK_MSG_BAD_SYMREF_POINTEE,
+ "point to invalid refname");
+ }
+
+ if (lstat(pointee_path, &st) < 0)
+ goto out;
+
+ if (!S_ISREG(st.st_mode) && !S_ISLNK(st.st_mode)) {
+ ret = fsck_refs_report(o, NULL,
+ FSCK_MSG_BAD_SYMREF_POINTEE,
+ "point to invalid target");
+ goto out;
+ }
+out:
+ return ret;
+}
+
+static int files_fsck_refs_content(struct fsck_options *o,
+ const char *gitdir,
+ const char *refs_check_dir,
+ struct dir_iterator *iter)
+{
+ struct strbuf pointee_path = STRBUF_INIT,
+ ref_content = STRBUF_INIT,
+ abs_gitdir = STRBUF_INIT,
+ referent = STRBUF_INIT,
+ refname = STRBUF_INIT;
+ const char *trailing = NULL;
+ int failure_errno = 0;
+ unsigned int type = 0;
+ struct object_id oid;
+ int ret = 0;
+
+ strbuf_addf(&refname, "%s/%s", refs_check_dir, iter->relative_path);
+ o->refs_info.ref_checkee = refname.buf;
+
+ /*
+ * If the file is a symlink, we need to only check the connectivity
+ * of the destination object.
+ */
+ if (S_ISLNK(iter->st.st_mode)) {
+ const char *pointee_name = NULL;
+
+ strbuf_add_real_path(&pointee_path, iter->path.buf);
+
+ strbuf_add_absolute_path(&abs_gitdir, gitdir);
+ strbuf_normalize_path(&abs_gitdir);
+ if (!is_dir_sep(abs_gitdir.buf[abs_gitdir.len - 1]))
+ strbuf_addch(&abs_gitdir, '/');
+
+ if (!skip_prefix(pointee_path.buf,
+ abs_gitdir.buf, &pointee_name)) {
+ ret = fsck_refs_report(o, NULL,
+ FSCK_MSG_BAD_SYMREF_POINTEE,
+ "point to target outside gitdir");
+ goto clean;
+ }
+
+ ret = files_fsck_symref_target(o, refname.buf, pointee_name,
+ pointee_path.buf);
+ goto clean;
+ }
+
+ if (strbuf_read_file(&ref_content, iter->path.buf, 0) < 0) {
+ ret = error_errno(_("%s/%s: unable to read the ref"),
+ refs_check_dir, iter->relative_path);
+ goto clean;
+ }
+
+ if (parse_loose_ref_contents(ref_content.buf, &oid,
+ &referent, &type,
+ &failure_errno, &trailing)) {
+ ret = fsck_refs_report(o, NULL,
+ FSCK_MSG_BAD_REF_CONTENT,
+ "invalid ref content");
+ goto clean;
+ }
+
+ /*
+ * If the ref is a symref, we need to check the destination name and
+ * connectivity.
+ */
+ if (referent.len && (type & REF_ISSYMREF)) {
+ strbuf_addf(&pointee_path, "%s/%s", gitdir, referent.buf);
+ strbuf_rtrim(&referent);
+
+ ret = files_fsck_symref_target(o, refname.buf, referent.buf,
+ pointee_path.buf);
+ goto clean;
+ } else {
+ if (trailing && (*trailing != '\0' && *trailing != '\n')) {
+ ret = fsck_refs_report(o, NULL,
+ FSCK_MSG_TRAILING_REF_CONTENT,
+ "trailing garbage in ref");
+ goto clean;
+ }
+ }
+
+clean:
+ strbuf_release(&abs_gitdir);
+ strbuf_release(&pointee_path);
+ strbuf_release(&refname);
+ strbuf_release(&ref_content);
+ strbuf_release(&referent);
+ return ret;
+}
+
static int files_fsck_refs_dir(struct ref_store *ref_store,
struct fsck_options *o,
const char *refs_check_dir,
@@ -3491,6 +3628,7 @@ static int files_fsck_refs(struct ref_store *ref_store,
int ret;
files_fsck_refs_fn fsck_refs_fns[]= {
files_fsck_refs_name,
+ files_fsck_refs_content,
NULL
};
diff --git a/refs/refs-internal.h b/refs/refs-internal.h
index a905e187cd..2fabf41d14 100644
--- a/refs/refs-internal.h
+++ b/refs/refs-internal.h
@@ -709,11 +709,12 @@ struct ref_store {
/*
* Parse contents of a loose ref file. *failure_errno maybe be set to EINVAL for
- * invalid contents.
+ * invalid contents. Also *trailing is set to the first character after the
+ * refname or NULL if the referent is not empty.
*/
int parse_loose_ref_contents(const char *buf, struct object_id *oid,
struct strbuf *referent, unsigned int *type,
- int *failure_errno);
+ int *failure_errno, const char **trailing);
/*
* Fill in the generic part of refs and add it to our collection of
diff --git a/t/t0602-reffiles-fsck.sh b/t/t0602-reffiles-fsck.sh
index b2db58d2c6..35bf40ee64 100755
--- a/t/t0602-reffiles-fsck.sh
+++ b/t/t0602-reffiles-fsck.sh
@@ -98,4 +98,114 @@ test_expect_success 'ref name check should be adapted into fsck messages' '
)
'
+test_expect_success 'regular ref content should be checked' '
+ test_when_finished "rm -rf repo" &&
+ git init repo &&
+ branch_dir_prefix=.git/refs/heads &&
+ tag_dir_prefix=.git/refs/tags &&
+ (
+ cd repo &&
+ git commit --allow-empty -m initial &&
+ git checkout -b branch-1 &&
+ git tag tag-1 &&
+ git commit --allow-empty -m second &&
+ git checkout -b branch-2 &&
+ git tag tag-2 &&
+ git checkout -b a/b/tag-2
+ ) &&
+ (
+ cd repo &&
+ printf "%s garbage" "$(git rev-parse branch-1)" > $branch_dir_prefix/branch-1-garbage &&
+ git fsck 2>err &&
+ cat >expect <<-EOF &&
+ warning: refs/heads/branch-1-garbage: trailingRefContent: trailing garbage in ref
+ EOF
+ rm $branch_dir_prefix/branch-1-garbage &&
+ test_cmp expect err
+ ) &&
+ (
+ cd repo &&
+ printf "%s garbage" "$(git rev-parse tag-1)" > $tag_dir_prefix/tag-1-garbage &&
+ test_must_fail git -c fsck.trailingRefContent=error fsck 2>err &&
+ cat >expect <<-EOF &&
+ error: refs/tags/tag-1-garbage: trailingRefContent: trailing garbage in ref
+ EOF
+ rm $tag_dir_prefix/tag-1-garbage &&
+ test_cmp expect err
+ ) &&
+ (
+ cd repo &&
+ printf "%s " "$(git rev-parse tag-2)" > $tag_dir_prefix/tag-2-garbage &&
+ git fsck 2>err &&
+ cat >expect <<-EOF &&
+ warning: refs/tags/tag-2-garbage: trailingRefContent: trailing garbage in ref
+ EOF
+ rm $tag_dir_prefix/tag-2-garbage &&
+ test_cmp expect err
+ ) &&
+ (
+ cd repo &&
+ printf "xfsazqfxcadas" > $tag_dir_prefix/tag-2-bad &&
+ test_must_fail git refs verify 2>err &&
+ cat >expect <<-EOF &&
+ error: refs/tags/tag-2-bad: badRefContent: invalid ref content
+ EOF
+ rm $tag_dir_prefix/tag-2-bad &&
+ test_cmp expect err
+ ) &&
+ (
+ cd repo &&
+ printf "xfsazqfxcadas" > $branch_dir_prefix/a/b/branch-2-bad &&
+ test_must_fail git refs verify 2>err &&
+ cat >expect <<-EOF &&
+ error: refs/heads/a/b/branch-2-bad: badRefContent: invalid ref content
+ EOF
+ rm $branch_dir_prefix/a/b/branch-2-bad &&
+ test_cmp expect err
+ )
+'
+
+test_expect_success 'symbolic ref content should be checked' '
+ test_when_finished "rm -rf repo" &&
+ git init repo &&
+ branch_dir_prefix=.git/refs/heads &&
+ tag_dir_prefix=.git/refs/tags &&
+ (
+ cd repo &&
+ git commit --allow-empty -m initial &&
+ git checkout -b branch-1 &&
+ git tag tag-1
+ ) &&
+ (
+ cd repo &&
+ printf "ref: refs/heads/.branch" > $branch_dir_prefix/branch-2-bad &&
+ test_must_fail git refs verify 2>err &&
+ cat >expect <<-EOF &&
+ error: refs/heads/branch-2-bad: badSymrefPointee: point to invalid refname
+ EOF
+ rm $branch_dir_prefix/branch-2-bad &&
+ test_cmp expect err
+ ) &&
+ (
+ cd repo &&
+ printf "ref: refs/heads" > $branch_dir_prefix/branch-2-bad &&
+ test_must_fail git refs verify 2>err &&
+ cat >expect <<-EOF &&
+ error: refs/heads/branch-2-bad: badSymrefPointee: point to invalid target
+ EOF
+ rm $branch_dir_prefix/branch-2-bad &&
+ test_cmp expect err
+ ) &&
+ (
+ cd repo &&
+ printf "ref: logs/maint-v2.45" > $branch_dir_prefix/branch-2-bad &&
+ test_must_fail git refs verify 2>err &&
+ cat >expect <<-EOF &&
+ error: refs/heads/branch-2-bad: badSymrefPointee: point to target out of refs hierarchy
+ EOF
+ rm $branch_dir_prefix/branch-2-bad &&
+ test_cmp expect err
+ )
+'
+
test_done
--
2.45.2
^ permalink raw reply related [flat|nested] 282+ messages in thread
* [GSoC][PATCH v13 00/10] ref consistency check infra setup
2024-07-20 9:25 ` [GSoC][PATCH v12 00/10] ref consistency check infra setup shejialuo
` (9 preceding siblings ...)
2024-07-20 9:29 ` [GSoC][PATCH v12 10/10] fsck: add ref content " shejialuo
@ 2024-07-29 13:22 ` shejialuo
2024-07-29 13:26 ` [GSoC][PATCH v13 01/10] fsck: rename "skiplist" to "skip_oids" shejialuo
` (11 more replies)
10 siblings, 12 replies; 282+ messages in thread
From: shejialuo @ 2024-07-29 13:22 UTC (permalink / raw)
To: git
Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine,
Justin Tobler
Hi All:
This version mainly focuses on the problem about how should we provide
extensibility for reporting the problem of refs. In patch v10, Junio
asked a question here:
> The error reporting function for refs consistency check was still
> about reporting a problem for a single ref. I am wondering how
> consistency violations that are not about a single ref should be
> handled. For example, if refs/packed-backend.c:packed_fsck() finds
> that the file is not sorted properly or has some unparseable garbage
> in it, it is not something you can report as "refs/heads/main is
> broken", but those who are interested in seeing the "reference
> database consistency" verified, it is very much what they want the
> tool to notice. How would detection of such a breakage that is not
> attributed to a single ref fit in this "ref consistency check
> infrastructure" that was introduced by [05/10]?
Actually, I think that the original parameter "checked_ref_name" is a
bad name which makes the reader think that "we only handle refs". And
this is my fault for this design. However, I misunderstood the Junio's
word and made things complicated.
The patch v11 and v12 wants to solve a problem that we should provide
extensibility for reporting refs problem. However, these two versions
still made things complicated. After an offline meeting with Patrick and
Karthik, we design the following simple flat data structure:
struct fsck_refs_info {
const char *path;
};
It is simple and provides extensibility.
Thanks,
Jialuo
shejialuo (10):
fsck: rename "skiplist" to "skip_oids"
fsck: add a unified interface for reporting fsck messages
fsck: rename objects-related fsck error functions
fsck: add refs-related error report function
refs: set up ref consistency check infrastructure
git refs: add verify subcommand
builtin/fsck: add `git-refs verify` child process
files-backend: add unified interface for refs scanning
fsck: add ref name check for files backend
fsck: add ref content check for files backend
Documentation/fsck-msgids.txt | 12 ++
Documentation/git-refs.txt | 13 ++
builtin/fsck.c | 32 ++++-
builtin/mktag.c | 1 +
builtin/refs.c | 44 ++++++
fsck.c | 109 +++++++++++----
fsck.h | 74 +++++++---
object-file.c | 11 +-
refs.c | 7 +-
refs.h | 8 ++
refs/debug.c | 11 ++
refs/files-backend.c | 251 +++++++++++++++++++++++++++++++++-
refs/packed-backend.c | 8 ++
refs/refs-internal.h | 11 +-
refs/reftable-backend.c | 8 ++
t/t0602-reffiles-fsck.sh | 211 ++++++++++++++++++++++++++++
16 files changed, 754 insertions(+), 57 deletions(-)
create mode 100755 t/t0602-reffiles-fsck.sh
Range-diff against v12:
1: a69705b777 = 1: a69705b777 fsck: rename "skiplist" to "skip_oids"
2: a4bfccd938 < -: ---------- fsck: add a unified interface for reporting fsck messages
-: ---------- > 2: 178329d085 fsck: add a unified interface for reporting fsck messages
3: 9bc8892761 ! 3: 341cb841bf fsck: rename objects-related fsck error functions
@@ builtin/fsck.c: static int objerror(struct object *obj, const char *err)
-static int fsck_error_func(struct fsck_options *o UNUSED,
- const struct object_id *oid,
- enum object_type object_type,
+- const struct fsck_refs_info *refs_info UNUSED,
- enum fsck_msg_type msg_type,
- enum fsck_msg_id msg_id UNUSED,
- const char *message)
+static int fsck_objects_error_func(struct fsck_options *o UNUSED,
+ const struct object_id *oid,
+ enum object_type object_type,
++ const struct fsck_refs_info *refs_info UNUSED,
+ enum fsck_msg_type msg_type,
+ enum fsck_msg_id msg_id UNUSED,
+ const char *message)
@@ fsck.c: int fsck_buffer(const struct object_id *oid, enum object_type type,
-int fsck_error_function(struct fsck_options *o,
- const struct object_id *oid,
- enum object_type object_type UNUSED,
+- const struct fsck_refs_info *refs_info UNUSED,
- enum fsck_msg_type msg_type,
- enum fsck_msg_id msg_id UNUSED,
- const char *message)
+int fsck_objects_error_function(struct fsck_options *o,
+ const struct object_id *oid,
+ enum object_type object_type UNUSED,
++ const struct fsck_refs_info *refs_info UNUSED,
+ enum fsck_msg_type msg_type,
+ enum fsck_msg_id msg_id UNUSED,
+ const char *message)
@@ fsck.c: int git_fsck_config(const char *var, const char *value,
-int fsck_error_cb_print_missing_gitmodules(struct fsck_options *o,
- const struct object_id *oid,
- enum object_type object_type,
+- const struct fsck_refs_info *refs_info,
- enum fsck_msg_type msg_type,
- enum fsck_msg_id msg_id,
- const char *message)
+int fsck_objects_error_cb_print_missing_gitmodules(struct fsck_options *o,
+ const struct object_id *oid,
+ enum object_type object_type,
++ const struct fsck_refs_info *refs_info,
+ enum fsck_msg_type msg_type,
+ enum fsck_msg_id msg_id,
+ const char *message)
@@ fsck.c: int git_fsck_config(const char *var, const char *value,
puts(oid_to_hex(oid));
return 0;
}
-- return fsck_error_function(o, oid, object_type, msg_type, msg_id, message);
-+ return fsck_objects_error_function(o, oid, object_type,
+- return fsck_error_function(o, oid, object_type, refs_info,
+- msg_type, msg_id, message);
++ return fsck_objects_error_function(o, oid, object_type, refs_info,
+ msg_type, msg_id, message);
}
@@ fsck.h: typedef int (*fsck_error)(struct fsck_options *o,
-int fsck_error_function(struct fsck_options *o,
- const struct object_id *oid, enum object_type object_type,
+- const struct fsck_refs_info *refs_info,
- enum fsck_msg_type msg_type, enum fsck_msg_id msg_id,
- const char *message);
-int fsck_error_cb_print_missing_gitmodules(struct fsck_options *o,
- const struct object_id *oid,
- enum object_type object_type,
+- const struct fsck_refs_info *refs_info,
- enum fsck_msg_type msg_type,
- enum fsck_msg_id msg_id,
- const char *message);
+int fsck_objects_error_function(struct fsck_options *o,
+ const struct object_id *oid, enum object_type object_type,
++ const struct fsck_refs_info *refs_info,
+ enum fsck_msg_type msg_type, enum fsck_msg_id msg_id,
+ const char *message);
+int fsck_objects_error_cb_print_missing_gitmodules(struct fsck_options *o,
+ const struct object_id *oid,
+ enum object_type object_type,
++ const struct fsck_refs_info *refs_info,
+ enum fsck_msg_type msg_type,
+ enum fsck_msg_id msg_id,
+ const char *message);
4: 82296dc2b9 ! 4: b328f3b218 fsck: add refs-related error report function
@@ Metadata
## Commit message ##
fsck: add refs-related error report function
- Create refs-specific "error_func" callback "fsck_refs_error_function"
- which could provide the following report messages for files backend
+ Add refs-related options to the "fsck_options", create refs-specific
+ "error_func" callback "fsck_refs_error_function".
- 1. "ref_checkee": "fsck error name": "user message".
- 2. "ref_checkee.sub_ref_checkee": "fsck error name": "user message".
- 3. "ref_checkee -> (oid hex)": "fsck error name": "user message".
- 4. "ref_checkee.sub_ref_checkee -> (oid hex)": "fsck error name": "user
- message".
-
- "fsck_refs_error_function" uses the "ref_checkee" and "sub_ref_checkee"
- in the "fsck_refs_info" to indicate the information of the checked refs.
- For loose ref and reflog, it only uses the "ref_checkee". For packed
- refs and reftable refs, when checking the consistency of the file
- itself, it still only uses "ref_checkee". However, when checking the
- consistency of the ref or reflog contained in the file ,it will use the
- "sub_ref_checkee" to indicate that we are not checking the file but the
- incorporated ref or reflog.
-
- "fsck_refs_error_function" will use the "oid" parameter if the caller
+ "fsck_refs_error_function" will use the "oid" parameter. When the caller
passes the oid, it will use "oid_to_hex" to get the corresponding hex
value to report to the caller.
@@ fsck.c: int fsck_objects_error_function(struct fsck_options *o,
+int fsck_refs_error_function(struct fsck_options *options,
+ const struct object_id *oid,
+ enum object_type object_type UNUSED,
++ const struct fsck_refs_info *refs_info,
+ enum fsck_msg_type msg_type,
+ enum fsck_msg_id msg_id UNUSED,
+ const char *message)
+{
+ struct strbuf sb = STRBUF_INIT;
-+ struct fsck_refs_info *refs_info = &options->refs_info;
+ int ret = 0;
+
-+ if (the_repository->ref_storage_format == REF_STORAGE_FORMAT_FILES) {
-+ strbuf_addstr(&sb, refs_info->ref_checkee);
-+ if (refs_info->u.files.sub_ref_checkee)
-+ strbuf_addf(&sb, ".%s", refs_info->u.files.sub_ref_checkee);
++ strbuf_addstr(&sb, refs_info->path);
+
-+ if (oid)
-+ strbuf_addf(&sb, " -> (%s)", oid_to_hex(oid));
-+ }
++ if (oid)
++ strbuf_addf(&sb, " -> (%s)", oid_to_hex(oid));
+
+ if (msg_type == FSCK_WARN)
+ warning("%s: %s", sb.buf, message);
@@ fsck.h: int fsck_objects_error_cb_print_missing_gitmodules(struct fsck_options *
+int fsck_refs_error_function(struct fsck_options *options,
+ const struct object_id *oid,
+ enum object_type object_type,
++ const struct fsck_refs_info *refs_info,
+ enum fsck_msg_type msg_type,
+ enum fsck_msg_id msg_id,
+ const char *message);
5: c5cac2e318 ! 5: 481bffac6e refs: set up ref consistency check infrastructure
@@ refs/files-backend.c: static int files_ref_store_remove_on_disk(struct ref_store
+static int files_fsck(struct ref_store *ref_store,
+ struct fsck_options *o)
+{
-+ int ret;
+ struct files_ref_store *refs =
+ files_downcast(ref_store, REF_STORE_READ, "fsck");
+
-+ ret = refs->packed_ref_store->be->fsck(refs->packed_ref_store, o);
-+ return ret;
++ return refs->packed_ref_store->be->fsck(refs->packed_ref_store, o);
+}
+
struct ref_storage_be refs_be_files = {
6: 84d840506e ! 6: cea8908338 git refs: add verify subcommand
@@ fsck.h: struct fsck_options {
unsigned strict:1;
+ unsigned verbose:1;
enum fsck_msg_type *msg_type;
- struct fsck_refs_info refs_info;
struct oidset skip_oids;
+ struct oidset gitmodules_found;
7: 3fc77ec329 = 7: 2200167bf6 builtin/fsck: add `git-refs verify` child process
8: 44a75141fa ! 8: c69883be9a files-backend: add unified interface for refs scanning
@@ refs/files-backend.c: static int files_ref_store_remove_on_disk(struct ref_store
+static int files_fsck_refs(struct ref_store *ref_store,
+ struct fsck_options *o)
+{
-+ int ret;
+ files_fsck_refs_fn fsck_refs_fns[]= {
+ NULL
+ };
@@ refs/files-backend.c: static int files_ref_store_remove_on_disk(struct ref_store
+ if (o->verbose)
+ fprintf_ln(stderr, "Checking references consistency");
+
-+ ret = files_fsck_refs_dir(ref_store, o, "refs", fsck_refs_fns);
++ return files_fsck_refs_dir(ref_store, o, "refs", fsck_refs_fns);
+
-+ return ret;
+}
+
static int files_fsck(struct ref_store *ref_store,
struct fsck_options *o)
{
-@@ refs/files-backend.c: static int files_fsck(struct ref_store *ref_store,
struct files_ref_store *refs =
files_downcast(ref_store, REF_STORE_READ, "fsck");
-- ret = refs->packed_ref_store->be->fsck(refs->packed_ref_store, o);
-+ ret = refs->packed_ref_store->be->fsck(refs->packed_ref_store, o)
-+ | files_fsck_refs(ref_store, o);
-+
- return ret;
+- return refs->packed_ref_store->be->fsck(refs->packed_ref_store, o);
++ return refs->packed_ref_store->be->fsck(refs->packed_ref_store, o) |
++ files_fsck_refs(ref_store, o);
}
+ struct ref_storage_be refs_be_files = {
9: 4a0d58b07d ! 9: 5ce0e7367e fsck: add ref name check for files backend
@@ Documentation/fsck-msgids.txt
(ERROR) A commit object has a bad parent sha1.
+`badRefName`::
-+ (ERROR) A ref has a bad name.
++ (ERROR) A ref has an invalid format.
+
`badTagName`::
(INFO) A tag has an invalid format.
@@ refs/files-backend.c: typedef int (*files_fsck_refs_fn)(struct fsck_options *o,
+ struct dir_iterator *iter)
+{
+ struct strbuf sb = STRBUF_INIT;
++ struct fsck_refs_info info;
+ int ret = 0;
+
+ if (check_refname_format(iter->basename, REFNAME_ALLOW_ONELEVEL)) {
+ strbuf_addf(&sb, "%s/%s", refs_check_dir, iter->relative_path);
-+ o->refs_info.ref_checkee = sb.buf;
-+ ret = fsck_refs_report(o, NULL,
++ info.path = sb.buf;
++ ret = fsck_refs_report(o, NULL, &info,
+ FSCK_MSG_BAD_REF_NAME,
+ "invalid refname format");
+ }
@@ refs/files-backend.c: typedef int (*files_fsck_refs_fn)(struct fsck_options *o,
struct fsck_options *o,
const char *refs_check_dir,
@@ refs/files-backend.c: static int files_fsck_refs(struct ref_store *ref_store,
+ struct fsck_options *o)
{
- int ret;
files_fsck_refs_fn fsck_refs_fns[]= {
+ files_fsck_refs_name,
NULL
10: c529670e54 ! 10: f77ca18c68 fsck: add ref content check for files backend
@@ Documentation/fsck-msgids.txt
+ (ERROR) A ref has a bad content.
+
`badRefName`::
- (ERROR) A ref has a bad name.
+ (ERROR) A ref has an invalid format.
+`badSymrefPointee`::
+ (ERROR) The pointee of a symref is bad.
@@ refs/files-backend.c: static int files_fsck_refs_name(struct fsck_options *o,
+ * be the relative path agaignst "gitdir".
+ */
+static int files_fsck_symref_target(struct fsck_options *o,
++ struct fsck_refs_info *info,
+ const char *refname,
+ const char *pointee_name,
+ const char *pointee_path)
@@ refs/files-backend.c: static int files_fsck_refs_name(struct fsck_options *o,
+
+ if (!skip_prefix(pointee_name, "refs/", &p)) {
+
-+ ret = fsck_refs_report(o, NULL,
++ ret = fsck_refs_report(o, NULL, info,
+ FSCK_MSG_BAD_SYMREF_POINTEE,
-+ "point to target out of refs hierarchy");
++ "points to ref outside the refs directory");
+ goto out;
+ }
+
+ if (check_refname_format(pointee_name, 0)) {
-+ ret = fsck_refs_report(o, NULL,
++ ret = fsck_refs_report(o, NULL, info,
+ FSCK_MSG_BAD_SYMREF_POINTEE,
-+ "point to invalid refname");
++ "points to refname with invalid format");
+ }
+
+ if (lstat(pointee_path, &st) < 0)
+ goto out;
+
+ if (!S_ISREG(st.st_mode) && !S_ISLNK(st.st_mode)) {
-+ ret = fsck_refs_report(o, NULL,
++ ret = fsck_refs_report(o, NULL, info,
+ FSCK_MSG_BAD_SYMREF_POINTEE,
-+ "point to invalid target");
++ "points to an invalid file type");
+ goto out;
+ }
+out:
@@ refs/files-backend.c: static int files_fsck_refs_name(struct fsck_options *o,
+ referent = STRBUF_INIT,
+ refname = STRBUF_INIT;
+ const char *trailing = NULL;
++ struct fsck_refs_info info;
+ int failure_errno = 0;
+ unsigned int type = 0;
+ struct object_id oid;
+ int ret = 0;
+
+ strbuf_addf(&refname, "%s/%s", refs_check_dir, iter->relative_path);
-+ o->refs_info.ref_checkee = refname.buf;
++ info.path = refname.buf;
+
+ /*
+ * If the file is a symlink, we need to only check the connectivity
@@ refs/files-backend.c: static int files_fsck_refs_name(struct fsck_options *o,
+
+ if (!skip_prefix(pointee_path.buf,
+ abs_gitdir.buf, &pointee_name)) {
-+ ret = fsck_refs_report(o, NULL,
++ ret = fsck_refs_report(o, NULL, &info,
+ FSCK_MSG_BAD_SYMREF_POINTEE,
+ "point to target outside gitdir");
+ goto clean;
+ }
+
-+ ret = files_fsck_symref_target(o, refname.buf, pointee_name,
-+ pointee_path.buf);
++ ret = files_fsck_symref_target(o, &info, refname.buf,
++ pointee_name, pointee_path.buf);
+ goto clean;
+ }
+
@@ refs/files-backend.c: static int files_fsck_refs_name(struct fsck_options *o,
+ if (parse_loose_ref_contents(ref_content.buf, &oid,
+ &referent, &type,
+ &failure_errno, &trailing)) {
-+ ret = fsck_refs_report(o, NULL,
++ ret = fsck_refs_report(o, NULL, &info,
+ FSCK_MSG_BAD_REF_CONTENT,
+ "invalid ref content");
+ goto clean;
@@ refs/files-backend.c: static int files_fsck_refs_name(struct fsck_options *o,
+ strbuf_addf(&pointee_path, "%s/%s", gitdir, referent.buf);
+ strbuf_rtrim(&referent);
+
-+ ret = files_fsck_symref_target(o, refname.buf, referent.buf,
-+ pointee_path.buf);
++ ret = files_fsck_symref_target(o, &info, refname.buf,
++ referent.buf, pointee_path.buf);
+ goto clean;
+ } else {
+ if (trailing && (*trailing != '\0' && *trailing != '\n')) {
-+ ret = fsck_refs_report(o, NULL,
++ ret = fsck_refs_report(o, NULL, &info,
+ FSCK_MSG_TRAILING_REF_CONTENT,
+ "trailing garbage in ref");
+ goto clean;
@@ refs/files-backend.c: static int files_fsck_refs_name(struct fsck_options *o,
struct fsck_options *o,
const char *refs_check_dir,
@@ refs/files-backend.c: static int files_fsck_refs(struct ref_store *ref_store,
- int ret;
+ {
files_fsck_refs_fn fsck_refs_fns[]= {
files_fsck_refs_name,
+ files_fsck_refs_content,
@@ t/t0602-reffiles-fsck.sh: test_expect_success 'ref name check should be adapted
+ printf "ref: refs/heads/.branch" > $branch_dir_prefix/branch-2-bad &&
+ test_must_fail git refs verify 2>err &&
+ cat >expect <<-EOF &&
-+ error: refs/heads/branch-2-bad: badSymrefPointee: point to invalid refname
++ error: refs/heads/branch-2-bad: badSymrefPointee: points to refname with invalid format
+ EOF
+ rm $branch_dir_prefix/branch-2-bad &&
+ test_cmp expect err
@@ t/t0602-reffiles-fsck.sh: test_expect_success 'ref name check should be adapted
+ printf "ref: refs/heads" > $branch_dir_prefix/branch-2-bad &&
+ test_must_fail git refs verify 2>err &&
+ cat >expect <<-EOF &&
-+ error: refs/heads/branch-2-bad: badSymrefPointee: point to invalid target
++ error: refs/heads/branch-2-bad: badSymrefPointee: points to an invalid file type
+ EOF
+ rm $branch_dir_prefix/branch-2-bad &&
+ test_cmp expect err
@@ t/t0602-reffiles-fsck.sh: test_expect_success 'ref name check should be adapted
+ printf "ref: logs/maint-v2.45" > $branch_dir_prefix/branch-2-bad &&
+ test_must_fail git refs verify 2>err &&
+ cat >expect <<-EOF &&
-+ error: refs/heads/branch-2-bad: badSymrefPointee: point to target out of refs hierarchy
++ error: refs/heads/branch-2-bad: badSymrefPointee: points to ref outside the refs directory
+ EOF
+ rm $branch_dir_prefix/branch-2-bad &&
+ test_cmp expect err
--
2.45.2
^ permalink raw reply [flat|nested] 282+ messages in thread
* [GSoC][PATCH v13 01/10] fsck: rename "skiplist" to "skip_oids"
2024-07-29 13:22 ` [GSoC][PATCH v13 00/10] ref consistency check infra setup shejialuo
@ 2024-07-29 13:26 ` shejialuo
2024-07-29 13:26 ` [GSoC][PATCH v13 02/10] fsck: add a unified interface for reporting fsck messages shejialuo
` (10 subsequent siblings)
11 siblings, 0 replies; 282+ messages in thread
From: shejialuo @ 2024-07-29 13:26 UTC (permalink / raw)
To: git
Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine,
Justin Tobler
The "skiplist" field in "fsck_options" is related to objects. Because we
are going to introduce ref consistency check, the "skiplist" name is too
general which will make the caller think "skiplist" is related to both
the refs and objects.
It may seem that for both refs and objects, we should provide a general
"skiplist" here. However, the type for "skiplist" is `struct oidset`
which is totally unsuitable for refs.
To avoid above ambiguity, rename "skiplist" to "skip_oids".
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
fsck.c | 4 ++--
fsck.h | 4 ++--
2 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/fsck.c b/fsck.c
index eea7145470..3f32441492 100644
--- a/fsck.c
+++ b/fsck.c
@@ -205,7 +205,7 @@ void fsck_set_msg_types(struct fsck_options *options, const char *values)
if (!strcmp(buf, "skiplist")) {
if (equal == len)
die("skiplist requires a path");
- oidset_parse_file(&options->skiplist, buf + equal + 1,
+ oidset_parse_file(&options->skip_oids, buf + equal + 1,
the_repository->hash_algo);
buf += len + 1;
continue;
@@ -223,7 +223,7 @@ void fsck_set_msg_types(struct fsck_options *options, const char *values)
static int object_on_skiplist(struct fsck_options *opts,
const struct object_id *oid)
{
- return opts && oid && oidset_contains(&opts->skiplist, oid);
+ return opts && oid && oidset_contains(&opts->skip_oids, oid);
}
__attribute__((format (printf, 5, 6)))
diff --git a/fsck.h b/fsck.h
index 6085a384f6..bcfb2e34cd 100644
--- a/fsck.h
+++ b/fsck.h
@@ -136,7 +136,7 @@ struct fsck_options {
fsck_error error_func;
unsigned strict:1;
enum fsck_msg_type *msg_type;
- struct oidset skiplist;
+ struct oidset skip_oids;
struct oidset gitmodules_found;
struct oidset gitmodules_done;
struct oidset gitattributes_found;
@@ -145,7 +145,7 @@ struct fsck_options {
};
#define FSCK_OPTIONS_DEFAULT { \
- .skiplist = OIDSET_INIT, \
+ .skip_oids = OIDSET_INIT, \
.gitmodules_found = OIDSET_INIT, \
.gitmodules_done = OIDSET_INIT, \
.gitattributes_found = OIDSET_INIT, \
--
2.45.2
^ permalink raw reply related [flat|nested] 282+ messages in thread
* [GSoC][PATCH v13 02/10] fsck: add a unified interface for reporting fsck messages
2024-07-29 13:22 ` [GSoC][PATCH v13 00/10] ref consistency check infra setup shejialuo
2024-07-29 13:26 ` [GSoC][PATCH v13 01/10] fsck: rename "skiplist" to "skip_oids" shejialuo
@ 2024-07-29 13:26 ` shejialuo
2024-07-30 8:31 ` Patrick Steinhardt
2024-07-29 13:26 ` [GSoC][PATCH v13 03/10] fsck: rename objects-related fsck error functions shejialuo
` (9 subsequent siblings)
11 siblings, 1 reply; 282+ messages in thread
From: shejialuo @ 2024-07-29 13:26 UTC (permalink / raw)
To: git
Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine,
Justin Tobler
The static function "report" provided by "fsck.c" aims at checking fsck
error type and calling the callback "error_func" to report the message.
However, "report" function is only related to object database which
cannot be reused for refs. In order to provide a unified interface which
can report either objects or refs, create a new function "fsck_vreport"
following the "report" prototype. Instead of using "...", provide
"va_list" to allow more flexibility.
In order to provide an extensible error report for refs, add a new
"fsck_refs_info" structure and add parameter "const struct
*fsck_refs_info" into "fsck_vreport" function.
Like "report", the "fsck_vreport" function will use "error_func"
registered in "fsck_options" to report customized messages. Change
"error_func" prototype to align with the new "fsck_vreport".
Then, change "report" function to use "fsck_vreport" to report objects
related messages. Add a new function called "fsck_refs_report" to use
"fsck_vreport" to report refs related messages.
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
builtin/fsck.c | 1 +
builtin/mktag.c | 1 +
fsck.c | 56 +++++++++++++++++++++++++++++++++++++++++--------
fsck.h | 25 +++++++++++++++++++++-
object-file.c | 11 +++++-----
5 files changed, 79 insertions(+), 15 deletions(-)
diff --git a/builtin/fsck.c b/builtin/fsck.c
index d13a226c2e..6abad60e7e 100644
--- a/builtin/fsck.c
+++ b/builtin/fsck.c
@@ -92,6 +92,7 @@ static int objerror(struct object *obj, const char *err)
static int fsck_error_func(struct fsck_options *o UNUSED,
const struct object_id *oid,
enum object_type object_type,
+ const struct fsck_refs_info *refs_info UNUSED,
enum fsck_msg_type msg_type,
enum fsck_msg_id msg_id UNUSED,
const char *message)
diff --git a/builtin/mktag.c b/builtin/mktag.c
index 4767f1a97e..6496deca0a 100644
--- a/builtin/mktag.c
+++ b/builtin/mktag.c
@@ -20,6 +20,7 @@ static struct fsck_options fsck_options = FSCK_OPTIONS_STRICT;
static int mktag_fsck_error_func(struct fsck_options *o UNUSED,
const struct object_id *oid UNUSED,
enum object_type object_type UNUSED,
+ const struct fsck_refs_info *refs_info UNUSED,
enum fsck_msg_type msg_type,
enum fsck_msg_id msg_id UNUSED,
const char *message)
diff --git a/fsck.c b/fsck.c
index 3f32441492..1185e9a8ad 100644
--- a/fsck.c
+++ b/fsck.c
@@ -226,12 +226,18 @@ static int object_on_skiplist(struct fsck_options *opts,
return opts && oid && oidset_contains(&opts->skip_oids, oid);
}
-__attribute__((format (printf, 5, 6)))
-static int report(struct fsck_options *options,
- const struct object_id *oid, enum object_type object_type,
- enum fsck_msg_id msg_id, const char *fmt, ...)
+/*
+ * Provide a unified interface for either fscking refs or objects.
+ * It will get the current msg error type and call the error_func callback
+ * which is registered in the "fsck_options" struct.
+ */
+static int fsck_vreport(struct fsck_options *options,
+ const struct object_id *oid,
+ enum object_type object_type,
+ const struct fsck_refs_info *refs_info,
+ enum fsck_msg_id msg_id, const char *fmt, va_list ap)
{
- va_list ap;
+ va_list ap_copy;
struct strbuf sb = STRBUF_INIT;
enum fsck_msg_type msg_type = fsck_msg_type(msg_id, options);
int result;
@@ -250,9 +256,9 @@ static int report(struct fsck_options *options,
prepare_msg_ids();
strbuf_addf(&sb, "%s: ", msg_id_info[msg_id].camelcased);
- va_start(ap, fmt);
- strbuf_vaddf(&sb, fmt, ap);
- result = options->error_func(options, oid, object_type,
+ va_copy(ap_copy, ap);
+ strbuf_vaddf(&sb, fmt, ap_copy);
+ result = options->error_func(options, oid, object_type, refs_info,
msg_type, msg_id, sb.buf);
strbuf_release(&sb);
va_end(ap);
@@ -260,6 +266,35 @@ static int report(struct fsck_options *options,
return result;
}
+__attribute__((format (printf, 5, 6)))
+static int report(struct fsck_options *options,
+ const struct object_id *oid, enum object_type object_type,
+ enum fsck_msg_id msg_id, const char *fmt, ...)
+{
+ va_list ap;
+ int result;
+
+ va_start(ap, fmt);
+ result = fsck_vreport(options, oid, object_type, NULL, msg_id, fmt, ap);
+ va_end(ap);
+
+ return result;
+}
+
+int fsck_refs_report(struct fsck_options *options,
+ const struct object_id *oid,
+ const struct fsck_refs_info *refs_info,
+ enum fsck_msg_id msg_id, const char *fmt, ...)
+{
+ va_list ap;
+ int result;
+ va_start(ap, fmt);
+ result = fsck_vreport(options, oid, OBJ_NONE, refs_info,
+ msg_id, fmt, ap);
+ va_end(ap);
+ return result;
+}
+
void fsck_enable_object_names(struct fsck_options *options)
{
if (!options->object_names)
@@ -1203,6 +1238,7 @@ int fsck_buffer(const struct object_id *oid, enum object_type type,
int fsck_error_function(struct fsck_options *o,
const struct object_id *oid,
enum object_type object_type UNUSED,
+ const struct fsck_refs_info *refs_info UNUSED,
enum fsck_msg_type msg_type,
enum fsck_msg_id msg_id UNUSED,
const char *message)
@@ -1306,6 +1342,7 @@ int git_fsck_config(const char *var, const char *value,
int fsck_error_cb_print_missing_gitmodules(struct fsck_options *o,
const struct object_id *oid,
enum object_type object_type,
+ const struct fsck_refs_info *refs_info,
enum fsck_msg_type msg_type,
enum fsck_msg_id msg_id,
const char *message)
@@ -1314,5 +1351,6 @@ int fsck_error_cb_print_missing_gitmodules(struct fsck_options *o,
puts(oid_to_hex(oid));
return 0;
}
- return fsck_error_function(o, oid, object_type, msg_type, msg_id, message);
+ return fsck_error_function(o, oid, object_type, refs_info,
+ msg_type, msg_id, message);
}
diff --git a/fsck.h b/fsck.h
index bcfb2e34cd..4f01a46cc7 100644
--- a/fsck.h
+++ b/fsck.h
@@ -92,6 +92,7 @@ enum fsck_msg_id {
};
#undef MSG_ID
+struct fsck_refs_info;
struct fsck_options;
struct object;
@@ -114,23 +115,35 @@ int is_valid_msg_type(const char *msg_id, const char *msg_type);
typedef int (*fsck_walk_func)(struct object *obj, enum object_type object_type,
void *data, struct fsck_options *options);
-/* callback for fsck_object, type is FSCK_ERROR or FSCK_WARN */
+/*
+ * callback function for reporting errors when checking either objects or refs
+ */
typedef int (*fsck_error)(struct fsck_options *o,
const struct object_id *oid, enum object_type object_type,
+ const struct fsck_refs_info *refs_info,
enum fsck_msg_type msg_type, enum fsck_msg_id msg_id,
const char *message);
int fsck_error_function(struct fsck_options *o,
const struct object_id *oid, enum object_type object_type,
+ const struct fsck_refs_info *refs_info,
enum fsck_msg_type msg_type, enum fsck_msg_id msg_id,
const char *message);
int fsck_error_cb_print_missing_gitmodules(struct fsck_options *o,
const struct object_id *oid,
enum object_type object_type,
+ const struct fsck_refs_info *refs_info,
enum fsck_msg_type msg_type,
enum fsck_msg_id msg_id,
const char *message);
+/*
+ * The information for reporting refs-related error message
+ */
+struct fsck_refs_info {
+ const char *path;
+};
+
struct fsck_options {
fsck_walk_func walk;
fsck_error error_func;
@@ -209,6 +222,16 @@ int fsck_tag_standalone(const struct object_id *oid, const char *buffer,
*/
int fsck_finish(struct fsck_options *options);
+/*
+ * Report an error or warning for refs.
+ */
+__attribute__((format (printf, 5, 6)))
+int fsck_refs_report(struct fsck_options *options,
+ const struct object_id *oid,
+ const struct fsck_refs_info *refs_info,
+ enum fsck_msg_id msg_id,
+ const char *fmt, ...);
+
/*
* Subsystem for storing human-readable names for each object.
*
diff --git a/object-file.c b/object-file.c
index 065103be3e..91ddab2696 100644
--- a/object-file.c
+++ b/object-file.c
@@ -2470,11 +2470,12 @@ int repo_has_object_file(struct repository *r,
* give more context.
*/
static int hash_format_check_report(struct fsck_options *opts UNUSED,
- const struct object_id *oid UNUSED,
- enum object_type object_type UNUSED,
- enum fsck_msg_type msg_type UNUSED,
- enum fsck_msg_id msg_id UNUSED,
- const char *message)
+ const struct object_id *oid UNUSED,
+ enum object_type object_type UNUSED,
+ const struct fsck_refs_info *refs_info UNUSED,
+ enum fsck_msg_type msg_type UNUSED,
+ enum fsck_msg_id msg_id UNUSED,
+ const char *message)
{
error(_("object fails fsck: %s"), message);
return 1;
--
2.45.2
^ permalink raw reply related [flat|nested] 282+ messages in thread
* Re: [GSoC][PATCH v13 02/10] fsck: add a unified interface for reporting fsck messages
2024-07-29 13:26 ` [GSoC][PATCH v13 02/10] fsck: add a unified interface for reporting fsck messages shejialuo
@ 2024-07-30 8:31 ` Patrick Steinhardt
2024-07-30 14:56 ` shejialuo
0 siblings, 1 reply; 282+ messages in thread
From: Patrick Steinhardt @ 2024-07-30 8:31 UTC (permalink / raw)
To: shejialuo
Cc: git, Karthik Nayak, Junio C Hamano, Eric Sunshine, Justin Tobler
[-- Attachment #1: Type: text/plain, Size: 3978 bytes --]
On Mon, Jul 29, 2024 at 09:26:30PM +0800, shejialuo wrote:
> The static function "report" provided by "fsck.c" aims at checking fsck
> error type and calling the callback "error_func" to report the message.
> However, "report" function is only related to object database which
> cannot be reused for refs.
Nit: it would be nice to mention _why_ it cannot be reused for refs.
> diff --git a/fsck.c b/fsck.c
> index 3f32441492..1185e9a8ad 100644
> --- a/fsck.c
> +++ b/fsck.c
> @@ -226,12 +226,18 @@ static int object_on_skiplist(struct fsck_options *opts,
> return opts && oid && oidset_contains(&opts->skip_oids, oid);
> }
>
> -__attribute__((format (printf, 5, 6)))
> -static int report(struct fsck_options *options,
> - const struct object_id *oid, enum object_type object_type,
> - enum fsck_msg_id msg_id, const char *fmt, ...)
> +/*
> + * Provide a unified interface for either fscking refs or objects.
> + * It will get the current msg error type and call the error_func callback
> + * which is registered in the "fsck_options" struct.
> + */
> +static int fsck_vreport(struct fsck_options *options,
> + const struct object_id *oid,
> + enum object_type object_type,
> + const struct fsck_refs_info *refs_info,
> + enum fsck_msg_id msg_id, const char *fmt, va_list ap)
> {
> - va_list ap;
> + va_list ap_copy;
> struct strbuf sb = STRBUF_INIT;
> enum fsck_msg_type msg_type = fsck_msg_type(msg_id, options);
> int result;
It is a bit weird that this new generic function receives non-generic
inputs which are specific to the respective subsystems (objects or refs)
that we are checking.
A better design would likely be to make `error_func()` receive a void
pointer such that `error_func()` and then have the respective subsystems
provide a function that knows to format the message while receiving
either a `struct fsck_object_report *` or a `struct fsck_ref_report *`.
I don't think this is particularly worriesome though as it is still
manageable right now. So I'm fine if we want to leave this as-is, and
then we can iterate on this in a future patch series as required.
> @@ -250,9 +256,9 @@ static int report(struct fsck_options *options,
> prepare_msg_ids();
> strbuf_addf(&sb, "%s: ", msg_id_info[msg_id].camelcased);
>
> - va_start(ap, fmt);
> - strbuf_vaddf(&sb, fmt, ap);
> - result = options->error_func(options, oid, object_type,
> + va_copy(ap_copy, ap);
> + strbuf_vaddf(&sb, fmt, ap_copy);
> + result = options->error_func(options, oid, object_type, refs_info,
> msg_type, msg_id, sb.buf);
> strbuf_release(&sb);
> va_end(ap);
> @@ -260,6 +266,35 @@ static int report(struct fsck_options *options,
> return result;
> }
>
> +__attribute__((format (printf, 5, 6)))
> +static int report(struct fsck_options *options,
> + const struct object_id *oid, enum object_type object_type,
> + enum fsck_msg_id msg_id, const char *fmt, ...)
> +{
> + va_list ap;
> + int result;
> +
> + va_start(ap, fmt);
> + result = fsck_vreport(options, oid, object_type, NULL, msg_id, fmt, ap);
> + va_end(ap);
> +
> + return result;
> +}
As far as I can see, `report()` is now specific to reporting errors with
objects while `fsck_vreport()` is the generic part. Do we want to rename
the function to `fsck_report_object()` to clarify, or would that cause
too much churn?
Hm. Seeing that we have 62 callsites of that function it may be too much
churn indeed.
> +int fsck_refs_report(struct fsck_options *options,
> + const struct object_id *oid,
> + const struct fsck_refs_info *refs_info,
> + enum fsck_msg_id msg_id, const char *fmt, ...)
Would `fsck_report_ref()` be a better name?
What is the intent of the `oid` field? Would it be set to the object ID
that a reference points to? What if the reference is a non-resolving
symbolic reference? I wonder whether we can just remove it.
Patrick
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]
^ permalink raw reply [flat|nested] 282+ messages in thread
* Re: [GSoC][PATCH v13 02/10] fsck: add a unified interface for reporting fsck messages
2024-07-30 8:31 ` Patrick Steinhardt
@ 2024-07-30 14:56 ` shejialuo
0 siblings, 0 replies; 282+ messages in thread
From: shejialuo @ 2024-07-30 14:56 UTC (permalink / raw)
To: Patrick Steinhardt
Cc: git, Karthik Nayak, Junio C Hamano, Eric Sunshine, Justin Tobler
On Tue, Jul 30, 2024 at 10:31:16AM +0200, Patrick Steinhardt wrote:
> On Mon, Jul 29, 2024 at 09:26:30PM +0800, shejialuo wrote:
> > The static function "report" provided by "fsck.c" aims at checking fsck
> > error type and calling the callback "error_func" to report the message.
> > However, "report" function is only related to object database which
> > cannot be reused for refs.
>
> Nit: it would be nice to mention _why_ it cannot be reused for refs.
>
> > diff --git a/fsck.c b/fsck.c
> > index 3f32441492..1185e9a8ad 100644
> > --- a/fsck.c
> > +++ b/fsck.c
> > @@ -226,12 +226,18 @@ static int object_on_skiplist(struct fsck_options *opts,
> > return opts && oid && oidset_contains(&opts->skip_oids, oid);
> > }
> >
> > -__attribute__((format (printf, 5, 6)))
> > -static int report(struct fsck_options *options,
> > - const struct object_id *oid, enum object_type object_type,
> > - enum fsck_msg_id msg_id, const char *fmt, ...)
> > +/*
> > + * Provide a unified interface for either fscking refs or objects.
> > + * It will get the current msg error type and call the error_func callback
> > + * which is registered in the "fsck_options" struct.
> > + */
> > +static int fsck_vreport(struct fsck_options *options,
> > + const struct object_id *oid,
> > + enum object_type object_type,
> > + const struct fsck_refs_info *refs_info,
> > + enum fsck_msg_id msg_id, const char *fmt, va_list ap)
> > {
> > - va_list ap;
> > + va_list ap_copy;
> > struct strbuf sb = STRBUF_INIT;
> > enum fsck_msg_type msg_type = fsck_msg_type(msg_id, options);
> > int result;
>
> It is a bit weird that this new generic function receives non-generic
> inputs which are specific to the respective subsystems (objects or refs)
> that we are checking.
>
Actually, this is one of the biggest problem when implementing the
infrastructure. The original function "report" only cares about
reporting the problem of objects. So the callback "error_func" uses the
similar prototype.
Problem comes when we want to add ref-related report. In my very former
implementation, I just created a new function "fsck_refs_report" to just
copy some codes from "report" and defines refs-related callback.
However, this is a bad way because we make duplication. If we want to
reuse the "report" function, we should add new parameters into "report"
and "error_func". This is the idea of this patch. However, as you can
see, there are so many "report" function calls in the codebase, it's bad
to change them. So I define a more common function called "fsck_vreport"
function and wrap "report" to eventually call this function.
> A better design would likely be to make `error_func()` receive a void
> pointer such that `error_func()` and then have the respective subsystems
> provide a function that knows to format the message while receiving
> either a `struct fsck_object_report *` or a `struct fsck_ref_report *`.
>
Yes, I agree with this idea. And I think we should use only one function
called "fsck_reportf" to report any fsck-related messages. We could
design the following callback "prototype".
typedef int (*fsck_error)(struct fsck_options *o,
void *info,
enum fsck_msg_type msg_type, enum fsck_msg_id msg_id,
const char *message);
Thus, we could make "fsck_reportf" generic. It will handle the common
"fsck_options" and "enum fsck_msg_id" and then it will call "fsck_error"
callback. The user could pass either refs information or objects
information.
> I don't think this is particularly worriesome though as it is still
> manageable right now. So I'm fine if we want to leave this as-is, and
> then we can iterate on this in a future patch series as required.
>
I strongly suggest that we should use the above design for the following
reasons:
1. We only expose one interface called "fsck_reportf" which will make
the code clear. Actually, there is no different between reporting refs
and reporting objects.
2. We provide more extensibility here, because we will never change
"fsck_reportf" and "fsck_error" prototype when we want to add more info
for either refs or objects.
But do we really need this? Junio, could you please give some advice
here. How do you think about this design. In my perspective, the only
overhead here is that there are too many "report" function we should
refactor.
> > @@ -250,9 +256,9 @@ static int report(struct fsck_options *options,
> > prepare_msg_ids();
> > strbuf_addf(&sb, "%s: ", msg_id_info[msg_id].camelcased);
> >
> > - va_start(ap, fmt);
> > - strbuf_vaddf(&sb, fmt, ap);
> > - result = options->error_func(options, oid, object_type,
> > + va_copy(ap_copy, ap);
> > + strbuf_vaddf(&sb, fmt, ap_copy);
> > + result = options->error_func(options, oid, object_type, refs_info,
> > msg_type, msg_id, sb.buf);
> > strbuf_release(&sb);
> > va_end(ap);
> > @@ -260,6 +266,35 @@ static int report(struct fsck_options *options,
> > return result;
> > }
> >
> > +__attribute__((format (printf, 5, 6)))
> > +static int report(struct fsck_options *options,
> > + const struct object_id *oid, enum object_type object_type,
> > + enum fsck_msg_id msg_id, const char *fmt, ...)
> > +{
> > + va_list ap;
> > + int result;
> > +
> > + va_start(ap, fmt);
> > + result = fsck_vreport(options, oid, object_type, NULL, msg_id, fmt, ap);
> > + va_end(ap);
> > +
> > + return result;
> > +}
>
> As far as I can see, `report()` is now specific to reporting errors with
> objects while `fsck_vreport()` is the generic part. Do we want to rename
> the function to `fsck_report_object()` to clarify, or would that cause
> too much churn?
>
> Hm. Seeing that we have 62 callsites of that function it may be too much
> churn indeed.
>
Yes, there are too many references for "report" function. That's why I
wrap the "report" using "fsck_vreport".
> > +int fsck_refs_report(struct fsck_options *options,
> > + const struct object_id *oid,
> > + const struct fsck_refs_info *refs_info,
> > + enum fsck_msg_id msg_id, const char *fmt, ...)
>
> Would `fsck_report_ref()` be a better name?
>
I agree. However, if we use the above design, we will just use
"fsck_reportf" here both for refs and objects.
> What is the intent of the `oid` field? Would it be set to the object ID
> that a reference points to? What if the reference is a non-resolving
> symbolic reference? I wonder whether we can just remove it.
>
`oid` is used to be the object ID that a reference points to. If the
reference is a symbolic link or symref, we do not care about it. The
caller should just pass `NULL`. Actually, we may not use this field.
I just suppose that we may provide the user more information. Because
when using "file-backend.c::parse_loose_ref_contents()" we will
automatically get the `oid` if the ref is a regular reference. So I just
provide `oid` here.
> Patrick
^ permalink raw reply [flat|nested] 282+ messages in thread
* [GSoC][PATCH v13 03/10] fsck: rename objects-related fsck error functions
2024-07-29 13:22 ` [GSoC][PATCH v13 00/10] ref consistency check infra setup shejialuo
2024-07-29 13:26 ` [GSoC][PATCH v13 01/10] fsck: rename "skiplist" to "skip_oids" shejialuo
2024-07-29 13:26 ` [GSoC][PATCH v13 02/10] fsck: add a unified interface for reporting fsck messages shejialuo
@ 2024-07-29 13:26 ` shejialuo
2024-07-30 8:31 ` Patrick Steinhardt
2024-07-29 13:26 ` [GSoC][PATCH v13 04/10] fsck: add refs-related error report function shejialuo
` (8 subsequent siblings)
11 siblings, 1 reply; 282+ messages in thread
From: shejialuo @ 2024-07-29 13:26 UTC (permalink / raw)
To: git
Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine,
Justin Tobler
The names of objects-related fsck error functions are general. It's OK
when there is only object database check. However, we have introduced
refs database check report function. To avoid ambiguity, rename
object-related fsck error functions to explicitly indicate these
functions are used to report objects-related messages.
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
builtin/fsck.c | 16 ++++++++--------
fsck.c | 32 ++++++++++++++++----------------
fsck.h | 30 +++++++++++++++---------------
3 files changed, 39 insertions(+), 39 deletions(-)
diff --git a/builtin/fsck.c b/builtin/fsck.c
index 6abad60e7e..8553e2bc1b 100644
--- a/builtin/fsck.c
+++ b/builtin/fsck.c
@@ -89,13 +89,13 @@ static int objerror(struct object *obj, const char *err)
return -1;
}
-static int fsck_error_func(struct fsck_options *o UNUSED,
- const struct object_id *oid,
- enum object_type object_type,
- const struct fsck_refs_info *refs_info UNUSED,
- enum fsck_msg_type msg_type,
- enum fsck_msg_id msg_id UNUSED,
- const char *message)
+static int fsck_objects_error_func(struct fsck_options *o UNUSED,
+ const struct object_id *oid,
+ enum object_type object_type,
+ const struct fsck_refs_info *refs_info UNUSED,
+ enum fsck_msg_type msg_type,
+ enum fsck_msg_id msg_id UNUSED,
+ const char *message)
{
switch (msg_type) {
case FSCK_WARN:
@@ -939,7 +939,7 @@ int cmd_fsck(int argc, const char **argv, const char *prefix)
fsck_walk_options.walk = mark_object;
fsck_obj_options.walk = mark_used;
- fsck_obj_options.error_func = fsck_error_func;
+ fsck_obj_options.error_func = fsck_objects_error_func;
if (check_strict)
fsck_obj_options.strict = 1;
diff --git a/fsck.c b/fsck.c
index 1185e9a8ad..af61fa90ba 100644
--- a/fsck.c
+++ b/fsck.c
@@ -1235,13 +1235,13 @@ int fsck_buffer(const struct object_id *oid, enum object_type type,
type);
}
-int fsck_error_function(struct fsck_options *o,
- const struct object_id *oid,
- enum object_type object_type UNUSED,
- const struct fsck_refs_info *refs_info UNUSED,
- enum fsck_msg_type msg_type,
- enum fsck_msg_id msg_id UNUSED,
- const char *message)
+int fsck_objects_error_function(struct fsck_options *o,
+ const struct object_id *oid,
+ enum object_type object_type UNUSED,
+ const struct fsck_refs_info *refs_info UNUSED,
+ enum fsck_msg_type msg_type,
+ enum fsck_msg_id msg_id UNUSED,
+ const char *message)
{
if (msg_type == FSCK_WARN) {
warning("object %s: %s", fsck_describe_object(o, oid), message);
@@ -1339,18 +1339,18 @@ int git_fsck_config(const char *var, const char *value,
* Custom error callbacks that are used in more than one place.
*/
-int fsck_error_cb_print_missing_gitmodules(struct fsck_options *o,
- const struct object_id *oid,
- enum object_type object_type,
- const struct fsck_refs_info *refs_info,
- enum fsck_msg_type msg_type,
- enum fsck_msg_id msg_id,
- const char *message)
+int fsck_objects_error_cb_print_missing_gitmodules(struct fsck_options *o,
+ const struct object_id *oid,
+ enum object_type object_type,
+ const struct fsck_refs_info *refs_info,
+ enum fsck_msg_type msg_type,
+ enum fsck_msg_id msg_id,
+ const char *message)
{
if (msg_id == FSCK_MSG_GITMODULES_MISSING) {
puts(oid_to_hex(oid));
return 0;
}
- return fsck_error_function(o, oid, object_type, refs_info,
- msg_type, msg_id, message);
+ return fsck_objects_error_function(o, oid, object_type, refs_info,
+ msg_type, msg_id, message);
}
diff --git a/fsck.h b/fsck.h
index 4f01a46cc7..f53ac339d2 100644
--- a/fsck.h
+++ b/fsck.h
@@ -124,18 +124,18 @@ typedef int (*fsck_error)(struct fsck_options *o,
enum fsck_msg_type msg_type, enum fsck_msg_id msg_id,
const char *message);
-int fsck_error_function(struct fsck_options *o,
- const struct object_id *oid, enum object_type object_type,
- const struct fsck_refs_info *refs_info,
- enum fsck_msg_type msg_type, enum fsck_msg_id msg_id,
- const char *message);
-int fsck_error_cb_print_missing_gitmodules(struct fsck_options *o,
- const struct object_id *oid,
- enum object_type object_type,
- const struct fsck_refs_info *refs_info,
- enum fsck_msg_type msg_type,
- enum fsck_msg_id msg_id,
- const char *message);
+int fsck_objects_error_function(struct fsck_options *o,
+ const struct object_id *oid, enum object_type object_type,
+ const struct fsck_refs_info *refs_info,
+ enum fsck_msg_type msg_type, enum fsck_msg_id msg_id,
+ const char *message);
+int fsck_objects_error_cb_print_missing_gitmodules(struct fsck_options *o,
+ const struct object_id *oid,
+ enum object_type object_type,
+ const struct fsck_refs_info *refs_info,
+ enum fsck_msg_type msg_type,
+ enum fsck_msg_id msg_id,
+ const char *message);
/*
* The information for reporting refs-related error message
@@ -163,7 +163,7 @@ struct fsck_options {
.gitmodules_done = OIDSET_INIT, \
.gitattributes_found = OIDSET_INIT, \
.gitattributes_done = OIDSET_INIT, \
- .error_func = fsck_error_function \
+ .error_func = fsck_objects_error_function \
}
#define FSCK_OPTIONS_STRICT { \
.strict = 1, \
@@ -171,7 +171,7 @@ struct fsck_options {
.gitmodules_done = OIDSET_INIT, \
.gitattributes_found = OIDSET_INIT, \
.gitattributes_done = OIDSET_INIT, \
- .error_func = fsck_error_function, \
+ .error_func = fsck_objects_error_function, \
}
#define FSCK_OPTIONS_MISSING_GITMODULES { \
.strict = 1, \
@@ -179,7 +179,7 @@ struct fsck_options {
.gitmodules_done = OIDSET_INIT, \
.gitattributes_found = OIDSET_INIT, \
.gitattributes_done = OIDSET_INIT, \
- .error_func = fsck_error_cb_print_missing_gitmodules, \
+ .error_func = fsck_objects_error_cb_print_missing_gitmodules, \
}
/* descend in all linked child objects
--
2.45.2
^ permalink raw reply related [flat|nested] 282+ messages in thread
* Re: [GSoC][PATCH v13 03/10] fsck: rename objects-related fsck error functions
2024-07-29 13:26 ` [GSoC][PATCH v13 03/10] fsck: rename objects-related fsck error functions shejialuo
@ 2024-07-30 8:31 ` Patrick Steinhardt
2024-07-30 14:59 ` shejialuo
0 siblings, 1 reply; 282+ messages in thread
From: Patrick Steinhardt @ 2024-07-30 8:31 UTC (permalink / raw)
To: shejialuo
Cc: git, Karthik Nayak, Junio C Hamano, Eric Sunshine, Justin Tobler
[-- Attachment #1: Type: text/plain, Size: 574 bytes --]
On Mon, Jul 29, 2024 at 09:26:40PM +0800, shejialuo wrote:
> The names of objects-related fsck error functions are general. It's OK
s/general/generic, I guess.
> when there is only object database check. However, we have introduced
> refs database check report function. To avoid ambiguity, rename
> object-related fsck error functions to explicitly indicate these
> functions are used to report objects-related messages.
I agree that it is sensible to rename the functions to make it more
explicit that they are only used by the object-checking infrastructure.
Patrick
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]
^ permalink raw reply [flat|nested] 282+ messages in thread
* Re: [GSoC][PATCH v13 03/10] fsck: rename objects-related fsck error functions
2024-07-30 8:31 ` Patrick Steinhardt
@ 2024-07-30 14:59 ` shejialuo
0 siblings, 0 replies; 282+ messages in thread
From: shejialuo @ 2024-07-30 14:59 UTC (permalink / raw)
To: Patrick Steinhardt
Cc: git, Karthik Nayak, Junio C Hamano, Eric Sunshine, Justin Tobler
On Tue, Jul 30, 2024 at 10:31:21AM +0200, Patrick Steinhardt wrote:
> On Mon, Jul 29, 2024 at 09:26:40PM +0800, shejialuo wrote:
> > The names of objects-related fsck error functions are general. It's OK
>
> s/general/generic, I guess.
>
I will improve this in the next version.
^ permalink raw reply [flat|nested] 282+ messages in thread
* [GSoC][PATCH v13 04/10] fsck: add refs-related error report function
2024-07-29 13:22 ` [GSoC][PATCH v13 00/10] ref consistency check infra setup shejialuo
` (2 preceding siblings ...)
2024-07-29 13:26 ` [GSoC][PATCH v13 03/10] fsck: rename objects-related fsck error functions shejialuo
@ 2024-07-29 13:26 ` shejialuo
2024-07-30 8:31 ` Patrick Steinhardt
2024-07-29 13:27 ` [GSoC][PATCH v13 05/10] refs: set up ref consistency check infrastructure shejialuo
` (7 subsequent siblings)
11 siblings, 1 reply; 282+ messages in thread
From: shejialuo @ 2024-07-29 13:26 UTC (permalink / raw)
To: git
Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine,
Justin Tobler
Add refs-related options to the "fsck_options", create refs-specific
"error_func" callback "fsck_refs_error_function".
"fsck_refs_error_function" will use the "oid" parameter. When the caller
passes the oid, it will use "oid_to_hex" to get the corresponding hex
value to report to the caller.
Last, add "FSCK_REFS_OPTIONS_DEFAULT" and "FSCK_REFS_OPTIONS_STRICT"
macros to create refs options easily.
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
fsck.c | 25 +++++++++++++++++++++++++
fsck.h | 14 ++++++++++++++
2 files changed, 39 insertions(+)
diff --git a/fsck.c b/fsck.c
index af61fa90ba..56de29b4c0 100644
--- a/fsck.c
+++ b/fsck.c
@@ -1251,6 +1251,31 @@ int fsck_objects_error_function(struct fsck_options *o,
return 1;
}
+int fsck_refs_error_function(struct fsck_options *options,
+ const struct object_id *oid,
+ enum object_type object_type UNUSED,
+ const struct fsck_refs_info *refs_info,
+ enum fsck_msg_type msg_type,
+ enum fsck_msg_id msg_id UNUSED,
+ const char *message)
+{
+ struct strbuf sb = STRBUF_INIT;
+ int ret = 0;
+
+ strbuf_addstr(&sb, refs_info->path);
+
+ if (oid)
+ strbuf_addf(&sb, " -> (%s)", oid_to_hex(oid));
+
+ if (msg_type == FSCK_WARN)
+ warning("%s: %s", sb.buf, message);
+ else
+ ret = error("%s: %s", sb.buf, message);
+
+ strbuf_release(&sb);
+ return ret;
+}
+
static int fsck_blobs(struct oidset *blobs_found, struct oidset *blobs_done,
enum fsck_msg_id msg_missing, enum fsck_msg_id msg_type,
struct fsck_options *options, const char *blob_type)
diff --git a/fsck.h b/fsck.h
index f53ac339d2..a4a4ba88ee 100644
--- a/fsck.h
+++ b/fsck.h
@@ -136,6 +136,13 @@ int fsck_objects_error_cb_print_missing_gitmodules(struct fsck_options *o,
enum fsck_msg_type msg_type,
enum fsck_msg_id msg_id,
const char *message);
+int fsck_refs_error_function(struct fsck_options *options,
+ const struct object_id *oid,
+ enum object_type object_type,
+ const struct fsck_refs_info *refs_info,
+ enum fsck_msg_type msg_type,
+ enum fsck_msg_id msg_id,
+ const char *message);
/*
* The information for reporting refs-related error message
@@ -181,6 +188,13 @@ struct fsck_options {
.gitattributes_done = OIDSET_INIT, \
.error_func = fsck_objects_error_cb_print_missing_gitmodules, \
}
+#define FSCK_REFS_OPTIONS_DEFAULT { \
+ .error_func = fsck_refs_error_function, \
+}
+#define FSCK_REFS_OPTIONS_STRICT { \
+ .strict = 1, \
+ .error_func = fsck_refs_error_function, \
+}
/* descend in all linked child objects
* the return value is:
--
2.45.2
^ permalink raw reply related [flat|nested] 282+ messages in thread
* Re: [GSoC][PATCH v13 04/10] fsck: add refs-related error report function
2024-07-29 13:26 ` [GSoC][PATCH v13 04/10] fsck: add refs-related error report function shejialuo
@ 2024-07-30 8:31 ` Patrick Steinhardt
2024-07-30 15:09 ` shejialuo
0 siblings, 1 reply; 282+ messages in thread
From: Patrick Steinhardt @ 2024-07-30 8:31 UTC (permalink / raw)
To: shejialuo
Cc: git, Karthik Nayak, Junio C Hamano, Eric Sunshine, Justin Tobler
[-- Attachment #1: Type: text/plain, Size: 2439 bytes --]
On Mon, Jul 29, 2024 at 09:26:51PM +0800, shejialuo wrote:
> Add refs-related options to the "fsck_options", create refs-specific
> "error_func" callback "fsck_refs_error_function".
We should have an explanation _why_ we are adding these functions in the
commit message.
> "fsck_refs_error_function" will use the "oid" parameter. When the caller
> passes the oid, it will use "oid_to_hex" to get the corresponding hex
> value to report to the caller.
>
> Last, add "FSCK_REFS_OPTIONS_DEFAULT" and "FSCK_REFS_OPTIONS_STRICT"
> macros to create refs options easily.
It is a bit unclear to me what you mean with "create refs options
easily". Do you mean to say that `git refs check` (or whatever this will
be called) will have flags like "--strict"?
> Mentored-by: Patrick Steinhardt <ps@pks.im>
> Mentored-by: Karthik Nayak <karthik.188@gmail.com>
> Signed-off-by: shejialuo <shejialuo@gmail.com>
> ---
> fsck.c | 25 +++++++++++++++++++++++++
> fsck.h | 14 ++++++++++++++
> 2 files changed, 39 insertions(+)
>
> diff --git a/fsck.c b/fsck.c
> index af61fa90ba..56de29b4c0 100644
> --- a/fsck.c
> +++ b/fsck.c
> @@ -1251,6 +1251,31 @@ int fsck_objects_error_function(struct fsck_options *o,
> return 1;
> }
>
> +int fsck_refs_error_function(struct fsck_options *options,
> + const struct object_id *oid,
> + enum object_type object_type UNUSED,
> + const struct fsck_refs_info *refs_info,
> + enum fsck_msg_type msg_type,
> + enum fsck_msg_id msg_id UNUSED,
> + const char *message)
> +{
> + struct strbuf sb = STRBUF_INIT;
> + int ret = 0;
> +
> + strbuf_addstr(&sb, refs_info->path);
> +
> + if (oid)
> + strbuf_addf(&sb, " -> (%s)", oid_to_hex(oid));
Okay, so we do end up printing the object ID indeed. But wouldn't we
want to potentially do the same with symbolic refs?
Also, would it make more sense to put the `oid` (and potentially the
`referent` when we also handle symbolic refs) into `struct
fsck_refs_info`? Like this, the whole state would be self-contained in
that structure, which would also make my proposal from a preceding
commit more feasible where the subsystem-specific error functions only
get a void pointer to this structure. It would require another
refactoring on top to move the object type and OID into a `struct
fsck_objects_info`, too, but that shouldn't be too involved, I guess.
Patrick
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]
^ permalink raw reply [flat|nested] 282+ messages in thread
* Re: [GSoC][PATCH v13 04/10] fsck: add refs-related error report function
2024-07-30 8:31 ` Patrick Steinhardt
@ 2024-07-30 15:09 ` shejialuo
0 siblings, 0 replies; 282+ messages in thread
From: shejialuo @ 2024-07-30 15:09 UTC (permalink / raw)
To: Patrick Steinhardt
Cc: git, Karthik Nayak, Junio C Hamano, Eric Sunshine, Justin Tobler
On Tue, Jul 30, 2024 at 10:31:26AM +0200, Patrick Steinhardt wrote:
> On Mon, Jul 29, 2024 at 09:26:51PM +0800, shejialuo wrote:
> > Add refs-related options to the "fsck_options", create refs-specific
> > "error_func" callback "fsck_refs_error_function".
>
> We should have an explanation _why_ we are adding these functions in the
> commit message.
>
Yes, I will improve this in the next version.
> > "fsck_refs_error_function" will use the "oid" parameter. When the caller
> > passes the oid, it will use "oid_to_hex" to get the corresponding hex
> > value to report to the caller.
> >
> > Last, add "FSCK_REFS_OPTIONS_DEFAULT" and "FSCK_REFS_OPTIONS_STRICT"
> > macros to create refs options easily.
>
> It is a bit unclear to me what you mean with "create refs options
> easily". Do you mean to say that `git refs check` (or whatever this will
> be called) will have flags like "--strict"?
>
Yes, when the user passes `--strict`, all the warn type will be seen as
the error type. So I create "FSCK_REFS_OPTIONS_STRICT". However, I
didn't think too much here. I just followed the way the codebase does
for the objects.
> > Mentored-by: Patrick Steinhardt <ps@pks.im>
> > Mentored-by: Karthik Nayak <karthik.188@gmail.com>
> > Signed-off-by: shejialuo <shejialuo@gmail.com>
> > ---
> > fsck.c | 25 +++++++++++++++++++++++++
> > fsck.h | 14 ++++++++++++++
> > 2 files changed, 39 insertions(+)
> >
> > diff --git a/fsck.c b/fsck.c
> > index af61fa90ba..56de29b4c0 100644
> > --- a/fsck.c
> > +++ b/fsck.c
> > @@ -1251,6 +1251,31 @@ int fsck_objects_error_function(struct fsck_options *o,
> > return 1;
> > }
> >
> > +int fsck_refs_error_function(struct fsck_options *options,
> > + const struct object_id *oid,
> > + enum object_type object_type UNUSED,
> > + const struct fsck_refs_info *refs_info,
> > + enum fsck_msg_type msg_type,
> > + enum fsck_msg_id msg_id UNUSED,
> > + const char *message)
> > +{
> > + struct strbuf sb = STRBUF_INIT;
> > + int ret = 0;
> > +
> > + strbuf_addstr(&sb, refs_info->path);
> > +
> > + if (oid)
> > + strbuf_addf(&sb, " -> (%s)", oid_to_hex(oid));
>
> Okay, so we do end up printing the object ID indeed. But wouldn't we
> want to potentially do the same with symbolic refs?
>
> Also, would it make more sense to put the `oid` (and potentially the
> `referent` when we also handle symbolic refs) into `struct
> fsck_refs_info`? Like this, the whole state would be self-contained in
> that structure, which would also make my proposal from a preceding
> commit more feasible where the subsystem-specific error functions only
> get a void pointer to this structure. It would require another
> refactoring on top to move the object type and OID into a `struct
> fsck_objects_info`, too, but that shouldn't be too involved, I guess.
>
Yes, I totally agree here. I didn't consider symrefs here, actually we
should provide "symref -> referent" message if we have providen the
"regular ref -> oid" message.
I think we should do this. And I have commented on "[Patch v13][2/10]",
it is a necessity we should refactor this part.
> Patrick
^ permalink raw reply [flat|nested] 282+ messages in thread
* [GSoC][PATCH v13 05/10] refs: set up ref consistency check infrastructure
2024-07-29 13:22 ` [GSoC][PATCH v13 00/10] ref consistency check infra setup shejialuo
` (3 preceding siblings ...)
2024-07-29 13:26 ` [GSoC][PATCH v13 04/10] fsck: add refs-related error report function shejialuo
@ 2024-07-29 13:27 ` shejialuo
2024-07-30 8:31 ` Patrick Steinhardt
2024-07-29 13:27 ` [GSoC][PATCH v13 06/10] git refs: add verify subcommand shejialuo
` (6 subsequent siblings)
11 siblings, 1 reply; 282+ messages in thread
From: shejialuo @ 2024-07-29 13:27 UTC (permalink / raw)
To: git
Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine,
Justin Tobler
The interfaces defined in the `ref_storage_be` are carefully structured
in semantic. It's organized as the five parts:
1. The name and the initialization interfaces.
2. The ref transaction interfaces.
3. The ref internal interfaces (pack, rename and copy).
4. The ref filesystem interfaces.
5. The reflog related interfaces.
To keep consistent with the git-fsck(1), add a new interface named
"fsck_refs_fn" to the end of "ref_storage_be". This semantic cannot be
grouped into any above five categories. Explicitly add blank line to
make it different from others.
Last, implement placeholder functions for each ref backends.
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
refs.c | 5 +++++
refs.h | 8 ++++++++
refs/debug.c | 11 +++++++++++
refs/files-backend.c | 13 ++++++++++++-
refs/packed-backend.c | 8 ++++++++
refs/refs-internal.h | 6 ++++++
refs/reftable-backend.c | 8 ++++++++
7 files changed, 58 insertions(+), 1 deletion(-)
diff --git a/refs.c b/refs.c
index bb90a18875..410919246b 100644
--- a/refs.c
+++ b/refs.c
@@ -318,6 +318,11 @@ int check_refname_format(const char *refname, int flags)
return check_or_sanitize_refname(refname, flags, NULL);
}
+int refs_fsck(struct ref_store *refs, struct fsck_options *o)
+{
+ return refs->be->fsck(refs, o);
+}
+
void sanitize_refname_component(const char *refname, struct strbuf *out)
{
if (check_or_sanitize_refname(refname, REFNAME_ALLOW_ONELEVEL, out))
diff --git a/refs.h b/refs.h
index 0ecba21b4a..804d6a7fce 100644
--- a/refs.h
+++ b/refs.h
@@ -4,6 +4,7 @@
#include "commit.h"
#include "repository.h"
+struct fsck_options;
struct object_id;
struct ref_store;
struct strbuf;
@@ -541,6 +542,13 @@ int refs_for_each_reflog(struct ref_store *refs, each_reflog_fn fn, void *cb_dat
*/
int check_refname_format(const char *refname, int flags);
+/*
+ * Check the reference database for consistency. Return 0 if refs and
+ * reflogs are consistent, and non-zero otherwise. The errors will be
+ * written to stderr.
+ */
+int refs_fsck(struct ref_store *refs, struct fsck_options *o);
+
/*
* Apply the rules from check_refname_format, but mutate the result until it
* is acceptable, and place the result in "out".
diff --git a/refs/debug.c b/refs/debug.c
index 547d9245b9..45e2e784a0 100644
--- a/refs/debug.c
+++ b/refs/debug.c
@@ -419,6 +419,15 @@ static int debug_reflog_expire(struct ref_store *ref_store, const char *refname,
return res;
}
+static int debug_fsck(struct ref_store *ref_store,
+ struct fsck_options *o)
+{
+ struct debug_ref_store *drefs = (struct debug_ref_store *)ref_store;
+ int res = drefs->refs->be->fsck(drefs->refs, o);
+ trace_printf_key(&trace_refs, "fsck: %d\n", res);
+ return res;
+}
+
struct ref_storage_be refs_be_debug = {
.name = "debug",
.init = NULL,
@@ -451,4 +460,6 @@ struct ref_storage_be refs_be_debug = {
.create_reflog = debug_create_reflog,
.delete_reflog = debug_delete_reflog,
.reflog_expire = debug_reflog_expire,
+
+ .fsck = debug_fsck,
};
diff --git a/refs/files-backend.c b/refs/files-backend.c
index aa52d9be7c..4630eb1f80 100644
--- a/refs/files-backend.c
+++ b/refs/files-backend.c
@@ -3408,6 +3408,15 @@ static int files_ref_store_remove_on_disk(struct ref_store *ref_store,
return ret;
}
+static int files_fsck(struct ref_store *ref_store,
+ struct fsck_options *o)
+{
+ struct files_ref_store *refs =
+ files_downcast(ref_store, REF_STORE_READ, "fsck");
+
+ return refs->packed_ref_store->be->fsck(refs->packed_ref_store, o);
+}
+
struct ref_storage_be refs_be_files = {
.name = "files",
.init = files_ref_store_init,
@@ -3434,5 +3443,7 @@ struct ref_storage_be refs_be_files = {
.reflog_exists = files_reflog_exists,
.create_reflog = files_create_reflog,
.delete_reflog = files_delete_reflog,
- .reflog_expire = files_reflog_expire
+ .reflog_expire = files_reflog_expire,
+
+ .fsck = files_fsck,
};
diff --git a/refs/packed-backend.c b/refs/packed-backend.c
index a0666407cd..5209b0b212 100644
--- a/refs/packed-backend.c
+++ b/refs/packed-backend.c
@@ -1735,6 +1735,12 @@ static struct ref_iterator *packed_reflog_iterator_begin(struct ref_store *ref_s
return empty_ref_iterator_begin();
}
+static int packed_fsck(struct ref_store *ref_store,
+ struct fsck_options *o)
+{
+ return 0;
+}
+
struct ref_storage_be refs_be_packed = {
.name = "packed",
.init = packed_ref_store_init,
@@ -1762,4 +1768,6 @@ struct ref_storage_be refs_be_packed = {
.create_reflog = NULL,
.delete_reflog = NULL,
.reflog_expire = NULL,
+
+ .fsck = packed_fsck,
};
diff --git a/refs/refs-internal.h b/refs/refs-internal.h
index fa975d69aa..a905e187cd 100644
--- a/refs/refs-internal.h
+++ b/refs/refs-internal.h
@@ -4,6 +4,7 @@
#include "refs.h"
#include "iterator.h"
+struct fsck_options;
struct ref_transaction;
/*
@@ -650,6 +651,9 @@ typedef int read_raw_ref_fn(struct ref_store *ref_store, const char *refname,
typedef int read_symbolic_ref_fn(struct ref_store *ref_store, const char *refname,
struct strbuf *referent);
+typedef int fsck_fn(struct ref_store *ref_store,
+ struct fsck_options *o);
+
struct ref_storage_be {
const char *name;
ref_store_init_fn *init;
@@ -677,6 +681,8 @@ struct ref_storage_be {
create_reflog_fn *create_reflog;
delete_reflog_fn *delete_reflog;
reflog_expire_fn *reflog_expire;
+
+ fsck_fn *fsck;
};
extern struct ref_storage_be refs_be_files;
diff --git a/refs/reftable-backend.c b/refs/reftable-backend.c
index fbe74c239d..b5a1a526df 100644
--- a/refs/reftable-backend.c
+++ b/refs/reftable-backend.c
@@ -2303,6 +2303,12 @@ static int reftable_be_reflog_expire(struct ref_store *ref_store,
return ret;
}
+static int reftable_be_fsck(struct ref_store *ref_store,
+ struct fsck_options *o)
+{
+ return 0;
+}
+
struct ref_storage_be refs_be_reftable = {
.name = "reftable",
.init = reftable_be_init,
@@ -2330,4 +2336,6 @@ struct ref_storage_be refs_be_reftable = {
.create_reflog = reftable_be_create_reflog,
.delete_reflog = reftable_be_delete_reflog,
.reflog_expire = reftable_be_reflog_expire,
+
+ .fsck = reftable_be_fsck,
};
--
2.45.2
^ permalink raw reply related [flat|nested] 282+ messages in thread
* Re: [GSoC][PATCH v13 05/10] refs: set up ref consistency check infrastructure
2024-07-29 13:27 ` [GSoC][PATCH v13 05/10] refs: set up ref consistency check infrastructure shejialuo
@ 2024-07-30 8:31 ` Patrick Steinhardt
2024-07-30 15:10 ` shejialuo
0 siblings, 1 reply; 282+ messages in thread
From: Patrick Steinhardt @ 2024-07-30 8:31 UTC (permalink / raw)
To: shejialuo
Cc: git, Karthik Nayak, Junio C Hamano, Eric Sunshine, Justin Tobler
[-- Attachment #1: Type: text/plain, Size: 1063 bytes --]
On Mon, Jul 29, 2024 at 09:27:02PM +0800, shejialuo wrote:
> The interfaces defined in the `ref_storage_be` are carefully structured
> in semantic. It's organized as the five parts:
>
> 1. The name and the initialization interfaces.
> 2. The ref transaction interfaces.
> 3. The ref internal interfaces (pack, rename and copy).
> 4. The ref filesystem interfaces.
> 5. The reflog related interfaces.
>
> To keep consistent with the git-fsck(1), add a new interface named
> "fsck_refs_fn" to the end of "ref_storage_be". This semantic cannot be
> grouped into any above five categories. Explicitly add blank line to
> make it different from others.
>
> Last, implement placeholder functions for each ref backends.
You're carefully explaining what you are doing and where you are placing
the new callback functions. But you never explain why you add those
functions in the first place, which I would think is much more important
than explaining the placement of the new callbacks.
Other than that this patch looks good to me.
Patrick
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]
^ permalink raw reply [flat|nested] 282+ messages in thread
* Re: [GSoC][PATCH v13 05/10] refs: set up ref consistency check infrastructure
2024-07-30 8:31 ` Patrick Steinhardt
@ 2024-07-30 15:10 ` shejialuo
0 siblings, 0 replies; 282+ messages in thread
From: shejialuo @ 2024-07-30 15:10 UTC (permalink / raw)
To: Patrick Steinhardt
Cc: git, Karthik Nayak, Junio C Hamano, Eric Sunshine, Justin Tobler
On Tue, Jul 30, 2024 at 10:31:32AM +0200, Patrick Steinhardt wrote:
> On Mon, Jul 29, 2024 at 09:27:02PM +0800, shejialuo wrote:
> > The interfaces defined in the `ref_storage_be` are carefully structured
> > in semantic. It's organized as the five parts:
> >
> > 1. The name and the initialization interfaces.
> > 2. The ref transaction interfaces.
> > 3. The ref internal interfaces (pack, rename and copy).
> > 4. The ref filesystem interfaces.
> > 5. The reflog related interfaces.
> >
> > To keep consistent with the git-fsck(1), add a new interface named
> > "fsck_refs_fn" to the end of "ref_storage_be". This semantic cannot be
> > grouped into any above five categories. Explicitly add blank line to
> > make it different from others.
> >
> > Last, implement placeholder functions for each ref backends.
>
> You're carefully explaining what you are doing and where you are placing
> the new callback functions. But you never explain why you add those
> functions in the first place, which I would think is much more important
> than explaining the placement of the new callbacks.
>
Thanks, I will improve this in the next version.
> Other than that this patch looks good to me.
>
> Patrick
^ permalink raw reply [flat|nested] 282+ messages in thread
* [GSoC][PATCH v13 06/10] git refs: add verify subcommand
2024-07-29 13:22 ` [GSoC][PATCH v13 00/10] ref consistency check infra setup shejialuo
` (4 preceding siblings ...)
2024-07-29 13:27 ` [GSoC][PATCH v13 05/10] refs: set up ref consistency check infrastructure shejialuo
@ 2024-07-29 13:27 ` shejialuo
2024-07-30 8:31 ` Patrick Steinhardt
2024-07-29 13:27 ` [GSoC][PATCH v13 07/10] builtin/fsck: add `git-refs verify` child process shejialuo
` (5 subsequent siblings)
11 siblings, 1 reply; 282+ messages in thread
From: shejialuo @ 2024-07-29 13:27 UTC (permalink / raw)
To: git
Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine,
Justin Tobler
Introduce a new subcommand "verify" in git-refs(1) to allow the user to
check the reference database consistency and also this subcommand will
be used as the entry point of checking refs for "git-fsck(1)". Last, add
"verbose" field into "fsck_options" to indicate whether we should print
verbose messages when checking refs and objects consistency.
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
Documentation/git-refs.txt | 13 +++++++++++
builtin/refs.c | 44 ++++++++++++++++++++++++++++++++++++++
fsck.h | 1 +
3 files changed, 58 insertions(+)
diff --git a/Documentation/git-refs.txt b/Documentation/git-refs.txt
index 5b99e04385..1244a85b64 100644
--- a/Documentation/git-refs.txt
+++ b/Documentation/git-refs.txt
@@ -10,6 +10,7 @@ SYNOPSIS
--------
[verse]
'git refs migrate' --ref-format=<format> [--dry-run]
+'git refs verify' [--strict] [--verbose]
DESCRIPTION
-----------
@@ -22,6 +23,9 @@ COMMANDS
migrate::
Migrate ref store between different formats.
+verify::
+ Verify reference database consistency.
+
OPTIONS
-------
@@ -39,6 +43,15 @@ include::ref-storage-format.txt[]
can be used to double check that the migration works as expected before
performing the actual migration.
+The following options are specific to 'git refs verify':
+
+--strict::
+ Enable more strict checking, every WARN severity for the `Fsck Messages`
+ be seen as ERROR. See linkgit:git-fsck[1].
+
+--verbose::
+ When verifying the reference database consistency, be chatty.
+
KNOWN LIMITATIONS
-----------------
diff --git a/builtin/refs.c b/builtin/refs.c
index 46dcd150d4..4831c9e28e 100644
--- a/builtin/refs.c
+++ b/builtin/refs.c
@@ -1,4 +1,6 @@
#include "builtin.h"
+#include "config.h"
+#include "fsck.h"
#include "parse-options.h"
#include "refs.h"
#include "repository.h"
@@ -7,6 +9,9 @@
#define REFS_MIGRATE_USAGE \
N_("git refs migrate --ref-format=<format> [--dry-run]")
+#define REFS_VERIFY_USAGE \
+ N_("git refs verify [--strict] [--verbose]")
+
static int cmd_refs_migrate(int argc, const char **argv, const char *prefix)
{
const char * const migrate_usage[] = {
@@ -58,15 +63,54 @@ static int cmd_refs_migrate(int argc, const char **argv, const char *prefix)
return err;
}
+static int cmd_refs_verify(int argc, const char **argv, const char *prefix)
+{
+ struct fsck_options fsck_refs_options = FSCK_REFS_OPTIONS_DEFAULT;
+ const char * const verify_usage[] = {
+ REFS_VERIFY_USAGE,
+ NULL,
+ };
+ unsigned int verbose = 0, strict = 0;
+ struct option options[] = {
+ OPT__VERBOSE(&verbose, N_("be verbose")),
+ OPT_BOOL(0, "strict", &strict, N_("enable strict checking")),
+ OPT_END(),
+ };
+ int ret;
+
+ argc = parse_options(argc, argv, prefix, options, verify_usage, 0);
+ if (argc)
+ usage(_("'git refs verify' takes no arguments"));
+
+ if (verbose)
+ fsck_refs_options.verbose = 1;
+ if (strict)
+ fsck_refs_options.strict = 1;
+
+ git_config(git_fsck_config, &fsck_refs_options);
+ prepare_repo_settings(the_repository);
+
+ ret = refs_fsck(get_main_ref_store(the_repository), &fsck_refs_options);
+
+ /*
+ * Explicitly free the allocated array and "skip_oids" set
+ */
+ free(fsck_refs_options.msg_type);
+ oidset_clear(&fsck_refs_options.skip_oids);
+ return ret;
+}
+
int cmd_refs(int argc, const char **argv, const char *prefix)
{
const char * const refs_usage[] = {
REFS_MIGRATE_USAGE,
+ REFS_VERIFY_USAGE,
NULL,
};
parse_opt_subcommand_fn *fn = NULL;
struct option opts[] = {
OPT_SUBCOMMAND("migrate", &fn, cmd_refs_migrate),
+ OPT_SUBCOMMAND("verify", &fn, cmd_refs_verify),
OPT_END(),
};
diff --git a/fsck.h b/fsck.h
index a4a4ba88ee..b03dba442e 100644
--- a/fsck.h
+++ b/fsck.h
@@ -155,6 +155,7 @@ struct fsck_options {
fsck_walk_func walk;
fsck_error error_func;
unsigned strict:1;
+ unsigned verbose:1;
enum fsck_msg_type *msg_type;
struct oidset skip_oids;
struct oidset gitmodules_found;
--
2.45.2
^ permalink raw reply related [flat|nested] 282+ messages in thread
* Re: [GSoC][PATCH v13 06/10] git refs: add verify subcommand
2024-07-29 13:27 ` [GSoC][PATCH v13 06/10] git refs: add verify subcommand shejialuo
@ 2024-07-30 8:31 ` Patrick Steinhardt
2024-07-30 15:59 ` shejialuo
0 siblings, 1 reply; 282+ messages in thread
From: Patrick Steinhardt @ 2024-07-30 8:31 UTC (permalink / raw)
To: shejialuo
Cc: git, Karthik Nayak, Junio C Hamano, Eric Sunshine, Justin Tobler
[-- Attachment #1: Type: text/plain, Size: 4790 bytes --]
On Mon, Jul 29, 2024 at 09:27:12PM +0800, shejialuo wrote:
The subject should probably start with "builtin/refs", not "git refs".
> Introduce a new subcommand "verify" in git-refs(1) to allow the user to
> check the reference database consistency and also this subcommand will
> be used as the entry point of checking refs for "git-fsck(1)". Last, add
> "verbose" field into "fsck_options" to indicate whether we should print
> verbose messages when checking refs and objects consistency.
Nice. I very much like that we now have a common home for such low-level
ref-related commands. Also, "verify" is neatly in line with e.g. `git
commit-graph verify".
> @@ -39,6 +43,15 @@ include::ref-storage-format.txt[]
> can be used to double check that the migration works as expected before
> performing the actual migration.
>
> +The following options are specific to 'git refs verify':
> +
> +--strict::
> + Enable more strict checking, every WARN severity for the `Fsck Messages`
> + be seen as ERROR. See linkgit:git-fsck[1].
How about:
"Enable stricter error checking. This will cause warnings to be
reported as errors. See linkgit:git-fsck[1]."
> +--verbose::
> + When verifying the reference database consistency, be chatty.
I wonder whether this really helps all that much. It doesn't really say
what it adds on top of the default mode. So unless we document what
exactly this changes, I rather think we can just leave it aways as
basically everyone knows what a "--verbose" flag does.
> KNOWN LIMITATIONS
> -----------------
>
> diff --git a/builtin/refs.c b/builtin/refs.c
> index 46dcd150d4..4831c9e28e 100644
> --- a/builtin/refs.c
> +++ b/builtin/refs.c
> @@ -1,4 +1,6 @@
> #include "builtin.h"
> +#include "config.h"
> +#include "fsck.h"
> #include "parse-options.h"
> #include "refs.h"
> #include "repository.h"
> @@ -7,6 +9,9 @@
> #define REFS_MIGRATE_USAGE \
> N_("git refs migrate --ref-format=<format> [--dry-run]")
>
> +#define REFS_VERIFY_USAGE \
> + N_("git refs verify [--strict] [--verbose]")
> +
> static int cmd_refs_migrate(int argc, const char **argv, const char *prefix)
> {
> const char * const migrate_usage[] = {
> @@ -58,15 +63,54 @@ static int cmd_refs_migrate(int argc, const char **argv, const char *prefix)
> return err;
> }
>
> +static int cmd_refs_verify(int argc, const char **argv, const char *prefix)
> +{
> + struct fsck_options fsck_refs_options = FSCK_REFS_OPTIONS_DEFAULT;
So we don't ever end up using `FSCK_REFS_OPTIONS_STRICT`? If so, I think
we should just drop that declaration in the preceding patch.
> + const char * const verify_usage[] = {
> + REFS_VERIFY_USAGE,
> + NULL,
> + };
> + unsigned int verbose = 0, strict = 0;
> + struct option options[] = {
> + OPT__VERBOSE(&verbose, N_("be verbose")),
> + OPT_BOOL(0, "strict", &strict, N_("enable strict checking")),
> + OPT_END(),
> + };
> + int ret;
> +
> + argc = parse_options(argc, argv, prefix, options, verify_usage, 0);
> + if (argc)
> + usage(_("'git refs verify' takes no arguments"));
> +
> + if (verbose)
> + fsck_refs_options.verbose = 1;
> + if (strict)
> + fsck_refs_options.strict = 1;
Instead of manually setting those variables, we can pass pointers to
those member variables in the `struct option`s directly.
> + git_config(git_fsck_config, &fsck_refs_options);
> + prepare_repo_settings(the_repository);
> +
> + ret = refs_fsck(get_main_ref_store(the_repository), &fsck_refs_options);
> +
> + /*
> + * Explicitly free the allocated array and "skip_oids" set
> + */
> + free(fsck_refs_options.msg_type);
> + oidset_clear(&fsck_refs_options.skip_oids);
Should we provide a `fsck_options_clear()` function that does this for
us? Otherwise we'll have to adapt callsites of `refs_fsck` whenever
internal implementation details of the subsystem add newly allocated
members.
> + return ret;
> +}
> +
> int cmd_refs(int argc, const char **argv, const char *prefix)
> {
> const char * const refs_usage[] = {
> REFS_MIGRATE_USAGE,
> + REFS_VERIFY_USAGE,
> NULL,
> };
> parse_opt_subcommand_fn *fn = NULL;
> struct option opts[] = {
> OPT_SUBCOMMAND("migrate", &fn, cmd_refs_migrate),
> + OPT_SUBCOMMAND("verify", &fn, cmd_refs_verify),
> OPT_END(),
> };
>
> diff --git a/fsck.h b/fsck.h
> index a4a4ba88ee..b03dba442e 100644
> --- a/fsck.h
> +++ b/fsck.h
> @@ -155,6 +155,7 @@ struct fsck_options {
> fsck_walk_func walk;
> fsck_error error_func;
> unsigned strict:1;
> + unsigned verbose:1;
Okay. Let's see whether this field will be used in a subsequent patch.
If not, we should drop it and get rid of the option altogether, I guess.
Patrick
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]
^ permalink raw reply [flat|nested] 282+ messages in thread
* Re: [GSoC][PATCH v13 06/10] git refs: add verify subcommand
2024-07-30 8:31 ` Patrick Steinhardt
@ 2024-07-30 15:59 ` shejialuo
2024-07-30 17:56 ` Eric Sunshine
0 siblings, 1 reply; 282+ messages in thread
From: shejialuo @ 2024-07-30 15:59 UTC (permalink / raw)
To: Patrick Steinhardt
Cc: git, Karthik Nayak, Junio C Hamano, Eric Sunshine, Justin Tobler
On Tue, Jul 30, 2024 at 10:31:37AM +0200, Patrick Steinhardt wrote:
> On Mon, Jul 29, 2024 at 09:27:12PM +0800, shejialuo wrote:
>
> The subject should probably start with "builtin/refs", not "git refs".
>
Yes, I will improve this in the next version.
> > @@ -39,6 +43,15 @@ include::ref-storage-format.txt[]
> > can be used to double check that the migration works as expected before
> > performing the actual migration.
> >
> > +The following options are specific to 'git refs verify':
> > +
> > +--strict::
> > + Enable more strict checking, every WARN severity for the `Fsck Messages`
> > + be seen as ERROR. See linkgit:git-fsck[1].
>
> How about:
>
> "Enable stricter error checking. This will cause warnings to be
> reported as errors. See linkgit:git-fsck[1]."
>
Yes, it is much more clear. Actually, I really feel hard to write a good
document.
> > +--verbose::
> > + When verifying the reference database consistency, be chatty.
>
> I wonder whether this really helps all that much. It doesn't really say
> what it adds on top of the default mode. So unless we document what
> exactly this changes, I rather think we can just leave it aways as
> basically everyone knows what a "--verbose" flag does.
>
Yes, I think so. `--verbose` is a common flag. However, we have already
added this, so we may just leave it here. It's not bad to add more
information.
> > +static int cmd_refs_verify(int argc, const char **argv, const char *prefix)
> > +{
> > + struct fsck_options fsck_refs_options = FSCK_REFS_OPTIONS_DEFAULT;
>
> So we don't ever end up using `FSCK_REFS_OPTIONS_STRICT`? If so, I think
> we should just drop that declaration in the preceding patch.
>
I agree here. I will delete `FSCK_REFS_OPTIONS_STRICT`.
> > + const char * const verify_usage[] = {
> > + REFS_VERIFY_USAGE,
> > + NULL,
> > + };
> > + unsigned int verbose = 0, strict = 0;
> > + struct option options[] = {
> > + OPT__VERBOSE(&verbose, N_("be verbose")),
> > + OPT_BOOL(0, "strict", &strict, N_("enable strict checking")),
> > + OPT_END(),
> > + };
> > + int ret;
> > +
> > + argc = parse_options(argc, argv, prefix, options, verify_usage, 0);
> > + if (argc)
> > + usage(_("'git refs verify' takes no arguments"));
> > +
> > + if (verbose)
> > + fsck_refs_options.verbose = 1;
> > + if (strict)
> > + fsck_refs_options.strict = 1;
>
> Instead of manually setting those variables, we can pass pointers to
> those member variables in the `struct option`s directly.
>
Yes, but I have tried but found that the types are mismatching, I will
find a way to do this.
> > + git_config(git_fsck_config, &fsck_refs_options);
> > + prepare_repo_settings(the_repository);
> > +
> > + ret = refs_fsck(get_main_ref_store(the_repository), &fsck_refs_options);
> > +
> > + /*
> > + * Explicitly free the allocated array and "skip_oids" set
> > + */
> > + free(fsck_refs_options.msg_type);
> > + oidset_clear(&fsck_refs_options.skip_oids);
>
> Should we provide a `fsck_options_clear()` function that does this for
> us? Otherwise we'll have to adapt callsites of `refs_fsck` whenever
> internal implementation details of the subsystem add newly allocated
> members.
>
Yes, I agree with this. I wanna talk more on this. In the first time, I
did not call `oidset_clear` and I failed the CI tests. It made me
confused. Because we never use "skip_oids" in the ref check, why the
tests said that "fsck_refs.options.skip_oids" was not freed.
This is because when executing the command "git -c fsck.skipList=.. fsck",
in the subprocess `git refs verify`, the code will still setup the
"skip_oids" by the config. So we should explicitly free the "skip_oids".
But how does "fsck.c" free "skip_oids", actually "fsck.c" never frees
"skip_oids". This is because "git-fsck(1)" defines the following:
static struct fsck_options fsck_walk_options = FSCK_OPTIONS_DEFAULT;
static struct fsck_options fsck_obj_options = FSCK_OPTIONS_DEFAULT;
Because these two options are "static", so there is no memory leak. We
leave it to the operating system. So maybe a more simple way is just to
add "static" identifier in "cmd_refs_verify" which means:
- struct fsck_options fsck_refs_options = FSCK_REFS_OPTIONS_DEFAULT;
+ static struct fsck_options fsck_refs_options = FSCK_REFS_OPTIONS_DEFAULT;
But I don't think we should use `static`, because Eric has told me that
making a variable "static" will make the code harder to "libfy". So
let's use "fsck_options_clear" function instead.
^ permalink raw reply [flat|nested] 282+ messages in thread
* Re: [GSoC][PATCH v13 06/10] git refs: add verify subcommand
2024-07-30 15:59 ` shejialuo
@ 2024-07-30 17:56 ` Eric Sunshine
0 siblings, 0 replies; 282+ messages in thread
From: Eric Sunshine @ 2024-07-30 17:56 UTC (permalink / raw)
To: shejialuo
Cc: Patrick Steinhardt, git, Karthik Nayak, Junio C Hamano,
Justin Tobler
On Tue, Jul 30, 2024 at 11:59 AM shejialuo <shejialuo@gmail.com> wrote:
> On Tue, Jul 30, 2024 at 10:31:37AM +0200, Patrick Steinhardt wrote:
> > On Mon, Jul 29, 2024 at 09:27:12PM +0800, shejialuo wrote:
> > > + /*
> > > + * Explicitly free the allocated array and "skip_oids" set
> > > + */
> > > + free(fsck_refs_options.msg_type);
> > > + oidset_clear(&fsck_refs_options.skip_oids);
> >
> > Should we provide a `fsck_options_clear()` function that does this for
> > us? Otherwise we'll have to adapt callsites of `refs_fsck` whenever
> > internal implementation details of the subsystem add newly allocated
> > members.
> [...]
> But how does "fsck.c" free "skip_oids", actually "fsck.c" never frees
> "skip_oids". This is because "git-fsck(1)" defines the following:
>
> static struct fsck_options fsck_walk_options = FSCK_OPTIONS_DEFAULT;
> static struct fsck_options fsck_obj_options = FSCK_OPTIONS_DEFAULT;
>
> Because these two options are "static", so there is no memory leak. We
> leave it to the operating system. So maybe a more simple way is just to
> add "static" identifier in "cmd_refs_verify" which means:
>
> - struct fsck_options fsck_refs_options = FSCK_REFS_OPTIONS_DEFAULT;
> + static struct fsck_options fsck_refs_options = FSCK_REFS_OPTIONS_DEFAULT;
>
> But I don't think we should use `static`, because Eric has told me that
> making a variable "static" will make the code harder to "libfy". So
> let's use "fsck_options_clear" function instead.
I haven't been following this topic closely and I'm not familiar with
this code (and don't have much time now to dig into it), but I suspect
the context here is rather different from the one[*] in which I was
highly skeptical of the use of `static`. The `static` in that earlier
case was suspicious/questionable for two reasons. First, it was a case
of premature optimization (which, by definition, is frowned upon).
Second, it was in a "library" function (namely, top-level
fsck.c:fsck_refs_error_function()) which may someday become a linkable
library which other programs (aside from `git` itself) may utilize.
Having a static strbuf in the library function makes the function
non-reentrant and takes memory management out of the hands of the
client.
In the case under discussion here (namely `builtin/fsck.c`), it is a
Git-specific command, not library code. As such, "libification" is
much less of an issue since Git-specific command code is less likely
to be reused by some other project. (However, that's not to say that
we shouldn't worry about unnecessary use of `static` even in builtin
commands; code from those commands does periodically migrate from
`builtin/*.c` to top-level library oriented `*.c`.)
So, considering that the variable under discussion:
struct fsck_options fsck_refs_options = FSCK_REFS_OPTIONS_DEFAULT;
is part of a builtin command rather than library code, we don't have
to worry about "libification" so much, thus making it `static` would
be a workable approach. However, doing so merely to avoid complaint by
the leak-checker does not seem like good justification. Hence, keeping
this variable non-static and freeing it explicitly seems a better idea
(which is what this code does presently).
I do agree with Patrick that adding fsck_options_clear() to top-level
`fsck.h` would be sensible since it frees callers from having to know
implementation details of `fsck_options`.
By the way, regarding the static `fsck_walk_options` and
`fsck_obj_options` those are probably global static for convenience
rather than out of necessity. It might very well be possible to make
those local variables in builtin/fsck.c:cmd_fsck() and then plumb them
through to called functions so that they don't have to be static, and
then they would be freed manually by cmd_fsck(), as well. However,
that sort of change is well outside the scope of this topic.
[*] https://lore.kernel.org/git/CAPig+cR=RgMeaAy1PRGgHu6_Ak+7=_-5tGvBZRekKRxi7GtdHw@mail.gmail.com/
^ permalink raw reply [flat|nested] 282+ messages in thread
* [GSoC][PATCH v13 07/10] builtin/fsck: add `git-refs verify` child process
2024-07-29 13:22 ` [GSoC][PATCH v13 00/10] ref consistency check infra setup shejialuo
` (5 preceding siblings ...)
2024-07-29 13:27 ` [GSoC][PATCH v13 06/10] git refs: add verify subcommand shejialuo
@ 2024-07-29 13:27 ` shejialuo
2024-07-29 13:27 ` [GSoC][PATCH v13 08/10] files-backend: add unified interface for refs scanning shejialuo
` (4 subsequent siblings)
11 siblings, 0 replies; 282+ messages in thread
From: shejialuo @ 2024-07-29 13:27 UTC (permalink / raw)
To: git
Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine,
Justin Tobler
Introduce a new function "fsck_refs" that initializes and runs a child
process to execute the "git-refs verify" command.
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
builtin/fsck.c | 17 +++++++++++++++++
1 file changed, 17 insertions(+)
diff --git a/builtin/fsck.c b/builtin/fsck.c
index 8553e2bc1b..7ca1628ba5 100644
--- a/builtin/fsck.c
+++ b/builtin/fsck.c
@@ -897,6 +897,21 @@ static int check_pack_rev_indexes(struct repository *r, int show_progress)
return res;
}
+static void fsck_refs(void)
+{
+ struct child_process refs_verify = CHILD_PROCESS_INIT;
+ child_process_init(&refs_verify);
+ refs_verify.git_cmd = 1;
+ strvec_pushl(&refs_verify.args, "refs", "verify", NULL);
+ if (verbose)
+ strvec_push(&refs_verify.args, "--verbose");
+ if (check_strict)
+ strvec_push(&refs_verify.args, "--strict");
+
+ if (run_command(&refs_verify))
+ errors_found |= ERROR_REFS;
+}
+
static char const * const fsck_usage[] = {
N_("git fsck [--tags] [--root] [--unreachable] [--cache] [--no-reflogs]\n"
" [--[no-]full] [--strict] [--verbose] [--lost-found]\n"
@@ -1066,6 +1081,8 @@ int cmd_fsck(int argc, const char **argv, const char *prefix)
check_connectivity();
+ fsck_refs();
+
if (the_repository->settings.core_commit_graph) {
struct child_process commit_graph_verify = CHILD_PROCESS_INIT;
--
2.45.2
^ permalink raw reply related [flat|nested] 282+ messages in thread
* [GSoC][PATCH v13 08/10] files-backend: add unified interface for refs scanning
2024-07-29 13:22 ` [GSoC][PATCH v13 00/10] ref consistency check infra setup shejialuo
` (6 preceding siblings ...)
2024-07-29 13:27 ` [GSoC][PATCH v13 07/10] builtin/fsck: add `git-refs verify` child process shejialuo
@ 2024-07-29 13:27 ` shejialuo
2024-07-30 8:31 ` Patrick Steinhardt
2024-07-29 13:27 ` [GSoC][PATCH v13 09/10] fsck: add ref name check for files backend shejialuo
` (3 subsequent siblings)
11 siblings, 1 reply; 282+ messages in thread
From: shejialuo @ 2024-07-29 13:27 UTC (permalink / raw)
To: git
Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine,
Justin Tobler
For refs and reflogs, we need to scan its corresponding directories to
check every regular file or symbolic link which shares the same pattern.
Introduce a unified interface for scanning directories for
files-backend.
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
refs/files-backend.c | 74 +++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 73 insertions(+), 1 deletion(-)
diff --git a/refs/files-backend.c b/refs/files-backend.c
index 4630eb1f80..cb184953c1 100644
--- a/refs/files-backend.c
+++ b/refs/files-backend.c
@@ -6,6 +6,7 @@
#include "../gettext.h"
#include "../hash.h"
#include "../hex.h"
+#include "../fsck.h"
#include "../refs.h"
#include "refs-internal.h"
#include "ref-cache.h"
@@ -3408,13 +3409,84 @@ static int files_ref_store_remove_on_disk(struct ref_store *ref_store,
return ret;
}
+/*
+ * For refs and reflogs, they share a unified interface when scanning
+ * the whole directory. This function is used as the callback for each
+ * regular file or symlink in the directory.
+ */
+typedef int (*files_fsck_refs_fn)(struct fsck_options *o,
+ const char *gitdir,
+ const char *refs_check_dir,
+ struct dir_iterator *iter);
+
+static int files_fsck_refs_dir(struct ref_store *ref_store,
+ struct fsck_options *o,
+ const char *refs_check_dir,
+ files_fsck_refs_fn *fsck_refs_fns)
+{
+ const char *gitdir = ref_store->gitdir;
+ struct strbuf sb = STRBUF_INIT;
+ struct dir_iterator *iter;
+ int iter_status;
+ int ret = 0;
+
+ strbuf_addf(&sb, "%s/%s", gitdir, refs_check_dir);
+
+ iter = dir_iterator_begin(sb.buf, 0);
+
+ if (!iter) {
+ ret = error_errno("cannot open directory %s", sb.buf);
+ goto out;
+ }
+
+ while ((iter_status = dir_iterator_advance(iter)) == ITER_OK) {
+ if (S_ISDIR(iter->st.st_mode)) {
+ continue;
+ } else if (S_ISREG(iter->st.st_mode) ||
+ S_ISLNK(iter->st.st_mode)) {
+ if (o->verbose)
+ fprintf_ln(stderr, "Checking %s/%s",
+ refs_check_dir, iter->relative_path);
+ for (size_t i = 0; fsck_refs_fns[i]; i++) {
+ if (fsck_refs_fns[i](o, gitdir, refs_check_dir, iter))
+ ret = -1;
+ }
+ } else {
+ ret = error(_("unexpected file type for '%s'"),
+ iter->basename);
+ }
+ }
+
+ if (iter_status != ITER_DONE)
+ ret = error(_("failed to iterate over '%s'"), sb.buf);
+
+out:
+ strbuf_release(&sb);
+ return ret;
+}
+
+static int files_fsck_refs(struct ref_store *ref_store,
+ struct fsck_options *o)
+{
+ files_fsck_refs_fn fsck_refs_fns[]= {
+ NULL
+ };
+
+ if (o->verbose)
+ fprintf_ln(stderr, "Checking references consistency");
+
+ return files_fsck_refs_dir(ref_store, o, "refs", fsck_refs_fns);
+
+}
+
static int files_fsck(struct ref_store *ref_store,
struct fsck_options *o)
{
struct files_ref_store *refs =
files_downcast(ref_store, REF_STORE_READ, "fsck");
- return refs->packed_ref_store->be->fsck(refs->packed_ref_store, o);
+ return refs->packed_ref_store->be->fsck(refs->packed_ref_store, o) |
+ files_fsck_refs(ref_store, o);
}
struct ref_storage_be refs_be_files = {
--
2.45.2
^ permalink raw reply related [flat|nested] 282+ messages in thread
* Re: [GSoC][PATCH v13 08/10] files-backend: add unified interface for refs scanning
2024-07-29 13:27 ` [GSoC][PATCH v13 08/10] files-backend: add unified interface for refs scanning shejialuo
@ 2024-07-30 8:31 ` Patrick Steinhardt
2024-07-30 16:10 ` shejialuo
0 siblings, 1 reply; 282+ messages in thread
From: Patrick Steinhardt @ 2024-07-30 8:31 UTC (permalink / raw)
To: shejialuo
Cc: git, Karthik Nayak, Junio C Hamano, Eric Sunshine, Justin Tobler
[-- Attachment #1: Type: text/plain, Size: 4429 bytes --]
On Mon, Jul 29, 2024 at 09:27:31PM +0800, shejialuo wrote:
> For refs and reflogs, we need to scan its corresponding directories to
> check every regular file or symbolic link which shares the same pattern.
> Introduce a unified interface for scanning directories for
> files-backend.
>
> Mentored-by: Patrick Steinhardt <ps@pks.im>
> Mentored-by: Karthik Nayak <karthik.188@gmail.com>
> Signed-off-by: shejialuo <shejialuo@gmail.com>
> ---
> refs/files-backend.c | 74 +++++++++++++++++++++++++++++++++++++++++++-
> 1 file changed, 73 insertions(+), 1 deletion(-)
>
> diff --git a/refs/files-backend.c b/refs/files-backend.c
> index 4630eb1f80..cb184953c1 100644
> --- a/refs/files-backend.c
> +++ b/refs/files-backend.c
> @@ -6,6 +6,7 @@
> #include "../gettext.h"
> #include "../hash.h"
> #include "../hex.h"
> +#include "../fsck.h"
> #include "../refs.h"
> #include "refs-internal.h"
> #include "ref-cache.h"
> @@ -3408,13 +3409,84 @@ static int files_ref_store_remove_on_disk(struct ref_store *ref_store,
> return ret;
> }
>
> +/*
> + * For refs and reflogs, they share a unified interface when scanning
> + * the whole directory. This function is used as the callback for each
> + * regular file or symlink in the directory.
> + */
> +typedef int (*files_fsck_refs_fn)(struct fsck_options *o,
> + const char *gitdir,
> + const char *refs_check_dir,
> + struct dir_iterator *iter);
> +
> +static int files_fsck_refs_dir(struct ref_store *ref_store,
> + struct fsck_options *o,
> + const char *refs_check_dir,
> + files_fsck_refs_fn *fsck_refs_fns)
> +{
> + const char *gitdir = ref_store->gitdir;
> + struct strbuf sb = STRBUF_INIT;
> + struct dir_iterator *iter;
> + int iter_status;
> + int ret = 0;
> +
> + strbuf_addf(&sb, "%s/%s", gitdir, refs_check_dir);
> +
> + iter = dir_iterator_begin(sb.buf, 0);
> +
> + if (!iter) {
> + ret = error_errno("cannot open directory %s", sb.buf);
> + goto out;
> + }
The error message should probably be marked as translatable. Also, I'd
personally remove the newline between `iter = ...` and the error check
as those are a logical unit.
> + while ((iter_status = dir_iterator_advance(iter)) == ITER_OK) {
> + if (S_ISDIR(iter->st.st_mode)) {
> + continue;
> + } else if (S_ISREG(iter->st.st_mode) ||
> + S_ISLNK(iter->st.st_mode)) {
> + if (o->verbose)
> + fprintf_ln(stderr, "Checking %s/%s",
> + refs_check_dir, iter->relative_path);
Okay, we do end up using the `verbose` flag :)
> + for (size_t i = 0; fsck_refs_fns[i]; i++) {
> + if (fsck_refs_fns[i](o, gitdir, refs_check_dir, iter))
> + ret = -1;
> + }
> + } else {
> + ret = error(_("unexpected file type for '%s'"),
> + iter->basename);
Instead of printing this as an error directly, shouldn't we report it
via the `fsck_refs_report` interface?
> + }
> + }
Okay. It does make sense to do our own directory walk as that will allow
us to check files which would otherwise not be reported by the normal
refs interfaces.
> + if (iter_status != ITER_DONE)
> + ret = error(_("failed to iterate over '%s'"), sb.buf);
Reporting this as an error feels sensible though as we have no ref to
tie this error to, and it feels like a generic error.
> +out:
> + strbuf_release(&sb);
> + return ret;
> +}
> +
> +static int files_fsck_refs(struct ref_store *ref_store,
> + struct fsck_options *o)
> +{
> + files_fsck_refs_fn fsck_refs_fns[]= {
> + NULL
The last member should also end with a comma.
> + };
> +
> + if (o->verbose)
> + fprintf_ln(stderr, "Checking references consistency");
> +
> + return files_fsck_refs_dir(ref_store, o, "refs", fsck_refs_fns);
> +
This newline should be removed.
> +}
> +
> static int files_fsck(struct ref_store *ref_store,
> struct fsck_options *o)
> {
> struct files_ref_store *refs =
> files_downcast(ref_store, REF_STORE_READ, "fsck");
>
> - return refs->packed_ref_store->be->fsck(refs->packed_ref_store, o);
> + return refs->packed_ref_store->be->fsck(refs->packed_ref_store, o) |
> + files_fsck_refs(ref_store, o);
I'd think we should first check loose files and then continue to check
the packed ref store. That's really only a gut feeling though, and I
cannot exactly say why that feels more natural.
Patrick
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]
^ permalink raw reply [flat|nested] 282+ messages in thread
* Re: [GSoC][PATCH v13 08/10] files-backend: add unified interface for refs scanning
2024-07-30 8:31 ` Patrick Steinhardt
@ 2024-07-30 16:10 ` shejialuo
0 siblings, 0 replies; 282+ messages in thread
From: shejialuo @ 2024-07-30 16:10 UTC (permalink / raw)
To: Patrick Steinhardt
Cc: git, Karthik Nayak, Junio C Hamano, Eric Sunshine, Justin Tobler
> > + iter = dir_iterator_begin(sb.buf, 0);
> > +
> > + if (!iter) {
> > + ret = error_errno("cannot open directory %s", sb.buf);
> > + goto out;
> > + }
>
> The error message should probably be marked as translatable. Also, I'd
> personally remove the newline between `iter = ...` and the error check
> as those are a logical unit.
>
Yes, I will improve this in the next version.
> > + for (size_t i = 0; fsck_refs_fns[i]; i++) {
> > + if (fsck_refs_fns[i](o, gitdir, refs_check_dir, iter))
> > + ret = -1;
> > + }
> > + } else {
> > + ret = error(_("unexpected file type for '%s'"),
> > + iter->basename);
>
> Instead of printing this as an error directly, shouldn't we report it
> via the `fsck_refs_report` interface?
>
Yes, exactly we should use this interface. I accidentally ignored this.
Thanks.
> > +out:
> > + strbuf_release(&sb);
> > + return ret;
> > +}
> > +
> > +static int files_fsck_refs(struct ref_store *ref_store,
> > + struct fsck_options *o)
> > +{
> > + files_fsck_refs_fn fsck_refs_fns[]= {
> > + NULL
>
> The last member should also end with a comma.
>
I will improve this in the next version.
> > + };
> > +
> > + if (o->verbose)
> > + fprintf_ln(stderr, "Checking references consistency");
> > +
> > + return files_fsck_refs_dir(ref_store, o, "refs", fsck_refs_fns);
> > +
>
> This newline should be removed.
>
OK.
> > +}
> > +
> > static int files_fsck(struct ref_store *ref_store,
> > struct fsck_options *o)
> > {
> > struct files_ref_store *refs =
> > files_downcast(ref_store, REF_STORE_READ, "fsck");
> >
> > - return refs->packed_ref_store->be->fsck(refs->packed_ref_store, o);
> > + return refs->packed_ref_store->be->fsck(refs->packed_ref_store, o) |
> > + files_fsck_refs(ref_store, o);
>
> I'd think we should first check loose files and then continue to check
> the packed ref store. That's really only a gut feeling though, and I
> cannot exactly say why that feels more natural.
>
It would feel more natural. Because packed-ref will point to the
loose ref. So we should first check the loose refs. For example. If a
regular ref is bad, we will first report the problem. And suppose the
packed-ref have recorded this regular ref. When checking packed-ref, we
could not check the regular ref itself. We only need to check one thing.
Whether the pointee exists under the "refs/" directory.
And we do not need to check regular ref again because we have checked in
the loose refs part.
> Patrick
^ permalink raw reply [flat|nested] 282+ messages in thread
* [GSoC][PATCH v13 09/10] fsck: add ref name check for files backend
2024-07-29 13:22 ` [GSoC][PATCH v13 00/10] ref consistency check infra setup shejialuo
` (7 preceding siblings ...)
2024-07-29 13:27 ` [GSoC][PATCH v13 08/10] files-backend: add unified interface for refs scanning shejialuo
@ 2024-07-29 13:27 ` shejialuo
2024-07-30 8:31 ` Patrick Steinhardt
2024-07-29 13:27 ` [GSoC][PATCH v13 10/10] fsck: add ref content " shejialuo
` (2 subsequent siblings)
11 siblings, 1 reply; 282+ messages in thread
From: shejialuo @ 2024-07-29 13:27 UTC (permalink / raw)
To: git
Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine,
Justin Tobler
The git-fsck(1) only implicitly checks the reference, it does not fully
check refs with bad format name such as standalone "@" and name ending
with ".lock".
In order to provide such checks, add a new fsck message id "badRefName"
with default ERROR type. Use existing "check_refname_format" to explicit
check the ref name. And add a new unit test to verify the functionality.
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
Documentation/fsck-msgids.txt | 3 +
fsck.h | 1 +
refs/files-backend.c | 22 ++++++++
t/t0602-reffiles-fsck.sh | 101 ++++++++++++++++++++++++++++++++++
4 files changed, 127 insertions(+)
create mode 100755 t/t0602-reffiles-fsck.sh
diff --git a/Documentation/fsck-msgids.txt b/Documentation/fsck-msgids.txt
index f643585a34..d8e437a043 100644
--- a/Documentation/fsck-msgids.txt
+++ b/Documentation/fsck-msgids.txt
@@ -19,6 +19,9 @@
`badParentSha1`::
(ERROR) A commit object has a bad parent sha1.
+`badRefName`::
+ (ERROR) A ref has an invalid format.
+
`badTagName`::
(INFO) A tag has an invalid format.
diff --git a/fsck.h b/fsck.h
index b03dba442e..ce56ce4bef 100644
--- a/fsck.h
+++ b/fsck.h
@@ -31,6 +31,7 @@ enum fsck_msg_type {
FUNC(BAD_NAME, ERROR) \
FUNC(BAD_OBJECT_SHA1, ERROR) \
FUNC(BAD_PARENT_SHA1, ERROR) \
+ FUNC(BAD_REF_NAME, ERROR) \
FUNC(BAD_TIMEZONE, ERROR) \
FUNC(BAD_TREE, ERROR) \
FUNC(BAD_TREE_SHA1, ERROR) \
diff --git a/refs/files-backend.c b/refs/files-backend.c
index cb184953c1..0d4fc27768 100644
--- a/refs/files-backend.c
+++ b/refs/files-backend.c
@@ -3419,6 +3419,27 @@ typedef int (*files_fsck_refs_fn)(struct fsck_options *o,
const char *refs_check_dir,
struct dir_iterator *iter);
+static int files_fsck_refs_name(struct fsck_options *o,
+ const char *gitdir UNUSED,
+ const char *refs_check_dir,
+ struct dir_iterator *iter)
+{
+ struct strbuf sb = STRBUF_INIT;
+ struct fsck_refs_info info;
+ int ret = 0;
+
+ if (check_refname_format(iter->basename, REFNAME_ALLOW_ONELEVEL)) {
+ strbuf_addf(&sb, "%s/%s", refs_check_dir, iter->relative_path);
+ info.path = sb.buf;
+ ret = fsck_refs_report(o, NULL, &info,
+ FSCK_MSG_BAD_REF_NAME,
+ "invalid refname format");
+ }
+
+ strbuf_release(&sb);
+ return ret;
+}
+
static int files_fsck_refs_dir(struct ref_store *ref_store,
struct fsck_options *o,
const char *refs_check_dir,
@@ -3469,6 +3490,7 @@ static int files_fsck_refs(struct ref_store *ref_store,
struct fsck_options *o)
{
files_fsck_refs_fn fsck_refs_fns[]= {
+ files_fsck_refs_name,
NULL
};
diff --git a/t/t0602-reffiles-fsck.sh b/t/t0602-reffiles-fsck.sh
new file mode 100755
index 0000000000..b2db58d2c6
--- /dev/null
+++ b/t/t0602-reffiles-fsck.sh
@@ -0,0 +1,101 @@
+#!/bin/sh
+
+test_description='Test reffiles backend consistency check'
+
+GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=main
+export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME
+GIT_TEST_DEFAULT_REF_FORMAT=files
+export GIT_TEST_DEFAULT_REF_FORMAT
+
+. ./test-lib.sh
+
+test_expect_success 'ref name should be checked' '
+ test_when_finished "rm -rf repo" &&
+ git init repo &&
+ branch_dir_prefix=.git/refs/heads &&
+ tag_dir_prefix=.git/refs/tags &&
+ (
+ cd repo &&
+ git commit --allow-empty -m initial &&
+ git checkout -b branch-1 &&
+ git tag tag-1 &&
+ git commit --allow-empty -m second &&
+ git checkout -b branch-2 &&
+ git tag tag-2 &&
+ git tag multi_hierarchy/tag-2
+ ) &&
+ (
+ cd repo &&
+ cp $branch_dir_prefix/branch-1 $branch_dir_prefix/.branch-1 &&
+ test_must_fail git fsck 2>err &&
+ cat >expect <<-EOF &&
+ error: refs/heads/.branch-1: badRefName: invalid refname format
+ EOF
+ rm $branch_dir_prefix/.branch-1 &&
+ test_cmp expect err
+ ) &&
+ (
+ cd repo &&
+ cp $tag_dir_prefix/tag-1 $tag_dir_prefix/tag-1.lock &&
+ test_must_fail git fsck 2>err &&
+ cat >expect <<-EOF &&
+ error: refs/tags/tag-1.lock: badRefName: invalid refname format
+ EOF
+ rm $tag_dir_prefix/tag-1.lock &&
+ test_cmp expect err
+ ) &&
+ (
+ cd repo &&
+ cp $branch_dir_prefix/branch-1 $branch_dir_prefix/@ &&
+ test_must_fail git fsck 2>err &&
+ cat >expect <<-EOF &&
+ error: refs/heads/@: badRefName: invalid refname format
+ EOF
+ rm $branch_dir_prefix/@ &&
+ test_cmp expect err
+ ) &&
+ (
+ cd repo &&
+ cp $tag_dir_prefix/multi_hierarchy/tag-2 $tag_dir_prefix/multi_hierarchy/@ &&
+ test_must_fail git fsck 2>err &&
+ cat >expect <<-EOF &&
+ error: refs/tags/multi_hierarchy/@: badRefName: invalid refname format
+ EOF
+ rm $tag_dir_prefix/multi_hierarchy/@ &&
+ test_cmp expect err
+ )
+'
+
+test_expect_success 'ref name check should be adapted into fsck messages' '
+ test_when_finished "rm -rf repo" &&
+ git init repo &&
+ branch_dir_prefix=.git/refs/heads &&
+ tag_dir_prefix=.git/refs/tags &&
+ (
+ cd repo &&
+ git commit --allow-empty -m initial &&
+ git checkout -b branch-1 &&
+ git tag tag-1 &&
+ git commit --allow-empty -m second &&
+ git checkout -b branch-2 &&
+ git tag tag-2
+ ) &&
+ (
+ cd repo &&
+ cp $branch_dir_prefix/branch-1 $branch_dir_prefix/.branch-1 &&
+ git -c fsck.badRefName=warn fsck 2>err &&
+ cat >expect <<-EOF &&
+ warning: refs/heads/.branch-1: badRefName: invalid refname format
+ EOF
+ rm $branch_dir_prefix/.branch-1 &&
+ test_cmp expect err
+ ) &&
+ (
+ cd repo &&
+ cp $branch_dir_prefix/branch-1 $branch_dir_prefix/@ &&
+ git -c fsck.badRefName=ignore fsck 2>err &&
+ test_must_be_empty err
+ )
+'
+
+test_done
--
2.45.2
^ permalink raw reply related [flat|nested] 282+ messages in thread
* Re: [GSoC][PATCH v13 09/10] fsck: add ref name check for files backend
2024-07-29 13:27 ` [GSoC][PATCH v13 09/10] fsck: add ref name check for files backend shejialuo
@ 2024-07-30 8:31 ` Patrick Steinhardt
2024-07-30 16:14 ` shejialuo
0 siblings, 1 reply; 282+ messages in thread
From: Patrick Steinhardt @ 2024-07-30 8:31 UTC (permalink / raw)
To: shejialuo
Cc: git, Karthik Nayak, Junio C Hamano, Eric Sunshine, Justin Tobler
[-- Attachment #1: Type: text/plain, Size: 3837 bytes --]
On Mon, Jul 29, 2024 at 09:27:45PM +0800, shejialuo wrote:
> diff --git a/refs/files-backend.c b/refs/files-backend.c
> index cb184953c1..0d4fc27768 100644
> --- a/refs/files-backend.c
> +++ b/refs/files-backend.c
> @@ -3419,6 +3419,27 @@ typedef int (*files_fsck_refs_fn)(struct fsck_options *o,
> const char *refs_check_dir,
> struct dir_iterator *iter);
>
> +static int files_fsck_refs_name(struct fsck_options *o,
> + const char *gitdir UNUSED,
> + const char *refs_check_dir,
> + struct dir_iterator *iter)
> +{
> + struct strbuf sb = STRBUF_INIT;
> + struct fsck_refs_info info;
> + int ret = 0;
> +
> + if (check_refname_format(iter->basename, REFNAME_ALLOW_ONELEVEL)) {
> + strbuf_addf(&sb, "%s/%s", refs_check_dir, iter->relative_path);
> + info.path = sb.buf;
> + ret = fsck_refs_report(o, NULL, &info,
> + FSCK_MSG_BAD_REF_NAME,
> + "invalid refname format");
> + }
> +
> + strbuf_release(&sb);
> + return ret;
> +}
> +
> static int files_fsck_refs_dir(struct ref_store *ref_store,
> struct fsck_options *o,
> const char *refs_check_dir,
> @@ -3469,6 +3490,7 @@ static int files_fsck_refs(struct ref_store *ref_store,
> struct fsck_options *o)
> {
> files_fsck_refs_fn fsck_refs_fns[]= {
> + files_fsck_refs_name,
> NULL
Neat. I very much like that we can simply add new checks to this
function and the rest is handled for us already. Makes this whole thing
nicely extensible.
> };
>
> diff --git a/t/t0602-reffiles-fsck.sh b/t/t0602-reffiles-fsck.sh
> new file mode 100755
> index 0000000000..b2db58d2c6
> --- /dev/null
> +++ b/t/t0602-reffiles-fsck.sh
> @@ -0,0 +1,101 @@
> +#!/bin/sh
> +
> +test_description='Test reffiles backend consistency check'
> +
> +GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=main
> +export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME
> +GIT_TEST_DEFAULT_REF_FORMAT=files
> +export GIT_TEST_DEFAULT_REF_FORMAT
> +
> +. ./test-lib.sh
Is this test suite intentionally not marked with
`TEST_PASSES_SANITIZE_LEAK=true`?
> +
> +test_expect_success 'ref name should be checked' '
> + test_when_finished "rm -rf repo" &&
> + git init repo &&
> + branch_dir_prefix=.git/refs/heads &&
> + tag_dir_prefix=.git/refs/tags &&
> + (
> + cd repo &&
> + git commit --allow-empty -m initial &&
> + git checkout -b branch-1 &&
> + git tag tag-1 &&
> + git commit --allow-empty -m second &&
> + git checkout -b branch-2 &&
> + git tag tag-2 &&
> + git tag multi_hierarchy/tag-2
> + ) &&
I don't quite get why you create several subshells only to cd into
`repo` in each of them. Isn't a single subshell sufficient for all of
those tests? If you want to delimit blocks, then you can simply add an
empty newline between them.
> + (
> + cd repo &&
> + cp $branch_dir_prefix/branch-1 $branch_dir_prefix/.branch-1 &&
> + test_must_fail git fsck 2>err &&
> + cat >expect <<-EOF &&
> + error: refs/heads/.branch-1: badRefName: invalid refname format
> + EOF
> + rm $branch_dir_prefix/.branch-1 &&
> + test_cmp expect err
> + ) &&
> + (
> + cd repo &&
> + cp $tag_dir_prefix/tag-1 $tag_dir_prefix/tag-1.lock &&
> + test_must_fail git fsck 2>err &&
> + cat >expect <<-EOF &&
> + error: refs/tags/tag-1.lock: badRefName: invalid refname format
> + EOF
> + rm $tag_dir_prefix/tag-1.lock &&
> + test_cmp expect err
> + ) &&
The other cases all make sense, but I don't think that a file ending
with ".lock" should be marked as having a "badRefName". It is expected
that concurrent writers may have such lock files.
What could make sense is to eventually mark stale lock files older than
X amount of time as errors or warnings. But I'd think that this is
outside of the scope of this patch series.
Patrick
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]
^ permalink raw reply [flat|nested] 282+ messages in thread
* Re: [GSoC][PATCH v13 09/10] fsck: add ref name check for files backend
2024-07-30 8:31 ` Patrick Steinhardt
@ 2024-07-30 16:14 ` shejialuo
0 siblings, 0 replies; 282+ messages in thread
From: shejialuo @ 2024-07-30 16:14 UTC (permalink / raw)
To: Patrick Steinhardt
Cc: git, Karthik Nayak, Junio C Hamano, Eric Sunshine, Justin Tobler
> > diff --git a/t/t0602-reffiles-fsck.sh b/t/t0602-reffiles-fsck.sh
> > new file mode 100755
> > index 0000000000..b2db58d2c6
> > --- /dev/null
> > +++ b/t/t0602-reffiles-fsck.sh
> > @@ -0,0 +1,101 @@
> > +#!/bin/sh
> > +
> > +test_description='Test reffiles backend consistency check'
> > +
> > +GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=main
> > +export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME
> > +GIT_TEST_DEFAULT_REF_FORMAT=files
> > +export GIT_TEST_DEFAULT_REF_FORMAT
> > +
> > +. ./test-lib.sh
>
> Is this test suite intentionally not marked with
> `TEST_PASSES_SANITIZE_LEAK=true`?
>
No, I don't know this. I will add `TEST_PASSES_SANITIZE_LEAK=true` and
export this environment variable.
> > +
> > +test_expect_success 'ref name should be checked' '
> > + test_when_finished "rm -rf repo" &&
> > + git init repo &&
> > + branch_dir_prefix=.git/refs/heads &&
> > + tag_dir_prefix=.git/refs/tags &&
> > + (
> > + cd repo &&
> > + git commit --allow-empty -m initial &&
> > + git checkout -b branch-1 &&
> > + git tag tag-1 &&
> > + git commit --allow-empty -m second &&
> > + git checkout -b branch-2 &&
> > + git tag tag-2 &&
> > + git tag multi_hierarchy/tag-2
> > + ) &&
>
> I don't quite get why you create several subshells only to cd into
> `repo` in each of them. Isn't a single subshell sufficient for all of
> those tests? If you want to delimit blocks, then you can simply add an
> empty newline between them.
>
I just want to delimit, I will use newline in the next version.
> > + (
> > + cd repo &&
> > + cp $branch_dir_prefix/branch-1 $branch_dir_prefix/.branch-1 &&
> > + test_must_fail git fsck 2>err &&
> > + cat >expect <<-EOF &&
> > + error: refs/heads/.branch-1: badRefName: invalid refname format
> > + EOF
> > + rm $branch_dir_prefix/.branch-1 &&
> > + test_cmp expect err
> > + ) &&
> > + (
> > + cd repo &&
> > + cp $tag_dir_prefix/tag-1 $tag_dir_prefix/tag-1.lock &&
> > + test_must_fail git fsck 2>err &&
> > + cat >expect <<-EOF &&
> > + error: refs/tags/tag-1.lock: badRefName: invalid refname format
> > + EOF
> > + rm $tag_dir_prefix/tag-1.lock &&
> > + test_cmp expect err
> > + ) &&
>
> The other cases all make sense, but I don't think that a file ending
> with ".lock" should be marked as having a "badRefName". It is expected
> that concurrent writers may have such lock files.
>
> What could make sense is to eventually mark stale lock files older than
> X amount of time as errors or warnings. But I'd think that this is
> outside of the scope of this patch series.
>
If so, let us just ignore ".lock" situation at the moment.
> Patrick
^ permalink raw reply [flat|nested] 282+ messages in thread
* [GSoC][PATCH v13 10/10] fsck: add ref content check for files backend
2024-07-29 13:22 ` [GSoC][PATCH v13 00/10] ref consistency check infra setup shejialuo
` (8 preceding siblings ...)
2024-07-29 13:27 ` [GSoC][PATCH v13 09/10] fsck: add ref name check for files backend shejialuo
@ 2024-07-29 13:27 ` shejialuo
2024-07-30 8:31 ` Patrick Steinhardt
2024-07-30 22:06 ` Junio C Hamano
2024-07-30 8:31 ` [GSoC][PATCH v13 00/10] ref consistency check infra setup Patrick Steinhardt
2024-08-01 15:11 ` [GSoC][PATCH v14 00/11] " shejialuo
11 siblings, 2 replies; 282+ messages in thread
From: shejialuo @ 2024-07-29 13:27 UTC (permalink / raw)
To: git
Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine,
Justin Tobler
Enhance the git-fsck(1) command by adding a check for reference content
in the files backend. The new functionality ensures that symrefs, real
symbolic link and regular refs are validated correctly.
In order to check the trailing content of the regular refs, add a new
parameter `trailing` to `parse_loose_ref_contents`.
For symrefs, `parse_loose_ref_contents` will set the "referent".
However, symbolic link could be either absolute or relative. Use
"strbuf_add_real_path" to read the symbolic link and convert the
relative path to absolute path. Then use "skip_prefix" to make it align
with symref "referent".
Thus, the symrefs and symbolic links could share the same interface. Add
a new function "files_fsck_symref_target" which aims at checking the
following things:
1. whether the pointee is under the `refs/` directory.
2. whether the pointee name is correct.
3. whether the pointee path is a wrong type in filesystem.
Last, add the following FSCK MESSAGEs:
1. "badRefContent(ERROR)": A ref has a bad content
2. "badSymrefPointee(ERROR)": The pointee of a symref is bad.
3. "trailingRefContent(WARN)": A ref content has trailing contents.
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
Documentation/fsck-msgids.txt | 9 +++
fsck.h | 3 +
refs.c | 2 +-
refs/files-backend.c | 144 +++++++++++++++++++++++++++++++++-
refs/refs-internal.h | 5 +-
t/t0602-reffiles-fsck.sh | 110 ++++++++++++++++++++++++++
6 files changed, 268 insertions(+), 5 deletions(-)
diff --git a/Documentation/fsck-msgids.txt b/Documentation/fsck-msgids.txt
index d8e437a043..8fe24a960e 100644
--- a/Documentation/fsck-msgids.txt
+++ b/Documentation/fsck-msgids.txt
@@ -19,9 +19,15 @@
`badParentSha1`::
(ERROR) A commit object has a bad parent sha1.
+`badRefContent`::
+ (ERROR) A ref has a bad content.
+
`badRefName`::
(ERROR) A ref has an invalid format.
+`badSymrefPointee`::
+ (ERROR) The pointee of a symref is bad.
+
`badTagName`::
(INFO) A tag has an invalid format.
@@ -167,6 +173,9 @@
`nullSha1`::
(WARN) Tree contains entries pointing to a null sha1.
+`trailingRefContent`::
+ (WARN) A ref content has trailing contents.
+
`treeNotSorted`::
(ERROR) A tree is not properly sorted.
diff --git a/fsck.h b/fsck.h
index ce56ce4bef..710b3513d0 100644
--- a/fsck.h
+++ b/fsck.h
@@ -32,6 +32,8 @@ enum fsck_msg_type {
FUNC(BAD_OBJECT_SHA1, ERROR) \
FUNC(BAD_PARENT_SHA1, ERROR) \
FUNC(BAD_REF_NAME, ERROR) \
+ FUNC(BAD_REF_CONTENT, ERROR) \
+ FUNC(BAD_SYMREF_POINTEE, ERROR) \
FUNC(BAD_TIMEZONE, ERROR) \
FUNC(BAD_TREE, ERROR) \
FUNC(BAD_TREE_SHA1, ERROR) \
@@ -72,6 +74,7 @@ enum fsck_msg_type {
FUNC(HAS_DOTDOT, WARN) \
FUNC(HAS_DOTGIT, WARN) \
FUNC(NULL_SHA1, WARN) \
+ FUNC(TRAILING_REF_CONTENT, WARN) \
FUNC(ZERO_PADDED_FILEMODE, WARN) \
FUNC(NUL_IN_COMMIT, WARN) \
FUNC(LARGE_PATHNAME, WARN) \
diff --git a/refs.c b/refs.c
index 410919246b..eb82fb7d4e 100644
--- a/refs.c
+++ b/refs.c
@@ -1760,7 +1760,7 @@ static int refs_read_special_head(struct ref_store *ref_store,
}
result = parse_loose_ref_contents(content.buf, oid, referent, type,
- failure_errno);
+ failure_errno, NULL);
done:
strbuf_release(&full_path);
diff --git a/refs/files-backend.c b/refs/files-backend.c
index 0d4fc27768..131eec7307 100644
--- a/refs/files-backend.c
+++ b/refs/files-backend.c
@@ -1,6 +1,7 @@
#define USE_THE_REPOSITORY_VARIABLE
#include "../git-compat-util.h"
+#include "../abspath.h"
#include "../copy.h"
#include "../environment.h"
#include "../gettext.h"
@@ -553,7 +554,7 @@ static int read_ref_internal(struct ref_store *ref_store, const char *refname,
strbuf_rtrim(&sb_contents);
buf = sb_contents.buf;
- ret = parse_loose_ref_contents(buf, oid, referent, type, &myerr);
+ ret = parse_loose_ref_contents(buf, oid, referent, type, &myerr, NULL);
out:
if (ret && !myerr)
@@ -589,7 +590,7 @@ static int files_read_symbolic_ref(struct ref_store *ref_store, const char *refn
int parse_loose_ref_contents(const char *buf, struct object_id *oid,
struct strbuf *referent, unsigned int *type,
- int *failure_errno)
+ int *failure_errno, const char **trailing)
{
const char *p;
if (skip_prefix(buf, "ref:", &buf)) {
@@ -611,6 +612,10 @@ int parse_loose_ref_contents(const char *buf, struct object_id *oid,
*failure_errno = EINVAL;
return -1;
}
+
+ if (trailing)
+ *trailing = p;
+
return 0;
}
@@ -3440,6 +3445,140 @@ static int files_fsck_refs_name(struct fsck_options *o,
return ret;
}
+/*
+ * Check the symref "pointee_name" and "pointee_path". The caller should
+ * make sure that "pointee_path" is absolute. For symbolic ref, "pointee_name"
+ * would be the content after "refs:". For symblic link, "pointee_name" would
+ * be the relative path agaignst "gitdir".
+ */
+static int files_fsck_symref_target(struct fsck_options *o,
+ struct fsck_refs_info *info,
+ const char *refname,
+ const char *pointee_name,
+ const char *pointee_path)
+{
+ const char *p = NULL;
+ struct stat st;
+ int ret = 0;
+
+ if (!skip_prefix(pointee_name, "refs/", &p)) {
+
+ ret = fsck_refs_report(o, NULL, info,
+ FSCK_MSG_BAD_SYMREF_POINTEE,
+ "points to ref outside the refs directory");
+ goto out;
+ }
+
+ if (check_refname_format(pointee_name, 0)) {
+ ret = fsck_refs_report(o, NULL, info,
+ FSCK_MSG_BAD_SYMREF_POINTEE,
+ "points to refname with invalid format");
+ }
+
+ if (lstat(pointee_path, &st) < 0)
+ goto out;
+
+ if (!S_ISREG(st.st_mode) && !S_ISLNK(st.st_mode)) {
+ ret = fsck_refs_report(o, NULL, info,
+ FSCK_MSG_BAD_SYMREF_POINTEE,
+ "points to an invalid file type");
+ goto out;
+ }
+out:
+ return ret;
+}
+
+static int files_fsck_refs_content(struct fsck_options *o,
+ const char *gitdir,
+ const char *refs_check_dir,
+ struct dir_iterator *iter)
+{
+ struct strbuf pointee_path = STRBUF_INIT,
+ ref_content = STRBUF_INIT,
+ abs_gitdir = STRBUF_INIT,
+ referent = STRBUF_INIT,
+ refname = STRBUF_INIT;
+ const char *trailing = NULL;
+ struct fsck_refs_info info;
+ int failure_errno = 0;
+ unsigned int type = 0;
+ struct object_id oid;
+ int ret = 0;
+
+ strbuf_addf(&refname, "%s/%s", refs_check_dir, iter->relative_path);
+ info.path = refname.buf;
+
+ /*
+ * If the file is a symlink, we need to only check the connectivity
+ * of the destination object.
+ */
+ if (S_ISLNK(iter->st.st_mode)) {
+ const char *pointee_name = NULL;
+
+ strbuf_add_real_path(&pointee_path, iter->path.buf);
+
+ strbuf_add_absolute_path(&abs_gitdir, gitdir);
+ strbuf_normalize_path(&abs_gitdir);
+ if (!is_dir_sep(abs_gitdir.buf[abs_gitdir.len - 1]))
+ strbuf_addch(&abs_gitdir, '/');
+
+ if (!skip_prefix(pointee_path.buf,
+ abs_gitdir.buf, &pointee_name)) {
+ ret = fsck_refs_report(o, NULL, &info,
+ FSCK_MSG_BAD_SYMREF_POINTEE,
+ "point to target outside gitdir");
+ goto clean;
+ }
+
+ ret = files_fsck_symref_target(o, &info, refname.buf,
+ pointee_name, pointee_path.buf);
+ goto clean;
+ }
+
+ if (strbuf_read_file(&ref_content, iter->path.buf, 0) < 0) {
+ ret = error_errno(_("%s/%s: unable to read the ref"),
+ refs_check_dir, iter->relative_path);
+ goto clean;
+ }
+
+ if (parse_loose_ref_contents(ref_content.buf, &oid,
+ &referent, &type,
+ &failure_errno, &trailing)) {
+ ret = fsck_refs_report(o, NULL, &info,
+ FSCK_MSG_BAD_REF_CONTENT,
+ "invalid ref content");
+ goto clean;
+ }
+
+ /*
+ * If the ref is a symref, we need to check the destination name and
+ * connectivity.
+ */
+ if (referent.len && (type & REF_ISSYMREF)) {
+ strbuf_addf(&pointee_path, "%s/%s", gitdir, referent.buf);
+ strbuf_rtrim(&referent);
+
+ ret = files_fsck_symref_target(o, &info, refname.buf,
+ referent.buf, pointee_path.buf);
+ goto clean;
+ } else {
+ if (trailing && (*trailing != '\0' && *trailing != '\n')) {
+ ret = fsck_refs_report(o, NULL, &info,
+ FSCK_MSG_TRAILING_REF_CONTENT,
+ "trailing garbage in ref");
+ goto clean;
+ }
+ }
+
+clean:
+ strbuf_release(&abs_gitdir);
+ strbuf_release(&pointee_path);
+ strbuf_release(&refname);
+ strbuf_release(&ref_content);
+ strbuf_release(&referent);
+ return ret;
+}
+
static int files_fsck_refs_dir(struct ref_store *ref_store,
struct fsck_options *o,
const char *refs_check_dir,
@@ -3491,6 +3630,7 @@ static int files_fsck_refs(struct ref_store *ref_store,
{
files_fsck_refs_fn fsck_refs_fns[]= {
files_fsck_refs_name,
+ files_fsck_refs_content,
NULL
};
diff --git a/refs/refs-internal.h b/refs/refs-internal.h
index a905e187cd..2fabf41d14 100644
--- a/refs/refs-internal.h
+++ b/refs/refs-internal.h
@@ -709,11 +709,12 @@ struct ref_store {
/*
* Parse contents of a loose ref file. *failure_errno maybe be set to EINVAL for
- * invalid contents.
+ * invalid contents. Also *trailing is set to the first character after the
+ * refname or NULL if the referent is not empty.
*/
int parse_loose_ref_contents(const char *buf, struct object_id *oid,
struct strbuf *referent, unsigned int *type,
- int *failure_errno);
+ int *failure_errno, const char **trailing);
/*
* Fill in the generic part of refs and add it to our collection of
diff --git a/t/t0602-reffiles-fsck.sh b/t/t0602-reffiles-fsck.sh
index b2db58d2c6..29cd824224 100755
--- a/t/t0602-reffiles-fsck.sh
+++ b/t/t0602-reffiles-fsck.sh
@@ -98,4 +98,114 @@ test_expect_success 'ref name check should be adapted into fsck messages' '
)
'
+test_expect_success 'regular ref content should be checked' '
+ test_when_finished "rm -rf repo" &&
+ git init repo &&
+ branch_dir_prefix=.git/refs/heads &&
+ tag_dir_prefix=.git/refs/tags &&
+ (
+ cd repo &&
+ git commit --allow-empty -m initial &&
+ git checkout -b branch-1 &&
+ git tag tag-1 &&
+ git commit --allow-empty -m second &&
+ git checkout -b branch-2 &&
+ git tag tag-2 &&
+ git checkout -b a/b/tag-2
+ ) &&
+ (
+ cd repo &&
+ printf "%s garbage" "$(git rev-parse branch-1)" > $branch_dir_prefix/branch-1-garbage &&
+ git fsck 2>err &&
+ cat >expect <<-EOF &&
+ warning: refs/heads/branch-1-garbage: trailingRefContent: trailing garbage in ref
+ EOF
+ rm $branch_dir_prefix/branch-1-garbage &&
+ test_cmp expect err
+ ) &&
+ (
+ cd repo &&
+ printf "%s garbage" "$(git rev-parse tag-1)" > $tag_dir_prefix/tag-1-garbage &&
+ test_must_fail git -c fsck.trailingRefContent=error fsck 2>err &&
+ cat >expect <<-EOF &&
+ error: refs/tags/tag-1-garbage: trailingRefContent: trailing garbage in ref
+ EOF
+ rm $tag_dir_prefix/tag-1-garbage &&
+ test_cmp expect err
+ ) &&
+ (
+ cd repo &&
+ printf "%s " "$(git rev-parse tag-2)" > $tag_dir_prefix/tag-2-garbage &&
+ git fsck 2>err &&
+ cat >expect <<-EOF &&
+ warning: refs/tags/tag-2-garbage: trailingRefContent: trailing garbage in ref
+ EOF
+ rm $tag_dir_prefix/tag-2-garbage &&
+ test_cmp expect err
+ ) &&
+ (
+ cd repo &&
+ printf "xfsazqfxcadas" > $tag_dir_prefix/tag-2-bad &&
+ test_must_fail git refs verify 2>err &&
+ cat >expect <<-EOF &&
+ error: refs/tags/tag-2-bad: badRefContent: invalid ref content
+ EOF
+ rm $tag_dir_prefix/tag-2-bad &&
+ test_cmp expect err
+ ) &&
+ (
+ cd repo &&
+ printf "xfsazqfxcadas" > $branch_dir_prefix/a/b/branch-2-bad &&
+ test_must_fail git refs verify 2>err &&
+ cat >expect <<-EOF &&
+ error: refs/heads/a/b/branch-2-bad: badRefContent: invalid ref content
+ EOF
+ rm $branch_dir_prefix/a/b/branch-2-bad &&
+ test_cmp expect err
+ )
+'
+
+test_expect_success 'symbolic ref content should be checked' '
+ test_when_finished "rm -rf repo" &&
+ git init repo &&
+ branch_dir_prefix=.git/refs/heads &&
+ tag_dir_prefix=.git/refs/tags &&
+ (
+ cd repo &&
+ git commit --allow-empty -m initial &&
+ git checkout -b branch-1 &&
+ git tag tag-1
+ ) &&
+ (
+ cd repo &&
+ printf "ref: refs/heads/.branch" > $branch_dir_prefix/branch-2-bad &&
+ test_must_fail git refs verify 2>err &&
+ cat >expect <<-EOF &&
+ error: refs/heads/branch-2-bad: badSymrefPointee: points to refname with invalid format
+ EOF
+ rm $branch_dir_prefix/branch-2-bad &&
+ test_cmp expect err
+ ) &&
+ (
+ cd repo &&
+ printf "ref: refs/heads" > $branch_dir_prefix/branch-2-bad &&
+ test_must_fail git refs verify 2>err &&
+ cat >expect <<-EOF &&
+ error: refs/heads/branch-2-bad: badSymrefPointee: points to an invalid file type
+ EOF
+ rm $branch_dir_prefix/branch-2-bad &&
+ test_cmp expect err
+ ) &&
+ (
+ cd repo &&
+ printf "ref: logs/maint-v2.45" > $branch_dir_prefix/branch-2-bad &&
+ test_must_fail git refs verify 2>err &&
+ cat >expect <<-EOF &&
+ error: refs/heads/branch-2-bad: badSymrefPointee: points to ref outside the refs directory
+ EOF
+ rm $branch_dir_prefix/branch-2-bad &&
+ test_cmp expect err
+ )
+'
+
test_done
--
2.45.2
^ permalink raw reply related [flat|nested] 282+ messages in thread
* Re: [GSoC][PATCH v13 10/10] fsck: add ref content check for files backend
2024-07-29 13:27 ` [GSoC][PATCH v13 10/10] fsck: add ref content " shejialuo
@ 2024-07-30 8:31 ` Patrick Steinhardt
2024-07-30 16:25 ` shejialuo
2024-07-30 22:06 ` Junio C Hamano
1 sibling, 1 reply; 282+ messages in thread
From: Patrick Steinhardt @ 2024-07-30 8:31 UTC (permalink / raw)
To: shejialuo
Cc: git, Karthik Nayak, Junio C Hamano, Eric Sunshine, Justin Tobler
[-- Attachment #1: Type: text/plain, Size: 5107 bytes --]
On Mon, Jul 29, 2024 at 09:27:56PM +0800, shejialuo wrote:
> Enhance the git-fsck(1) command by adding a check for reference content
> in the files backend. The new functionality ensures that symrefs, real
> symbolic link and regular refs are validated correctly.
>
> In order to check the trailing content of the regular refs, add a new
> parameter `trailing` to `parse_loose_ref_contents`.
>
> For symrefs, `parse_loose_ref_contents` will set the "referent".
> However, symbolic link could be either absolute or relative. Use
> "strbuf_add_real_path" to read the symbolic link and convert the
> relative path to absolute path. Then use "skip_prefix" to make it align
> with symref "referent".
>
> Thus, the symrefs and symbolic links could share the same interface. Add
> a new function "files_fsck_symref_target" which aims at checking the
> following things:
>
> 1. whether the pointee is under the `refs/` directory.
> 2. whether the pointee name is correct.
> 3. whether the pointee path is a wrong type in filesystem.
>
> Last, add the following FSCK MESSAGEs:
>
> 1. "badRefContent(ERROR)": A ref has a bad content
> 2. "badSymrefPointee(ERROR)": The pointee of a symref is bad.
> 3. "trailingRefContent(WARN)": A ref content has trailing contents.
I think it would have been fine to stop at the preceding commit as it
clearly demonstrates how the whole infrastructure is supposed to work.
Additional checks like those you add here would then be a good candidate
for a separate patch series. This would help you get the first patch
series landed faster as you really only have to focus on setting up the
baseline infrastructure.
Feel free to keep or drop this patch as you prefer though, I don't want
to discourage you aiming higher. Just keep in mind that the more you add
on top the longer it takes to land a patch series :)
> diff --git a/Documentation/fsck-msgids.txt b/Documentation/fsck-msgids.txt
> index d8e437a043..8fe24a960e 100644
> --- a/Documentation/fsck-msgids.txt
> +++ b/Documentation/fsck-msgids.txt
> @@ -19,9 +19,15 @@
> `badParentSha1`::
> (ERROR) A commit object has a bad parent sha1.
>
> +`badRefContent`::
> + (ERROR) A ref has a bad content.
> +
s/a bad/bad
> +static int files_fsck_refs_content(struct fsck_options *o,
> + const char *gitdir,
> + const char *refs_check_dir,
> + struct dir_iterator *iter)
> +{
> + struct strbuf pointee_path = STRBUF_INIT,
> + ref_content = STRBUF_INIT,
> + abs_gitdir = STRBUF_INIT,
> + referent = STRBUF_INIT,
> + refname = STRBUF_INIT;
Nit: I think it's more customary to start each of the lines with `struct
strbuf`. Not a 100% certain on this one, though.
> + const char *trailing = NULL;
> + struct fsck_refs_info info;
> + int failure_errno = 0;
> + unsigned int type = 0;
> + struct object_id oid;
> + int ret = 0;
> +
> + strbuf_addf(&refname, "%s/%s", refs_check_dir, iter->relative_path);
> + info.path = refname.buf;
> +
> + /*
> + * If the file is a symlink, we need to only check the connectivity
> + * of the destination object.
> + */
> + if (S_ISLNK(iter->st.st_mode)) {
> + const char *pointee_name = NULL;
> +
> + strbuf_add_real_path(&pointee_path, iter->path.buf);
> +
> + strbuf_add_absolute_path(&abs_gitdir, gitdir);
> + strbuf_normalize_path(&abs_gitdir);
> + if (!is_dir_sep(abs_gitdir.buf[abs_gitdir.len - 1]))
> + strbuf_addch(&abs_gitdir, '/');
> +
> + if (!skip_prefix(pointee_path.buf,
> + abs_gitdir.buf, &pointee_name)) {
> + ret = fsck_refs_report(o, NULL, &info,
> + FSCK_MSG_BAD_SYMREF_POINTEE,
> + "point to target outside gitdir");
> + goto clean;
> + }
> +
> + ret = files_fsck_symref_target(o, &info, refname.buf,
> + pointee_name, pointee_path.buf);
> + goto clean;
> + }
> +
> + if (strbuf_read_file(&ref_content, iter->path.buf, 0) < 0) {
> + ret = error_errno(_("%s/%s: unable to read the ref"),
> + refs_check_dir, iter->relative_path);
> + goto clean;
> + }
> +
> + if (parse_loose_ref_contents(ref_content.buf, &oid,
> + &referent, &type,
> + &failure_errno, &trailing)) {
> + ret = fsck_refs_report(o, NULL, &info,
> + FSCK_MSG_BAD_REF_CONTENT,
> + "invalid ref content");
> + goto clean;
> + }
> +
> + /*
> + * If the ref is a symref, we need to check the destination name and
> + * connectivity.
> + */
> + if (referent.len && (type & REF_ISSYMREF)) {
> + strbuf_addf(&pointee_path, "%s/%s", gitdir, referent.buf);
> + strbuf_rtrim(&referent);
> +
> + ret = files_fsck_symref_target(o, &info, refname.buf,
> + referent.buf, pointee_path.buf);
> + goto clean;
> + } else {
> + if (trailing && (*trailing != '\0' && *trailing != '\n')) {
In case the ref ends with a newline, should we check that the next
character is `\0`? Otherwise, it may contain multiple lines, which is
not allowed for a normal ref.
Also, shouldn't the ref always end with a newline?
Patrick
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]
^ permalink raw reply [flat|nested] 282+ messages in thread
* Re: [GSoC][PATCH v13 10/10] fsck: add ref content check for files backend
2024-07-30 8:31 ` Patrick Steinhardt
@ 2024-07-30 16:25 ` shejialuo
0 siblings, 0 replies; 282+ messages in thread
From: shejialuo @ 2024-07-30 16:25 UTC (permalink / raw)
To: Patrick Steinhardt
Cc: git, Karthik Nayak, Junio C Hamano, Eric Sunshine, Justin Tobler
On Tue, Jul 30, 2024 at 10:31:54AM +0200, Patrick Steinhardt wrote:
> On Mon, Jul 29, 2024 at 09:27:56PM +0800, shejialuo wrote:
> > Enhance the git-fsck(1) command by adding a check for reference content
> > in the files backend. The new functionality ensures that symrefs, real
> > symbolic link and regular refs are validated correctly.
> >
> > In order to check the trailing content of the regular refs, add a new
> > parameter `trailing` to `parse_loose_ref_contents`.
> >
> > For symrefs, `parse_loose_ref_contents` will set the "referent".
> > However, symbolic link could be either absolute or relative. Use
> > "strbuf_add_real_path" to read the symbolic link and convert the
> > relative path to absolute path. Then use "skip_prefix" to make it align
> > with symref "referent".
> >
> > Thus, the symrefs and symbolic links could share the same interface. Add
> > a new function "files_fsck_symref_target" which aims at checking the
> > following things:
> >
> > 1. whether the pointee is under the `refs/` directory.
> > 2. whether the pointee name is correct.
> > 3. whether the pointee path is a wrong type in filesystem.
> >
> > Last, add the following FSCK MESSAGEs:
> >
> > 1. "badRefContent(ERROR)": A ref has a bad content
> > 2. "badSymrefPointee(ERROR)": The pointee of a symref is bad.
> > 3. "trailingRefContent(WARN)": A ref content has trailing contents.
>
> I think it would have been fine to stop at the preceding commit as it
> clearly demonstrates how the whole infrastructure is supposed to work.
> Additional checks like those you add here would then be a good candidate
> for a separate patch series. This would help you get the first patch
> series landed faster as you really only have to focus on setting up the
> baseline infrastructure.
>
> Feel free to keep or drop this patch as you prefer though, I don't want
> to discourage you aiming higher. Just keep in mind that the more you add
> on top the longer it takes to land a patch series :)
>
I will drop this patch in the next version. Actually, in the very former
version, I didn't realise that the effort to set up the infra is so
much.
> > + /*
> > + * If the ref is a symref, we need to check the destination name and
> > + * connectivity.
> > + */
> > + if (referent.len && (type & REF_ISSYMREF)) {
> > + strbuf_addf(&pointee_path, "%s/%s", gitdir, referent.buf);
> > + strbuf_rtrim(&referent);
> > +
> > + ret = files_fsck_symref_target(o, &info, refname.buf,
> > + referent.buf, pointee_path.buf);
> > + goto clean;
> > + } else {
> > + if (trailing && (*trailing != '\0' && *trailing != '\n')) {
>
> In case the ref ends with a newline, should we check that the next
> character is `\0`? Otherwise, it may contain multiple lines, which is
> not allowed for a normal ref.
>
> Also, shouldn't the ref always end with a newline?
>
This is a very interesting question here. Based on my experiments, I
have found the following things:
It's OK that regular refs contain multiple newlines. And git totally
allows such case. The current code does not handle multiple newlines.
For symrefs, it allows spaces and newlines, for example:
ref: refs/heads/master <space>
ref: refs/heads/master \n\n\n
But for such case, git will report error:
ref: refs/heads/master garbage
And ref can be end with a newline or not. Both will be accepted. Junio
have told me that there is no spec really. So, I ignore multiple
newlines for regular refs and also ignore multiple newlines and trailing
spaces for symref.
> Patrick
^ permalink raw reply [flat|nested] 282+ messages in thread
* Re: [GSoC][PATCH v13 10/10] fsck: add ref content check for files backend
2024-07-29 13:27 ` [GSoC][PATCH v13 10/10] fsck: add ref content " shejialuo
2024-07-30 8:31 ` Patrick Steinhardt
@ 2024-07-30 22:06 ` Junio C Hamano
2024-07-31 16:19 ` shejialuo
1 sibling, 1 reply; 282+ messages in thread
From: Junio C Hamano @ 2024-07-30 22:06 UTC (permalink / raw)
To: shejialuo
Cc: git, Patrick Steinhardt, Karthik Nayak, Eric Sunshine,
Justin Tobler
shejialuo <shejialuo@gmail.com> writes:
> +static int files_fsck_refs_content(struct fsck_options *o,
> + const char *gitdir,
> + const char *refs_check_dir,
> + struct dir_iterator *iter)
> +{
> + ...
> + if (parse_loose_ref_contents(ref_content.buf, &oid,
> + &referent, &type,
> + &failure_errno, &trailing)) {
The function parse_loose_ref_contents() needs to know what the hash
algorithm is, and it used to implicitly assume that the_repository's
hash algorithm was an OK thing to use. Patrick's recent clean-up
series instead passes "struct ref_store *refs" throughout the call
chain so that "ref->repo->hash_algo" can be used. This needs some
matching change, which means ...
> files_fsck_refs_fn fsck_refs_fns[]= {
> files_fsck_refs_name,
> + files_fsck_refs_content,
> NULL
> };
... the function signature for files_fsck_refs_fn must change to
have something that lets you access repo->hash_algo.
By the way, unless the most common use of an array is to pass it
around as a collection of items and operate on the collection, it is
a better practice to name an array with a singular noun. Name the
array as fsck_refs_fn[] not _fns[]. This is so that you can refer
to a single element in a more grammatical way. E.g. with
struct dog dog[] = { { .breed="shiba" }, { .breed="beagle" } };
you can say "dog[0] has brown fur" instead of "dogs[0] has ...".
In this case, you do not treat the collection of functions as a one
thing and do something to the collection. Instead you'd repeat over
the functions in a loop and individually call them, perhaps like so:
for (i = 0; fsck_fn[i] != NULL; i++)
fsck_fn[i](...);
so it is very much more appropriate to name the array itself as
singular to allow you to say "first fsck_fn", "next fsck_fn", etc.
^ permalink raw reply [flat|nested] 282+ messages in thread
* Re: [GSoC][PATCH v13 10/10] fsck: add ref content check for files backend
2024-07-30 22:06 ` Junio C Hamano
@ 2024-07-31 16:19 ` shejialuo
0 siblings, 0 replies; 282+ messages in thread
From: shejialuo @ 2024-07-31 16:19 UTC (permalink / raw)
To: Junio C Hamano
Cc: git, Patrick Steinhardt, Karthik Nayak, Eric Sunshine,
Justin Tobler
On Tue, Jul 30, 2024 at 03:06:37PM -0700, Junio C Hamano wrote:
> shejialuo <shejialuo@gmail.com> writes:
>
> > +static int files_fsck_refs_content(struct fsck_options *o,
> > + const char *gitdir,
> > + const char *refs_check_dir,
> > + struct dir_iterator *iter)
> > +{
> > + ...
> > + if (parse_loose_ref_contents(ref_content.buf, &oid,
> > + &referent, &type,
> > + &failure_errno, &trailing)) {
>
> The function parse_loose_ref_contents() needs to know what the hash
> algorithm is, and it used to implicitly assume that the_repository's
> hash algorithm was an OK thing to use. Patrick's recent clean-up
> series instead passes "struct ref_store *refs" throughout the call
> chain so that "ref->repo->hash_algo" can be used. This needs some
> matching change, which means ...
>
> > files_fsck_refs_fn fsck_refs_fns[]= {
> > files_fsck_refs_name,
> > + files_fsck_refs_content,
> > NULL
> > };
>
> ... the function signature for files_fsck_refs_fn must change to
> have something that lets you access repo->hash_algo.
>
Thanks for your remind, I have scanned this patch:
https://lore.kernel.org/git/fe0e2c3617c8040c632dbc3de613a1d22e8070f7.1722316795.git.ps@pks.im/
I guess I will handle this later. It seems that this series has not come
into the cooking tree. I will update this part until Patrick's patch
gets merged into "next".
>
> By the way, unless the most common use of an array is to pass it
> around as a collection of items and operate on the collection, it is
> a better practice to name an array with a singular noun. Name the
> array as fsck_refs_fn[] not _fns[]. This is so that you can refer
> to a single element in a more grammatical way. E.g. with
>
> struct dog dog[] = { { .breed="shiba" }, { .breed="beagle" } };
>
> you can say "dog[0] has brown fur" instead of "dogs[0] has ...".
>
> In this case, you do not treat the collection of functions as a one
> thing and do something to the collection. Instead you'd repeat over
> the functions in a loop and individually call them, perhaps like so:
>
> for (i = 0; fsck_fn[i] != NULL; i++)
> fsck_fn[i](...);
>
> so it is very much more appropriate to name the array itself as
> singular to allow you to say "first fsck_fn", "next fsck_fn", etc.
>
Thanks, I have learned a lot here. I will improve this in the next
version.
^ permalink raw reply [flat|nested] 282+ messages in thread
* Re: [GSoC][PATCH v13 00/10] ref consistency check infra setup
2024-07-29 13:22 ` [GSoC][PATCH v13 00/10] ref consistency check infra setup shejialuo
` (9 preceding siblings ...)
2024-07-29 13:27 ` [GSoC][PATCH v13 10/10] fsck: add ref content " shejialuo
@ 2024-07-30 8:31 ` Patrick Steinhardt
2024-07-30 16:29 ` shejialuo
2024-08-01 15:11 ` [GSoC][PATCH v14 00/11] " shejialuo
11 siblings, 1 reply; 282+ messages in thread
From: Patrick Steinhardt @ 2024-07-30 8:31 UTC (permalink / raw)
To: shejialuo
Cc: git, Karthik Nayak, Junio C Hamano, Eric Sunshine, Justin Tobler
[-- Attachment #1: Type: text/plain, Size: 2065 bytes --]
On Mon, Jul 29, 2024 at 09:22:52PM +0800, shejialuo wrote:
> Hi All:
>
> This version mainly focuses on the problem about how should we provide
> extensibility for reporting the problem of refs. In patch v10, Junio
> asked a question here:
>
> > The error reporting function for refs consistency check was still
> > about reporting a problem for a single ref. I am wondering how
> > consistency violations that are not about a single ref should be
> > handled. For example, if refs/packed-backend.c:packed_fsck() finds
> > that the file is not sorted properly or has some unparseable garbage
> > in it, it is not something you can report as "refs/heads/main is
> > broken", but those who are interested in seeing the "reference
> > database consistency" verified, it is very much what they want the
> > tool to notice. How would detection of such a breakage that is not
> > attributed to a single ref fit in this "ref consistency check
> > infrastructure" that was introduced by [05/10]?
>
> Actually, I think that the original parameter "checked_ref_name" is a
> bad name which makes the reader think that "we only handle refs". And
> this is my fault for this design. However, I misunderstood the Junio's
> word and made things complicated.
>
> The patch v11 and v12 wants to solve a problem that we should provide
> extensibility for reporting refs problem. However, these two versions
> still made things complicated. After an offline meeting with Patrick and
> Karthik, we design the following simple flat data structure:
>
> struct fsck_refs_info {
> const char *path;
> };
>
> It is simple and provides extensibility.
I've got a bunch of comments, mostly because I've been out for quite a
while and thus didn't follow the progression of this series. But don't
let yourself be discouraged by the volume, I quite like the shape of
this patch series and think that it is very close to what we actually
want to have.
I'd say that this series is likely to become ready soonish.
Thanks!
Patrick
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]
^ permalink raw reply [flat|nested] 282+ messages in thread
* Re: [GSoC][PATCH v13 00/10] ref consistency check infra setup
2024-07-30 8:31 ` [GSoC][PATCH v13 00/10] ref consistency check infra setup Patrick Steinhardt
@ 2024-07-30 16:29 ` shejialuo
0 siblings, 0 replies; 282+ messages in thread
From: shejialuo @ 2024-07-30 16:29 UTC (permalink / raw)
To: Patrick Steinhardt
Cc: git, Karthik Nayak, Junio C Hamano, Eric Sunshine, Justin Tobler
> I've got a bunch of comments, mostly because I've been out for quite a
> while and thus didn't follow the progression of this series. But don't
> let yourself be discouraged by the volume, I quite like the shape of
> this patch series and think that it is very close to what we actually
> want to have.
>
> I'd say that this series is likely to become ready soonish.
>
> Thanks!
>
> Patrick
That's OK, Patrick. It's important for me to receive reviews thus we can
make this series better. And I will wait for the advice from the Junio
about the design of the "fsck_error" here. If Junio were satisfied with
this design. I will clean the code and submit a new version until
everything is OK.
Thanks,
Jialuo
^ permalink raw reply [flat|nested] 282+ messages in thread
* [GSoC][PATCH v14 00/11] ref consistency check infra setup
2024-07-29 13:22 ` [GSoC][PATCH v13 00/10] ref consistency check infra setup shejialuo
` (10 preceding siblings ...)
2024-07-30 8:31 ` [GSoC][PATCH v13 00/10] ref consistency check infra setup Patrick Steinhardt
@ 2024-08-01 15:11 ` shejialuo
2024-08-01 15:13 ` [GSoC][PATCH v14 01/11] fsck: rename "skiplist" to "skip_oids" shejialuo
` (12 more replies)
11 siblings, 13 replies; 282+ messages in thread
From: shejialuo @ 2024-08-01 15:11 UTC (permalink / raw)
To: git
Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine,
Justin Tobler
Hi All,
This version does the following things:
1. By following the advice from Patrick, we should make the callback
function be generic by adding only one "void * fsck_report" parameter.
Thus the commit sequence will be much more clearer. And it wll be much
easier for reviewers to review. And I have split the commit into more
commits in this version.
2. Enhance the commit messages to provide more context about why we
should do this.
3. Patrick advices that we should initialize the "fsck_options" member
when parsing the options. However, because the original "strict" and
"verbose" field are defined as the bit field, we cannot take the address
of them. So I simply remove the bit field.
4. As Patrick said, ".lock" should not be reported as error. At current,
ignore files ending with ".lock".
5. Add a fsck msg type called "badRefFiletype" which indicates that a
ref has a bad file type when scanning the directory.
6. Junio advices instead of using "fsck_refs_fns", we should use the
singular version "fsck_refs_fn", fix this.
7. Drop the last patch because in this series, we mainly focus on the
infra, I will add a series later to add ref content check.
However, there is one thing holding. Junio advices that I should
follow the Patrick's change to change the prototype of "files_fsck_refs_fn"
https://lore.kernel.org/git/fe0e2c3617c8040c632dbc3de613a1d22e8070f7.1722316795.git.ps@pks.im/
However, at current, this patch is not in the cooking tree. I will
handle this later.
shejialuo (11):
fsck: rename "skiplist" to "skip_oids"
fsck: make "fsck_error" callback generic
fsck: add a unified interface for reporting fsck messages
fsck: add refs report function
fsck: add refs-related error callback
fsck: rename objects-related fsck error functions
refs: set up ref consistency check infrastructure
builtin/refs: add verify subcommand
builtin/fsck: add `git-refs verify` child process
files-backend: add unified interface for refs scanning
fsck: add ref name check for files backend
Documentation/fsck-msgids.txt | 6 ++
Documentation/git-refs.txt | 13 ++++
builtin/fsck.c | 34 +++++++--
builtin/mktag.c | 3 +-
builtin/refs.c | 34 +++++++++
fsck.c | 131 +++++++++++++++++++++++++++-------
fsck.h | 76 +++++++++++++++-----
object-file.c | 9 ++-
refs.c | 5 ++
refs.h | 8 +++
refs/debug.c | 11 +++
refs/files-backend.c | 118 +++++++++++++++++++++++++++++-
refs/packed-backend.c | 8 +++
refs/refs-internal.h | 6 ++
refs/reftable-backend.c | 8 +++
t/t0602-reffiles-fsck.sh | 94 ++++++++++++++++++++++++
16 files changed, 504 insertions(+), 60 deletions(-)
create mode 100755 t/t0602-reffiles-fsck.sh
Range-diff against v13:
1: 772cad5b92 = 1: 75b64a219d fsck: rename "skiplist" to "skip_oids"
2: e76449977d < -: ---------- fsck: add a unified interface for reporting fsck messages
-: ---------- > 2: 81433d1628 fsck: make "fsck_error" callback generic
-: ---------- > 3: 0792c51e6d fsck: add a unified interface for reporting fsck messages
-: ---------- > 4: ecd144af15 fsck: add refs report function
-: ---------- > 5: e80dba0cab fsck: add refs-related error callback
3: 26bde4283f ! 6: a61db42bf2 fsck: rename objects-related fsck error functions
@@ Metadata
## Commit message ##
fsck: rename objects-related fsck error functions
- The names of objects-related fsck error functions are general. It's OK
+ The names of objects-related fsck error functions are generic. It's OK
when there is only object database check. However, we have introduced
refs database check report function. To avoid ambiguity, rename
object-related fsck error functions to explicitly indicate these
@@ builtin/fsck.c: static int objerror(struct object *obj, const char *err)
}
-static int fsck_error_func(struct fsck_options *o UNUSED,
-- const struct object_id *oid,
-- enum object_type object_type,
-- const struct fsck_refs_info *refs_info UNUSED,
+- void *fsck_report,
- enum fsck_msg_type msg_type,
- enum fsck_msg_id msg_id UNUSED,
- const char *message)
+static int fsck_objects_error_func(struct fsck_options *o UNUSED,
-+ const struct object_id *oid,
-+ enum object_type object_type,
-+ const struct fsck_refs_info *refs_info UNUSED,
++ void *fsck_report,
+ enum fsck_msg_type msg_type,
+ enum fsck_msg_id msg_id UNUSED,
+ const char *message)
{
- switch (msg_type) {
- case FSCK_WARN:
+ struct fsck_object_report *report = fsck_report;
+ const struct object_id *oid = report->oid;
@@ builtin/fsck.c: int cmd_fsck(int argc, const char **argv, const char *prefix)
fsck_walk_options.walk = mark_object;
@@ fsck.c: int fsck_buffer(const struct object_id *oid, enum object_type type,
}
-int fsck_error_function(struct fsck_options *o,
-- const struct object_id *oid,
-- enum object_type object_type UNUSED,
-- const struct fsck_refs_info *refs_info UNUSED,
+- void *fsck_report,
- enum fsck_msg_type msg_type,
- enum fsck_msg_id msg_id UNUSED,
- const char *message)
+int fsck_objects_error_function(struct fsck_options *o,
-+ const struct object_id *oid,
-+ enum object_type object_type UNUSED,
-+ const struct fsck_refs_info *refs_info UNUSED,
++ void *fsck_report,
+ enum fsck_msg_type msg_type,
+ enum fsck_msg_id msg_id UNUSED,
+ const char *message)
{
- if (msg_type == FSCK_WARN) {
- warning("object %s: %s", fsck_describe_object(o, oid), message);
+ struct fsck_object_report *report = fsck_report;
+ const struct object_id *oid = report->oid;
@@ fsck.c: int git_fsck_config(const char *var, const char *value,
* Custom error callbacks that are used in more than one place.
*/
-int fsck_error_cb_print_missing_gitmodules(struct fsck_options *o,
-- const struct object_id *oid,
-- enum object_type object_type,
-- const struct fsck_refs_info *refs_info,
+- void *fsck_report,
- enum fsck_msg_type msg_type,
- enum fsck_msg_id msg_id,
- const char *message)
+int fsck_objects_error_cb_print_missing_gitmodules(struct fsck_options *o,
-+ const struct object_id *oid,
-+ enum object_type object_type,
-+ const struct fsck_refs_info *refs_info,
++ void *fsck_report,
+ enum fsck_msg_type msg_type,
+ enum fsck_msg_id msg_id,
+ const char *message)
{
if (msg_id == FSCK_MSG_GITMODULES_MISSING) {
- puts(oid_to_hex(oid));
+ struct fsck_object_report *report = fsck_report;
+ puts(oid_to_hex(report->oid));
return 0;
}
-- return fsck_error_function(o, oid, object_type, refs_info,
-- msg_type, msg_id, message);
-+ return fsck_objects_error_function(o, oid, object_type, refs_info,
-+ msg_type, msg_id, message);
+- return fsck_error_function(o, fsck_report, msg_type, msg_id, message);
++ return fsck_objects_error_function(o, fsck_report, msg_type,msg_id, message);
}
## fsck.h ##
@@ fsck.h: typedef int (*fsck_error)(struct fsck_options *o,
const char *message);
-int fsck_error_function(struct fsck_options *o,
-- const struct object_id *oid, enum object_type object_type,
-- const struct fsck_refs_info *refs_info,
+- void *fsck_report,
- enum fsck_msg_type msg_type, enum fsck_msg_id msg_id,
- const char *message);
-int fsck_error_cb_print_missing_gitmodules(struct fsck_options *o,
-- const struct object_id *oid,
-- enum object_type object_type,
-- const struct fsck_refs_info *refs_info,
+- void *fsck_report,
- enum fsck_msg_type msg_type,
- enum fsck_msg_id msg_id,
- const char *message);
+int fsck_objects_error_function(struct fsck_options *o,
-+ const struct object_id *oid, enum object_type object_type,
-+ const struct fsck_refs_info *refs_info,
++ void *fsck_report,
+ enum fsck_msg_type msg_type, enum fsck_msg_id msg_id,
+ const char *message);
+int fsck_objects_error_cb_print_missing_gitmodules(struct fsck_options *o,
-+ const struct object_id *oid,
-+ enum object_type object_type,
-+ const struct fsck_refs_info *refs_info,
++ void *fsck_report,
+ enum fsck_msg_type msg_type,
+ enum fsck_msg_id msg_id,
+ const char *message);
- /*
- * The information for reporting refs-related error message
+ int fsck_refs_error_function(struct fsck_options *options,
+ void *fsck_report,
@@ fsck.h: struct fsck_options {
.gitmodules_done = OIDSET_INIT, \
.gitattributes_found = OIDSET_INIT, \
@@ fsck.h: struct fsck_options {
- .error_func = fsck_error_cb_print_missing_gitmodules, \
+ .error_func = fsck_objects_error_cb_print_missing_gitmodules, \
}
-
- /* descend in all linked child objects
+ #define FSCK_REFS_OPTIONS_DEFAULT { \
+ .error_func = fsck_refs_error_function, \
4: 56fc833a54 < -: ---------- fsck: add refs-related error report function
5: 90f992ecfb ! 7: f2248bc52d refs: set up ref consistency check infrastructure
@@ Metadata
## Commit message ##
refs: set up ref consistency check infrastructure
+ The "struct ref_store" is the base class which contains the "be" pointer
+ which provides backend-specific functions whose interfaces are defined
+ in the "ref_storage_be". We could reuse this polymorphism to define only
+ one interface. For every backend, we need to provide its own function
+ pointer.
+
The interfaces defined in the `ref_storage_be` are carefully structured
in semantic. It's organized as the five parts:
6: b2277f5ef4 ! 8: 0c5463d757 git refs: add verify subcommand
@@ Metadata
Author: shejialuo <shejialuo@gmail.com>
## Commit message ##
- git refs: add verify subcommand
+ builtin/refs: add verify subcommand
Introduce a new subcommand "verify" in git-refs(1) to allow the user to
check the reference database consistency and also this subcommand will
- be used as the entry point of checking refs for "git-fsck(1)". Last, add
- "verbose" field into "fsck_options" to indicate whether we should print
- verbose messages when checking refs and objects consistency.
+ be used as the entry point of checking refs for "git-fsck(1)".
+
+ Add "verbose" field into "fsck_options" to indicate whether we should
+ print verbose messages when checking refs and objects consistency.
+
+ Remove bit-field for "strict" field, this is because we cannot take
+ address of a bit-field which makes it unhandy to set member variables
+ when parsing the command line options.
+
+ The "git-fsck(1)" declares "fsck_options" variable with "static"
+ identifier which avoids complaint by the leak-checker. However, in
+ "git-refs verify", we need to do memory clean manually. Thus add
+ "fsck_options_clear" function in "fsck.c" to provide memory clean
+ operation.
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
@@ Documentation/git-refs.txt: include::ref-storage-format.txt[]
+The following options are specific to 'git refs verify':
+
+--strict::
-+ Enable more strict checking, every WARN severity for the `Fsck Messages`
-+ be seen as ERROR. See linkgit:git-fsck[1].
++ Enable stricter error checking. This will cause warnings to be
++ reported as errors. See linkgit:git-fsck[1].
+
+--verbose::
+ When verifying the reference database consistency, be chatty.
@@ builtin/refs.c: static int cmd_refs_migrate(int argc, const char **argv, const c
+ REFS_VERIFY_USAGE,
+ NULL,
+ };
-+ unsigned int verbose = 0, strict = 0;
+ struct option options[] = {
-+ OPT__VERBOSE(&verbose, N_("be verbose")),
-+ OPT_BOOL(0, "strict", &strict, N_("enable strict checking")),
++ OPT_BOOL(0, "verbose", &fsck_refs_options.verbose, N_("be verbose")),
++ OPT_BOOL(0, "strict", &fsck_refs_options.strict, N_("enable strict checking")),
+ OPT_END(),
+ };
+ int ret;
@@ builtin/refs.c: static int cmd_refs_migrate(int argc, const char **argv, const c
+ if (argc)
+ usage(_("'git refs verify' takes no arguments"));
+
-+ if (verbose)
-+ fsck_refs_options.verbose = 1;
-+ if (strict)
-+ fsck_refs_options.strict = 1;
-+
+ git_config(git_fsck_config, &fsck_refs_options);
+ prepare_repo_settings(the_repository);
+
+ ret = refs_fsck(get_main_ref_store(the_repository), &fsck_refs_options);
+
-+ /*
-+ * Explicitly free the allocated array and "skip_oids" set
-+ */
-+ free(fsck_refs_options.msg_type);
-+ oidset_clear(&fsck_refs_options.skip_oids);
++ fsck_options_clear(&fsck_refs_options);
+ return ret;
+}
+
@@ builtin/refs.c: static int cmd_refs_migrate(int argc, const char **argv, const c
};
+ ## fsck.c ##
+@@ fsck.c: int fsck_finish(struct fsck_options *options)
+ return ret;
+ }
+
++void fsck_options_clear(struct fsck_options *options)
++{
++ free(options->msg_type);
++ oidset_clear(&options->skip_oids);
++ oidset_clear(&options->gitmodules_found);
++ oidset_clear(&options->gitmodules_done);
++ oidset_clear(&options->gitattributes_found);
++ oidset_clear(&options->gitattributes_done);
++ kh_clear_oid_map(options->object_names);
++}
++
+ int git_fsck_config(const char *var, const char *value,
+ const struct config_context *ctx, void *cb)
+ {
+
## fsck.h ##
-@@ fsck.h: struct fsck_options {
+@@ fsck.h: struct fsck_ref_report {
+ struct fsck_options {
fsck_walk_func walk;
fsck_error error_func;
- unsigned strict:1;
-+ unsigned verbose:1;
+- unsigned strict:1;
++ unsigned strict;
++ unsigned verbose;
enum fsck_msg_type *msg_type;
struct oidset skip_oids;
struct oidset gitmodules_found;
+@@ fsck.h: int fsck_tag_standalone(const struct object_id *oid, const char *buffer,
+ */
+ int fsck_finish(struct fsck_options *options);
+
++/*
++ * Clear the fsck_options struct, freeing any allocated memory.
++ */
++void fsck_options_clear(struct fsck_options *options);
++
+ /*
+ * Report an error or warning for refs.
+ */
7: f96d0f200d = 9: 0ac25c2f6e builtin/fsck: add `git-refs verify` child process
8: f9a0c16bef ! 10: 2b6f5e6c46 files-backend: add unified interface for refs scanning
@@ Commit message
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
+ ## Documentation/fsck-msgids.txt ##
+@@
+ `badParentSha1`::
+ (ERROR) A commit object has a bad parent sha1.
+
++`badRefFiletype`::
++ (ERROR) A ref has a bad file type.
++
+ `badTagName`::
+ (INFO) A tag has an invalid format.
+
+
+ ## fsck.h ##
+@@ fsck.h: enum fsck_msg_type {
+ FUNC(BAD_NAME, ERROR) \
+ FUNC(BAD_OBJECT_SHA1, ERROR) \
+ FUNC(BAD_PARENT_SHA1, ERROR) \
++ FUNC(BAD_REF_FILETYPE, ERROR) \
+ FUNC(BAD_TIMEZONE, ERROR) \
+ FUNC(BAD_TREE, ERROR) \
+ FUNC(BAD_TREE_SHA1, ERROR) \
+
## refs/files-backend.c ##
@@
#include "../gettext.h"
@@ refs/files-backend.c: static int files_ref_store_remove_on_disk(struct ref_store
+static int files_fsck_refs_dir(struct ref_store *ref_store,
+ struct fsck_options *o,
+ const char *refs_check_dir,
-+ files_fsck_refs_fn *fsck_refs_fns)
++ files_fsck_refs_fn *fsck_refs_fn)
+{
+ const char *gitdir = ref_store->gitdir;
+ struct strbuf sb = STRBUF_INIT;
@@ refs/files-backend.c: static int files_ref_store_remove_on_disk(struct ref_store
+ strbuf_addf(&sb, "%s/%s", gitdir, refs_check_dir);
+
+ iter = dir_iterator_begin(sb.buf, 0);
-+
+ if (!iter) {
-+ ret = error_errno("cannot open directory %s", sb.buf);
++ ret = error_errno(_("cannot open directory %s"), sb.buf);
+ goto out;
+ }
+
@@ refs/files-backend.c: static int files_ref_store_remove_on_disk(struct ref_store
+ if (o->verbose)
+ fprintf_ln(stderr, "Checking %s/%s",
+ refs_check_dir, iter->relative_path);
-+ for (size_t i = 0; fsck_refs_fns[i]; i++) {
-+ if (fsck_refs_fns[i](o, gitdir, refs_check_dir, iter))
++ for (size_t i = 0; fsck_refs_fn[i]; i++) {
++ if (fsck_refs_fn[i](o, gitdir, refs_check_dir, iter))
+ ret = -1;
+ }
+ } else {
-+ ret = error(_("unexpected file type for '%s'"),
-+ iter->basename);
++ struct fsck_ref_report report = { .path = iter->basename };
++ if (fsck_report_ref(o, &report,
++ FSCK_MSG_BAD_REF_FILETYPE,
++ "unexpected file type"))
++ ret = -1;
+ }
+ }
+
@@ refs/files-backend.c: static int files_ref_store_remove_on_disk(struct ref_store
+static int files_fsck_refs(struct ref_store *ref_store,
+ struct fsck_options *o)
+{
-+ files_fsck_refs_fn fsck_refs_fns[]= {
-+ NULL
++ files_fsck_refs_fn fsck_refs_fn[]= {
++ NULL,
+ };
+
+ if (o->verbose)
+ fprintf_ln(stderr, "Checking references consistency");
-+
-+ return files_fsck_refs_dir(ref_store, o, "refs", fsck_refs_fns);
++ return files_fsck_refs_dir(ref_store, o, "refs", fsck_refs_fn);
+
+}
+
@@ refs/files-backend.c: static int files_ref_store_remove_on_disk(struct ref_store
files_downcast(ref_store, REF_STORE_READ, "fsck");
- return refs->packed_ref_store->be->fsck(refs->packed_ref_store, o);
-+ return refs->packed_ref_store->be->fsck(refs->packed_ref_store, o) |
-+ files_fsck_refs(ref_store, o);
++ return files_fsck_refs(ref_store, o) |
++ refs->packed_ref_store->be->fsck(refs->packed_ref_store, o);
}
struct ref_storage_be refs_be_files = {
9: ee55ee5787 < -: ---------- fsck: add ref name check for files backend
10: 9256328cbb < -: ---------- fsck: add ref content check for files backend
-: ---------- > 11: 14f2739bd7 fsck: add ref name check for files backend
--
2.45.2
^ permalink raw reply [flat|nested] 282+ messages in thread
* [GSoC][PATCH v14 01/11] fsck: rename "skiplist" to "skip_oids"
2024-08-01 15:11 ` [GSoC][PATCH v14 00/11] " shejialuo
@ 2024-08-01 15:13 ` shejialuo
2024-08-01 15:13 ` [GSoC][PATCH v14 02/11] fsck: make "fsck_error" callback generic shejialuo
` (11 subsequent siblings)
12 siblings, 0 replies; 282+ messages in thread
From: shejialuo @ 2024-08-01 15:13 UTC (permalink / raw)
To: git
Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine,
Justin Tobler
The "skiplist" field in "fsck_options" is related to objects. Because we
are going to introduce ref consistency check, the "skiplist" name is too
general which will make the caller think "skiplist" is related to both
the refs and objects.
It may seem that for both refs and objects, we should provide a general
"skiplist" here. However, the type for "skiplist" is `struct oidset`
which is totally unsuitable for refs.
To avoid above ambiguity, rename "skiplist" to "skip_oids".
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
fsck.c | 4 ++--
fsck.h | 4 ++--
2 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/fsck.c b/fsck.c
index eea7145470..3f32441492 100644
--- a/fsck.c
+++ b/fsck.c
@@ -205,7 +205,7 @@ void fsck_set_msg_types(struct fsck_options *options, const char *values)
if (!strcmp(buf, "skiplist")) {
if (equal == len)
die("skiplist requires a path");
- oidset_parse_file(&options->skiplist, buf + equal + 1,
+ oidset_parse_file(&options->skip_oids, buf + equal + 1,
the_repository->hash_algo);
buf += len + 1;
continue;
@@ -223,7 +223,7 @@ void fsck_set_msg_types(struct fsck_options *options, const char *values)
static int object_on_skiplist(struct fsck_options *opts,
const struct object_id *oid)
{
- return opts && oid && oidset_contains(&opts->skiplist, oid);
+ return opts && oid && oidset_contains(&opts->skip_oids, oid);
}
__attribute__((format (printf, 5, 6)))
diff --git a/fsck.h b/fsck.h
index 6085a384f6..bcfb2e34cd 100644
--- a/fsck.h
+++ b/fsck.h
@@ -136,7 +136,7 @@ struct fsck_options {
fsck_error error_func;
unsigned strict:1;
enum fsck_msg_type *msg_type;
- struct oidset skiplist;
+ struct oidset skip_oids;
struct oidset gitmodules_found;
struct oidset gitmodules_done;
struct oidset gitattributes_found;
@@ -145,7 +145,7 @@ struct fsck_options {
};
#define FSCK_OPTIONS_DEFAULT { \
- .skiplist = OIDSET_INIT, \
+ .skip_oids = OIDSET_INIT, \
.gitmodules_found = OIDSET_INIT, \
.gitmodules_done = OIDSET_INIT, \
.gitattributes_found = OIDSET_INIT, \
--
2.45.2
^ permalink raw reply related [flat|nested] 282+ messages in thread
* [GSoC][PATCH v14 02/11] fsck: make "fsck_error" callback generic
2024-08-01 15:11 ` [GSoC][PATCH v14 00/11] " shejialuo
2024-08-01 15:13 ` [GSoC][PATCH v14 01/11] fsck: rename "skiplist" to "skip_oids" shejialuo
@ 2024-08-01 15:13 ` shejialuo
2024-08-01 15:13 ` [GSoC][PATCH v14 03/11] fsck: add a unified interface for reporting fsck messages shejialuo
` (10 subsequent siblings)
12 siblings, 0 replies; 282+ messages in thread
From: shejialuo @ 2024-08-01 15:13 UTC (permalink / raw)
To: git
Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine,
Justin Tobler
The "fsck_error" callback is designed to report the objects-related
error messages. It accepts two parameter "oid" and "object_type" which
is not generic. In order to provide a unified callback which can report
either objects or refs, remove the objects-related parameters and add
the generic parameter "void *fsck_report".
Create a new "fsck_object_report" structure which incorporates the
removed parameters "oid" and "object_type". Then change the
corresponding references to adapt to new "fsck_error" callback.
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
builtin/fsck.c | 7 +++++--
builtin/mktag.c | 3 +--
fsck.c | 20 +++++++++++++-------
fsck.h | 17 ++++++++++++-----
object-file.c | 9 ++++-----
5 files changed, 35 insertions(+), 21 deletions(-)
diff --git a/builtin/fsck.c b/builtin/fsck.c
index d13a226c2e..9673a08286 100644
--- a/builtin/fsck.c
+++ b/builtin/fsck.c
@@ -90,12 +90,15 @@ static int objerror(struct object *obj, const char *err)
}
static int fsck_error_func(struct fsck_options *o UNUSED,
- const struct object_id *oid,
- enum object_type object_type,
+ void *fsck_report,
enum fsck_msg_type msg_type,
enum fsck_msg_id msg_id UNUSED,
const char *message)
{
+ struct fsck_object_report *report = fsck_report;
+ const struct object_id *oid = report->oid;
+ enum object_type object_type = report->object_type;
+
switch (msg_type) {
case FSCK_WARN:
/* TRANSLATORS: e.g. warning in tree 01bfda: <more explanation> */
diff --git a/builtin/mktag.c b/builtin/mktag.c
index 4767f1a97e..c6b644219f 100644
--- a/builtin/mktag.c
+++ b/builtin/mktag.c
@@ -18,8 +18,7 @@ static int option_strict = 1;
static struct fsck_options fsck_options = FSCK_OPTIONS_STRICT;
static int mktag_fsck_error_func(struct fsck_options *o UNUSED,
- const struct object_id *oid UNUSED,
- enum object_type object_type UNUSED,
+ void *fsck_report UNUSED,
enum fsck_msg_type msg_type,
enum fsck_msg_id msg_id UNUSED,
const char *message)
diff --git a/fsck.c b/fsck.c
index 3f32441492..4c1f8bc44a 100644
--- a/fsck.c
+++ b/fsck.c
@@ -232,6 +232,10 @@ static int report(struct fsck_options *options,
enum fsck_msg_id msg_id, const char *fmt, ...)
{
va_list ap;
+ struct fsck_object_report report = {
+ .oid = oid,
+ .object_type = object_type
+ };
struct strbuf sb = STRBUF_INIT;
enum fsck_msg_type msg_type = fsck_msg_type(msg_id, options);
int result;
@@ -252,7 +256,7 @@ static int report(struct fsck_options *options,
va_start(ap, fmt);
strbuf_vaddf(&sb, fmt, ap);
- result = options->error_func(options, oid, object_type,
+ result = options->error_func(options, &report,
msg_type, msg_id, sb.buf);
strbuf_release(&sb);
va_end(ap);
@@ -1201,12 +1205,14 @@ int fsck_buffer(const struct object_id *oid, enum object_type type,
}
int fsck_error_function(struct fsck_options *o,
- const struct object_id *oid,
- enum object_type object_type UNUSED,
+ void *fsck_report,
enum fsck_msg_type msg_type,
enum fsck_msg_id msg_id UNUSED,
const char *message)
{
+ struct fsck_object_report *report = fsck_report;
+ const struct object_id *oid = report->oid;
+
if (msg_type == FSCK_WARN) {
warning("object %s: %s", fsck_describe_object(o, oid), message);
return 0;
@@ -1304,15 +1310,15 @@ int git_fsck_config(const char *var, const char *value,
*/
int fsck_error_cb_print_missing_gitmodules(struct fsck_options *o,
- const struct object_id *oid,
- enum object_type object_type,
+ void *fsck_report,
enum fsck_msg_type msg_type,
enum fsck_msg_id msg_id,
const char *message)
{
if (msg_id == FSCK_MSG_GITMODULES_MISSING) {
- puts(oid_to_hex(oid));
+ struct fsck_object_report *report = fsck_report;
+ puts(oid_to_hex(report->oid));
return 0;
}
- return fsck_error_function(o, oid, object_type, msg_type, msg_id, message);
+ return fsck_error_function(o, fsck_report, msg_type, msg_id, message);
}
diff --git a/fsck.h b/fsck.h
index bcfb2e34cd..303174a5d8 100644
--- a/fsck.h
+++ b/fsck.h
@@ -114,23 +114,30 @@ int is_valid_msg_type(const char *msg_id, const char *msg_type);
typedef int (*fsck_walk_func)(struct object *obj, enum object_type object_type,
void *data, struct fsck_options *options);
-/* callback for fsck_object, type is FSCK_ERROR or FSCK_WARN */
+/*
+ * Callback for reporting errors either for objects or refs. The "fsck_report"
+ * is a generic pointer that can be used to pass any information.
+ */
typedef int (*fsck_error)(struct fsck_options *o,
- const struct object_id *oid, enum object_type object_type,
+ void *fsck_report,
enum fsck_msg_type msg_type, enum fsck_msg_id msg_id,
const char *message);
int fsck_error_function(struct fsck_options *o,
- const struct object_id *oid, enum object_type object_type,
+ void *fsck_report,
enum fsck_msg_type msg_type, enum fsck_msg_id msg_id,
const char *message);
int fsck_error_cb_print_missing_gitmodules(struct fsck_options *o,
- const struct object_id *oid,
- enum object_type object_type,
+ void *fsck_report,
enum fsck_msg_type msg_type,
enum fsck_msg_id msg_id,
const char *message);
+struct fsck_object_report {
+ const struct object_id *oid;
+ enum object_type object_type;
+};
+
struct fsck_options {
fsck_walk_func walk;
fsck_error error_func;
diff --git a/object-file.c b/object-file.c
index 065103be3e..05ac6ebed6 100644
--- a/object-file.c
+++ b/object-file.c
@@ -2470,11 +2470,10 @@ int repo_has_object_file(struct repository *r,
* give more context.
*/
static int hash_format_check_report(struct fsck_options *opts UNUSED,
- const struct object_id *oid UNUSED,
- enum object_type object_type UNUSED,
- enum fsck_msg_type msg_type UNUSED,
- enum fsck_msg_id msg_id UNUSED,
- const char *message)
+ void *fsck_report UNUSED,
+ enum fsck_msg_type msg_type UNUSED,
+ enum fsck_msg_id msg_id UNUSED,
+ const char *message)
{
error(_("object fails fsck: %s"), message);
return 1;
--
2.45.2
^ permalink raw reply related [flat|nested] 282+ messages in thread
* [GSoC][PATCH v14 03/11] fsck: add a unified interface for reporting fsck messages
2024-08-01 15:11 ` [GSoC][PATCH v14 00/11] " shejialuo
2024-08-01 15:13 ` [GSoC][PATCH v14 01/11] fsck: rename "skiplist" to "skip_oids" shejialuo
2024-08-01 15:13 ` [GSoC][PATCH v14 02/11] fsck: make "fsck_error" callback generic shejialuo
@ 2024-08-01 15:13 ` shejialuo
2024-08-05 12:58 ` Patrick Steinhardt
2024-08-01 15:14 ` [GSoC][PATCH v14 04/11] fsck: add refs report function shejialuo
` (9 subsequent siblings)
12 siblings, 1 reply; 282+ messages in thread
From: shejialuo @ 2024-08-01 15:13 UTC (permalink / raw)
To: git
Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine,
Justin Tobler
The static function "report" provided by "fsck.c" aims at checking error
type and calling the callback "error_func" to report the message. Both
refs and objects need to check the error type of the current fsck
message. In order to extract this common behavior, create a new function
"fsck_vreport". Instead of using "...", provide "va_list" to allow more
flexibility.
Instead of changing "report" prototype to be algin with the
"fsck_vreport" function, we leave the "report" prototype unchanged due
to the reason that there are nearly 62 references about "report"
function. Simply change "report" function to use "fsck_vreport" to
report objects related messages.
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
fsck.c | 49 ++++++++++++++++++++++++++++++++++---------------
1 file changed, 34 insertions(+), 15 deletions(-)
diff --git a/fsck.c b/fsck.c
index 4c1f8bc44a..b394a9e397 100644
--- a/fsck.c
+++ b/fsck.c
@@ -226,16 +226,16 @@ static int object_on_skiplist(struct fsck_options *opts,
return opts && oid && oidset_contains(&opts->skip_oids, oid);
}
-__attribute__((format (printf, 5, 6)))
-static int report(struct fsck_options *options,
- const struct object_id *oid, enum object_type object_type,
- enum fsck_msg_id msg_id, const char *fmt, ...)
+/*
+ * Provide the common functionality for either fscking refs or objects.
+ * It will get the current msg error type and call the error_func callback
+ * which is registered in the "fsck_options" struct.
+ */
+static int fsck_vreport(struct fsck_options *options,
+ void *fsck_report,
+ enum fsck_msg_id msg_id, const char *fmt, va_list ap)
{
- va_list ap;
- struct fsck_object_report report = {
- .oid = oid,
- .object_type = object_type
- };
+ va_list ap_copy;
struct strbuf sb = STRBUF_INIT;
enum fsck_msg_type msg_type = fsck_msg_type(msg_id, options);
int result;
@@ -243,9 +243,6 @@ static int report(struct fsck_options *options,
if (msg_type == FSCK_IGNORE)
return 0;
- if (object_on_skiplist(options, oid))
- return 0;
-
if (msg_type == FSCK_FATAL)
msg_type = FSCK_ERROR;
else if (msg_type == FSCK_INFO)
@@ -254,9 +251,9 @@ static int report(struct fsck_options *options,
prepare_msg_ids();
strbuf_addf(&sb, "%s: ", msg_id_info[msg_id].camelcased);
- va_start(ap, fmt);
- strbuf_vaddf(&sb, fmt, ap);
- result = options->error_func(options, &report,
+ va_copy(ap_copy, ap);
+ strbuf_vaddf(&sb, fmt, ap_copy);
+ result = options->error_func(options, fsck_report,
msg_type, msg_id, sb.buf);
strbuf_release(&sb);
va_end(ap);
@@ -264,6 +261,28 @@ static int report(struct fsck_options *options,
return result;
}
+__attribute__((format (printf, 5, 6)))
+static int report(struct fsck_options *options,
+ const struct object_id *oid, enum object_type object_type,
+ enum fsck_msg_id msg_id, const char *fmt, ...)
+{
+ va_list ap;
+ struct fsck_object_report report = {
+ .oid = oid,
+ .object_type = object_type
+ };
+ int result;
+
+ if (object_on_skiplist(options, oid))
+ return 0;
+
+ va_start(ap, fmt);
+ result = fsck_vreport(options, &report, msg_id, fmt, ap);
+ va_end(ap);
+
+ return result;
+}
+
void fsck_enable_object_names(struct fsck_options *options)
{
if (!options->object_names)
--
2.45.2
^ permalink raw reply related [flat|nested] 282+ messages in thread
* Re: [GSoC][PATCH v14 03/11] fsck: add a unified interface for reporting fsck messages
2024-08-01 15:13 ` [GSoC][PATCH v14 03/11] fsck: add a unified interface for reporting fsck messages shejialuo
@ 2024-08-05 12:58 ` Patrick Steinhardt
2024-08-05 15:10 ` shejialuo
0 siblings, 1 reply; 282+ messages in thread
From: Patrick Steinhardt @ 2024-08-05 12:58 UTC (permalink / raw)
To: shejialuo
Cc: git, Karthik Nayak, Junio C Hamano, Eric Sunshine, Justin Tobler
[-- Attachment #1: Type: text/plain, Size: 757 bytes --]
On Thu, Aug 01, 2024 at 11:13:59PM +0800, shejialuo wrote:
> @@ -254,9 +251,9 @@ static int report(struct fsck_options *options,
> prepare_msg_ids();
> strbuf_addf(&sb, "%s: ", msg_id_info[msg_id].camelcased);
>
> - va_start(ap, fmt);
> - strbuf_vaddf(&sb, fmt, ap);
> - result = options->error_func(options, &report,
> + va_copy(ap_copy, ap);
Can't we use `ap` directly instead of copying it? We'd have to get rid
of the call to `va_end` as our caller already does that, but other than
that I don't see any reason to copy the argument list here.
> + strbuf_vaddf(&sb, fmt, ap_copy);
> + result = options->error_func(options, fsck_report,
> msg_type, msg_id, sb.buf);
> strbuf_release(&sb);
> va_end(ap);
Patrick
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]
^ permalink raw reply [flat|nested] 282+ messages in thread
* Re: [GSoC][PATCH v14 03/11] fsck: add a unified interface for reporting fsck messages
2024-08-05 12:58 ` Patrick Steinhardt
@ 2024-08-05 15:10 ` shejialuo
0 siblings, 0 replies; 282+ messages in thread
From: shejialuo @ 2024-08-05 15:10 UTC (permalink / raw)
To: Patrick Steinhardt
Cc: git, Karthik Nayak, Junio C Hamano, Eric Sunshine, Justin Tobler
On Mon, Aug 05, 2024 at 02:58:06PM +0200, Patrick Steinhardt wrote:
> On Thu, Aug 01, 2024 at 11:13:59PM +0800, shejialuo wrote:
> > @@ -254,9 +251,9 @@ static int report(struct fsck_options *options,
> > prepare_msg_ids();
> > strbuf_addf(&sb, "%s: ", msg_id_info[msg_id].camelcased);
> >
> > - va_start(ap, fmt);
> > - strbuf_vaddf(&sb, fmt, ap);
> > - result = options->error_func(options, &report,
> > + va_copy(ap_copy, ap);
>
> Can't we use `ap` directly instead of copying it? We'd have to get rid
> of the call to `va_end` as our caller already does that, but other than
> that I don't see any reason to copy the argument list here.
>
Thanks, Patrick! This is right. I will fix this in the next version.
> > + strbuf_vaddf(&sb, fmt, ap_copy);
> > + result = options->error_func(options, fsck_report,
> > msg_type, msg_id, sb.buf);
> > strbuf_release(&sb);
> > va_end(ap);
>
> Patrick
^ permalink raw reply [flat|nested] 282+ messages in thread
* [GSoC][PATCH v14 04/11] fsck: add refs report function
2024-08-01 15:11 ` [GSoC][PATCH v14 00/11] " shejialuo
` (2 preceding siblings ...)
2024-08-01 15:13 ` [GSoC][PATCH v14 03/11] fsck: add a unified interface for reporting fsck messages shejialuo
@ 2024-08-01 15:14 ` shejialuo
2024-08-05 12:58 ` Patrick Steinhardt
2024-08-01 15:14 ` [GSoC][PATCH v14 05/11] fsck: add refs-related error callback shejialuo
` (8 subsequent siblings)
12 siblings, 1 reply; 282+ messages in thread
From: shejialuo @ 2024-08-01 15:14 UTC (permalink / raw)
To: git
Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine,
Justin Tobler
Introduce a new struct "fsck_ref_report" to contain the information we
need when reporting refs-related messages.
With the new "fsck_vreport" function, add a new function
"fsck_report_ref" to report refs-related fsck error message. Unlike
"report" function uses the exact parameters, we simply pass "struct
fsck_ref_report *report" as the parameter. This is because at current we
don't know exactly how many fields we need. By passing this parameter,
we don't need to change this function prototype when we want to add more
information into "fsck_ref_report".
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
fsck.c | 13 +++++++++++++
fsck.h | 16 ++++++++++++++++
2 files changed, 29 insertions(+)
diff --git a/fsck.c b/fsck.c
index b394a9e397..ee888c1417 100644
--- a/fsck.c
+++ b/fsck.c
@@ -283,6 +283,19 @@ static int report(struct fsck_options *options,
return result;
}
+int fsck_report_ref(struct fsck_options *options,
+ struct fsck_ref_report *report,
+ enum fsck_msg_id msg_id,
+ const char *fmt, ...)
+{
+ va_list ap;
+ int result;
+ va_start(ap, fmt);
+ result = fsck_vreport(options, report, msg_id, fmt, ap);
+ va_end(ap);
+ return result;
+}
+
void fsck_enable_object_names(struct fsck_options *options)
{
if (!options->object_names)
diff --git a/fsck.h b/fsck.h
index 303174a5d8..0918c28d0f 100644
--- a/fsck.h
+++ b/fsck.h
@@ -138,6 +138,12 @@ struct fsck_object_report {
enum object_type object_type;
};
+struct fsck_ref_report {
+ const char *path;
+ const struct object_id *oid;
+ const char *referent;
+};
+
struct fsck_options {
fsck_walk_func walk;
fsck_error error_func;
@@ -216,6 +222,16 @@ int fsck_tag_standalone(const struct object_id *oid, const char *buffer,
*/
int fsck_finish(struct fsck_options *options);
+/*
+ * Report an error or warning for refs.
+ */
+__attribute__((format (printf, 4, 5)))
+int fsck_report_ref(struct fsck_options *options,
+ struct fsck_ref_report *report,
+ enum fsck_msg_id msg_id,
+ const char *fmt, ...);
+
+
/*
* Subsystem for storing human-readable names for each object.
*
--
2.45.2
^ permalink raw reply related [flat|nested] 282+ messages in thread
* Re: [GSoC][PATCH v14 04/11] fsck: add refs report function
2024-08-01 15:14 ` [GSoC][PATCH v14 04/11] fsck: add refs report function shejialuo
@ 2024-08-05 12:58 ` Patrick Steinhardt
0 siblings, 0 replies; 282+ messages in thread
From: Patrick Steinhardt @ 2024-08-05 12:58 UTC (permalink / raw)
To: shejialuo
Cc: git, Karthik Nayak, Junio C Hamano, Eric Sunshine, Justin Tobler
[-- Attachment #1: Type: text/plain, Size: 578 bytes --]
On Thu, Aug 01, 2024 at 11:14:10PM +0800, shejialuo wrote:
> @@ -216,6 +222,16 @@ int fsck_tag_standalone(const struct object_id *oid, const char *buffer,
> */
> int fsck_finish(struct fsck_options *options);
>
> +/*
> + * Report an error or warning for refs.
> + */
> +__attribute__((format (printf, 4, 5)))
> +int fsck_report_ref(struct fsck_options *options,
> + struct fsck_ref_report *report,
> + enum fsck_msg_id msg_id,
> + const char *fmt, ...);
Nice that we got rid of the parts that don't matter for reporting refs
now.
Patrick
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]
^ permalink raw reply [flat|nested] 282+ messages in thread
* [GSoC][PATCH v14 05/11] fsck: add refs-related error callback
2024-08-01 15:11 ` [GSoC][PATCH v14 00/11] " shejialuo
` (3 preceding siblings ...)
2024-08-01 15:14 ` [GSoC][PATCH v14 04/11] fsck: add refs report function shejialuo
@ 2024-08-01 15:14 ` shejialuo
2024-08-05 12:58 ` Patrick Steinhardt
2024-08-01 15:14 ` [GSoC][PATCH v14 06/11] fsck: rename objects-related fsck error functions shejialuo
` (7 subsequent siblings)
12 siblings, 1 reply; 282+ messages in thread
From: shejialuo @ 2024-08-01 15:14 UTC (permalink / raw)
To: git
Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine,
Justin Tobler
We have introduced "fsck_report_ref" function to report the error
message for refs. We still need to add the corresponding callback
function. Create refs-specific "error_func" callback
"fsck_refs_error_function".
Last, add "FSCK_REFS_OPTIONS_DEFAULT" macro to create default options
when checking ref consistency.
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
fsck.c | 28 ++++++++++++++++++++++++++++
fsck.h | 9 +++++++++
2 files changed, 37 insertions(+)
diff --git a/fsck.c b/fsck.c
index ee888c1417..f0ae760c86 100644
--- a/fsck.c
+++ b/fsck.c
@@ -1253,6 +1253,34 @@ int fsck_error_function(struct fsck_options *o,
return 1;
}
+int fsck_refs_error_function(struct fsck_options *options UNUSED,
+ void *fsck_report,
+ enum fsck_msg_type msg_type,
+ enum fsck_msg_id msg_id UNUSED,
+ const char *message)
+{
+ struct fsck_ref_report *report = fsck_report;
+
+ struct strbuf sb = STRBUF_INIT;
+ int ret = 0;
+
+ strbuf_addstr(&sb, report->path);
+
+ if (report->oid)
+ strbuf_addf(&sb, " -> (%s)", oid_to_hex(report->oid));
+ else if (report->referent)
+ strbuf_addf(&sb, " -> (%s)", report->referent);
+
+ if (msg_type == FSCK_WARN)
+ warning("%s: %s", sb.buf, message);
+ else
+ ret = error("%s: %s", sb.buf, message);
+
+ strbuf_release(&sb);
+ return ret;
+
+}
+
static int fsck_blobs(struct oidset *blobs_found, struct oidset *blobs_done,
enum fsck_msg_id msg_missing, enum fsck_msg_id msg_type,
struct fsck_options *options, const char *blob_type)
diff --git a/fsck.h b/fsck.h
index 0918c28d0f..2276ea7e34 100644
--- a/fsck.h
+++ b/fsck.h
@@ -133,6 +133,12 @@ int fsck_error_cb_print_missing_gitmodules(struct fsck_options *o,
enum fsck_msg_id msg_id,
const char *message);
+int fsck_refs_error_function(struct fsck_options *options,
+ void *fsck_report,
+ enum fsck_msg_type msg_type,
+ enum fsck_msg_id msg_id,
+ const char *message);
+
struct fsck_object_report {
const struct object_id *oid;
enum object_type object_type;
@@ -181,6 +187,9 @@ struct fsck_options {
.gitattributes_done = OIDSET_INIT, \
.error_func = fsck_error_cb_print_missing_gitmodules, \
}
+#define FSCK_REFS_OPTIONS_DEFAULT { \
+ .error_func = fsck_refs_error_function, \
+}
/* descend in all linked child objects
* the return value is:
--
2.45.2
^ permalink raw reply related [flat|nested] 282+ messages in thread
* Re: [GSoC][PATCH v14 05/11] fsck: add refs-related error callback
2024-08-01 15:14 ` [GSoC][PATCH v14 05/11] fsck: add refs-related error callback shejialuo
@ 2024-08-05 12:58 ` Patrick Steinhardt
0 siblings, 0 replies; 282+ messages in thread
From: Patrick Steinhardt @ 2024-08-05 12:58 UTC (permalink / raw)
To: shejialuo
Cc: git, Karthik Nayak, Junio C Hamano, Eric Sunshine, Justin Tobler
[-- Attachment #1: Type: text/plain, Size: 679 bytes --]
On Thu, Aug 01, 2024 at 11:14:19PM +0800, shejialuo wrote:
> We have introduced "fsck_report_ref" function to report the error
> message for refs. We still need to add the corresponding callback
> function. Create refs-specific "error_func" callback
> "fsck_refs_error_function".
>
> Last, add "FSCK_REFS_OPTIONS_DEFAULT" macro to create default options
> when checking ref consistency.
Nit: personally, I'd squash this commit into the preceding one. They
belong together logically, as the preceding step doesn't make any sense
without the error reporting function and this step here doesn't make any
sense without introducing `struct fsck_ref_report`.
Patrick
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]
^ permalink raw reply [flat|nested] 282+ messages in thread
* [GSoC][PATCH v14 06/11] fsck: rename objects-related fsck error functions
2024-08-01 15:11 ` [GSoC][PATCH v14 00/11] " shejialuo
` (4 preceding siblings ...)
2024-08-01 15:14 ` [GSoC][PATCH v14 05/11] fsck: add refs-related error callback shejialuo
@ 2024-08-01 15:14 ` shejialuo
2024-08-05 12:58 ` Patrick Steinhardt
2024-08-01 15:14 ` [GSoC][PATCH v14 07/11] refs: set up ref consistency check infrastructure shejialuo
` (6 subsequent siblings)
12 siblings, 1 reply; 282+ messages in thread
From: shejialuo @ 2024-08-01 15:14 UTC (permalink / raw)
To: git
Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine,
Justin Tobler
The names of objects-related fsck error functions are generic. It's OK
when there is only object database check. However, we have introduced
refs database check report function. To avoid ambiguity, rename
object-related fsck error functions to explicitly indicate these
functions are used to report objects-related messages.
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
builtin/fsck.c | 12 ++++++------
fsck.c | 22 +++++++++++-----------
fsck.h | 24 ++++++++++++------------
3 files changed, 29 insertions(+), 29 deletions(-)
diff --git a/builtin/fsck.c b/builtin/fsck.c
index 9673a08286..766bbd014d 100644
--- a/builtin/fsck.c
+++ b/builtin/fsck.c
@@ -89,11 +89,11 @@ static int objerror(struct object *obj, const char *err)
return -1;
}
-static int fsck_error_func(struct fsck_options *o UNUSED,
- void *fsck_report,
- enum fsck_msg_type msg_type,
- enum fsck_msg_id msg_id UNUSED,
- const char *message)
+static int fsck_objects_error_func(struct fsck_options *o UNUSED,
+ void *fsck_report,
+ enum fsck_msg_type msg_type,
+ enum fsck_msg_id msg_id UNUSED,
+ const char *message)
{
struct fsck_object_report *report = fsck_report;
const struct object_id *oid = report->oid;
@@ -941,7 +941,7 @@ int cmd_fsck(int argc, const char **argv, const char *prefix)
fsck_walk_options.walk = mark_object;
fsck_obj_options.walk = mark_used;
- fsck_obj_options.error_func = fsck_error_func;
+ fsck_obj_options.error_func = fsck_objects_error_func;
if (check_strict)
fsck_obj_options.strict = 1;
diff --git a/fsck.c b/fsck.c
index f0ae760c86..0a870fcec0 100644
--- a/fsck.c
+++ b/fsck.c
@@ -1236,11 +1236,11 @@ int fsck_buffer(const struct object_id *oid, enum object_type type,
type);
}
-int fsck_error_function(struct fsck_options *o,
- void *fsck_report,
- enum fsck_msg_type msg_type,
- enum fsck_msg_id msg_id UNUSED,
- const char *message)
+int fsck_objects_error_function(struct fsck_options *o,
+ void *fsck_report,
+ enum fsck_msg_type msg_type,
+ enum fsck_msg_id msg_id UNUSED,
+ const char *message)
{
struct fsck_object_report *report = fsck_report;
const struct object_id *oid = report->oid;
@@ -1369,16 +1369,16 @@ int git_fsck_config(const char *var, const char *value,
* Custom error callbacks that are used in more than one place.
*/
-int fsck_error_cb_print_missing_gitmodules(struct fsck_options *o,
- void *fsck_report,
- enum fsck_msg_type msg_type,
- enum fsck_msg_id msg_id,
- const char *message)
+int fsck_objects_error_cb_print_missing_gitmodules(struct fsck_options *o,
+ void *fsck_report,
+ enum fsck_msg_type msg_type,
+ enum fsck_msg_id msg_id,
+ const char *message)
{
if (msg_id == FSCK_MSG_GITMODULES_MISSING) {
struct fsck_object_report *report = fsck_report;
puts(oid_to_hex(report->oid));
return 0;
}
- return fsck_error_function(o, fsck_report, msg_type, msg_id, message);
+ return fsck_objects_error_function(o, fsck_report, msg_type,msg_id, message);
}
diff --git a/fsck.h b/fsck.h
index 2276ea7e34..2002590f60 100644
--- a/fsck.h
+++ b/fsck.h
@@ -123,15 +123,15 @@ typedef int (*fsck_error)(struct fsck_options *o,
enum fsck_msg_type msg_type, enum fsck_msg_id msg_id,
const char *message);
-int fsck_error_function(struct fsck_options *o,
- void *fsck_report,
- enum fsck_msg_type msg_type, enum fsck_msg_id msg_id,
- const char *message);
-int fsck_error_cb_print_missing_gitmodules(struct fsck_options *o,
- void *fsck_report,
- enum fsck_msg_type msg_type,
- enum fsck_msg_id msg_id,
- const char *message);
+int fsck_objects_error_function(struct fsck_options *o,
+ void *fsck_report,
+ enum fsck_msg_type msg_type, enum fsck_msg_id msg_id,
+ const char *message);
+int fsck_objects_error_cb_print_missing_gitmodules(struct fsck_options *o,
+ void *fsck_report,
+ enum fsck_msg_type msg_type,
+ enum fsck_msg_id msg_id,
+ const char *message);
int fsck_refs_error_function(struct fsck_options *options,
void *fsck_report,
@@ -169,7 +169,7 @@ struct fsck_options {
.gitmodules_done = OIDSET_INIT, \
.gitattributes_found = OIDSET_INIT, \
.gitattributes_done = OIDSET_INIT, \
- .error_func = fsck_error_function \
+ .error_func = fsck_objects_error_function \
}
#define FSCK_OPTIONS_STRICT { \
.strict = 1, \
@@ -177,7 +177,7 @@ struct fsck_options {
.gitmodules_done = OIDSET_INIT, \
.gitattributes_found = OIDSET_INIT, \
.gitattributes_done = OIDSET_INIT, \
- .error_func = fsck_error_function, \
+ .error_func = fsck_objects_error_function, \
}
#define FSCK_OPTIONS_MISSING_GITMODULES { \
.strict = 1, \
@@ -185,7 +185,7 @@ struct fsck_options {
.gitmodules_done = OIDSET_INIT, \
.gitattributes_found = OIDSET_INIT, \
.gitattributes_done = OIDSET_INIT, \
- .error_func = fsck_error_cb_print_missing_gitmodules, \
+ .error_func = fsck_objects_error_cb_print_missing_gitmodules, \
}
#define FSCK_REFS_OPTIONS_DEFAULT { \
.error_func = fsck_refs_error_function, \
--
2.45.2
^ permalink raw reply related [flat|nested] 282+ messages in thread
* Re: [GSoC][PATCH v14 06/11] fsck: rename objects-related fsck error functions
2024-08-01 15:14 ` [GSoC][PATCH v14 06/11] fsck: rename objects-related fsck error functions shejialuo
@ 2024-08-05 12:58 ` Patrick Steinhardt
0 siblings, 0 replies; 282+ messages in thread
From: Patrick Steinhardt @ 2024-08-05 12:58 UTC (permalink / raw)
To: shejialuo
Cc: git, Karthik Nayak, Junio C Hamano, Eric Sunshine, Justin Tobler
[-- Attachment #1: Type: text/plain, Size: 541 bytes --]
On Thu, Aug 01, 2024 at 11:14:29PM +0800, shejialuo wrote:
> The names of objects-related fsck error functions are generic. It's OK
> when there is only object database check. However, we have introduced
> refs database check report function. To avoid ambiguity, rename
> object-related fsck error functions to explicitly indicate these
> functions are used to report objects-related messages.
Nit: I think it would be a bit easier to follow if you moved this step
a bit earlier, namely before you introduce the ref-related infra.
Patrick
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]
^ permalink raw reply [flat|nested] 282+ messages in thread
* [GSoC][PATCH v14 07/11] refs: set up ref consistency check infrastructure
2024-08-01 15:11 ` [GSoC][PATCH v14 00/11] " shejialuo
` (5 preceding siblings ...)
2024-08-01 15:14 ` [GSoC][PATCH v14 06/11] fsck: rename objects-related fsck error functions shejialuo
@ 2024-08-01 15:14 ` shejialuo
2024-08-01 15:14 ` [GSoC][PATCH v14 08/11] builtin/refs: add verify subcommand shejialuo
` (5 subsequent siblings)
12 siblings, 0 replies; 282+ messages in thread
From: shejialuo @ 2024-08-01 15:14 UTC (permalink / raw)
To: git
Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine,
Justin Tobler
The "struct ref_store" is the base class which contains the "be" pointer
which provides backend-specific functions whose interfaces are defined
in the "ref_storage_be". We could reuse this polymorphism to define only
one interface. For every backend, we need to provide its own function
pointer.
The interfaces defined in the `ref_storage_be` are carefully structured
in semantic. It's organized as the five parts:
1. The name and the initialization interfaces.
2. The ref transaction interfaces.
3. The ref internal interfaces (pack, rename and copy).
4. The ref filesystem interfaces.
5. The reflog related interfaces.
To keep consistent with the git-fsck(1), add a new interface named
"fsck_refs_fn" to the end of "ref_storage_be". This semantic cannot be
grouped into any above five categories. Explicitly add blank line to
make it different from others.
Last, implement placeholder functions for each ref backends.
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
refs.c | 5 +++++
refs.h | 8 ++++++++
refs/debug.c | 11 +++++++++++
refs/files-backend.c | 13 ++++++++++++-
refs/packed-backend.c | 8 ++++++++
refs/refs-internal.h | 6 ++++++
refs/reftable-backend.c | 8 ++++++++
7 files changed, 58 insertions(+), 1 deletion(-)
diff --git a/refs.c b/refs.c
index 915aeb4d1d..6f642dc681 100644
--- a/refs.c
+++ b/refs.c
@@ -318,6 +318,11 @@ int check_refname_format(const char *refname, int flags)
return check_or_sanitize_refname(refname, flags, NULL);
}
+int refs_fsck(struct ref_store *refs, struct fsck_options *o)
+{
+ return refs->be->fsck(refs, o);
+}
+
void sanitize_refname_component(const char *refname, struct strbuf *out)
{
if (check_or_sanitize_refname(refname, REFNAME_ALLOW_ONELEVEL, out))
diff --git a/refs.h b/refs.h
index b3e39bc257..405073621a 100644
--- a/refs.h
+++ b/refs.h
@@ -4,6 +4,7 @@
#include "commit.h"
#include "repository.h"
+struct fsck_options;
struct object_id;
struct ref_store;
struct strbuf;
@@ -541,6 +542,13 @@ int refs_for_each_reflog(struct ref_store *refs, each_reflog_fn fn, void *cb_dat
*/
int check_refname_format(const char *refname, int flags);
+/*
+ * Check the reference database for consistency. Return 0 if refs and
+ * reflogs are consistent, and non-zero otherwise. The errors will be
+ * written to stderr.
+ */
+int refs_fsck(struct ref_store *refs, struct fsck_options *o);
+
/*
* Apply the rules from check_refname_format, but mutate the result until it
* is acceptable, and place the result in "out".
diff --git a/refs/debug.c b/refs/debug.c
index 547d9245b9..45e2e784a0 100644
--- a/refs/debug.c
+++ b/refs/debug.c
@@ -419,6 +419,15 @@ static int debug_reflog_expire(struct ref_store *ref_store, const char *refname,
return res;
}
+static int debug_fsck(struct ref_store *ref_store,
+ struct fsck_options *o)
+{
+ struct debug_ref_store *drefs = (struct debug_ref_store *)ref_store;
+ int res = drefs->refs->be->fsck(drefs->refs, o);
+ trace_printf_key(&trace_refs, "fsck: %d\n", res);
+ return res;
+}
+
struct ref_storage_be refs_be_debug = {
.name = "debug",
.init = NULL,
@@ -451,4 +460,6 @@ struct ref_storage_be refs_be_debug = {
.create_reflog = debug_create_reflog,
.delete_reflog = debug_delete_reflog,
.reflog_expire = debug_reflog_expire,
+
+ .fsck = debug_fsck,
};
diff --git a/refs/files-backend.c b/refs/files-backend.c
index aa52d9be7c..4630eb1f80 100644
--- a/refs/files-backend.c
+++ b/refs/files-backend.c
@@ -3408,6 +3408,15 @@ static int files_ref_store_remove_on_disk(struct ref_store *ref_store,
return ret;
}
+static int files_fsck(struct ref_store *ref_store,
+ struct fsck_options *o)
+{
+ struct files_ref_store *refs =
+ files_downcast(ref_store, REF_STORE_READ, "fsck");
+
+ return refs->packed_ref_store->be->fsck(refs->packed_ref_store, o);
+}
+
struct ref_storage_be refs_be_files = {
.name = "files",
.init = files_ref_store_init,
@@ -3434,5 +3443,7 @@ struct ref_storage_be refs_be_files = {
.reflog_exists = files_reflog_exists,
.create_reflog = files_create_reflog,
.delete_reflog = files_delete_reflog,
- .reflog_expire = files_reflog_expire
+ .reflog_expire = files_reflog_expire,
+
+ .fsck = files_fsck,
};
diff --git a/refs/packed-backend.c b/refs/packed-backend.c
index a0666407cd..5209b0b212 100644
--- a/refs/packed-backend.c
+++ b/refs/packed-backend.c
@@ -1735,6 +1735,12 @@ static struct ref_iterator *packed_reflog_iterator_begin(struct ref_store *ref_s
return empty_ref_iterator_begin();
}
+static int packed_fsck(struct ref_store *ref_store,
+ struct fsck_options *o)
+{
+ return 0;
+}
+
struct ref_storage_be refs_be_packed = {
.name = "packed",
.init = packed_ref_store_init,
@@ -1762,4 +1768,6 @@ struct ref_storage_be refs_be_packed = {
.create_reflog = NULL,
.delete_reflog = NULL,
.reflog_expire = NULL,
+
+ .fsck = packed_fsck,
};
diff --git a/refs/refs-internal.h b/refs/refs-internal.h
index fa975d69aa..a905e187cd 100644
--- a/refs/refs-internal.h
+++ b/refs/refs-internal.h
@@ -4,6 +4,7 @@
#include "refs.h"
#include "iterator.h"
+struct fsck_options;
struct ref_transaction;
/*
@@ -650,6 +651,9 @@ typedef int read_raw_ref_fn(struct ref_store *ref_store, const char *refname,
typedef int read_symbolic_ref_fn(struct ref_store *ref_store, const char *refname,
struct strbuf *referent);
+typedef int fsck_fn(struct ref_store *ref_store,
+ struct fsck_options *o);
+
struct ref_storage_be {
const char *name;
ref_store_init_fn *init;
@@ -677,6 +681,8 @@ struct ref_storage_be {
create_reflog_fn *create_reflog;
delete_reflog_fn *delete_reflog;
reflog_expire_fn *reflog_expire;
+
+ fsck_fn *fsck;
};
extern struct ref_storage_be refs_be_files;
diff --git a/refs/reftable-backend.c b/refs/reftable-backend.c
index fbe74c239d..b5a1a526df 100644
--- a/refs/reftable-backend.c
+++ b/refs/reftable-backend.c
@@ -2303,6 +2303,12 @@ static int reftable_be_reflog_expire(struct ref_store *ref_store,
return ret;
}
+static int reftable_be_fsck(struct ref_store *ref_store,
+ struct fsck_options *o)
+{
+ return 0;
+}
+
struct ref_storage_be refs_be_reftable = {
.name = "reftable",
.init = reftable_be_init,
@@ -2330,4 +2336,6 @@ struct ref_storage_be refs_be_reftable = {
.create_reflog = reftable_be_create_reflog,
.delete_reflog = reftable_be_delete_reflog,
.reflog_expire = reftable_be_reflog_expire,
+
+ .fsck = reftable_be_fsck,
};
--
2.45.2
^ permalink raw reply related [flat|nested] 282+ messages in thread
* [GSoC][PATCH v14 08/11] builtin/refs: add verify subcommand
2024-08-01 15:11 ` [GSoC][PATCH v14 00/11] " shejialuo
` (6 preceding siblings ...)
2024-08-01 15:14 ` [GSoC][PATCH v14 07/11] refs: set up ref consistency check infrastructure shejialuo
@ 2024-08-01 15:14 ` shejialuo
2024-08-01 15:15 ` [GSoC][PATCH v14 09/11] builtin/fsck: add `git-refs verify` child process shejialuo
` (4 subsequent siblings)
12 siblings, 0 replies; 282+ messages in thread
From: shejialuo @ 2024-08-01 15:14 UTC (permalink / raw)
To: git
Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine,
Justin Tobler
Introduce a new subcommand "verify" in git-refs(1) to allow the user to
check the reference database consistency and also this subcommand will
be used as the entry point of checking refs for "git-fsck(1)".
Add "verbose" field into "fsck_options" to indicate whether we should
print verbose messages when checking refs and objects consistency.
Remove bit-field for "strict" field, this is because we cannot take
address of a bit-field which makes it unhandy to set member variables
when parsing the command line options.
The "git-fsck(1)" declares "fsck_options" variable with "static"
identifier which avoids complaint by the leak-checker. However, in
"git-refs verify", we need to do memory clean manually. Thus add
"fsck_options_clear" function in "fsck.c" to provide memory clean
operation.
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
Documentation/git-refs.txt | 13 +++++++++++++
builtin/refs.c | 34 ++++++++++++++++++++++++++++++++++
fsck.c | 11 +++++++++++
fsck.h | 8 +++++++-
4 files changed, 65 insertions(+), 1 deletion(-)
diff --git a/Documentation/git-refs.txt b/Documentation/git-refs.txt
index 5b99e04385..ce31f93061 100644
--- a/Documentation/git-refs.txt
+++ b/Documentation/git-refs.txt
@@ -10,6 +10,7 @@ SYNOPSIS
--------
[verse]
'git refs migrate' --ref-format=<format> [--dry-run]
+'git refs verify' [--strict] [--verbose]
DESCRIPTION
-----------
@@ -22,6 +23,9 @@ COMMANDS
migrate::
Migrate ref store between different formats.
+verify::
+ Verify reference database consistency.
+
OPTIONS
-------
@@ -39,6 +43,15 @@ include::ref-storage-format.txt[]
can be used to double check that the migration works as expected before
performing the actual migration.
+The following options are specific to 'git refs verify':
+
+--strict::
+ Enable stricter error checking. This will cause warnings to be
+ reported as errors. See linkgit:git-fsck[1].
+
+--verbose::
+ When verifying the reference database consistency, be chatty.
+
KNOWN LIMITATIONS
-----------------
diff --git a/builtin/refs.c b/builtin/refs.c
index 46dcd150d4..131f98be98 100644
--- a/builtin/refs.c
+++ b/builtin/refs.c
@@ -1,4 +1,6 @@
#include "builtin.h"
+#include "config.h"
+#include "fsck.h"
#include "parse-options.h"
#include "refs.h"
#include "repository.h"
@@ -7,6 +9,9 @@
#define REFS_MIGRATE_USAGE \
N_("git refs migrate --ref-format=<format> [--dry-run]")
+#define REFS_VERIFY_USAGE \
+ N_("git refs verify [--strict] [--verbose]")
+
static int cmd_refs_migrate(int argc, const char **argv, const char *prefix)
{
const char * const migrate_usage[] = {
@@ -58,15 +63,44 @@ static int cmd_refs_migrate(int argc, const char **argv, const char *prefix)
return err;
}
+static int cmd_refs_verify(int argc, const char **argv, const char *prefix)
+{
+ struct fsck_options fsck_refs_options = FSCK_REFS_OPTIONS_DEFAULT;
+ const char * const verify_usage[] = {
+ REFS_VERIFY_USAGE,
+ NULL,
+ };
+ struct option options[] = {
+ OPT_BOOL(0, "verbose", &fsck_refs_options.verbose, N_("be verbose")),
+ OPT_BOOL(0, "strict", &fsck_refs_options.strict, N_("enable strict checking")),
+ OPT_END(),
+ };
+ int ret;
+
+ argc = parse_options(argc, argv, prefix, options, verify_usage, 0);
+ if (argc)
+ usage(_("'git refs verify' takes no arguments"));
+
+ git_config(git_fsck_config, &fsck_refs_options);
+ prepare_repo_settings(the_repository);
+
+ ret = refs_fsck(get_main_ref_store(the_repository), &fsck_refs_options);
+
+ fsck_options_clear(&fsck_refs_options);
+ return ret;
+}
+
int cmd_refs(int argc, const char **argv, const char *prefix)
{
const char * const refs_usage[] = {
REFS_MIGRATE_USAGE,
+ REFS_VERIFY_USAGE,
NULL,
};
parse_opt_subcommand_fn *fn = NULL;
struct option opts[] = {
OPT_SUBCOMMAND("migrate", &fn, cmd_refs_migrate),
+ OPT_SUBCOMMAND("verify", &fn, cmd_refs_verify),
OPT_END(),
};
diff --git a/fsck.c b/fsck.c
index 0a870fcec0..d5e7c88eab 100644
--- a/fsck.c
+++ b/fsck.c
@@ -1336,6 +1336,17 @@ int fsck_finish(struct fsck_options *options)
return ret;
}
+void fsck_options_clear(struct fsck_options *options)
+{
+ free(options->msg_type);
+ oidset_clear(&options->skip_oids);
+ oidset_clear(&options->gitmodules_found);
+ oidset_clear(&options->gitmodules_done);
+ oidset_clear(&options->gitattributes_found);
+ oidset_clear(&options->gitattributes_done);
+ kh_clear_oid_map(options->object_names);
+}
+
int git_fsck_config(const char *var, const char *value,
const struct config_context *ctx, void *cb)
{
diff --git a/fsck.h b/fsck.h
index 2002590f60..d551a9fe86 100644
--- a/fsck.h
+++ b/fsck.h
@@ -153,7 +153,8 @@ struct fsck_ref_report {
struct fsck_options {
fsck_walk_func walk;
fsck_error error_func;
- unsigned strict:1;
+ unsigned strict;
+ unsigned verbose;
enum fsck_msg_type *msg_type;
struct oidset skip_oids;
struct oidset gitmodules_found;
@@ -231,6 +232,11 @@ int fsck_tag_standalone(const struct object_id *oid, const char *buffer,
*/
int fsck_finish(struct fsck_options *options);
+/*
+ * Clear the fsck_options struct, freeing any allocated memory.
+ */
+void fsck_options_clear(struct fsck_options *options);
+
/*
* Report an error or warning for refs.
*/
--
2.45.2
^ permalink raw reply related [flat|nested] 282+ messages in thread
* [GSoC][PATCH v14 09/11] builtin/fsck: add `git-refs verify` child process
2024-08-01 15:11 ` [GSoC][PATCH v14 00/11] " shejialuo
` (7 preceding siblings ...)
2024-08-01 15:14 ` [GSoC][PATCH v14 08/11] builtin/refs: add verify subcommand shejialuo
@ 2024-08-01 15:15 ` shejialuo
2024-08-05 12:58 ` Patrick Steinhardt
2024-08-01 15:15 ` [GSoC][PATCH v14 10/11] files-backend: add unified interface for refs scanning shejialuo
` (3 subsequent siblings)
12 siblings, 1 reply; 282+ messages in thread
From: shejialuo @ 2024-08-01 15:15 UTC (permalink / raw)
To: git
Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine,
Justin Tobler
Introduce a new function "fsck_refs" that initializes and runs a child
process to execute the "git-refs verify" command.
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
builtin/fsck.c | 17 +++++++++++++++++
1 file changed, 17 insertions(+)
diff --git a/builtin/fsck.c b/builtin/fsck.c
index 766bbd014d..b6ac878270 100644
--- a/builtin/fsck.c
+++ b/builtin/fsck.c
@@ -899,6 +899,21 @@ static int check_pack_rev_indexes(struct repository *r, int show_progress)
return res;
}
+static void fsck_refs(void)
+{
+ struct child_process refs_verify = CHILD_PROCESS_INIT;
+ child_process_init(&refs_verify);
+ refs_verify.git_cmd = 1;
+ strvec_pushl(&refs_verify.args, "refs", "verify", NULL);
+ if (verbose)
+ strvec_push(&refs_verify.args, "--verbose");
+ if (check_strict)
+ strvec_push(&refs_verify.args, "--strict");
+
+ if (run_command(&refs_verify))
+ errors_found |= ERROR_REFS;
+}
+
static char const * const fsck_usage[] = {
N_("git fsck [--tags] [--root] [--unreachable] [--cache] [--no-reflogs]\n"
" [--[no-]full] [--strict] [--verbose] [--lost-found]\n"
@@ -1068,6 +1083,8 @@ int cmd_fsck(int argc, const char **argv, const char *prefix)
check_connectivity();
+ fsck_refs();
+
if (the_repository->settings.core_commit_graph) {
struct child_process commit_graph_verify = CHILD_PROCESS_INIT;
--
2.45.2
^ permalink raw reply related [flat|nested] 282+ messages in thread
* Re: [GSoC][PATCH v14 09/11] builtin/fsck: add `git-refs verify` child process
2024-08-01 15:15 ` [GSoC][PATCH v14 09/11] builtin/fsck: add `git-refs verify` child process shejialuo
@ 2024-08-05 12:58 ` Patrick Steinhardt
2024-08-05 15:18 ` shejialuo
0 siblings, 1 reply; 282+ messages in thread
From: Patrick Steinhardt @ 2024-08-05 12:58 UTC (permalink / raw)
To: shejialuo
Cc: git, Karthik Nayak, Junio C Hamano, Eric Sunshine, Justin Tobler
[-- Attachment #1: Type: text/plain, Size: 2137 bytes --]
On Thu, Aug 01, 2024 at 11:15:00PM +0800, shejialuo wrote:
> Introduce a new function "fsck_refs" that initializes and runs a child
> process to execute the "git-refs verify" command.
It's `git refs verify`, not `git-refs verify` both in the commit body
and subject.
> Mentored-by: Patrick Steinhardt <ps@pks.im>
> Mentored-by: Karthik Nayak <karthik.188@gmail.com>
> Signed-off-by: shejialuo <shejialuo@gmail.com>
> ---
> builtin/fsck.c | 17 +++++++++++++++++
> 1 file changed, 17 insertions(+)
>
> diff --git a/builtin/fsck.c b/builtin/fsck.c
> index 766bbd014d..b6ac878270 100644
> --- a/builtin/fsck.c
> +++ b/builtin/fsck.c
> @@ -899,6 +899,21 @@ static int check_pack_rev_indexes(struct repository *r, int show_progress)
> return res;
> }
>
> +static void fsck_refs(void)
> +{
> + struct child_process refs_verify = CHILD_PROCESS_INIT;
> + child_process_init(&refs_verify);
> + refs_verify.git_cmd = 1;
> + strvec_pushl(&refs_verify.args, "refs", "verify", NULL);
> + if (verbose)
> + strvec_push(&refs_verify.args, "--verbose");
> + if (check_strict)
> + strvec_push(&refs_verify.args, "--strict");
> +
> + if (run_command(&refs_verify))
> + errors_found |= ERROR_REFS;
> +}
Okay. I think that it's sensible to execute this as part of git-fsck(1).
But do we want to provide an option to disable this new check, as well?
It does feel a bit like opening a can of worms, though. None of the
other checks have trivial ways to disable them, and git-fsck(1) is
gaining more and more checks. So if we can disable ref checks, we may
also want to have options to disable checks for objects, connectivity,
reverse indices, indices, commit graphs and whatnot. In other words, in
my opinion we need to think a bit bigger and design a proper UI around
this.
But I don't think that should happen as part of this commit series, as
it is already big enough. So either we just accept this patch as-is. Or
we evict it from this series and handle it in the future together with
all the other taks that one may potentially want to disable.
I'd rather pick option two.
Patrick
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]
^ permalink raw reply [flat|nested] 282+ messages in thread
* Re: [GSoC][PATCH v14 09/11] builtin/fsck: add `git-refs verify` child process
2024-08-05 12:58 ` Patrick Steinhardt
@ 2024-08-05 15:18 ` shejialuo
2024-08-05 18:11 ` Junio C Hamano
0 siblings, 1 reply; 282+ messages in thread
From: shejialuo @ 2024-08-05 15:18 UTC (permalink / raw)
To: Patrick Steinhardt
Cc: git, Karthik Nayak, Junio C Hamano, Eric Sunshine, Justin Tobler
> > +static void fsck_refs(void)
> > +{
> > + struct child_process refs_verify = CHILD_PROCESS_INIT;
> > + child_process_init(&refs_verify);
> > + refs_verify.git_cmd = 1;
> > + strvec_pushl(&refs_verify.args, "refs", "verify", NULL);
> > + if (verbose)
> > + strvec_push(&refs_verify.args, "--verbose");
> > + if (check_strict)
> > + strvec_push(&refs_verify.args, "--strict");
> > +
> > + if (run_command(&refs_verify))
> > + errors_found |= ERROR_REFS;
> > +}
>
> Okay. I think that it's sensible to execute this as part of git-fsck(1).
> But do we want to provide an option to disable this new check, as well?
>
> It does feel a bit like opening a can of worms, though. None of the
> other checks have trivial ways to disable them, and git-fsck(1) is
> gaining more and more checks. So if we can disable ref checks, we may
> also want to have options to disable checks for objects, connectivity,
> reverse indices, indices, commit graphs and whatnot. In other words, in
> my opinion we need to think a bit bigger and design a proper UI around
> this.
>
> But I don't think that should happen as part of this commit series, as
> it is already big enough. So either we just accept this patch as-is. Or
> we evict it from this series and handle it in the future together with
> all the other taks that one may potentially want to disable.
>
> I'd rather pick option two.
>
After talking with Patrick offline, we decide to drop this patch. At
current, we should put this change slowly for the user. Because many
people use "git-fsck(1)", currently we don't have a way to disable ref
checks by default. It's a little beyond this series.
We may consider later.
> Patrick
^ permalink raw reply [flat|nested] 282+ messages in thread
* Re: [GSoC][PATCH v14 09/11] builtin/fsck: add `git-refs verify` child process
2024-08-05 15:18 ` shejialuo
@ 2024-08-05 18:11 ` Junio C Hamano
2024-08-05 18:36 ` shejialuo
0 siblings, 1 reply; 282+ messages in thread
From: Junio C Hamano @ 2024-08-05 18:11 UTC (permalink / raw)
To: shejialuo
Cc: Patrick Steinhardt, git, Karthik Nayak, Eric Sunshine,
Justin Tobler
shejialuo <shejialuo@gmail.com> writes:
>> > +static void fsck_refs(void)
>> > +{
>> > + struct child_process refs_verify = CHILD_PROCESS_INIT;
>> > + child_process_init(&refs_verify);
>> > + refs_verify.git_cmd = 1;
>> > + strvec_pushl(&refs_verify.args, "refs", "verify", NULL);
>> > + if (verbose)
>> > + strvec_push(&refs_verify.args, "--verbose");
>> > + if (check_strict)
>> > + strvec_push(&refs_verify.args, "--strict");
>> > +
>> > + if (run_command(&refs_verify))
>> > + errors_found |= ERROR_REFS;
>> > +}
>>
>> Okay. I think that it's sensible to execute this as part of git-fsck(1).
>> But do we want to provide an option to disable this new check, as well?
>>
>> It does feel a bit like opening a can of worms, though. None of the
>> other checks have trivial ways to disable them, and git-fsck(1) is
>> gaining more and more checks. So if we can disable ref checks, we may
>> also want to have options to disable checks for objects, connectivity,
>> reverse indices, indices, commit graphs and whatnot. In other words, in
>> my opinion we need to think a bit bigger and design a proper UI around
>> this.
>>
>> But I don't think that should happen as part of this commit series, as
>> it is already big enough. So either we just accept this patch as-is. Or
>> we evict it from this series and handle it in the future together with
>> all the other taks that one may potentially want to disable.
>>
>> I'd rather pick option two.
>>
>
> After talking with Patrick offline, we decide to drop this patch. At
> current, we should put this change slowly for the user. Because many
> people use "git-fsck(1)", currently we don't have a way to disable ref
> checks by default. It's a little beyond this series.
>
> We may consider later.
Hmph, I am fine with the approach to take it slower.
BUT.
Here is what the diffstat for the whole thing in the updated round
v15 looked like. Do most of these changes outside refs/ still
needed if we do not give any way to even optionally enable it via
"fsck" for those who feel adventurous? Should we still be touching
fsck.[ch] and other fsck related infrastructure in the series?
Documentation/fsck-msgids.txt | 6 ++
Documentation/git-refs.txt | 13 +++++
builtin/fsck.c | 17 +++---
builtin/mktag.c | 3 +-
builtin/refs.c | 34 +++++++++++
fsck.c | 127 +++++++++++++++++++++++++++++++++---------
fsck.h | 76 +++++++++++++++++++------
object-file.c | 9 ++-
refs.c | 5 ++
refs.h | 8 +++
refs/debug.c | 11 ++++
refs/files-backend.c | 116 +++++++++++++++++++++++++++++++++++++-
refs/packed-backend.c | 8 +++
refs/refs-internal.h | 6 ++
refs/reftable-backend.c | 8 +++
t/t0602-reffiles-fsck.sh | 92 ++++++++++++++++++++++++++++++
16 files changed, 480 insertions(+), 59 deletions(-)
^ permalink raw reply [flat|nested] 282+ messages in thread
* Re: [GSoC][PATCH v14 09/11] builtin/fsck: add `git-refs verify` child process
2024-08-05 18:11 ` Junio C Hamano
@ 2024-08-05 18:36 ` shejialuo
2024-08-05 19:38 ` Junio C Hamano
0 siblings, 1 reply; 282+ messages in thread
From: shejialuo @ 2024-08-05 18:36 UTC (permalink / raw)
To: Junio C Hamano
Cc: Patrick Steinhardt, git, Karthik Nayak, Eric Sunshine,
Justin Tobler
>
> Hmph, I am fine with the approach to take it slower.
>
> BUT.
>
> Here is what the diffstat for the whole thing in the updated round
> v15 looked like. Do most of these changes outside refs/ still
> needed if we do not give any way to even optionally enable it via
> "fsck" for those who feel adventurous? Should we still be touching
> fsck.[ch] and other fsck related infrastructure in the series?
>
From my current standing, regardless of whether we run the "git refs
verify" subprocess inside "git-fsck(1)", we must change the fsck
infrastructure illustrated by the following:
From the development of this series, we can know the main problem is
that fsck error message is highly coupled with the object checks. Even
if we don't perform "git refs verify" in "git-fsck(1)", we still need to
follow the fsck msg framework when executing "git refs verify". We
cannot avoid this.
The content we change for "fsck.[ch]" is mainly the fsck msg part. We do
not change anything about the objects.
I agree with you that it would be strange if we do not expose any
interfaces for user who are adventurous. Actually we may simply add an
option "--refs-experimental" or simply "--refs" to allow the users check
ref consistency by using "git-fsck(1)".
I guess the concern that Patrick cares about is that we ONLY make refs
optional here, but do not provide options for other checks. It will be
strange from this perspective.
> Documentation/fsck-msgids.txt | 6 ++
> Documentation/git-refs.txt | 13 +++++
> builtin/fsck.c | 17 +++---
> builtin/mktag.c | 3 +-
> builtin/refs.c | 34 +++++++++++
> fsck.c | 127 +++++++++++++++++++++++++++++++++---------
> fsck.h | 76 +++++++++++++++++++------
> object-file.c | 9 ++-
> refs.c | 5 ++
> refs.h | 8 +++
> refs/debug.c | 11 ++++
> refs/files-backend.c | 116 +++++++++++++++++++++++++++++++++++++-
> refs/packed-backend.c | 8 +++
> refs/refs-internal.h | 6 ++
> refs/reftable-backend.c | 8 +++
> t/t0602-reffiles-fsck.sh | 92 ++++++++++++++++++++++++++++++
> 16 files changed, 480 insertions(+), 59 deletions(-)
^ permalink raw reply [flat|nested] 282+ messages in thread
* Re: [GSoC][PATCH v14 09/11] builtin/fsck: add `git-refs verify` child process
2024-08-05 18:36 ` shejialuo
@ 2024-08-05 19:38 ` Junio C Hamano
2024-08-06 0:42 ` shejialuo
2024-08-06 5:29 ` Patrick Steinhardt
0 siblings, 2 replies; 282+ messages in thread
From: Junio C Hamano @ 2024-08-05 19:38 UTC (permalink / raw)
To: shejialuo
Cc: Patrick Steinhardt, git, Karthik Nayak, Eric Sunshine,
Justin Tobler
shejialuo <shejialuo@gmail.com> writes:
> I agree with you that it would be strange if we do not expose any
> interfaces for user who are adventurous. Actually we may simply add an
> option "--refs-experimental" or simply "--refs" to allow the users check
> ref consistency by using "git-fsck(1)".
>
> I guess the concern that Patrick cares about is that we ONLY make refs
> optional here, but do not provide options for other checks. It will be
> strange from this perspective.
I do not care about strange all that much. I however care about new
complexity in the code, complexity that is not taken advantage of
and is not exercised.
You said
> From the development of this series, we can know the main problem is
> that fsck error message is highly coupled with the object checks.
and even if it is true and we have problem in fsck code paths, we
cannot see if _your_ solution to that problem is a good one without
having the code that exercises your additional code.
But if "git refs verify" does exercise all the new code paths (and
the refactored code that existed before this series, sitting now in
different places), then I do not have to worry about it. My question
was primarily to extract "even though we do not wire this up to fsck,
we already have another code paths that uses all these changes" out
of you.
Thanks.
^ permalink raw reply [flat|nested] 282+ messages in thread
* Re: [GSoC][PATCH v14 09/11] builtin/fsck: add `git-refs verify` child process
2024-08-05 19:38 ` Junio C Hamano
@ 2024-08-06 0:42 ` shejialuo
2024-08-06 6:04 ` Patrick Steinhardt
2024-08-06 5:29 ` Patrick Steinhardt
1 sibling, 1 reply; 282+ messages in thread
From: shejialuo @ 2024-08-06 0:42 UTC (permalink / raw)
To: Junio C Hamano
Cc: Patrick Steinhardt, git, Karthik Nayak, Eric Sunshine,
Justin Tobler
On Mon, Aug 05, 2024 at 12:38:43PM -0700, Junio C Hamano wrote:
> shejialuo <shejialuo@gmail.com> writes:
>
> > I agree with you that it would be strange if we do not expose any
> > interfaces for user who are adventurous. Actually we may simply add an
> > option "--refs-experimental" or simply "--refs" to allow the users check
> > ref consistency by using "git-fsck(1)".
> >
> > I guess the concern that Patrick cares about is that we ONLY make refs
> > optional here, but do not provide options for other checks. It will be
> > strange from this perspective.
>
> I do not care about strange all that much. I however care about new
> complexity in the code, complexity that is not taken advantage of
> and is not exercised.
>
> You said
>
> > From the development of this series, we can know the main problem is
> > that fsck error message is highly coupled with the object checks.
>
> and even if it is true and we have problem in fsck code paths, we
> cannot see if _your_ solution to that problem is a good one without
> having the code that exercises your additional code.
>
> But if "git refs verify" does exercise all the new code paths (and
> the refactored code that existed before this series, sitting now in
> different places), then I do not have to worry about it. My question
> was primarily to extract "even though we do not wire this up to fsck,
> we already have another code paths that uses all these changes" out
> of you.
>
I understand what you mean here. I can say that "git refs verify" only
exercises a part of the new code paths. The main reason why this series
changes a lot of "fsck.[ch]" is that I want to expose the
"fsck_report_ref" interface to report refs-related errors. So I guess
this part should be covered.
At the current implementation, we change the "fsck.[ch]" for the
following three things:
1. Refactor "report" to use "fsck_vreport"
2. Create "fsck_report_ref" for refs check.
3. Do some simple renames to distinguish between refs and objects.
We do cover the second case in "git refs verify". But sadly, the first
case and third case are covered in "git-fsck(1)". So, "git refs verify"
does not exercise the refactored code.
However, I am a little confused. Actually, in the implementation, refs
check and objects check are independent.
I think we should wire up "git refs verify" to "git-fsck(1)". Because we
have no way to exercise the above case 1 and 3. If we do not, we will
bring a lot of complexity here.
> Thanks.
^ permalink raw reply [flat|nested] 282+ messages in thread
* Re: [GSoC][PATCH v14 09/11] builtin/fsck: add `git-refs verify` child process
2024-08-06 0:42 ` shejialuo
@ 2024-08-06 6:04 ` Patrick Steinhardt
2024-08-06 15:54 ` Junio C Hamano
0 siblings, 1 reply; 282+ messages in thread
From: Patrick Steinhardt @ 2024-08-06 6:04 UTC (permalink / raw)
To: shejialuo
Cc: Junio C Hamano, git, Karthik Nayak, Eric Sunshine, Justin Tobler
[-- Attachment #1: Type: text/plain, Size: 4021 bytes --]
On Tue, Aug 06, 2024 at 08:42:23AM +0800, shejialuo wrote:
> On Mon, Aug 05, 2024 at 12:38:43PM -0700, Junio C Hamano wrote:
> > shejialuo <shejialuo@gmail.com> writes:
> >
> > > I agree with you that it would be strange if we do not expose any
> > > interfaces for user who are adventurous. Actually we may simply add an
> > > option "--refs-experimental" or simply "--refs" to allow the users check
> > > ref consistency by using "git-fsck(1)".
> > >
> > > I guess the concern that Patrick cares about is that we ONLY make refs
> > > optional here, but do not provide options for other checks. It will be
> > > strange from this perspective.
> >
> > I do not care about strange all that much. I however care about new
> > complexity in the code, complexity that is not taken advantage of
> > and is not exercised.
> >
> > You said
> >
> > > From the development of this series, we can know the main problem is
> > > that fsck error message is highly coupled with the object checks.
> >
> > and even if it is true and we have problem in fsck code paths, we
> > cannot see if _your_ solution to that problem is a good one without
> > having the code that exercises your additional code.
> >
> > But if "git refs verify" does exercise all the new code paths (and
> > the refactored code that existed before this series, sitting now in
> > different places), then I do not have to worry about it. My question
> > was primarily to extract "even though we do not wire this up to fsck,
> > we already have another code paths that uses all these changes" out
> > of you.
> >
>
> I understand what you mean here. I can say that "git refs verify" only
> exercises a part of the new code paths. The main reason why this series
> changes a lot of "fsck.[ch]" is that I want to expose the
> "fsck_report_ref" interface to report refs-related errors. So I guess
> this part should be covered.
>
> At the current implementation, we change the "fsck.[ch]" for the
> following three things:
>
> 1. Refactor "report" to use "fsck_vreport"
> 2. Create "fsck_report_ref" for refs check.
> 3. Do some simple renames to distinguish between refs and objects.
>
> We do cover the second case in "git refs verify". But sadly, the first
> case and third case are covered in "git-fsck(1)". So, "git refs verify"
> does not exercise the refactored code.
>
> However, I am a little confused. Actually, in the implementation, refs
> check and objects check are independent.
>
> I think we should wire up "git refs verify" to "git-fsck(1)". Because we
> have no way to exercise the above case 1 and 3. If we do not, we will
> bring a lot of complexity here.
I don't think that is necessary. Basically, what you are saying is that
we seem to be testing only the new refs-related reporting that you have
introduced via your refactorings, but not the preexisting objects
related functionality.
That isn't true though. The object-specific reporting functions that you
have refactored already had callers before, and it still has callers now
because you adapted those accordingly. You can assume that those callers
already had tests before your refactorings, and as no tests broken you
can be reasonably sure that your refactorings are sound for the object
related code.
Furthermore, you do exercise the new ref-related parts via a couple of
tests that exercise `git refs verify`. Consequently, all parts of the
refactoring are covered by either old or new tests, and we should be
good here.
So even though we do not exercise the ref-related parts via git-fsck(1),
it is still subjected to tests. Eventually, when we start calling `git
refs verify` in git-fsck(1), we'd introduce more tests that verify that
the integration of those two commands works as expected, as well.
So, to summarize: the refactored functionality is both used and tested
and I think it's sensible to defer the integration of git-fsck(1) and
git-refs(1).
Thanks!
Patrick
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]
^ permalink raw reply [flat|nested] 282+ messages in thread
* Re: [GSoC][PATCH v14 09/11] builtin/fsck: add `git-refs verify` child process
2024-08-06 6:04 ` Patrick Steinhardt
@ 2024-08-06 15:54 ` Junio C Hamano
2024-08-07 4:49 ` Patrick Steinhardt
0 siblings, 1 reply; 282+ messages in thread
From: Junio C Hamano @ 2024-08-06 15:54 UTC (permalink / raw)
To: Patrick Steinhardt
Cc: shejialuo, git, Karthik Nayak, Eric Sunshine, Justin Tobler
Patrick Steinhardt <ps@pks.im> writes:
>> > But if "git refs verify" does exercise all the new code paths (and
>> > the refactored code that existed before this series, sitting now in
>> > different places), then I do not have to worry about it. My question
>> > was primarily to extract "even though we do not wire this up to fsck,
>> > we already have another code paths that uses all these changes" out
>> > of you.
>> ...
> So, to summarize: the refactored functionality is both used and tested
> and I think it's sensible to defer the integration of git-fsck(1) and
> git-refs(1).
After refactoring, existing functionality about objects are used, of
course, (there is no other code that does so), the refactoring lets
the code to learn to perform checks on references, and these new
checks are exercised by "git refs verify".
I took what shejialuo said that way, and that is fine by me when I
said the above ;-). So I think we all are on the same page?
Thanks.
^ permalink raw reply [flat|nested] 282+ messages in thread
* Re: [GSoC][PATCH v14 09/11] builtin/fsck: add `git-refs verify` child process
2024-08-06 15:54 ` Junio C Hamano
@ 2024-08-07 4:49 ` Patrick Steinhardt
0 siblings, 0 replies; 282+ messages in thread
From: Patrick Steinhardt @ 2024-08-07 4:49 UTC (permalink / raw)
To: Junio C Hamano
Cc: shejialuo, git, Karthik Nayak, Eric Sunshine, Justin Tobler
[-- Attachment #1: Type: text/plain, Size: 1239 bytes --]
On Tue, Aug 06, 2024 at 08:54:34AM -0700, Junio C Hamano wrote:
> Patrick Steinhardt <ps@pks.im> writes:
>
> >> > But if "git refs verify" does exercise all the new code paths (and
> >> > the refactored code that existed before this series, sitting now in
> >> > different places), then I do not have to worry about it. My question
> >> > was primarily to extract "even though we do not wire this up to fsck,
> >> > we already have another code paths that uses all these changes" out
> >> > of you.
> >> ...
> > So, to summarize: the refactored functionality is both used and tested
> > and I think it's sensible to defer the integration of git-fsck(1) and
> > git-refs(1).
>
> After refactoring, existing functionality about objects are used, of
> course, (there is no other code that does so), the refactoring lets
> the code to learn to perform checks on references, and these new
> checks are exercised by "git refs verify".
>
> I took what shejialuo said that way, and that is fine by me when I
> said the above ;-). So I think we all are on the same page?
Yup, we are. I was mostly aiming to reassure Jialuo, who was a bit
uncertain whether his answers had been sufficient or not.
Thanks!
Patrick
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]
^ permalink raw reply [flat|nested] 282+ messages in thread
* Re: [GSoC][PATCH v14 09/11] builtin/fsck: add `git-refs verify` child process
2024-08-05 19:38 ` Junio C Hamano
2024-08-06 0:42 ` shejialuo
@ 2024-08-06 5:29 ` Patrick Steinhardt
1 sibling, 0 replies; 282+ messages in thread
From: Patrick Steinhardt @ 2024-08-06 5:29 UTC (permalink / raw)
To: Junio C Hamano
Cc: shejialuo, git, Karthik Nayak, Eric Sunshine, Justin Tobler
[-- Attachment #1: Type: text/plain, Size: 2236 bytes --]
On Mon, Aug 05, 2024 at 12:38:43PM -0700, Junio C Hamano wrote:
> shejialuo <shejialuo@gmail.com> writes:
>
> > I agree with you that it would be strange if we do not expose any
> > interfaces for user who are adventurous. Actually we may simply add an
> > option "--refs-experimental" or simply "--refs" to allow the users check
> > ref consistency by using "git-fsck(1)".
> >
> > I guess the concern that Patrick cares about is that we ONLY make refs
> > optional here, but do not provide options for other checks. It will be
> > strange from this perspective.
>
> I do not care about strange all that much. I however care about new
> complexity in the code, complexity that is not taken advantage of
> and is not exercised.
>
> You said
>
> > From the development of this series, we can know the main problem is
> > that fsck error message is highly coupled with the object checks.
>
> and even if it is true and we have problem in fsck code paths, we
> cannot see if _your_ solution to that problem is a good one without
> having the code that exercises your additional code.
>
> But if "git refs verify" does exercise all the new code paths (and
> the refactored code that existed before this series, sitting now in
> different places), then I do not have to worry about it. My question
> was primarily to extract "even though we do not wire this up to fsck,
> we already have another code paths that uses all these changes" out
> of you.
Yeah, there is quite some complexity introduced in fsck code, where most
of the complexity comes from making the reportin functions more generic.
And we do end up using that in this series even if we don't integrate
`git refs verify` with `git fsck`. It brings us the ability to configure
the checks performed by `git refs verify` in the same way like we can
configure all the other checks in `git fsck`, such that we can enable,
disable or change severity levels for each of the messages reported by
it.
I think that this is a sensible way to go about it, and it leaves us
with a more flexible and consistent error reporting infrastructure that
we can eventually also use for other commands that git-fsck(1) shells
out to.
Patrick
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]
^ permalink raw reply [flat|nested] 282+ messages in thread
* [GSoC][PATCH v14 10/11] files-backend: add unified interface for refs scanning
2024-08-01 15:11 ` [GSoC][PATCH v14 00/11] " shejialuo
` (8 preceding siblings ...)
2024-08-01 15:15 ` [GSoC][PATCH v14 09/11] builtin/fsck: add `git-refs verify` child process shejialuo
@ 2024-08-01 15:15 ` shejialuo
2024-08-05 12:58 ` Patrick Steinhardt
2024-08-01 15:15 ` [GSoC][PATCH v14 11/11] fsck: add ref name check for files backend shejialuo
` (2 subsequent siblings)
12 siblings, 1 reply; 282+ messages in thread
From: shejialuo @ 2024-08-01 15:15 UTC (permalink / raw)
To: git
Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine,
Justin Tobler
For refs and reflogs, we need to scan its corresponding directories to
check every regular file or symbolic link which shares the same pattern.
Introduce a unified interface for scanning directories for
files-backend.
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
Documentation/fsck-msgids.txt | 3 ++
fsck.h | 1 +
refs/files-backend.c | 75 ++++++++++++++++++++++++++++++++++-
3 files changed, 78 insertions(+), 1 deletion(-)
diff --git a/Documentation/fsck-msgids.txt b/Documentation/fsck-msgids.txt
index f643585a34..7c809fddf1 100644
--- a/Documentation/fsck-msgids.txt
+++ b/Documentation/fsck-msgids.txt
@@ -19,6 +19,9 @@
`badParentSha1`::
(ERROR) A commit object has a bad parent sha1.
+`badRefFiletype`::
+ (ERROR) A ref has a bad file type.
+
`badTagName`::
(INFO) A tag has an invalid format.
diff --git a/fsck.h b/fsck.h
index d551a9fe86..af02174973 100644
--- a/fsck.h
+++ b/fsck.h
@@ -31,6 +31,7 @@ enum fsck_msg_type {
FUNC(BAD_NAME, ERROR) \
FUNC(BAD_OBJECT_SHA1, ERROR) \
FUNC(BAD_PARENT_SHA1, ERROR) \
+ FUNC(BAD_REF_FILETYPE, ERROR) \
FUNC(BAD_TIMEZONE, ERROR) \
FUNC(BAD_TREE, ERROR) \
FUNC(BAD_TREE_SHA1, ERROR) \
diff --git a/refs/files-backend.c b/refs/files-backend.c
index 4630eb1f80..5574e78656 100644
--- a/refs/files-backend.c
+++ b/refs/files-backend.c
@@ -6,6 +6,7 @@
#include "../gettext.h"
#include "../hash.h"
#include "../hex.h"
+#include "../fsck.h"
#include "../refs.h"
#include "refs-internal.h"
#include "ref-cache.h"
@@ -3408,13 +3409,85 @@ static int files_ref_store_remove_on_disk(struct ref_store *ref_store,
return ret;
}
+/*
+ * For refs and reflogs, they share a unified interface when scanning
+ * the whole directory. This function is used as the callback for each
+ * regular file or symlink in the directory.
+ */
+typedef int (*files_fsck_refs_fn)(struct fsck_options *o,
+ const char *gitdir,
+ const char *refs_check_dir,
+ struct dir_iterator *iter);
+
+static int files_fsck_refs_dir(struct ref_store *ref_store,
+ struct fsck_options *o,
+ const char *refs_check_dir,
+ files_fsck_refs_fn *fsck_refs_fn)
+{
+ const char *gitdir = ref_store->gitdir;
+ struct strbuf sb = STRBUF_INIT;
+ struct dir_iterator *iter;
+ int iter_status;
+ int ret = 0;
+
+ strbuf_addf(&sb, "%s/%s", gitdir, refs_check_dir);
+
+ iter = dir_iterator_begin(sb.buf, 0);
+ if (!iter) {
+ ret = error_errno(_("cannot open directory %s"), sb.buf);
+ goto out;
+ }
+
+ while ((iter_status = dir_iterator_advance(iter)) == ITER_OK) {
+ if (S_ISDIR(iter->st.st_mode)) {
+ continue;
+ } else if (S_ISREG(iter->st.st_mode) ||
+ S_ISLNK(iter->st.st_mode)) {
+ if (o->verbose)
+ fprintf_ln(stderr, "Checking %s/%s",
+ refs_check_dir, iter->relative_path);
+ for (size_t i = 0; fsck_refs_fn[i]; i++) {
+ if (fsck_refs_fn[i](o, gitdir, refs_check_dir, iter))
+ ret = -1;
+ }
+ } else {
+ struct fsck_ref_report report = { .path = iter->basename };
+ if (fsck_report_ref(o, &report,
+ FSCK_MSG_BAD_REF_FILETYPE,
+ "unexpected file type"))
+ ret = -1;
+ }
+ }
+
+ if (iter_status != ITER_DONE)
+ ret = error(_("failed to iterate over '%s'"), sb.buf);
+
+out:
+ strbuf_release(&sb);
+ return ret;
+}
+
+static int files_fsck_refs(struct ref_store *ref_store,
+ struct fsck_options *o)
+{
+ files_fsck_refs_fn fsck_refs_fn[]= {
+ NULL,
+ };
+
+ if (o->verbose)
+ fprintf_ln(stderr, "Checking references consistency");
+ return files_fsck_refs_dir(ref_store, o, "refs", fsck_refs_fn);
+
+}
+
static int files_fsck(struct ref_store *ref_store,
struct fsck_options *o)
{
struct files_ref_store *refs =
files_downcast(ref_store, REF_STORE_READ, "fsck");
- return refs->packed_ref_store->be->fsck(refs->packed_ref_store, o);
+ return files_fsck_refs(ref_store, o) |
+ refs->packed_ref_store->be->fsck(refs->packed_ref_store, o);
}
struct ref_storage_be refs_be_files = {
--
2.45.2
^ permalink raw reply related [flat|nested] 282+ messages in thread
* Re: [GSoC][PATCH v14 10/11] files-backend: add unified interface for refs scanning
2024-08-01 15:15 ` [GSoC][PATCH v14 10/11] files-backend: add unified interface for refs scanning shejialuo
@ 2024-08-05 12:58 ` Patrick Steinhardt
0 siblings, 0 replies; 282+ messages in thread
From: Patrick Steinhardt @ 2024-08-05 12:58 UTC (permalink / raw)
To: shejialuo
Cc: git, Karthik Nayak, Junio C Hamano, Eric Sunshine, Justin Tobler
[-- Attachment #1: Type: text/plain, Size: 355 bytes --]
On Thu, Aug 01, 2024 at 11:15:14PM +0800, shejialuo wrote:
> +static int files_fsck_refs(struct ref_store *ref_store,
> + struct fsck_options *o)
> +{
> + files_fsck_refs_fn fsck_refs_fn[]= {
> + NULL,
> + };
> +
> + if (o->verbose)
> + fprintf_ln(stderr, "Checking references consistency");
This string should be marked for translation.
Patrick
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]
^ permalink raw reply [flat|nested] 282+ messages in thread
* [GSoC][PATCH v14 11/11] fsck: add ref name check for files backend
2024-08-01 15:11 ` [GSoC][PATCH v14 00/11] " shejialuo
` (9 preceding siblings ...)
2024-08-01 15:15 ` [GSoC][PATCH v14 10/11] files-backend: add unified interface for refs scanning shejialuo
@ 2024-08-01 15:15 ` shejialuo
2024-08-05 12:58 ` Patrick Steinhardt
2024-08-05 12:58 ` [GSoC][PATCH v14 00/11] ref consistency check infra setup Patrick Steinhardt
2024-08-05 16:43 ` [GSoC][PATCH v15 0/9] " shejialuo
12 siblings, 1 reply; 282+ messages in thread
From: shejialuo @ 2024-08-01 15:15 UTC (permalink / raw)
To: git
Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine,
Justin Tobler
The git-fsck(1) only implicitly checks the reference, it does not fully
check refs with bad format name such as standalone "@".
However, a file ending with ".lock" should not be marked as having a bad
ref name. It is expected that concurrent writers may have such lock files.
We currently ignore this situation. But for bare ".lock" file, we will
report it as error.
In order to provide such checks, add a new fsck message id "badRefName"
with default ERROR type. Use existing "check_refname_format" to explicit
check the ref name. And add a new unit test to verify the functionality.
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
Documentation/fsck-msgids.txt | 3 ++
fsck.h | 1 +
refs/files-backend.c | 32 ++++++++++++
t/t0602-reffiles-fsck.sh | 94 +++++++++++++++++++++++++++++++++++
4 files changed, 130 insertions(+)
create mode 100755 t/t0602-reffiles-fsck.sh
diff --git a/Documentation/fsck-msgids.txt b/Documentation/fsck-msgids.txt
index 7c809fddf1..68a2801f15 100644
--- a/Documentation/fsck-msgids.txt
+++ b/Documentation/fsck-msgids.txt
@@ -22,6 +22,9 @@
`badRefFiletype`::
(ERROR) A ref has a bad file type.
+`badRefName`::
+ (ERROR) A ref has an invalid format.
+
`badTagName`::
(INFO) A tag has an invalid format.
diff --git a/fsck.h b/fsck.h
index af02174973..500b4c04d2 100644
--- a/fsck.h
+++ b/fsck.h
@@ -32,6 +32,7 @@ enum fsck_msg_type {
FUNC(BAD_OBJECT_SHA1, ERROR) \
FUNC(BAD_PARENT_SHA1, ERROR) \
FUNC(BAD_REF_FILETYPE, ERROR) \
+ FUNC(BAD_REF_NAME, ERROR) \
FUNC(BAD_TIMEZONE, ERROR) \
FUNC(BAD_TREE, ERROR) \
FUNC(BAD_TREE_SHA1, ERROR) \
diff --git a/refs/files-backend.c b/refs/files-backend.c
index 5574e78656..1186b6cbb1 100644
--- a/refs/files-backend.c
+++ b/refs/files-backend.c
@@ -3419,6 +3419,37 @@ typedef int (*files_fsck_refs_fn)(struct fsck_options *o,
const char *refs_check_dir,
struct dir_iterator *iter);
+static int files_fsck_refs_name(struct fsck_options *o,
+ const char *gitdir UNUSED,
+ const char *refs_check_dir,
+ struct dir_iterator *iter)
+{
+ struct strbuf sb = STRBUF_INIT;
+ size_t len = 0;
+ int ret = 0;
+
+ /*
+ * Ignore the files ending with ".lock" as they may be lock files
+ * However, do not allow bare ".lock" files.
+ */
+ if (strip_suffix(iter->basename, ".lock", &len) && (len != 0))
+ goto clean;
+
+ if (check_refname_format(iter->basename, REFNAME_ALLOW_ONELEVEL)) {
+ struct fsck_ref_report report = { .path = NULL };
+
+ strbuf_addf(&sb, "%s/%s", refs_check_dir, iter->relative_path);
+ report.path = sb.buf;
+ ret = fsck_report_ref(o, &report,
+ FSCK_MSG_BAD_REF_NAME,
+ "invalid refname format");
+ }
+
+clean:
+ strbuf_release(&sb);
+ return ret;
+}
+
static int files_fsck_refs_dir(struct ref_store *ref_store,
struct fsck_options *o,
const char *refs_check_dir,
@@ -3471,6 +3502,7 @@ static int files_fsck_refs(struct ref_store *ref_store,
struct fsck_options *o)
{
files_fsck_refs_fn fsck_refs_fn[]= {
+ files_fsck_refs_name,
NULL,
};
diff --git a/t/t0602-reffiles-fsck.sh b/t/t0602-reffiles-fsck.sh
new file mode 100755
index 0000000000..2be28427ab
--- /dev/null
+++ b/t/t0602-reffiles-fsck.sh
@@ -0,0 +1,94 @@
+#!/bin/sh
+
+test_description='Test reffiles backend consistency check'
+
+GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=main
+export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME
+GIT_TEST_DEFAULT_REF_FORMAT=files
+export GIT_TEST_DEFAULT_REF_FORMAT
+TEST_PASSES_SANITIZE_LEAK=true
+
+. ./test-lib.sh
+
+test_expect_success 'ref name should be checked' '
+ test_when_finished "rm -rf repo" &&
+ git init repo &&
+ branch_dir_prefix=.git/refs/heads &&
+ tag_dir_prefix=.git/refs/tags &&
+ cd repo &&
+
+ git commit --allow-empty -m initial &&
+ git checkout -b branch-1 &&
+ git tag tag-1 &&
+ git commit --allow-empty -m second &&
+ git checkout -b branch-2 &&
+ git tag tag-2 &&
+ git tag multi_hierarchy/tag-2 &&
+
+ cp $branch_dir_prefix/branch-1 $branch_dir_prefix/.branch-1 &&
+ test_must_fail git fsck 2>err &&
+ cat >expect <<-EOF &&
+ error: refs/heads/.branch-1: badRefName: invalid refname format
+ EOF
+ rm $branch_dir_prefix/.branch-1 &&
+ test_cmp expect err &&
+
+ cp $branch_dir_prefix/branch-1 $branch_dir_prefix/@ &&
+ test_must_fail git fsck 2>err &&
+ cat >expect <<-EOF &&
+ error: refs/heads/@: badRefName: invalid refname format
+ EOF
+ rm $branch_dir_prefix/@ &&
+ test_cmp expect err &&
+
+ cp $tag_dir_prefix/multi_hierarchy/tag-2 $tag_dir_prefix/multi_hierarchy/@ &&
+ test_must_fail git fsck 2>err &&
+ cat >expect <<-EOF &&
+ error: refs/tags/multi_hierarchy/@: badRefName: invalid refname format
+ EOF
+ rm $tag_dir_prefix/multi_hierarchy/@ &&
+ test_cmp expect err &&
+
+ cp $tag_dir_prefix/tag-1 $tag_dir_prefix/tag-1.lock &&
+ git fsck 2>err &&
+ rm $tag_dir_prefix/tag-1.lock &&
+ test_must_be_empty err &&
+
+ cp $tag_dir_prefix/tag-1 $tag_dir_prefix/.lock &&
+ test_must_fail git fsck 2>err &&
+ cat >expect <<-EOF &&
+ error: refs/tags/.lock: badRefName: invalid refname format
+ EOF
+ rm $tag_dir_prefix/.lock &&
+ test_cmp expect err
+'
+
+test_expect_success 'ref name check should be adapted into fsck messages' '
+ test_when_finished "rm -rf repo" &&
+ git init repo &&
+ branch_dir_prefix=.git/refs/heads &&
+ tag_dir_prefix=.git/refs/tags &&
+ cd repo &&
+ git commit --allow-empty -m initial &&
+ git checkout -b branch-1 &&
+ git tag tag-1 &&
+ git commit --allow-empty -m second &&
+ git checkout -b branch-2 &&
+ git tag tag-2 &&
+
+
+ cp $branch_dir_prefix/branch-1 $branch_dir_prefix/.branch-1 &&
+ git -c fsck.badRefName=warn fsck 2>err &&
+ cat >expect <<-EOF &&
+ warning: refs/heads/.branch-1: badRefName: invalid refname format
+ EOF
+ rm $branch_dir_prefix/.branch-1 &&
+ test_cmp expect err &&
+
+
+ cp $branch_dir_prefix/branch-1 $branch_dir_prefix/@ &&
+ git -c fsck.badRefName=ignore fsck 2>err &&
+ test_must_be_empty err
+'
+
+test_done
--
2.45.2
^ permalink raw reply related [flat|nested] 282+ messages in thread
* Re: [GSoC][PATCH v14 11/11] fsck: add ref name check for files backend
2024-08-01 15:15 ` [GSoC][PATCH v14 11/11] fsck: add ref name check for files backend shejialuo
@ 2024-08-05 12:58 ` Patrick Steinhardt
0 siblings, 0 replies; 282+ messages in thread
From: Patrick Steinhardt @ 2024-08-05 12:58 UTC (permalink / raw)
To: shejialuo
Cc: git, Karthik Nayak, Junio C Hamano, Eric Sunshine, Justin Tobler
[-- Attachment #1: Type: text/plain, Size: 956 bytes --]
On Thu, Aug 01, 2024 at 11:15:24PM +0800, shejialuo wrote:
> diff --git a/refs/files-backend.c b/refs/files-backend.c
> index 5574e78656..1186b6cbb1 100644
> --- a/refs/files-backend.c
> +++ b/refs/files-backend.c
> @@ -3419,6 +3419,37 @@ typedef int (*files_fsck_refs_fn)(struct fsck_options *o,
> const char *refs_check_dir,
> struct dir_iterator *iter);
>
> +static int files_fsck_refs_name(struct fsck_options *o,
> + const char *gitdir UNUSED,
> + const char *refs_check_dir,
> + struct dir_iterator *iter)
> +{
> + struct strbuf sb = STRBUF_INIT;
> + size_t len = 0;
> + int ret = 0;
> +
> + /*
> + * Ignore the files ending with ".lock" as they may be lock files
> + * However, do not allow bare ".lock" files.
> + */
> + if (strip_suffix(iter->basename, ".lock", &len) && (len != 0))
> + goto clean;
Better:
if (ends_with(iter->basename, ".lock))
goto cleanup;
Patrick
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]
^ permalink raw reply [flat|nested] 282+ messages in thread
* Re: [GSoC][PATCH v14 00/11] ref consistency check infra setup
2024-08-01 15:11 ` [GSoC][PATCH v14 00/11] " shejialuo
` (10 preceding siblings ...)
2024-08-01 15:15 ` [GSoC][PATCH v14 11/11] fsck: add ref name check for files backend shejialuo
@ 2024-08-05 12:58 ` Patrick Steinhardt
2024-08-05 16:43 ` [GSoC][PATCH v15 0/9] " shejialuo
12 siblings, 0 replies; 282+ messages in thread
From: Patrick Steinhardt @ 2024-08-05 12:58 UTC (permalink / raw)
To: shejialuo
Cc: git, Karthik Nayak, Junio C Hamano, Eric Sunshine, Justin Tobler
[-- Attachment #1: Type: text/plain, Size: 1856 bytes --]
On Thu, Aug 01, 2024 at 11:11:21PM +0800, shejialuo wrote:
> Hi All,
>
> This version does the following things:
>
> 1. By following the advice from Patrick, we should make the callback
> function be generic by adding only one "void * fsck_report" parameter.
> Thus the commit sequence will be much more clearer. And it wll be much
> easier for reviewers to review. And I have split the commit into more
> commits in this version.
> 2. Enhance the commit messages to provide more context about why we
> should do this.
> 3. Patrick advices that we should initialize the "fsck_options" member
> when parsing the options. However, because the original "strict" and
> "verbose" field are defined as the bit field, we cannot take the address
> of them. So I simply remove the bit field.
> 4. As Patrick said, ".lock" should not be reported as error. At current,
> ignore files ending with ".lock".
> 5. Add a fsck msg type called "badRefFiletype" which indicates that a
> ref has a bad file type when scanning the directory.
> 6. Junio advices instead of using "fsck_refs_fns", we should use the
> singular version "fsck_refs_fn", fix this.
> 7. Drop the last patch because in this series, we mainly focus on the
> infra, I will add a series later to add ref content check.
>
> However, there is one thing holding. Junio advices that I should
> follow the Patrick's change to change the prototype of "files_fsck_refs_fn"
>
> https://lore.kernel.org/git/fe0e2c3617c8040c632dbc3de613a1d22e8070f7.1722316795.git.ps@pks.im/
>
> However, at current, this patch is not in the cooking tree. I will
> handle this later.
This looks very good to me. I've got another set of smallish nits, but
all of those should be quick to address, I think. I think that the next
version should likely be ready to go.
Thanks!
Patrick
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]
^ permalink raw reply [flat|nested] 282+ messages in thread
* [GSoC][PATCH v15 0/9] ref consistency check infra setup
2024-08-01 15:11 ` [GSoC][PATCH v14 00/11] " shejialuo
` (11 preceding siblings ...)
2024-08-05 12:58 ` [GSoC][PATCH v14 00/11] ref consistency check infra setup Patrick Steinhardt
@ 2024-08-05 16:43 ` shejialuo
2024-08-05 16:45 ` [GSoC][PATCH v15 1/9] fsck: rename "skiplist" to "skip_oids" shejialuo
` (11 more replies)
12 siblings, 12 replies; 282+ messages in thread
From: shejialuo @ 2024-08-05 16:43 UTC (permalink / raw)
To: git
Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine,
Justin Tobler
Hi All:
This version handles the following problems:
1. Patrick advices that I should not use `va_copy` in the changed
`report` function. Actually this is a mistake, this version avoids
redundant `ap` copy.
2. Patrick advices I should rebase [v14 05/11] into [v14 04/11]. I
follow this advice in this version.
3. Patrick advices that we should put [v14 06/11] before we introduce
ref-related operations. This version reorders the commit sequence. It's
a minor change.
4. Patrick suggests at current we should not add `git refs verify`
command into "git-fsck(1)". This is because we should disable this new
check by default for the users. Many users use "git-fsck(1)" in their daily
workflow. We should not be aggressive. However, if we provide this
mechanism in this series, we will again make more complexity. So this
version drop patch [v14 09/11]. Also because of dropping, change the
test file to use "git refs verify" command instead of "git fsck"
command.
5. Patrick suggests that we should use `ends_with` instead of
`strip_suffix`, fix.
There is another important problem this patch solves:
At v13, Junio has suggested that the `files_fsck_refs_fn` should be
adapted to Patrick's change. Actually, I made a bad design before. I
should always pass the `ref_store` structure. So I change it to
-typedef int (*files_fsck_refs_fn)(struct fsck_options *o,
- const char *gitdir,
+typedef int (*files_fsck_refs_fn)(struct ref_store *ref_store,
+ struct fsck_options *o,
const char *refs_check_dir,
struct dir_iterator *iter);
`gitdir` could be got by using `ref_store` parameter. By using
`ref_store` parameter, we provide extensibility here. If something else
change, we merely need to change "files_fsck_refs_fn" prototype.
Because I drop one patch and rebase one patch. I provide the `interdiff`
for reviewers to make the life easier.
Due to the deadline of the GSoC, I will speed up the review feedback
process.
Thanks,
Jialuo
shejialuo (9):
fsck: rename "skiplist" to "skip_oids"
fsck: rename objects-related fsck error functions
fsck: make "fsck_error" callback generic
fsck: add a unified interface for reporting fsck messages
fsck: add refs report function
refs: set up ref consistency check infrastructure
builtin/refs: add verify subcommand
files-backend: add unified interface for refs scanning
fsck: add ref name check for files backend
Documentation/fsck-msgids.txt | 6 ++
Documentation/git-refs.txt | 13 ++++
builtin/fsck.c | 17 +++--
builtin/mktag.c | 3 +-
builtin/refs.c | 34 +++++++++
fsck.c | 127 +++++++++++++++++++++++++++-------
fsck.h | 76 +++++++++++++++-----
object-file.c | 9 ++-
refs.c | 5 ++
refs.h | 8 +++
refs/debug.c | 11 +++
refs/files-backend.c | 116 ++++++++++++++++++++++++++++++-
refs/packed-backend.c | 8 +++
refs/refs-internal.h | 6 ++
refs/reftable-backend.c | 8 +++
t/t0602-reffiles-fsck.sh | 92 ++++++++++++++++++++++++
16 files changed, 480 insertions(+), 59 deletions(-)
create mode 100755 t/t0602-reffiles-fsck.sh
Interdiff against v14:
diff --git a/builtin/fsck.c b/builtin/fsck.c
index b6ac878270..766bbd014d 100644
--- a/builtin/fsck.c
+++ b/builtin/fsck.c
@@ -899,21 +899,6 @@ static int check_pack_rev_indexes(struct repository *r, int show_progress)
return res;
}
-static void fsck_refs(void)
-{
- struct child_process refs_verify = CHILD_PROCESS_INIT;
- child_process_init(&refs_verify);
- refs_verify.git_cmd = 1;
- strvec_pushl(&refs_verify.args, "refs", "verify", NULL);
- if (verbose)
- strvec_push(&refs_verify.args, "--verbose");
- if (check_strict)
- strvec_push(&refs_verify.args, "--strict");
-
- if (run_command(&refs_verify))
- errors_found |= ERROR_REFS;
-}
-
static char const * const fsck_usage[] = {
N_("git fsck [--tags] [--root] [--unreachable] [--cache] [--no-reflogs]\n"
" [--[no-]full] [--strict] [--verbose] [--lost-found]\n"
@@ -1083,8 +1068,6 @@ int cmd_fsck(int argc, const char **argv, const char *prefix)
check_connectivity();
- fsck_refs();
-
if (the_repository->settings.core_commit_graph) {
struct child_process commit_graph_verify = CHILD_PROCESS_INIT;
diff --git a/fsck.c b/fsck.c
index d5e7c88eab..7eb5cdefdd 100644
--- a/fsck.c
+++ b/fsck.c
@@ -235,7 +235,6 @@ static int fsck_vreport(struct fsck_options *options,
void *fsck_report,
enum fsck_msg_id msg_id, const char *fmt, va_list ap)
{
- va_list ap_copy;
struct strbuf sb = STRBUF_INIT;
enum fsck_msg_type msg_type = fsck_msg_type(msg_id, options);
int result;
@@ -251,12 +250,10 @@ static int fsck_vreport(struct fsck_options *options,
prepare_msg_ids();
strbuf_addf(&sb, "%s: ", msg_id_info[msg_id].camelcased);
- va_copy(ap_copy, ap);
- strbuf_vaddf(&sb, fmt, ap_copy);
+ strbuf_vaddf(&sb, fmt, ap);
result = options->error_func(options, fsck_report,
msg_type, msg_id, sb.buf);
strbuf_release(&sb);
- va_end(ap);
return result;
}
@@ -1391,5 +1388,6 @@ int fsck_objects_error_cb_print_missing_gitmodules(struct fsck_options *o,
puts(oid_to_hex(report->oid));
return 0;
}
- return fsck_objects_error_function(o, fsck_report, msg_type,msg_id, message);
+ return fsck_objects_error_function(o, fsck_report,
+ msg_type, msg_id, message);
}
diff --git a/refs/files-backend.c b/refs/files-backend.c
index 1186b6cbb1..6e6b47251d 100644
--- a/refs/files-backend.c
+++ b/refs/files-backend.c
@@ -3414,26 +3414,25 @@ static int files_ref_store_remove_on_disk(struct ref_store *ref_store,
* the whole directory. This function is used as the callback for each
* regular file or symlink in the directory.
*/
-typedef int (*files_fsck_refs_fn)(struct fsck_options *o,
- const char *gitdir,
+typedef int (*files_fsck_refs_fn)(struct ref_store *ref_store,
+ struct fsck_options *o,
const char *refs_check_dir,
struct dir_iterator *iter);
-static int files_fsck_refs_name(struct fsck_options *o,
- const char *gitdir UNUSED,
+static int files_fsck_refs_name(struct ref_store *ref_store UNUSED,
+ struct fsck_options *o,
const char *refs_check_dir,
struct dir_iterator *iter)
{
struct strbuf sb = STRBUF_INIT;
- size_t len = 0;
int ret = 0;
/*
* Ignore the files ending with ".lock" as they may be lock files
* However, do not allow bare ".lock" files.
*/
- if (strip_suffix(iter->basename, ".lock", &len) && (len != 0))
- goto clean;
+ if (iter->basename[0] != '.' && ends_with(iter->basename, ".lock"))
+ goto cleanup;
if (check_refname_format(iter->basename, REFNAME_ALLOW_ONELEVEL)) {
struct fsck_ref_report report = { .path = NULL };
@@ -3445,7 +3444,7 @@ static int files_fsck_refs_name(struct fsck_options *o,
"invalid refname format");
}
-clean:
+cleanup:
strbuf_release(&sb);
return ret;
}
@@ -3455,13 +3454,12 @@ static int files_fsck_refs_dir(struct ref_store *ref_store,
const char *refs_check_dir,
files_fsck_refs_fn *fsck_refs_fn)
{
- const char *gitdir = ref_store->gitdir;
struct strbuf sb = STRBUF_INIT;
struct dir_iterator *iter;
int iter_status;
int ret = 0;
- strbuf_addf(&sb, "%s/%s", gitdir, refs_check_dir);
+ strbuf_addf(&sb, "%s/%s", ref_store->gitdir, refs_check_dir);
iter = dir_iterator_begin(sb.buf, 0);
if (!iter) {
@@ -3478,7 +3476,7 @@ static int files_fsck_refs_dir(struct ref_store *ref_store,
fprintf_ln(stderr, "Checking %s/%s",
refs_check_dir, iter->relative_path);
for (size_t i = 0; fsck_refs_fn[i]; i++) {
- if (fsck_refs_fn[i](o, gitdir, refs_check_dir, iter))
+ if (fsck_refs_fn[i](ref_store, o, refs_check_dir, iter))
ret = -1;
}
} else {
@@ -3507,7 +3505,7 @@ static int files_fsck_refs(struct ref_store *ref_store,
};
if (o->verbose)
- fprintf_ln(stderr, "Checking references consistency");
+ fprintf_ln(stderr, _("Checking references consistency"));
return files_fsck_refs_dir(ref_store, o, "refs", fsck_refs_fn);
}
diff --git a/t/t0602-reffiles-fsck.sh b/t/t0602-reffiles-fsck.sh
index 2be28427ab..71a4d1a5ae 100755
--- a/t/t0602-reffiles-fsck.sh
+++ b/t/t0602-reffiles-fsck.sh
@@ -26,7 +26,7 @@ test_expect_success 'ref name should be checked' '
git tag multi_hierarchy/tag-2 &&
cp $branch_dir_prefix/branch-1 $branch_dir_prefix/.branch-1 &&
- test_must_fail git fsck 2>err &&
+ test_must_fail git refs verify 2>err &&
cat >expect <<-EOF &&
error: refs/heads/.branch-1: badRefName: invalid refname format
EOF
@@ -34,7 +34,7 @@ test_expect_success 'ref name should be checked' '
test_cmp expect err &&
cp $branch_dir_prefix/branch-1 $branch_dir_prefix/@ &&
- test_must_fail git fsck 2>err &&
+ test_must_fail git refs verify 2>err &&
cat >expect <<-EOF &&
error: refs/heads/@: badRefName: invalid refname format
EOF
@@ -42,7 +42,7 @@ test_expect_success 'ref name should be checked' '
test_cmp expect err &&
cp $tag_dir_prefix/multi_hierarchy/tag-2 $tag_dir_prefix/multi_hierarchy/@ &&
- test_must_fail git fsck 2>err &&
+ test_must_fail git refs verify 2>err &&
cat >expect <<-EOF &&
error: refs/tags/multi_hierarchy/@: badRefName: invalid refname format
EOF
@@ -50,12 +50,12 @@ test_expect_success 'ref name should be checked' '
test_cmp expect err &&
cp $tag_dir_prefix/tag-1 $tag_dir_prefix/tag-1.lock &&
- git fsck 2>err &&
+ git refs verify 2>err &&
rm $tag_dir_prefix/tag-1.lock &&
test_must_be_empty err &&
cp $tag_dir_prefix/tag-1 $tag_dir_prefix/.lock &&
- test_must_fail git fsck 2>err &&
+ test_must_fail git refs verify 2>err &&
cat >expect <<-EOF &&
error: refs/tags/.lock: badRefName: invalid refname format
EOF
@@ -76,18 +76,16 @@ test_expect_success 'ref name check should be adapted into fsck messages' '
git checkout -b branch-2 &&
git tag tag-2 &&
-
cp $branch_dir_prefix/branch-1 $branch_dir_prefix/.branch-1 &&
- git -c fsck.badRefName=warn fsck 2>err &&
+ git -c fsck.badRefName=warn refs verify 2>err &&
cat >expect <<-EOF &&
warning: refs/heads/.branch-1: badRefName: invalid refname format
EOF
rm $branch_dir_prefix/.branch-1 &&
test_cmp expect err &&
-
cp $branch_dir_prefix/branch-1 $branch_dir_prefix/@ &&
- git -c fsck.badRefName=ignore fsck 2>err &&
+ git -c fsck.badRefName=ignore refs verify 2>err &&
test_must_be_empty err
'
--
2.46.0
^ permalink raw reply related [flat|nested] 282+ messages in thread
* [GSoC][PATCH v15 1/9] fsck: rename "skiplist" to "skip_oids"
2024-08-05 16:43 ` [GSoC][PATCH v15 0/9] " shejialuo
@ 2024-08-05 16:45 ` shejialuo
2024-08-05 16:45 ` [GSoC][PATCH v15 2/9] fsck: rename objects-related fsck error functions shejialuo
` (10 subsequent siblings)
11 siblings, 0 replies; 282+ messages in thread
From: shejialuo @ 2024-08-05 16:45 UTC (permalink / raw)
To: git
Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine,
Justin Tobler
The "skiplist" field in "fsck_options" is related to objects. Because we
are going to introduce ref consistency check, the "skiplist" name is too
general which will make the caller think "skiplist" is related to both
the refs and objects.
It may seem that for both refs and objects, we should provide a general
"skiplist" here. However, the type for "skiplist" is `struct oidset`
which is totally unsuitable for refs.
To avoid above ambiguity, rename "skiplist" to "skip_oids".
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
fsck.c | 4 ++--
fsck.h | 4 ++--
2 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/fsck.c b/fsck.c
index eea7145470..3f32441492 100644
--- a/fsck.c
+++ b/fsck.c
@@ -205,7 +205,7 @@ void fsck_set_msg_types(struct fsck_options *options, const char *values)
if (!strcmp(buf, "skiplist")) {
if (equal == len)
die("skiplist requires a path");
- oidset_parse_file(&options->skiplist, buf + equal + 1,
+ oidset_parse_file(&options->skip_oids, buf + equal + 1,
the_repository->hash_algo);
buf += len + 1;
continue;
@@ -223,7 +223,7 @@ void fsck_set_msg_types(struct fsck_options *options, const char *values)
static int object_on_skiplist(struct fsck_options *opts,
const struct object_id *oid)
{
- return opts && oid && oidset_contains(&opts->skiplist, oid);
+ return opts && oid && oidset_contains(&opts->skip_oids, oid);
}
__attribute__((format (printf, 5, 6)))
diff --git a/fsck.h b/fsck.h
index 6085a384f6..bcfb2e34cd 100644
--- a/fsck.h
+++ b/fsck.h
@@ -136,7 +136,7 @@ struct fsck_options {
fsck_error error_func;
unsigned strict:1;
enum fsck_msg_type *msg_type;
- struct oidset skiplist;
+ struct oidset skip_oids;
struct oidset gitmodules_found;
struct oidset gitmodules_done;
struct oidset gitattributes_found;
@@ -145,7 +145,7 @@ struct fsck_options {
};
#define FSCK_OPTIONS_DEFAULT { \
- .skiplist = OIDSET_INIT, \
+ .skip_oids = OIDSET_INIT, \
.gitmodules_found = OIDSET_INIT, \
.gitmodules_done = OIDSET_INIT, \
.gitattributes_found = OIDSET_INIT, \
--
2.46.0
^ permalink raw reply related [flat|nested] 282+ messages in thread
* [GSoC][PATCH v15 2/9] fsck: rename objects-related fsck error functions
2024-08-05 16:43 ` [GSoC][PATCH v15 0/9] " shejialuo
2024-08-05 16:45 ` [GSoC][PATCH v15 1/9] fsck: rename "skiplist" to "skip_oids" shejialuo
@ 2024-08-05 16:45 ` shejialuo
2024-08-05 16:45 ` [GSoC][PATCH v15 3/9] fsck: make "fsck_error" callback generic shejialuo
` (9 subsequent siblings)
11 siblings, 0 replies; 282+ messages in thread
From: shejialuo @ 2024-08-05 16:45 UTC (permalink / raw)
To: git
Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine,
Justin Tobler
The names of objects-related fsck error functions are generic. It's OK
when there is only object database check. However, we are going to
introduce refs database check report function. To avoid ambiguity,
rename object-related fsck error functions to explicitly indicate these
functions are used to report objects-related messages.
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
builtin/fsck.c | 14 +++++++-------
fsck.c | 17 +++++++++--------
fsck.h | 26 +++++++++++++-------------
3 files changed, 29 insertions(+), 28 deletions(-)
diff --git a/builtin/fsck.c b/builtin/fsck.c
index d13a226c2e..6d86bbe1e9 100644
--- a/builtin/fsck.c
+++ b/builtin/fsck.c
@@ -89,12 +89,12 @@ static int objerror(struct object *obj, const char *err)
return -1;
}
-static int fsck_error_func(struct fsck_options *o UNUSED,
- const struct object_id *oid,
- enum object_type object_type,
- enum fsck_msg_type msg_type,
- enum fsck_msg_id msg_id UNUSED,
- const char *message)
+static int fsck_objects_error_func(struct fsck_options *o UNUSED,
+ const struct object_id *oid,
+ enum object_type object_type,
+ enum fsck_msg_type msg_type,
+ enum fsck_msg_id msg_id UNUSED,
+ const char *message)
{
switch (msg_type) {
case FSCK_WARN:
@@ -938,7 +938,7 @@ int cmd_fsck(int argc, const char **argv, const char *prefix)
fsck_walk_options.walk = mark_object;
fsck_obj_options.walk = mark_used;
- fsck_obj_options.error_func = fsck_error_func;
+ fsck_obj_options.error_func = fsck_objects_error_func;
if (check_strict)
fsck_obj_options.strict = 1;
diff --git a/fsck.c b/fsck.c
index 3f32441492..8347842cfb 100644
--- a/fsck.c
+++ b/fsck.c
@@ -1200,7 +1200,7 @@ int fsck_buffer(const struct object_id *oid, enum object_type type,
type);
}
-int fsck_error_function(struct fsck_options *o,
+int fsck_objects_error_function(struct fsck_options *o,
const struct object_id *oid,
enum object_type object_type UNUSED,
enum fsck_msg_type msg_type,
@@ -1303,16 +1303,17 @@ int git_fsck_config(const char *var, const char *value,
* Custom error callbacks that are used in more than one place.
*/
-int fsck_error_cb_print_missing_gitmodules(struct fsck_options *o,
- const struct object_id *oid,
- enum object_type object_type,
- enum fsck_msg_type msg_type,
- enum fsck_msg_id msg_id,
- const char *message)
+int fsck_objects_error_cb_print_missing_gitmodules(struct fsck_options *o,
+ const struct object_id *oid,
+ enum object_type object_type,
+ enum fsck_msg_type msg_type,
+ enum fsck_msg_id msg_id,
+ const char *message)
{
if (msg_id == FSCK_MSG_GITMODULES_MISSING) {
puts(oid_to_hex(oid));
return 0;
}
- return fsck_error_function(o, oid, object_type, msg_type, msg_id, message);
+ return fsck_objects_error_function(o, oid, object_type,
+ msg_type, msg_id, message);
}
diff --git a/fsck.h b/fsck.h
index bcfb2e34cd..41ebebbb59 100644
--- a/fsck.h
+++ b/fsck.h
@@ -120,16 +120,16 @@ typedef int (*fsck_error)(struct fsck_options *o,
enum fsck_msg_type msg_type, enum fsck_msg_id msg_id,
const char *message);
-int fsck_error_function(struct fsck_options *o,
- const struct object_id *oid, enum object_type object_type,
- enum fsck_msg_type msg_type, enum fsck_msg_id msg_id,
- const char *message);
-int fsck_error_cb_print_missing_gitmodules(struct fsck_options *o,
- const struct object_id *oid,
- enum object_type object_type,
- enum fsck_msg_type msg_type,
- enum fsck_msg_id msg_id,
- const char *message);
+int fsck_objects_error_function(struct fsck_options *o,
+ const struct object_id *oid, enum object_type object_type,
+ enum fsck_msg_type msg_type, enum fsck_msg_id msg_id,
+ const char *message);
+int fsck_objects_error_cb_print_missing_gitmodules(struct fsck_options *o,
+ const struct object_id *oid,
+ enum object_type object_type,
+ enum fsck_msg_type msg_type,
+ enum fsck_msg_id msg_id,
+ const char *message);
struct fsck_options {
fsck_walk_func walk;
@@ -150,7 +150,7 @@ struct fsck_options {
.gitmodules_done = OIDSET_INIT, \
.gitattributes_found = OIDSET_INIT, \
.gitattributes_done = OIDSET_INIT, \
- .error_func = fsck_error_function \
+ .error_func = fsck_objects_error_function \
}
#define FSCK_OPTIONS_STRICT { \
.strict = 1, \
@@ -158,7 +158,7 @@ struct fsck_options {
.gitmodules_done = OIDSET_INIT, \
.gitattributes_found = OIDSET_INIT, \
.gitattributes_done = OIDSET_INIT, \
- .error_func = fsck_error_function, \
+ .error_func = fsck_objects_error_function, \
}
#define FSCK_OPTIONS_MISSING_GITMODULES { \
.strict = 1, \
@@ -166,7 +166,7 @@ struct fsck_options {
.gitmodules_done = OIDSET_INIT, \
.gitattributes_found = OIDSET_INIT, \
.gitattributes_done = OIDSET_INIT, \
- .error_func = fsck_error_cb_print_missing_gitmodules, \
+ .error_func = fsck_objects_error_cb_print_missing_gitmodules, \
}
/* descend in all linked child objects
--
2.46.0
^ permalink raw reply related [flat|nested] 282+ messages in thread
* [GSoC][PATCH v15 3/9] fsck: make "fsck_error" callback generic
2024-08-05 16:43 ` [GSoC][PATCH v15 0/9] " shejialuo
2024-08-05 16:45 ` [GSoC][PATCH v15 1/9] fsck: rename "skiplist" to "skip_oids" shejialuo
2024-08-05 16:45 ` [GSoC][PATCH v15 2/9] fsck: rename objects-related fsck error functions shejialuo
@ 2024-08-05 16:45 ` shejialuo
2024-08-07 8:03 ` Karthik Nayak
2024-08-05 16:45 ` [GSoC][PATCH v15 4/9] fsck: add a unified interface for reporting fsck messages shejialuo
` (8 subsequent siblings)
11 siblings, 1 reply; 282+ messages in thread
From: shejialuo @ 2024-08-05 16:45 UTC (permalink / raw)
To: git
Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine,
Justin Tobler
The "fsck_error" callback is designed to report the objects-related
error messages. It accepts two parameter "oid" and "object_type" which
is not generic. In order to provide a unified callback which can report
either objects or refs, remove the objects-related parameters and add
the generic parameter "void *fsck_report".
Create a new "fsck_object_report" structure which incorporates the
removed parameters "oid" and "object_type". Then change the
corresponding references to adapt to new "fsck_error" callback.
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
builtin/fsck.c | 7 +++++--
builtin/mktag.c | 3 +--
fsck.c | 26 ++++++++++++++++----------
fsck.h | 17 ++++++++++++-----
object-file.c | 9 ++++-----
5 files changed, 38 insertions(+), 24 deletions(-)
diff --git a/builtin/fsck.c b/builtin/fsck.c
index 6d86bbe1e9..766bbd014d 100644
--- a/builtin/fsck.c
+++ b/builtin/fsck.c
@@ -90,12 +90,15 @@ static int objerror(struct object *obj, const char *err)
}
static int fsck_objects_error_func(struct fsck_options *o UNUSED,
- const struct object_id *oid,
- enum object_type object_type,
+ void *fsck_report,
enum fsck_msg_type msg_type,
enum fsck_msg_id msg_id UNUSED,
const char *message)
{
+ struct fsck_object_report *report = fsck_report;
+ const struct object_id *oid = report->oid;
+ enum object_type object_type = report->object_type;
+
switch (msg_type) {
case FSCK_WARN:
/* TRANSLATORS: e.g. warning in tree 01bfda: <more explanation> */
diff --git a/builtin/mktag.c b/builtin/mktag.c
index 4767f1a97e..c6b644219f 100644
--- a/builtin/mktag.c
+++ b/builtin/mktag.c
@@ -18,8 +18,7 @@ static int option_strict = 1;
static struct fsck_options fsck_options = FSCK_OPTIONS_STRICT;
static int mktag_fsck_error_func(struct fsck_options *o UNUSED,
- const struct object_id *oid UNUSED,
- enum object_type object_type UNUSED,
+ void *fsck_report UNUSED,
enum fsck_msg_type msg_type,
enum fsck_msg_id msg_id UNUSED,
const char *message)
diff --git a/fsck.c b/fsck.c
index 8347842cfb..cca6ae144f 100644
--- a/fsck.c
+++ b/fsck.c
@@ -232,6 +232,10 @@ static int report(struct fsck_options *options,
enum fsck_msg_id msg_id, const char *fmt, ...)
{
va_list ap;
+ struct fsck_object_report report = {
+ .oid = oid,
+ .object_type = object_type
+ };
struct strbuf sb = STRBUF_INIT;
enum fsck_msg_type msg_type = fsck_msg_type(msg_id, options);
int result;
@@ -252,7 +256,7 @@ static int report(struct fsck_options *options,
va_start(ap, fmt);
strbuf_vaddf(&sb, fmt, ap);
- result = options->error_func(options, oid, object_type,
+ result = options->error_func(options, &report,
msg_type, msg_id, sb.buf);
strbuf_release(&sb);
va_end(ap);
@@ -1201,12 +1205,14 @@ int fsck_buffer(const struct object_id *oid, enum object_type type,
}
int fsck_objects_error_function(struct fsck_options *o,
- const struct object_id *oid,
- enum object_type object_type UNUSED,
- enum fsck_msg_type msg_type,
- enum fsck_msg_id msg_id UNUSED,
- const char *message)
+ void *fsck_report,
+ enum fsck_msg_type msg_type,
+ enum fsck_msg_id msg_id UNUSED,
+ const char *message)
{
+ struct fsck_object_report *report = fsck_report;
+ const struct object_id *oid = report->oid;
+
if (msg_type == FSCK_WARN) {
warning("object %s: %s", fsck_describe_object(o, oid), message);
return 0;
@@ -1304,16 +1310,16 @@ int git_fsck_config(const char *var, const char *value,
*/
int fsck_objects_error_cb_print_missing_gitmodules(struct fsck_options *o,
- const struct object_id *oid,
- enum object_type object_type,
+ void *fsck_report,
enum fsck_msg_type msg_type,
enum fsck_msg_id msg_id,
const char *message)
{
if (msg_id == FSCK_MSG_GITMODULES_MISSING) {
- puts(oid_to_hex(oid));
+ struct fsck_object_report *report = fsck_report;
+ puts(oid_to_hex(report->oid));
return 0;
}
- return fsck_objects_error_function(o, oid, object_type,
+ return fsck_objects_error_function(o, fsck_report,
msg_type, msg_id, message);
}
diff --git a/fsck.h b/fsck.h
index 41ebebbb59..3b80d02506 100644
--- a/fsck.h
+++ b/fsck.h
@@ -114,23 +114,30 @@ int is_valid_msg_type(const char *msg_id, const char *msg_type);
typedef int (*fsck_walk_func)(struct object *obj, enum object_type object_type,
void *data, struct fsck_options *options);
-/* callback for fsck_object, type is FSCK_ERROR or FSCK_WARN */
+/*
+ * Callback for reporting errors either for objects or refs. The "fsck_report"
+ * is a generic pointer that can be used to pass any information.
+ */
typedef int (*fsck_error)(struct fsck_options *o,
- const struct object_id *oid, enum object_type object_type,
+ void *fsck_report,
enum fsck_msg_type msg_type, enum fsck_msg_id msg_id,
const char *message);
int fsck_objects_error_function(struct fsck_options *o,
- const struct object_id *oid, enum object_type object_type,
+ void *fsck_report,
enum fsck_msg_type msg_type, enum fsck_msg_id msg_id,
const char *message);
int fsck_objects_error_cb_print_missing_gitmodules(struct fsck_options *o,
- const struct object_id *oid,
- enum object_type object_type,
+ void *fsck_report,
enum fsck_msg_type msg_type,
enum fsck_msg_id msg_id,
const char *message);
+struct fsck_object_report {
+ const struct object_id *oid;
+ enum object_type object_type;
+};
+
struct fsck_options {
fsck_walk_func walk;
fsck_error error_func;
diff --git a/object-file.c b/object-file.c
index 065103be3e..05ac6ebed6 100644
--- a/object-file.c
+++ b/object-file.c
@@ -2470,11 +2470,10 @@ int repo_has_object_file(struct repository *r,
* give more context.
*/
static int hash_format_check_report(struct fsck_options *opts UNUSED,
- const struct object_id *oid UNUSED,
- enum object_type object_type UNUSED,
- enum fsck_msg_type msg_type UNUSED,
- enum fsck_msg_id msg_id UNUSED,
- const char *message)
+ void *fsck_report UNUSED,
+ enum fsck_msg_type msg_type UNUSED,
+ enum fsck_msg_id msg_id UNUSED,
+ const char *message)
{
error(_("object fails fsck: %s"), message);
return 1;
--
2.46.0
^ permalink raw reply related [flat|nested] 282+ messages in thread
* Re: [GSoC][PATCH v15 3/9] fsck: make "fsck_error" callback generic
2024-08-05 16:45 ` [GSoC][PATCH v15 3/9] fsck: make "fsck_error" callback generic shejialuo
@ 2024-08-07 8:03 ` Karthik Nayak
0 siblings, 0 replies; 282+ messages in thread
From: Karthik Nayak @ 2024-08-07 8:03 UTC (permalink / raw)
To: shejialuo, git
Cc: Patrick Steinhardt, Junio C Hamano, Eric Sunshine, Justin Tobler
[-- Attachment #1: Type: text/plain, Size: 802 bytes --]
shejialuo <shejialuo@gmail.com> writes:
> The "fsck_error" callback is designed to report the objects-related
> error messages. It accepts two parameter "oid" and "object_type" which
> is not generic. In order to provide a unified callback which can report
> either objects or refs, remove the objects-related parameters and add
> the generic parameter "void *fsck_report".
>
> Create a new "fsck_object_report" structure which incorporates the
> removed parameters "oid" and "object_type". Then change the
> corresponding references to adapt to new "fsck_error" callback.
>
So since my last review, we've now changed the function to accept a
'void *' parameter, this makes sense, this way the refs error function
and the objects error function can now only receive the data they care
about.
[snip]
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 690 bytes --]
^ permalink raw reply [flat|nested] 282+ messages in thread
* [GSoC][PATCH v15 4/9] fsck: add a unified interface for reporting fsck messages
2024-08-05 16:43 ` [GSoC][PATCH v15 0/9] " shejialuo
` (2 preceding siblings ...)
2024-08-05 16:45 ` [GSoC][PATCH v15 3/9] fsck: make "fsck_error" callback generic shejialuo
@ 2024-08-05 16:45 ` shejialuo
2024-08-07 8:05 ` Karthik Nayak
2024-08-05 16:45 ` [GSoC][PATCH v15 5/9] fsck: add refs report function shejialuo
` (7 subsequent siblings)
11 siblings, 1 reply; 282+ messages in thread
From: shejialuo @ 2024-08-05 16:45 UTC (permalink / raw)
To: git
Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine,
Justin Tobler
The static function "report" provided by "fsck.c" aims at checking error
type and calling the callback "error_func" to report the message. Both
refs and objects need to check the error type of the current fsck
message. In order to extract this common behavior, create a new function
"fsck_vreport". Instead of using "...", provide "va_list" to allow more
flexibility.
Instead of changing "report" prototype to be algin with the
"fsck_vreport" function, we leave the "report" prototype unchanged due
to the reason that there are nearly 62 references about "report"
function. Simply change "report" function to use "fsck_vreport" to
report objects related messages.
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
fsck.c | 44 ++++++++++++++++++++++++++++++--------------
1 file changed, 30 insertions(+), 14 deletions(-)
diff --git a/fsck.c b/fsck.c
index cca6ae144f..3614aa56a3 100644
--- a/fsck.c
+++ b/fsck.c
@@ -226,16 +226,15 @@ static int object_on_skiplist(struct fsck_options *opts,
return opts && oid && oidset_contains(&opts->skip_oids, oid);
}
-__attribute__((format (printf, 5, 6)))
-static int report(struct fsck_options *options,
- const struct object_id *oid, enum object_type object_type,
- enum fsck_msg_id msg_id, const char *fmt, ...)
+/*
+ * Provide the common functionality for either fscking refs or objects.
+ * It will get the current msg error type and call the error_func callback
+ * which is registered in the "fsck_options" struct.
+ */
+static int fsck_vreport(struct fsck_options *options,
+ void *fsck_report,
+ enum fsck_msg_id msg_id, const char *fmt, va_list ap)
{
- va_list ap;
- struct fsck_object_report report = {
- .oid = oid,
- .object_type = object_type
- };
struct strbuf sb = STRBUF_INIT;
enum fsck_msg_type msg_type = fsck_msg_type(msg_id, options);
int result;
@@ -243,9 +242,6 @@ static int report(struct fsck_options *options,
if (msg_type == FSCK_IGNORE)
return 0;
- if (object_on_skiplist(options, oid))
- return 0;
-
if (msg_type == FSCK_FATAL)
msg_type = FSCK_ERROR;
else if (msg_type == FSCK_INFO)
@@ -254,11 +250,31 @@ static int report(struct fsck_options *options,
prepare_msg_ids();
strbuf_addf(&sb, "%s: ", msg_id_info[msg_id].camelcased);
- va_start(ap, fmt);
strbuf_vaddf(&sb, fmt, ap);
- result = options->error_func(options, &report,
+ result = options->error_func(options, fsck_report,
msg_type, msg_id, sb.buf);
strbuf_release(&sb);
+
+ return result;
+}
+
+__attribute__((format (printf, 5, 6)))
+static int report(struct fsck_options *options,
+ const struct object_id *oid, enum object_type object_type,
+ enum fsck_msg_id msg_id, const char *fmt, ...)
+{
+ va_list ap;
+ struct fsck_object_report report = {
+ .oid = oid,
+ .object_type = object_type
+ };
+ int result;
+
+ if (object_on_skiplist(options, oid))
+ return 0;
+
+ va_start(ap, fmt);
+ result = fsck_vreport(options, &report, msg_id, fmt, ap);
va_end(ap);
return result;
--
2.46.0
^ permalink raw reply related [flat|nested] 282+ messages in thread
* [GSoC][PATCH v15 5/9] fsck: add refs report function
2024-08-05 16:43 ` [GSoC][PATCH v15 0/9] " shejialuo
` (3 preceding siblings ...)
2024-08-05 16:45 ` [GSoC][PATCH v15 4/9] fsck: add a unified interface for reporting fsck messages shejialuo
@ 2024-08-05 16:45 ` shejialuo
2024-08-06 7:32 ` Patrick Steinhardt
2024-08-05 16:46 ` [GSoC][PATCH v15 6/9] refs: set up ref consistency check infrastructure shejialuo
` (6 subsequent siblings)
11 siblings, 1 reply; 282+ messages in thread
From: shejialuo @ 2024-08-05 16:45 UTC (permalink / raw)
To: git
Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine,
Justin Tobler
Introduce a new struct "fsck_ref_report" to contain the information we
need when reporting refs-related messages.
With the new "fsck_vreport" function, add a new function
"fsck_report_ref" to report refs-related fsck error message. Unlike
"report" function uses the exact parameters, we simply pass "struct
fsck_ref_report *report" as the parameter. This is because at current we
don't know exactly how many fields we need. By passing this parameter,
we don't need to change this function prototype when we want to add more
information into "fsck_ref_report".
We have introduced "fsck_report_ref" function to report the error
message for refs. We still need to add the corresponding callback
function. Create refs-specific "error_func" callback
"fsck_refs_error_function".
Last, add "FSCK_REFS_OPTIONS_DEFAULT" macro to create default options
when checking ref consistency.
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
fsck.c | 41 +++++++++++++++++++++++++++++++++++++++++
fsck.h | 25 +++++++++++++++++++++++++
2 files changed, 66 insertions(+)
diff --git a/fsck.c b/fsck.c
index 3614aa56a3..38554b626e 100644
--- a/fsck.c
+++ b/fsck.c
@@ -280,6 +280,19 @@ static int report(struct fsck_options *options,
return result;
}
+int fsck_report_ref(struct fsck_options *options,
+ struct fsck_ref_report *report,
+ enum fsck_msg_id msg_id,
+ const char *fmt, ...)
+{
+ va_list ap;
+ int result;
+ va_start(ap, fmt);
+ result = fsck_vreport(options, report, msg_id, fmt, ap);
+ va_end(ap);
+ return result;
+}
+
void fsck_enable_object_names(struct fsck_options *options)
{
if (!options->object_names)
@@ -1237,6 +1250,34 @@ int fsck_objects_error_function(struct fsck_options *o,
return 1;
}
+int fsck_refs_error_function(struct fsck_options *options UNUSED,
+ void *fsck_report,
+ enum fsck_msg_type msg_type,
+ enum fsck_msg_id msg_id UNUSED,
+ const char *message)
+{
+ struct fsck_ref_report *report = fsck_report;
+
+ struct strbuf sb = STRBUF_INIT;
+ int ret = 0;
+
+ strbuf_addstr(&sb, report->path);
+
+ if (report->oid)
+ strbuf_addf(&sb, " -> (%s)", oid_to_hex(report->oid));
+ else if (report->referent)
+ strbuf_addf(&sb, " -> (%s)", report->referent);
+
+ if (msg_type == FSCK_WARN)
+ warning("%s: %s", sb.buf, message);
+ else
+ ret = error("%s: %s", sb.buf, message);
+
+ strbuf_release(&sb);
+ return ret;
+
+}
+
static int fsck_blobs(struct oidset *blobs_found, struct oidset *blobs_done,
enum fsck_msg_id msg_missing, enum fsck_msg_id msg_type,
struct fsck_options *options, const char *blob_type)
diff --git a/fsck.h b/fsck.h
index 3b80d02506..2002590f60 100644
--- a/fsck.h
+++ b/fsck.h
@@ -133,11 +133,23 @@ int fsck_objects_error_cb_print_missing_gitmodules(struct fsck_options *o,
enum fsck_msg_id msg_id,
const char *message);
+int fsck_refs_error_function(struct fsck_options *options,
+ void *fsck_report,
+ enum fsck_msg_type msg_type,
+ enum fsck_msg_id msg_id,
+ const char *message);
+
struct fsck_object_report {
const struct object_id *oid;
enum object_type object_type;
};
+struct fsck_ref_report {
+ const char *path;
+ const struct object_id *oid;
+ const char *referent;
+};
+
struct fsck_options {
fsck_walk_func walk;
fsck_error error_func;
@@ -175,6 +187,9 @@ struct fsck_options {
.gitattributes_done = OIDSET_INIT, \
.error_func = fsck_objects_error_cb_print_missing_gitmodules, \
}
+#define FSCK_REFS_OPTIONS_DEFAULT { \
+ .error_func = fsck_refs_error_function, \
+}
/* descend in all linked child objects
* the return value is:
@@ -216,6 +231,16 @@ int fsck_tag_standalone(const struct object_id *oid, const char *buffer,
*/
int fsck_finish(struct fsck_options *options);
+/*
+ * Report an error or warning for refs.
+ */
+__attribute__((format (printf, 4, 5)))
+int fsck_report_ref(struct fsck_options *options,
+ struct fsck_ref_report *report,
+ enum fsck_msg_id msg_id,
+ const char *fmt, ...);
+
+
/*
* Subsystem for storing human-readable names for each object.
*
--
2.46.0
^ permalink raw reply related [flat|nested] 282+ messages in thread
* Re: [GSoC][PATCH v15 5/9] fsck: add refs report function
2024-08-05 16:45 ` [GSoC][PATCH v15 5/9] fsck: add refs report function shejialuo
@ 2024-08-06 7:32 ` Patrick Steinhardt
0 siblings, 0 replies; 282+ messages in thread
From: Patrick Steinhardt @ 2024-08-06 7:32 UTC (permalink / raw)
To: shejialuo
Cc: git, Karthik Nayak, Junio C Hamano, Eric Sunshine, Justin Tobler
[-- Attachment #1: Type: text/plain, Size: 989 bytes --]
On Tue, Aug 06, 2024 at 12:45:57AM +0800, shejialuo wrote:
> @@ -1237,6 +1250,34 @@ int fsck_objects_error_function(struct fsck_options *o,
> return 1;
> }
>
> +int fsck_refs_error_function(struct fsck_options *options UNUSED,
> + void *fsck_report,
> + enum fsck_msg_type msg_type,
> + enum fsck_msg_id msg_id UNUSED,
> + const char *message)
> +{
> + struct fsck_ref_report *report = fsck_report;
> +
Nit: there's an unneeded empty newline here.
> + struct strbuf sb = STRBUF_INIT;
> + int ret = 0;
> +
> + strbuf_addstr(&sb, report->path);
> +
> + if (report->oid)
> + strbuf_addf(&sb, " -> (%s)", oid_to_hex(report->oid));
> + else if (report->referent)
> + strbuf_addf(&sb, " -> (%s)", report->referent);
> +
> + if (msg_type == FSCK_WARN)
> + warning("%s: %s", sb.buf, message);
> + else
> + ret = error("%s: %s", sb.buf, message);
> +
> + strbuf_release(&sb);
> + return ret;
> +
Here, as well.
Patrick
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]
^ permalink raw reply [flat|nested] 282+ messages in thread
* [GSoC][PATCH v15 6/9] refs: set up ref consistency check infrastructure
2024-08-05 16:43 ` [GSoC][PATCH v15 0/9] " shejialuo
` (4 preceding siblings ...)
2024-08-05 16:45 ` [GSoC][PATCH v15 5/9] fsck: add refs report function shejialuo
@ 2024-08-05 16:46 ` shejialuo
2024-08-05 16:46 ` [GSoC][PATCH v15 7/9] builtin/refs: add verify subcommand shejialuo
` (5 subsequent siblings)
11 siblings, 0 replies; 282+ messages in thread
From: shejialuo @ 2024-08-05 16:46 UTC (permalink / raw)
To: git
Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine,
Justin Tobler
The "struct ref_store" is the base class which contains the "be" pointer
which provides backend-specific functions whose interfaces are defined
in the "ref_storage_be". We could reuse this polymorphism to define only
one interface. For every backend, we need to provide its own function
pointer.
The interfaces defined in the `ref_storage_be` are carefully structured
in semantic. It's organized as the five parts:
1. The name and the initialization interfaces.
2. The ref transaction interfaces.
3. The ref internal interfaces (pack, rename and copy).
4. The ref filesystem interfaces.
5. The reflog related interfaces.
To keep consistent with the git-fsck(1), add a new interface named
"fsck_refs_fn" to the end of "ref_storage_be". This semantic cannot be
grouped into any above five categories. Explicitly add blank line to
make it different from others.
Last, implement placeholder functions for each ref backends.
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
refs.c | 5 +++++
refs.h | 8 ++++++++
refs/debug.c | 11 +++++++++++
refs/files-backend.c | 13 ++++++++++++-
refs/packed-backend.c | 8 ++++++++
refs/refs-internal.h | 6 ++++++
refs/reftable-backend.c | 8 ++++++++
7 files changed, 58 insertions(+), 1 deletion(-)
diff --git a/refs.c b/refs.c
index 915aeb4d1d..6f642dc681 100644
--- a/refs.c
+++ b/refs.c
@@ -318,6 +318,11 @@ int check_refname_format(const char *refname, int flags)
return check_or_sanitize_refname(refname, flags, NULL);
}
+int refs_fsck(struct ref_store *refs, struct fsck_options *o)
+{
+ return refs->be->fsck(refs, o);
+}
+
void sanitize_refname_component(const char *refname, struct strbuf *out)
{
if (check_or_sanitize_refname(refname, REFNAME_ALLOW_ONELEVEL, out))
diff --git a/refs.h b/refs.h
index b3e39bc257..405073621a 100644
--- a/refs.h
+++ b/refs.h
@@ -4,6 +4,7 @@
#include "commit.h"
#include "repository.h"
+struct fsck_options;
struct object_id;
struct ref_store;
struct strbuf;
@@ -541,6 +542,13 @@ int refs_for_each_reflog(struct ref_store *refs, each_reflog_fn fn, void *cb_dat
*/
int check_refname_format(const char *refname, int flags);
+/*
+ * Check the reference database for consistency. Return 0 if refs and
+ * reflogs are consistent, and non-zero otherwise. The errors will be
+ * written to stderr.
+ */
+int refs_fsck(struct ref_store *refs, struct fsck_options *o);
+
/*
* Apply the rules from check_refname_format, but mutate the result until it
* is acceptable, and place the result in "out".
diff --git a/refs/debug.c b/refs/debug.c
index 547d9245b9..45e2e784a0 100644
--- a/refs/debug.c
+++ b/refs/debug.c
@@ -419,6 +419,15 @@ static int debug_reflog_expire(struct ref_store *ref_store, const char *refname,
return res;
}
+static int debug_fsck(struct ref_store *ref_store,
+ struct fsck_options *o)
+{
+ struct debug_ref_store *drefs = (struct debug_ref_store *)ref_store;
+ int res = drefs->refs->be->fsck(drefs->refs, o);
+ trace_printf_key(&trace_refs, "fsck: %d\n", res);
+ return res;
+}
+
struct ref_storage_be refs_be_debug = {
.name = "debug",
.init = NULL,
@@ -451,4 +460,6 @@ struct ref_storage_be refs_be_debug = {
.create_reflog = debug_create_reflog,
.delete_reflog = debug_delete_reflog,
.reflog_expire = debug_reflog_expire,
+
+ .fsck = debug_fsck,
};
diff --git a/refs/files-backend.c b/refs/files-backend.c
index aa52d9be7c..4630eb1f80 100644
--- a/refs/files-backend.c
+++ b/refs/files-backend.c
@@ -3408,6 +3408,15 @@ static int files_ref_store_remove_on_disk(struct ref_store *ref_store,
return ret;
}
+static int files_fsck(struct ref_store *ref_store,
+ struct fsck_options *o)
+{
+ struct files_ref_store *refs =
+ files_downcast(ref_store, REF_STORE_READ, "fsck");
+
+ return refs->packed_ref_store->be->fsck(refs->packed_ref_store, o);
+}
+
struct ref_storage_be refs_be_files = {
.name = "files",
.init = files_ref_store_init,
@@ -3434,5 +3443,7 @@ struct ref_storage_be refs_be_files = {
.reflog_exists = files_reflog_exists,
.create_reflog = files_create_reflog,
.delete_reflog = files_delete_reflog,
- .reflog_expire = files_reflog_expire
+ .reflog_expire = files_reflog_expire,
+
+ .fsck = files_fsck,
};
diff --git a/refs/packed-backend.c b/refs/packed-backend.c
index a0666407cd..5209b0b212 100644
--- a/refs/packed-backend.c
+++ b/refs/packed-backend.c
@@ -1735,6 +1735,12 @@ static struct ref_iterator *packed_reflog_iterator_begin(struct ref_store *ref_s
return empty_ref_iterator_begin();
}
+static int packed_fsck(struct ref_store *ref_store,
+ struct fsck_options *o)
+{
+ return 0;
+}
+
struct ref_storage_be refs_be_packed = {
.name = "packed",
.init = packed_ref_store_init,
@@ -1762,4 +1768,6 @@ struct ref_storage_be refs_be_packed = {
.create_reflog = NULL,
.delete_reflog = NULL,
.reflog_expire = NULL,
+
+ .fsck = packed_fsck,
};
diff --git a/refs/refs-internal.h b/refs/refs-internal.h
index fa975d69aa..a905e187cd 100644
--- a/refs/refs-internal.h
+++ b/refs/refs-internal.h
@@ -4,6 +4,7 @@
#include "refs.h"
#include "iterator.h"
+struct fsck_options;
struct ref_transaction;
/*
@@ -650,6 +651,9 @@ typedef int read_raw_ref_fn(struct ref_store *ref_store, const char *refname,
typedef int read_symbolic_ref_fn(struct ref_store *ref_store, const char *refname,
struct strbuf *referent);
+typedef int fsck_fn(struct ref_store *ref_store,
+ struct fsck_options *o);
+
struct ref_storage_be {
const char *name;
ref_store_init_fn *init;
@@ -677,6 +681,8 @@ struct ref_storage_be {
create_reflog_fn *create_reflog;
delete_reflog_fn *delete_reflog;
reflog_expire_fn *reflog_expire;
+
+ fsck_fn *fsck;
};
extern struct ref_storage_be refs_be_files;
diff --git a/refs/reftable-backend.c b/refs/reftable-backend.c
index fbe74c239d..b5a1a526df 100644
--- a/refs/reftable-backend.c
+++ b/refs/reftable-backend.c
@@ -2303,6 +2303,12 @@ static int reftable_be_reflog_expire(struct ref_store *ref_store,
return ret;
}
+static int reftable_be_fsck(struct ref_store *ref_store,
+ struct fsck_options *o)
+{
+ return 0;
+}
+
struct ref_storage_be refs_be_reftable = {
.name = "reftable",
.init = reftable_be_init,
@@ -2330,4 +2336,6 @@ struct ref_storage_be refs_be_reftable = {
.create_reflog = reftable_be_create_reflog,
.delete_reflog = reftable_be_delete_reflog,
.reflog_expire = reftable_be_reflog_expire,
+
+ .fsck = reftable_be_fsck,
};
--
2.46.0
^ permalink raw reply related [flat|nested] 282+ messages in thread
* [GSoC][PATCH v15 7/9] builtin/refs: add verify subcommand
2024-08-05 16:43 ` [GSoC][PATCH v15 0/9] " shejialuo
` (5 preceding siblings ...)
2024-08-05 16:46 ` [GSoC][PATCH v15 6/9] refs: set up ref consistency check infrastructure shejialuo
@ 2024-08-05 16:46 ` shejialuo
2024-08-06 7:32 ` Patrick Steinhardt
2024-08-05 16:46 ` [GSoC][PATCH v15 8/9] files-backend: add unified interface for refs scanning shejialuo
` (4 subsequent siblings)
11 siblings, 1 reply; 282+ messages in thread
From: shejialuo @ 2024-08-05 16:46 UTC (permalink / raw)
To: git
Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine,
Justin Tobler
Introduce a new subcommand "verify" in git-refs(1) to allow the user to
check the reference database consistency and also this subcommand will
be used as the entry point of checking refs for "git-fsck(1)".
Add "verbose" field into "fsck_options" to indicate whether we should
print verbose messages when checking refs and objects consistency.
Remove bit-field for "strict" field, this is because we cannot take
address of a bit-field which makes it unhandy to set member variables
when parsing the command line options.
The "git-fsck(1)" declares "fsck_options" variable with "static"
identifier which avoids complaint by the leak-checker. However, in
"git-refs verify", we need to do memory clean manually. Thus add
"fsck_options_clear" function in "fsck.c" to provide memory clean
operation.
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
Documentation/git-refs.txt | 13 +++++++++++++
builtin/refs.c | 34 ++++++++++++++++++++++++++++++++++
fsck.c | 11 +++++++++++
fsck.h | 8 +++++++-
4 files changed, 65 insertions(+), 1 deletion(-)
diff --git a/Documentation/git-refs.txt b/Documentation/git-refs.txt
index 5b99e04385..ce31f93061 100644
--- a/Documentation/git-refs.txt
+++ b/Documentation/git-refs.txt
@@ -10,6 +10,7 @@ SYNOPSIS
--------
[verse]
'git refs migrate' --ref-format=<format> [--dry-run]
+'git refs verify' [--strict] [--verbose]
DESCRIPTION
-----------
@@ -22,6 +23,9 @@ COMMANDS
migrate::
Migrate ref store between different formats.
+verify::
+ Verify reference database consistency.
+
OPTIONS
-------
@@ -39,6 +43,15 @@ include::ref-storage-format.txt[]
can be used to double check that the migration works as expected before
performing the actual migration.
+The following options are specific to 'git refs verify':
+
+--strict::
+ Enable stricter error checking. This will cause warnings to be
+ reported as errors. See linkgit:git-fsck[1].
+
+--verbose::
+ When verifying the reference database consistency, be chatty.
+
KNOWN LIMITATIONS
-----------------
diff --git a/builtin/refs.c b/builtin/refs.c
index 46dcd150d4..131f98be98 100644
--- a/builtin/refs.c
+++ b/builtin/refs.c
@@ -1,4 +1,6 @@
#include "builtin.h"
+#include "config.h"
+#include "fsck.h"
#include "parse-options.h"
#include "refs.h"
#include "repository.h"
@@ -7,6 +9,9 @@
#define REFS_MIGRATE_USAGE \
N_("git refs migrate --ref-format=<format> [--dry-run]")
+#define REFS_VERIFY_USAGE \
+ N_("git refs verify [--strict] [--verbose]")
+
static int cmd_refs_migrate(int argc, const char **argv, const char *prefix)
{
const char * const migrate_usage[] = {
@@ -58,15 +63,44 @@ static int cmd_refs_migrate(int argc, const char **argv, const char *prefix)
return err;
}
+static int cmd_refs_verify(int argc, const char **argv, const char *prefix)
+{
+ struct fsck_options fsck_refs_options = FSCK_REFS_OPTIONS_DEFAULT;
+ const char * const verify_usage[] = {
+ REFS_VERIFY_USAGE,
+ NULL,
+ };
+ struct option options[] = {
+ OPT_BOOL(0, "verbose", &fsck_refs_options.verbose, N_("be verbose")),
+ OPT_BOOL(0, "strict", &fsck_refs_options.strict, N_("enable strict checking")),
+ OPT_END(),
+ };
+ int ret;
+
+ argc = parse_options(argc, argv, prefix, options, verify_usage, 0);
+ if (argc)
+ usage(_("'git refs verify' takes no arguments"));
+
+ git_config(git_fsck_config, &fsck_refs_options);
+ prepare_repo_settings(the_repository);
+
+ ret = refs_fsck(get_main_ref_store(the_repository), &fsck_refs_options);
+
+ fsck_options_clear(&fsck_refs_options);
+ return ret;
+}
+
int cmd_refs(int argc, const char **argv, const char *prefix)
{
const char * const refs_usage[] = {
REFS_MIGRATE_USAGE,
+ REFS_VERIFY_USAGE,
NULL,
};
parse_opt_subcommand_fn *fn = NULL;
struct option opts[] = {
OPT_SUBCOMMAND("migrate", &fn, cmd_refs_migrate),
+ OPT_SUBCOMMAND("verify", &fn, cmd_refs_verify),
OPT_END(),
};
diff --git a/fsck.c b/fsck.c
index 38554b626e..7eb5cdefdd 100644
--- a/fsck.c
+++ b/fsck.c
@@ -1333,6 +1333,17 @@ int fsck_finish(struct fsck_options *options)
return ret;
}
+void fsck_options_clear(struct fsck_options *options)
+{
+ free(options->msg_type);
+ oidset_clear(&options->skip_oids);
+ oidset_clear(&options->gitmodules_found);
+ oidset_clear(&options->gitmodules_done);
+ oidset_clear(&options->gitattributes_found);
+ oidset_clear(&options->gitattributes_done);
+ kh_clear_oid_map(options->object_names);
+}
+
int git_fsck_config(const char *var, const char *value,
const struct config_context *ctx, void *cb)
{
diff --git a/fsck.h b/fsck.h
index 2002590f60..d551a9fe86 100644
--- a/fsck.h
+++ b/fsck.h
@@ -153,7 +153,8 @@ struct fsck_ref_report {
struct fsck_options {
fsck_walk_func walk;
fsck_error error_func;
- unsigned strict:1;
+ unsigned strict;
+ unsigned verbose;
enum fsck_msg_type *msg_type;
struct oidset skip_oids;
struct oidset gitmodules_found;
@@ -231,6 +232,11 @@ int fsck_tag_standalone(const struct object_id *oid, const char *buffer,
*/
int fsck_finish(struct fsck_options *options);
+/*
+ * Clear the fsck_options struct, freeing any allocated memory.
+ */
+void fsck_options_clear(struct fsck_options *options);
+
/*
* Report an error or warning for refs.
*/
--
2.46.0
^ permalink raw reply related [flat|nested] 282+ messages in thread
* Re: [GSoC][PATCH v15 7/9] builtin/refs: add verify subcommand
2024-08-05 16:46 ` [GSoC][PATCH v15 7/9] builtin/refs: add verify subcommand shejialuo
@ 2024-08-06 7:32 ` Patrick Steinhardt
2024-08-06 16:15 ` Junio C Hamano
0 siblings, 1 reply; 282+ messages in thread
From: Patrick Steinhardt @ 2024-08-06 7:32 UTC (permalink / raw)
To: shejialuo
Cc: git, Karthik Nayak, Junio C Hamano, Eric Sunshine, Justin Tobler
[-- Attachment #1: Type: text/plain, Size: 1330 bytes --]
On Tue, Aug 06, 2024 at 12:46:15AM +0800, shejialuo wrote:
> @@ -58,15 +63,44 @@ static int cmd_refs_migrate(int argc, const char **argv, const char *prefix)
> return err;
> }
>
> +static int cmd_refs_verify(int argc, const char **argv, const char *prefix)
> +{
> + struct fsck_options fsck_refs_options = FSCK_REFS_OPTIONS_DEFAULT;
> + const char * const verify_usage[] = {
> + REFS_VERIFY_USAGE,
> + NULL,
> + };
> + struct option options[] = {
> + OPT_BOOL(0, "verbose", &fsck_refs_options.verbose, N_("be verbose")),
> + OPT_BOOL(0, "strict", &fsck_refs_options.strict, N_("enable strict checking")),
> + OPT_END(),
> + };
> + int ret;
> +
> + argc = parse_options(argc, argv, prefix, options, verify_usage, 0);
> + if (argc)
> + usage(_("'git refs verify' takes no arguments"));
Junio has posted a patch series [1] where he wants to get rid of
messages that simply say "no arguments" or "too many arguments". I guess
we can play nice and also move into the same direction here, where we
instead tell the user which argument we didn't expect.
So I'd propose to make this:
argc = parse_options(argc, argv, prefix, options, verify_usage, 0);
if (argc)
usage(_("unknown argument: '%s'", argv[0]));
[1]: <20240806003539.3292562-1-gitster@pobox.com>
Patrick
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]
^ permalink raw reply [flat|nested] 282+ messages in thread
* Re: [GSoC][PATCH v15 7/9] builtin/refs: add verify subcommand
2024-08-06 7:32 ` Patrick Steinhardt
@ 2024-08-06 16:15 ` Junio C Hamano
2024-08-07 5:55 ` Patrick Steinhardt
0 siblings, 1 reply; 282+ messages in thread
From: Junio C Hamano @ 2024-08-06 16:15 UTC (permalink / raw)
To: Patrick Steinhardt
Cc: shejialuo, git, Karthik Nayak, Eric Sunshine, Justin Tobler
Patrick Steinhardt <ps@pks.im> writes:
>> + if (argc)
>> + usage(_("'git refs verify' takes no arguments"));
>
> Junio has posted a patch series [1] where he wants to get rid of
> messages that simply say "no arguments" or "too many arguments".
> ...
> So I'd propose to make this:
>
> argc = parse_options(argc, argv, prefix, options, verify_usage, 0);
> if (argc)
> usage(_("unknown argument: '%s'", argv[0]));
I probably should have said that I am fully behind the intent
against "too many arguments", but I am not 100% behind the
particular messaging used in the patch series I sent out.
One potential complaint I expected to hear, for example, was that "a
is unknown" given when you said "git cmd a a a a a" is not all that
clear ;-). To alleviate, you would have to say "git cmd takes only
2 arguments" if 'a' you are complaining about is the third one.
Also, many people would consider that "unexpected argument" is
better than "unknown argument".
I personally think the message above is absolutely clear and good.
You say that 'git refs verify' takes no arguments, and for somebody
who said "git refs verify a b c d e", there is no doubt that all of
these a b c d e are unwanted. And there is no room to misinterpret
the message as "'git refs' is ok but 'git refs verify' is already
unwelcome with extra argument", either [*].
In short, I think the message in the patch here is good, and it is
the other "war on 'too many arguments'" series whose messages need
to be thought further.
[Foornote]
* ... which was the problem I was trying to address in the current
message "too many arguments" that does not even say which early
part of the command line we consider is "command" that was given
"arguments"---to uninitiated who said "git refs verify foo", it
is unclera if that's "git refs" command whose first argument is
"verify", "git" command whose first two arguments are "refs
verify", etc.
^ permalink raw reply [flat|nested] 282+ messages in thread
* Re: [GSoC][PATCH v15 7/9] builtin/refs: add verify subcommand
2024-08-06 16:15 ` Junio C Hamano
@ 2024-08-07 5:55 ` Patrick Steinhardt
2024-08-07 12:50 ` shejialuo
2024-08-07 15:55 ` Junio C Hamano
0 siblings, 2 replies; 282+ messages in thread
From: Patrick Steinhardt @ 2024-08-07 5:55 UTC (permalink / raw)
To: Junio C Hamano
Cc: shejialuo, git, Karthik Nayak, Eric Sunshine, Justin Tobler
[-- Attachment #1: Type: text/plain, Size: 2127 bytes --]
On Tue, Aug 06, 2024 at 09:15:28AM -0700, Junio C Hamano wrote:
> Patrick Steinhardt <ps@pks.im> writes:
>
> >> + if (argc)
> >> + usage(_("'git refs verify' takes no arguments"));
> >
> > Junio has posted a patch series [1] where he wants to get rid of
> > messages that simply say "no arguments" or "too many arguments".
> > ...
> > So I'd propose to make this:
> >
> > argc = parse_options(argc, argv, prefix, options, verify_usage, 0);
> > if (argc)
> > usage(_("unknown argument: '%s'", argv[0]));
>
> I probably should have said that I am fully behind the intent
> against "too many arguments", but I am not 100% behind the
> particular messaging used in the patch series I sent out.
>
> One potential complaint I expected to hear, for example, was that "a
> is unknown" given when you said "git cmd a a a a a" is not all that
> clear ;-). To alleviate, you would have to say "git cmd takes only
> 2 arguments" if 'a' you are complaining about is the third one.
>
> Also, many people would consider that "unexpected argument" is
> better than "unknown argument".
>
> I personally think the message above is absolutely clear and good.
>
> You say that 'git refs verify' takes no arguments, and for somebody
> who said "git refs verify a b c d e", there is no doubt that all of
> these a b c d e are unwanted. And there is no room to misinterpret
> the message as "'git refs' is ok but 'git refs verify' is already
> unwelcome with extra argument", either [*].
>
> In short, I think the message in the patch here is good, and it is
> the other "war on 'too many arguments'" series whose messages need
> to be thought further.
Just to clarify: with "the patch" you probably refer to the current
version that Jialuo has, right? In other words, keep the current version
that he has and adapt the message in the future, when we have decided
what to do about those "too many arguments" messages?
If so, then the only two I had were some spurious newlines. I'm not sure
whether these really would be worth rerolling the whole patch series.
Patrick
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]
^ permalink raw reply [flat|nested] 282+ messages in thread
* Re: [GSoC][PATCH v15 7/9] builtin/refs: add verify subcommand
2024-08-07 5:55 ` Patrick Steinhardt
@ 2024-08-07 12:50 ` shejialuo
2024-08-08 10:22 ` Karthik Nayak
2024-08-07 15:55 ` Junio C Hamano
1 sibling, 1 reply; 282+ messages in thread
From: shejialuo @ 2024-08-07 12:50 UTC (permalink / raw)
To: Patrick Steinhardt
Cc: Junio C Hamano, git, Karthik Nayak, Eric Sunshine, Justin Tobler
> If so, then the only two I had were some spurious newlines. I'm not sure
> whether these really would be worth rerolling the whole patch series.
>
Karthik has given some reviews. I guess I need to reroll because there
is one typo error in commit message. It's important to make this fixed.
> Patrick
^ permalink raw reply [flat|nested] 282+ messages in thread
* Re: [GSoC][PATCH v15 7/9] builtin/refs: add verify subcommand
2024-08-07 12:50 ` shejialuo
@ 2024-08-08 10:22 ` Karthik Nayak
0 siblings, 0 replies; 282+ messages in thread
From: Karthik Nayak @ 2024-08-08 10:22 UTC (permalink / raw)
To: shejialuo, Patrick Steinhardt
Cc: Junio C Hamano, git, Eric Sunshine, Justin Tobler
[-- Attachment #1: Type: text/plain, Size: 478 bytes --]
shejialuo <shejialuo@gmail.com> writes:
>> If so, then the only two I had were some spurious newlines. I'm not sure
>> whether these really would be worth rerolling the whole patch series.
>>
>
> Karthik has given some reviews. I guess I need to reroll because there
> is one typo error in commit message. It's important to make this fixed.
>
>> Patrick
I would say, my nits by themselves don't require a re-roll, but if
you're re-rolling, it'd be nice to resolve them too :)
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 690 bytes --]
^ permalink raw reply [flat|nested] 282+ messages in thread
* Re: [GSoC][PATCH v15 7/9] builtin/refs: add verify subcommand
2024-08-07 5:55 ` Patrick Steinhardt
2024-08-07 12:50 ` shejialuo
@ 2024-08-07 15:55 ` Junio C Hamano
1 sibling, 0 replies; 282+ messages in thread
From: Junio C Hamano @ 2024-08-07 15:55 UTC (permalink / raw)
To: Patrick Steinhardt
Cc: shejialuo, git, Karthik Nayak, Eric Sunshine, Justin Tobler
Patrick Steinhardt <ps@pks.im> writes:
> On Tue, Aug 06, 2024 at 09:15:28AM -0700, Junio C Hamano wrote:
>> Patrick Steinhardt <ps@pks.im> writes:
>>
>> >> + if (argc)
>> >> + usage(_("'git refs verify' takes no arguments"));
>> >
>> > Junio has posted a patch series [1] where he wants to get rid of
>> > messages that simply say "no arguments" or "too many arguments".
>> > ...
>> > So I'd propose to make this:
>> >
>> > argc = parse_options(argc, argv, prefix, options, verify_usage, 0);
>> > if (argc)
>> > usage(_("unknown argument: '%s'", argv[0]));
>>
>> I probably should have said that I am fully behind the intent
>> against "too many arguments", but I am not 100% behind the
>> particular messaging used in the patch series I sent out.
>>
>> One potential complaint I expected to hear, for example, was that "a
>> is unknown" given when you said "git cmd a a a a a" is not all that
>> clear ;-). To alleviate, you would have to say "git cmd takes only
>> 2 arguments" if 'a' you are complaining about is the third one.
>>
>> Also, many people would consider that "unexpected argument" is
>> better than "unknown argument".
>>
>> I personally think the message above is absolutely clear and good.
>>
>> You say that 'git refs verify' takes no arguments, and for somebody
>> who said "git refs verify a b c d e", there is no doubt that all of
>> these a b c d e are unwanted. And there is no room to misinterpret
>> the message as "'git refs' is ok but 'git refs verify' is already
>> unwelcome with extra argument", either [*].
>>
>> In short, I think the message in the patch here is good, and it is
>> the other "war on 'too many arguments'" series whose messages need
>> to be thought further.
>
> Just to clarify: with "the patch" you probably refer to the current
> version that Jialuo has, right? In other words, keep the current version
> that he has and adapt the message in the future, when we have decided
> what to do about those "too many arguments" messages?
I meant that I think (1) that "'git refs verify' takes no arguments"
is a good message, and (2) that there is no further change needed to
the patch that started this review thread, regardless of how we want
to deal with "too many arguments" messages.
> If so, then the only two I had were some spurious newlines. I'm not sure
> whether these really would be worth rerolling the whole patch series.
Yup, those blank lines were annoying while scanning the patches, but
they alone would not be something that makes a reroll _required_. A
reroll that clearly shows that the incremental change since the last
round is only these blank line changes is not too much to process,
so "not worth the reviewer time" is not a huge reason to avoid it,
either. I'd see that it is up to how perfectionist the patch
submitter wants to be ;-)
Thanks.
^ permalink raw reply [flat|nested] 282+ messages in thread
* [GSoC][PATCH v15 8/9] files-backend: add unified interface for refs scanning
2024-08-05 16:43 ` [GSoC][PATCH v15 0/9] " shejialuo
` (6 preceding siblings ...)
2024-08-05 16:46 ` [GSoC][PATCH v15 7/9] builtin/refs: add verify subcommand shejialuo
@ 2024-08-05 16:46 ` shejialuo
2024-08-06 7:33 ` Patrick Steinhardt
2024-08-05 16:46 ` [GSoC][PATCH v15 9/9] fsck: add ref name check for files backend shejialuo
` (3 subsequent siblings)
11 siblings, 1 reply; 282+ messages in thread
From: shejialuo @ 2024-08-05 16:46 UTC (permalink / raw)
To: git
Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine,
Justin Tobler
For refs and reflogs, we need to scan its corresponding directories to
check every regular file or symbolic link which shares the same pattern.
Introduce a unified interface for scanning directories for
files-backend.
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
Documentation/fsck-msgids.txt | 3 ++
fsck.h | 1 +
refs/files-backend.c | 74 ++++++++++++++++++++++++++++++++++-
3 files changed, 77 insertions(+), 1 deletion(-)
diff --git a/Documentation/fsck-msgids.txt b/Documentation/fsck-msgids.txt
index f643585a34..7c809fddf1 100644
--- a/Documentation/fsck-msgids.txt
+++ b/Documentation/fsck-msgids.txt
@@ -19,6 +19,9 @@
`badParentSha1`::
(ERROR) A commit object has a bad parent sha1.
+`badRefFiletype`::
+ (ERROR) A ref has a bad file type.
+
`badTagName`::
(INFO) A tag has an invalid format.
diff --git a/fsck.h b/fsck.h
index d551a9fe86..af02174973 100644
--- a/fsck.h
+++ b/fsck.h
@@ -31,6 +31,7 @@ enum fsck_msg_type {
FUNC(BAD_NAME, ERROR) \
FUNC(BAD_OBJECT_SHA1, ERROR) \
FUNC(BAD_PARENT_SHA1, ERROR) \
+ FUNC(BAD_REF_FILETYPE, ERROR) \
FUNC(BAD_TIMEZONE, ERROR) \
FUNC(BAD_TREE, ERROR) \
FUNC(BAD_TREE_SHA1, ERROR) \
diff --git a/refs/files-backend.c b/refs/files-backend.c
index 4630eb1f80..f337356860 100644
--- a/refs/files-backend.c
+++ b/refs/files-backend.c
@@ -6,6 +6,7 @@
#include "../gettext.h"
#include "../hash.h"
#include "../hex.h"
+#include "../fsck.h"
#include "../refs.h"
#include "refs-internal.h"
#include "ref-cache.h"
@@ -3408,13 +3409,84 @@ static int files_ref_store_remove_on_disk(struct ref_store *ref_store,
return ret;
}
+/*
+ * For refs and reflogs, they share a unified interface when scanning
+ * the whole directory. This function is used as the callback for each
+ * regular file or symlink in the directory.
+ */
+typedef int (*files_fsck_refs_fn)(struct ref_store *ref_store,
+ struct fsck_options *o,
+ const char *refs_check_dir,
+ struct dir_iterator *iter);
+
+static int files_fsck_refs_dir(struct ref_store *ref_store,
+ struct fsck_options *o,
+ const char *refs_check_dir,
+ files_fsck_refs_fn *fsck_refs_fn)
+{
+ struct strbuf sb = STRBUF_INIT;
+ struct dir_iterator *iter;
+ int iter_status;
+ int ret = 0;
+
+ strbuf_addf(&sb, "%s/%s", ref_store->gitdir, refs_check_dir);
+
+ iter = dir_iterator_begin(sb.buf, 0);
+ if (!iter) {
+ ret = error_errno(_("cannot open directory %s"), sb.buf);
+ goto out;
+ }
+
+ while ((iter_status = dir_iterator_advance(iter)) == ITER_OK) {
+ if (S_ISDIR(iter->st.st_mode)) {
+ continue;
+ } else if (S_ISREG(iter->st.st_mode) ||
+ S_ISLNK(iter->st.st_mode)) {
+ if (o->verbose)
+ fprintf_ln(stderr, "Checking %s/%s",
+ refs_check_dir, iter->relative_path);
+ for (size_t i = 0; fsck_refs_fn[i]; i++) {
+ if (fsck_refs_fn[i](ref_store, o, refs_check_dir, iter))
+ ret = -1;
+ }
+ } else {
+ struct fsck_ref_report report = { .path = iter->basename };
+ if (fsck_report_ref(o, &report,
+ FSCK_MSG_BAD_REF_FILETYPE,
+ "unexpected file type"))
+ ret = -1;
+ }
+ }
+
+ if (iter_status != ITER_DONE)
+ ret = error(_("failed to iterate over '%s'"), sb.buf);
+
+out:
+ strbuf_release(&sb);
+ return ret;
+}
+
+static int files_fsck_refs(struct ref_store *ref_store,
+ struct fsck_options *o)
+{
+ files_fsck_refs_fn fsck_refs_fn[]= {
+ NULL,
+ };
+
+ if (o->verbose)
+ fprintf_ln(stderr, _("Checking references consistency"));
+ return files_fsck_refs_dir(ref_store, o, "refs", fsck_refs_fn);
+
+}
+
static int files_fsck(struct ref_store *ref_store,
struct fsck_options *o)
{
struct files_ref_store *refs =
files_downcast(ref_store, REF_STORE_READ, "fsck");
- return refs->packed_ref_store->be->fsck(refs->packed_ref_store, o);
+ return files_fsck_refs(ref_store, o) |
+ refs->packed_ref_store->be->fsck(refs->packed_ref_store, o);
}
struct ref_storage_be refs_be_files = {
--
2.46.0
^ permalink raw reply related [flat|nested] 282+ messages in thread
* Re: [GSoC][PATCH v15 8/9] files-backend: add unified interface for refs scanning
2024-08-05 16:46 ` [GSoC][PATCH v15 8/9] files-backend: add unified interface for refs scanning shejialuo
@ 2024-08-06 7:33 ` Patrick Steinhardt
0 siblings, 0 replies; 282+ messages in thread
From: Patrick Steinhardt @ 2024-08-06 7:33 UTC (permalink / raw)
To: shejialuo
Cc: git, Karthik Nayak, Junio C Hamano, Eric Sunshine, Justin Tobler
[-- Attachment #1: Type: text/plain, Size: 429 bytes --]
On Tue, Aug 06, 2024 at 12:46:24AM +0800, shejialuo wrote:
[snip]
> +static int files_fsck_refs(struct ref_store *ref_store,
> + struct fsck_options *o)
> +{
> + files_fsck_refs_fn fsck_refs_fn[]= {
> + NULL,
> + };
> +
> + if (o->verbose)
> + fprintf_ln(stderr, _("Checking references consistency"));
> + return files_fsck_refs_dir(ref_store, o, "refs", fsck_refs_fn);
> +
Nit: another empty newline.
> +}
> +
Patrick
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]
^ permalink raw reply [flat|nested] 282+ messages in thread
* [GSoC][PATCH v15 9/9] fsck: add ref name check for files backend
2024-08-05 16:43 ` [GSoC][PATCH v15 0/9] " shejialuo
` (7 preceding siblings ...)
2024-08-05 16:46 ` [GSoC][PATCH v15 8/9] files-backend: add unified interface for refs scanning shejialuo
@ 2024-08-05 16:46 ` shejialuo
2024-08-06 7:32 ` [GSoC][PATCH v15 0/9] ref consistency check infra setup Patrick Steinhardt
` (2 subsequent siblings)
11 siblings, 0 replies; 282+ messages in thread
From: shejialuo @ 2024-08-05 16:46 UTC (permalink / raw)
To: git
Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine,
Justin Tobler
The git-fsck(1) only implicitly checks the reference, it does not fully
check refs with bad format name such as standalone "@".
However, a file ending with ".lock" should not be marked as having a bad
ref name. It is expected that concurrent writers may have such lock files.
We currently ignore this situation. But for bare ".lock" file, we will
report it as error.
In order to provide such checks, add a new fsck message id "badRefName"
with default ERROR type. Use existing "check_refname_format" to explicit
check the ref name. And add a new unit test to verify the functionality.
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
Documentation/fsck-msgids.txt | 3 ++
fsck.h | 1 +
refs/files-backend.c | 31 ++++++++++++
t/t0602-reffiles-fsck.sh | 92 +++++++++++++++++++++++++++++++++++
4 files changed, 127 insertions(+)
create mode 100755 t/t0602-reffiles-fsck.sh
diff --git a/Documentation/fsck-msgids.txt b/Documentation/fsck-msgids.txt
index 7c809fddf1..68a2801f15 100644
--- a/Documentation/fsck-msgids.txt
+++ b/Documentation/fsck-msgids.txt
@@ -22,6 +22,9 @@
`badRefFiletype`::
(ERROR) A ref has a bad file type.
+`badRefName`::
+ (ERROR) A ref has an invalid format.
+
`badTagName`::
(INFO) A tag has an invalid format.
diff --git a/fsck.h b/fsck.h
index af02174973..500b4c04d2 100644
--- a/fsck.h
+++ b/fsck.h
@@ -32,6 +32,7 @@ enum fsck_msg_type {
FUNC(BAD_OBJECT_SHA1, ERROR) \
FUNC(BAD_PARENT_SHA1, ERROR) \
FUNC(BAD_REF_FILETYPE, ERROR) \
+ FUNC(BAD_REF_NAME, ERROR) \
FUNC(BAD_TIMEZONE, ERROR) \
FUNC(BAD_TREE, ERROR) \
FUNC(BAD_TREE_SHA1, ERROR) \
diff --git a/refs/files-backend.c b/refs/files-backend.c
index f337356860..6e6b47251d 100644
--- a/refs/files-backend.c
+++ b/refs/files-backend.c
@@ -3419,6 +3419,36 @@ typedef int (*files_fsck_refs_fn)(struct ref_store *ref_store,
const char *refs_check_dir,
struct dir_iterator *iter);
+static int files_fsck_refs_name(struct ref_store *ref_store UNUSED,
+ struct fsck_options *o,
+ const char *refs_check_dir,
+ struct dir_iterator *iter)
+{
+ struct strbuf sb = STRBUF_INIT;
+ int ret = 0;
+
+ /*
+ * Ignore the files ending with ".lock" as they may be lock files
+ * However, do not allow bare ".lock" files.
+ */
+ if (iter->basename[0] != '.' && ends_with(iter->basename, ".lock"))
+ goto cleanup;
+
+ if (check_refname_format(iter->basename, REFNAME_ALLOW_ONELEVEL)) {
+ struct fsck_ref_report report = { .path = NULL };
+
+ strbuf_addf(&sb, "%s/%s", refs_check_dir, iter->relative_path);
+ report.path = sb.buf;
+ ret = fsck_report_ref(o, &report,
+ FSCK_MSG_BAD_REF_NAME,
+ "invalid refname format");
+ }
+
+cleanup:
+ strbuf_release(&sb);
+ return ret;
+}
+
static int files_fsck_refs_dir(struct ref_store *ref_store,
struct fsck_options *o,
const char *refs_check_dir,
@@ -3470,6 +3500,7 @@ static int files_fsck_refs(struct ref_store *ref_store,
struct fsck_options *o)
{
files_fsck_refs_fn fsck_refs_fn[]= {
+ files_fsck_refs_name,
NULL,
};
diff --git a/t/t0602-reffiles-fsck.sh b/t/t0602-reffiles-fsck.sh
new file mode 100755
index 0000000000..71a4d1a5ae
--- /dev/null
+++ b/t/t0602-reffiles-fsck.sh
@@ -0,0 +1,92 @@
+#!/bin/sh
+
+test_description='Test reffiles backend consistency check'
+
+GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=main
+export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME
+GIT_TEST_DEFAULT_REF_FORMAT=files
+export GIT_TEST_DEFAULT_REF_FORMAT
+TEST_PASSES_SANITIZE_LEAK=true
+
+. ./test-lib.sh
+
+test_expect_success 'ref name should be checked' '
+ test_when_finished "rm -rf repo" &&
+ git init repo &&
+ branch_dir_prefix=.git/refs/heads &&
+ tag_dir_prefix=.git/refs/tags &&
+ cd repo &&
+
+ git commit --allow-empty -m initial &&
+ git checkout -b branch-1 &&
+ git tag tag-1 &&
+ git commit --allow-empty -m second &&
+ git checkout -b branch-2 &&
+ git tag tag-2 &&
+ git tag multi_hierarchy/tag-2 &&
+
+ cp $branch_dir_prefix/branch-1 $branch_dir_prefix/.branch-1 &&
+ test_must_fail git refs verify 2>err &&
+ cat >expect <<-EOF &&
+ error: refs/heads/.branch-1: badRefName: invalid refname format
+ EOF
+ rm $branch_dir_prefix/.branch-1 &&
+ test_cmp expect err &&
+
+ cp $branch_dir_prefix/branch-1 $branch_dir_prefix/@ &&
+ test_must_fail git refs verify 2>err &&
+ cat >expect <<-EOF &&
+ error: refs/heads/@: badRefName: invalid refname format
+ EOF
+ rm $branch_dir_prefix/@ &&
+ test_cmp expect err &&
+
+ cp $tag_dir_prefix/multi_hierarchy/tag-2 $tag_dir_prefix/multi_hierarchy/@ &&
+ test_must_fail git refs verify 2>err &&
+ cat >expect <<-EOF &&
+ error: refs/tags/multi_hierarchy/@: badRefName: invalid refname format
+ EOF
+ rm $tag_dir_prefix/multi_hierarchy/@ &&
+ test_cmp expect err &&
+
+ cp $tag_dir_prefix/tag-1 $tag_dir_prefix/tag-1.lock &&
+ git refs verify 2>err &&
+ rm $tag_dir_prefix/tag-1.lock &&
+ test_must_be_empty err &&
+
+ cp $tag_dir_prefix/tag-1 $tag_dir_prefix/.lock &&
+ test_must_fail git refs verify 2>err &&
+ cat >expect <<-EOF &&
+ error: refs/tags/.lock: badRefName: invalid refname format
+ EOF
+ rm $tag_dir_prefix/.lock &&
+ test_cmp expect err
+'
+
+test_expect_success 'ref name check should be adapted into fsck messages' '
+ test_when_finished "rm -rf repo" &&
+ git init repo &&
+ branch_dir_prefix=.git/refs/heads &&
+ tag_dir_prefix=.git/refs/tags &&
+ cd repo &&
+ git commit --allow-empty -m initial &&
+ git checkout -b branch-1 &&
+ git tag tag-1 &&
+ git commit --allow-empty -m second &&
+ git checkout -b branch-2 &&
+ git tag tag-2 &&
+
+ cp $branch_dir_prefix/branch-1 $branch_dir_prefix/.branch-1 &&
+ git -c fsck.badRefName=warn refs verify 2>err &&
+ cat >expect <<-EOF &&
+ warning: refs/heads/.branch-1: badRefName: invalid refname format
+ EOF
+ rm $branch_dir_prefix/.branch-1 &&
+ test_cmp expect err &&
+
+ cp $branch_dir_prefix/branch-1 $branch_dir_prefix/@ &&
+ git -c fsck.badRefName=ignore refs verify 2>err &&
+ test_must_be_empty err
+'
+
+test_done
--
2.46.0
^ permalink raw reply related [flat|nested] 282+ messages in thread
* Re: [GSoC][PATCH v15 0/9] ref consistency check infra setup
2024-08-05 16:43 ` [GSoC][PATCH v15 0/9] " shejialuo
` (8 preceding siblings ...)
2024-08-05 16:46 ` [GSoC][PATCH v15 9/9] fsck: add ref name check for files backend shejialuo
@ 2024-08-06 7:32 ` Patrick Steinhardt
2024-08-07 9:29 ` Karthik Nayak
2024-08-08 11:21 ` [GSoC][PATCH v16 " shejialuo
11 siblings, 0 replies; 282+ messages in thread
From: Patrick Steinhardt @ 2024-08-06 7:32 UTC (permalink / raw)
To: shejialuo
Cc: git, Karthik Nayak, Junio C Hamano, Eric Sunshine, Justin Tobler
[-- Attachment #1: Type: text/plain, Size: 2489 bytes --]
On Tue, Aug 06, 2024 at 12:43:22AM +0800, shejialuo wrote:
> Hi All:
>
> This version handles the following problems:
>
> 1. Patrick advices that I should not use `va_copy` in the changed
> `report` function. Actually this is a mistake, this version avoids
> redundant `ap` copy.
> 2. Patrick advices I should rebase [v14 05/11] into [v14 04/11]. I
> follow this advice in this version.
> 3. Patrick advices that we should put [v14 06/11] before we introduce
> ref-related operations. This version reorders the commit sequence. It's
> a minor change.
> 4. Patrick suggests at current we should not add `git refs verify`
> command into "git-fsck(1)". This is because we should disable this new
> check by default for the users. Many users use "git-fsck(1)" in their daily
> workflow. We should not be aggressive. However, if we provide this
> mechanism in this series, we will again make more complexity. So this
> version drop patch [v14 09/11]. Also because of dropping, change the
> test file to use "git refs verify" command instead of "git fsck"
> command.
> 5. Patrick suggests that we should use `ends_with` instead of
> `strip_suffix`, fix.
>
> There is another important problem this patch solves:
>
> At v13, Junio has suggested that the `files_fsck_refs_fn` should be
> adapted to Patrick's change. Actually, I made a bad design before. I
> should always pass the `ref_store` structure. So I change it to
>
> -typedef int (*files_fsck_refs_fn)(struct fsck_options *o,
> - const char *gitdir,
> +typedef int (*files_fsck_refs_fn)(struct ref_store *ref_store,
> + struct fsck_options *o,
> const char *refs_check_dir,
> struct dir_iterator *iter);
>
> `gitdir` could be got by using `ref_store` parameter. By using
> `ref_store` parameter, we provide extensibility here. If something else
> change, we merely need to change "files_fsck_refs_fn" prototype.
>
> Because I drop one patch and rebase one patch. I provide the `interdiff`
> for reviewers to make the life easier.
>
> Due to the deadline of the GSoC, I will speed up the review feedback
> process.
I've got another small set of nits, almost not worth addressing. I was a
bit torn whether to send them or not as the series is in a good shape
already, in my opinion. But let's maybe wait one or two more days for
additional feedback, and then (hopefully) reroll this a final time.
Thanks for all your work!
Patrick
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]
^ permalink raw reply [flat|nested] 282+ messages in thread
* Re: [GSoC][PATCH v15 0/9] ref consistency check infra setup
2024-08-05 16:43 ` [GSoC][PATCH v15 0/9] " shejialuo
` (9 preceding siblings ...)
2024-08-06 7:32 ` [GSoC][PATCH v15 0/9] ref consistency check infra setup Patrick Steinhardt
@ 2024-08-07 9:29 ` Karthik Nayak
2024-08-08 11:21 ` [GSoC][PATCH v16 " shejialuo
11 siblings, 0 replies; 282+ messages in thread
From: Karthik Nayak @ 2024-08-07 9:29 UTC (permalink / raw)
To: shejialuo, git
Cc: Patrick Steinhardt, Junio C Hamano, Eric Sunshine, Justin Tobler
[-- Attachment #1: Type: text/plain, Size: 1352 bytes --]
shejialuo <shejialuo@gmail.com> writes:
> Hi All:
>
> This version handles the following problems:
>
> 1. Patrick advices that I should not use `va_copy` in the changed
> `report` function. Actually this is a mistake, this version avoids
> redundant `ap` copy.
> 2. Patrick advices I should rebase [v14 05/11] into [v14 04/11]. I
> follow this advice in this version.
> 3. Patrick advices that we should put [v14 06/11] before we introduce
> ref-related operations. This version reorders the commit sequence. It's
> a minor change.
> 4. Patrick suggests at current we should not add `git refs verify`
> command into "git-fsck(1)". This is because we should disable this new
> check by default for the users. Many users use "git-fsck(1)" in their daily
> workflow. We should not be aggressive. However, if we provide this
> mechanism in this series, we will again make more complexity. So this
> version drop patch [v14 09/11]. Also because of dropping, change the
> test file to use "git refs verify" command instead of "git fsck"
> command.
This is the biggest change in this version and it makes sense. It can
still be added later on, but for now users can use this via `git refs
verify`.
> 5. Patrick suggests that we should use `ends_with` instead of
> `strip_suffix`, fix.
>
Apart from the minor nits, I think this version looks good.
Thanks!
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 690 bytes --]
^ permalink raw reply [flat|nested] 282+ messages in thread
* [GSoC][PATCH v16 0/9] ref consistency check infra setup
2024-08-05 16:43 ` [GSoC][PATCH v15 0/9] " shejialuo
` (10 preceding siblings ...)
2024-08-07 9:29 ` Karthik Nayak
@ 2024-08-08 11:21 ` shejialuo
2024-08-08 11:24 ` [GSoC][PATCH v16 1/9] fsck: rename "skiplist" to "skip_oids" shejialuo
` (9 more replies)
11 siblings, 10 replies; 282+ messages in thread
From: shejialuo @ 2024-08-08 11:21 UTC (permalink / raw)
To: git
Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine,
Justin Tobler
Hi All, this version handles some minor changes:
1. Remove redundant newlines.
2. Fix typo in commit message.
At last, I wanna thank every reviewer. As we can see, this series starts
from 5.27, it's a very long journey. I have learned a lot.
Thanks
Jialuo
shejialuo (9):
fsck: rename "skiplist" to "skip_oids"
fsck: rename objects-related fsck error functions
fsck: make "fsck_error" callback generic
fsck: add a unified interface for reporting fsck messages
fsck: add refs report function
refs: set up ref consistency check infrastructure
builtin/refs: add verify subcommand
files-backend: add unified interface for refs scanning
fsck: add ref name check for files backend
Documentation/fsck-msgids.txt | 6 ++
Documentation/git-refs.txt | 13 ++++
builtin/fsck.c | 17 +++--
builtin/mktag.c | 3 +-
builtin/refs.c | 34 +++++++++
fsck.c | 125 +++++++++++++++++++++++++++-------
fsck.h | 76 ++++++++++++++++-----
object-file.c | 9 ++-
refs.c | 5 ++
refs.h | 8 +++
refs/debug.c | 11 +++
refs/files-backend.c | 115 ++++++++++++++++++++++++++++++-
refs/packed-backend.c | 8 +++
refs/refs-internal.h | 6 ++
refs/reftable-backend.c | 8 +++
t/t0602-reffiles-fsck.sh | 92 +++++++++++++++++++++++++
16 files changed, 477 insertions(+), 59 deletions(-)
create mode 100755 t/t0602-reffiles-fsck.sh
Range-diff against v15:
1: 9aeaa3211c = 1: 9aeaa3211c fsck: rename "skiplist" to "skip_oids"
2: 7511340a21 = 2: 7511340a21 fsck: rename objects-related fsck error functions
3: ee971d17f4 = 3: ee971d17f4 fsck: make "fsck_error" callback generic
4: 59ccdab54d ! 4: f80fa00538 fsck: add a unified interface for reporting fsck messages
@@ Commit message
"fsck_vreport". Instead of using "...", provide "va_list" to allow more
flexibility.
- Instead of changing "report" prototype to be algin with the
+ Instead of changing "report" prototype to be align with the
"fsck_vreport" function, we leave the "report" prototype unchanged due
to the reason that there are nearly 62 references about "report"
function. Simply change "report" function to use "fsck_vreport" to
5: b5607ac61c ! 5: 8c0376cfd5 fsck: add refs report function
@@ fsck.c: int fsck_objects_error_function(struct fsck_options *o,
+ const char *message)
+{
+ struct fsck_ref_report *report = fsck_report;
-+
+ struct strbuf sb = STRBUF_INIT;
+ int ret = 0;
+
@@ fsck.c: int fsck_objects_error_function(struct fsck_options *o,
+
+ strbuf_release(&sb);
+ return ret;
-+
+}
+
static int fsck_blobs(struct oidset *blobs_found, struct oidset *blobs_done,
6: 5819406b40 = 6: 552fe62ff4 refs: set up ref consistency check infrastructure
7: 6a9c194a05 = 7: 3b357fa89e builtin/refs: add verify subcommand
8: 76406b1303 ! 8: 8d2c7b3aa5 files-backend: add unified interface for refs scanning
@@ refs/files-backend.c: static int files_ref_store_remove_on_disk(struct ref_store
+ if (o->verbose)
+ fprintf_ln(stderr, _("Checking references consistency"));
+ return files_fsck_refs_dir(ref_store, o, "refs", fsck_refs_fn);
-+
+}
+
static int files_fsck(struct ref_store *ref_store,
9: ed02380516 = 9: b39533ae56 fsck: add ref name check for files backend
--
2.46.0
^ permalink raw reply [flat|nested] 282+ messages in thread
* [GSoC][PATCH v16 1/9] fsck: rename "skiplist" to "skip_oids"
2024-08-08 11:21 ` [GSoC][PATCH v16 " shejialuo
@ 2024-08-08 11:24 ` shejialuo
2024-08-08 11:24 ` [GSoC][PATCH v16 2/9] fsck: rename objects-related fsck error functions shejialuo
` (8 subsequent siblings)
9 siblings, 0 replies; 282+ messages in thread
From: shejialuo @ 2024-08-08 11:24 UTC (permalink / raw)
To: git
Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine,
Justin Tobler
The "skiplist" field in "fsck_options" is related to objects. Because we
are going to introduce ref consistency check, the "skiplist" name is too
general which will make the caller think "skiplist" is related to both
the refs and objects.
It may seem that for both refs and objects, we should provide a general
"skiplist" here. However, the type for "skiplist" is `struct oidset`
which is totally unsuitable for refs.
To avoid above ambiguity, rename "skiplist" to "skip_oids".
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
fsck.c | 4 ++--
fsck.h | 4 ++--
2 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/fsck.c b/fsck.c
index eea7145470..3f32441492 100644
--- a/fsck.c
+++ b/fsck.c
@@ -205,7 +205,7 @@ void fsck_set_msg_types(struct fsck_options *options, const char *values)
if (!strcmp(buf, "skiplist")) {
if (equal == len)
die("skiplist requires a path");
- oidset_parse_file(&options->skiplist, buf + equal + 1,
+ oidset_parse_file(&options->skip_oids, buf + equal + 1,
the_repository->hash_algo);
buf += len + 1;
continue;
@@ -223,7 +223,7 @@ void fsck_set_msg_types(struct fsck_options *options, const char *values)
static int object_on_skiplist(struct fsck_options *opts,
const struct object_id *oid)
{
- return opts && oid && oidset_contains(&opts->skiplist, oid);
+ return opts && oid && oidset_contains(&opts->skip_oids, oid);
}
__attribute__((format (printf, 5, 6)))
diff --git a/fsck.h b/fsck.h
index 6085a384f6..bcfb2e34cd 100644
--- a/fsck.h
+++ b/fsck.h
@@ -136,7 +136,7 @@ struct fsck_options {
fsck_error error_func;
unsigned strict:1;
enum fsck_msg_type *msg_type;
- struct oidset skiplist;
+ struct oidset skip_oids;
struct oidset gitmodules_found;
struct oidset gitmodules_done;
struct oidset gitattributes_found;
@@ -145,7 +145,7 @@ struct fsck_options {
};
#define FSCK_OPTIONS_DEFAULT { \
- .skiplist = OIDSET_INIT, \
+ .skip_oids = OIDSET_INIT, \
.gitmodules_found = OIDSET_INIT, \
.gitmodules_done = OIDSET_INIT, \
.gitattributes_found = OIDSET_INIT, \
--
2.46.0
^ permalink raw reply related [flat|nested] 282+ messages in thread
* [GSoC][PATCH v16 2/9] fsck: rename objects-related fsck error functions
2024-08-08 11:21 ` [GSoC][PATCH v16 " shejialuo
2024-08-08 11:24 ` [GSoC][PATCH v16 1/9] fsck: rename "skiplist" to "skip_oids" shejialuo
@ 2024-08-08 11:24 ` shejialuo
2024-08-08 11:26 ` [GSoC][PATCH v16 3/9] fsck: make "fsck_error" callback generic shejialuo
` (7 subsequent siblings)
9 siblings, 0 replies; 282+ messages in thread
From: shejialuo @ 2024-08-08 11:24 UTC (permalink / raw)
To: git
Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine,
Justin Tobler
The names of objects-related fsck error functions are generic. It's OK
when there is only object database check. However, we are going to
introduce refs database check report function. To avoid ambiguity,
rename object-related fsck error functions to explicitly indicate these
functions are used to report objects-related messages.
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
builtin/fsck.c | 14 +++++++-------
fsck.c | 17 +++++++++--------
fsck.h | 26 +++++++++++++-------------
3 files changed, 29 insertions(+), 28 deletions(-)
diff --git a/builtin/fsck.c b/builtin/fsck.c
index d13a226c2e..6d86bbe1e9 100644
--- a/builtin/fsck.c
+++ b/builtin/fsck.c
@@ -89,12 +89,12 @@ static int objerror(struct object *obj, const char *err)
return -1;
}
-static int fsck_error_func(struct fsck_options *o UNUSED,
- const struct object_id *oid,
- enum object_type object_type,
- enum fsck_msg_type msg_type,
- enum fsck_msg_id msg_id UNUSED,
- const char *message)
+static int fsck_objects_error_func(struct fsck_options *o UNUSED,
+ const struct object_id *oid,
+ enum object_type object_type,
+ enum fsck_msg_type msg_type,
+ enum fsck_msg_id msg_id UNUSED,
+ const char *message)
{
switch (msg_type) {
case FSCK_WARN:
@@ -938,7 +938,7 @@ int cmd_fsck(int argc, const char **argv, const char *prefix)
fsck_walk_options.walk = mark_object;
fsck_obj_options.walk = mark_used;
- fsck_obj_options.error_func = fsck_error_func;
+ fsck_obj_options.error_func = fsck_objects_error_func;
if (check_strict)
fsck_obj_options.strict = 1;
diff --git a/fsck.c b/fsck.c
index 3f32441492..8347842cfb 100644
--- a/fsck.c
+++ b/fsck.c
@@ -1200,7 +1200,7 @@ int fsck_buffer(const struct object_id *oid, enum object_type type,
type);
}
-int fsck_error_function(struct fsck_options *o,
+int fsck_objects_error_function(struct fsck_options *o,
const struct object_id *oid,
enum object_type object_type UNUSED,
enum fsck_msg_type msg_type,
@@ -1303,16 +1303,17 @@ int git_fsck_config(const char *var, const char *value,
* Custom error callbacks that are used in more than one place.
*/
-int fsck_error_cb_print_missing_gitmodules(struct fsck_options *o,
- const struct object_id *oid,
- enum object_type object_type,
- enum fsck_msg_type msg_type,
- enum fsck_msg_id msg_id,
- const char *message)
+int fsck_objects_error_cb_print_missing_gitmodules(struct fsck_options *o,
+ const struct object_id *oid,
+ enum object_type object_type,
+ enum fsck_msg_type msg_type,
+ enum fsck_msg_id msg_id,
+ const char *message)
{
if (msg_id == FSCK_MSG_GITMODULES_MISSING) {
puts(oid_to_hex(oid));
return 0;
}
- return fsck_error_function(o, oid, object_type, msg_type, msg_id, message);
+ return fsck_objects_error_function(o, oid, object_type,
+ msg_type, msg_id, message);
}
diff --git a/fsck.h b/fsck.h
index bcfb2e34cd..41ebebbb59 100644
--- a/fsck.h
+++ b/fsck.h
@@ -120,16 +120,16 @@ typedef int (*fsck_error)(struct fsck_options *o,
enum fsck_msg_type msg_type, enum fsck_msg_id msg_id,
const char *message);
-int fsck_error_function(struct fsck_options *o,
- const struct object_id *oid, enum object_type object_type,
- enum fsck_msg_type msg_type, enum fsck_msg_id msg_id,
- const char *message);
-int fsck_error_cb_print_missing_gitmodules(struct fsck_options *o,
- const struct object_id *oid,
- enum object_type object_type,
- enum fsck_msg_type msg_type,
- enum fsck_msg_id msg_id,
- const char *message);
+int fsck_objects_error_function(struct fsck_options *o,
+ const struct object_id *oid, enum object_type object_type,
+ enum fsck_msg_type msg_type, enum fsck_msg_id msg_id,
+ const char *message);
+int fsck_objects_error_cb_print_missing_gitmodules(struct fsck_options *o,
+ const struct object_id *oid,
+ enum object_type object_type,
+ enum fsck_msg_type msg_type,
+ enum fsck_msg_id msg_id,
+ const char *message);
struct fsck_options {
fsck_walk_func walk;
@@ -150,7 +150,7 @@ struct fsck_options {
.gitmodules_done = OIDSET_INIT, \
.gitattributes_found = OIDSET_INIT, \
.gitattributes_done = OIDSET_INIT, \
- .error_func = fsck_error_function \
+ .error_func = fsck_objects_error_function \
}
#define FSCK_OPTIONS_STRICT { \
.strict = 1, \
@@ -158,7 +158,7 @@ struct fsck_options {
.gitmodules_done = OIDSET_INIT, \
.gitattributes_found = OIDSET_INIT, \
.gitattributes_done = OIDSET_INIT, \
- .error_func = fsck_error_function, \
+ .error_func = fsck_objects_error_function, \
}
#define FSCK_OPTIONS_MISSING_GITMODULES { \
.strict = 1, \
@@ -166,7 +166,7 @@ struct fsck_options {
.gitmodules_done = OIDSET_INIT, \
.gitattributes_found = OIDSET_INIT, \
.gitattributes_done = OIDSET_INIT, \
- .error_func = fsck_error_cb_print_missing_gitmodules, \
+ .error_func = fsck_objects_error_cb_print_missing_gitmodules, \
}
/* descend in all linked child objects
--
2.46.0
^ permalink raw reply related [flat|nested] 282+ messages in thread
* [GSoC][PATCH v16 3/9] fsck: make "fsck_error" callback generic
2024-08-08 11:21 ` [GSoC][PATCH v16 " shejialuo
2024-08-08 11:24 ` [GSoC][PATCH v16 1/9] fsck: rename "skiplist" to "skip_oids" shejialuo
2024-08-08 11:24 ` [GSoC][PATCH v16 2/9] fsck: rename objects-related fsck error functions shejialuo
@ 2024-08-08 11:26 ` shejialuo
2024-08-08 11:26 ` [GSoC][PATCH v16 4/9] fsck: add a unified interface for reporting fsck messages shejialuo
` (6 subsequent siblings)
9 siblings, 0 replies; 282+ messages in thread
From: shejialuo @ 2024-08-08 11:26 UTC (permalink / raw)
To: git
Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine,
Justin Tobler
The "fsck_error" callback is designed to report the objects-related
error messages. It accepts two parameter "oid" and "object_type" which
is not generic. In order to provide a unified callback which can report
either objects or refs, remove the objects-related parameters and add
the generic parameter "void *fsck_report".
Create a new "fsck_object_report" structure which incorporates the
removed parameters "oid" and "object_type". Then change the
corresponding references to adapt to new "fsck_error" callback.
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
builtin/fsck.c | 7 +++++--
builtin/mktag.c | 3 +--
fsck.c | 26 ++++++++++++++++----------
fsck.h | 17 ++++++++++++-----
object-file.c | 9 ++++-----
5 files changed, 38 insertions(+), 24 deletions(-)
diff --git a/builtin/fsck.c b/builtin/fsck.c
index 6d86bbe1e9..766bbd014d 100644
--- a/builtin/fsck.c
+++ b/builtin/fsck.c
@@ -90,12 +90,15 @@ static int objerror(struct object *obj, const char *err)
}
static int fsck_objects_error_func(struct fsck_options *o UNUSED,
- const struct object_id *oid,
- enum object_type object_type,
+ void *fsck_report,
enum fsck_msg_type msg_type,
enum fsck_msg_id msg_id UNUSED,
const char *message)
{
+ struct fsck_object_report *report = fsck_report;
+ const struct object_id *oid = report->oid;
+ enum object_type object_type = report->object_type;
+
switch (msg_type) {
case FSCK_WARN:
/* TRANSLATORS: e.g. warning in tree 01bfda: <more explanation> */
diff --git a/builtin/mktag.c b/builtin/mktag.c
index 4767f1a97e..c6b644219f 100644
--- a/builtin/mktag.c
+++ b/builtin/mktag.c
@@ -18,8 +18,7 @@ static int option_strict = 1;
static struct fsck_options fsck_options = FSCK_OPTIONS_STRICT;
static int mktag_fsck_error_func(struct fsck_options *o UNUSED,
- const struct object_id *oid UNUSED,
- enum object_type object_type UNUSED,
+ void *fsck_report UNUSED,
enum fsck_msg_type msg_type,
enum fsck_msg_id msg_id UNUSED,
const char *message)
diff --git a/fsck.c b/fsck.c
index 8347842cfb..cca6ae144f 100644
--- a/fsck.c
+++ b/fsck.c
@@ -232,6 +232,10 @@ static int report(struct fsck_options *options,
enum fsck_msg_id msg_id, const char *fmt, ...)
{
va_list ap;
+ struct fsck_object_report report = {
+ .oid = oid,
+ .object_type = object_type
+ };
struct strbuf sb = STRBUF_INIT;
enum fsck_msg_type msg_type = fsck_msg_type(msg_id, options);
int result;
@@ -252,7 +256,7 @@ static int report(struct fsck_options *options,
va_start(ap, fmt);
strbuf_vaddf(&sb, fmt, ap);
- result = options->error_func(options, oid, object_type,
+ result = options->error_func(options, &report,
msg_type, msg_id, sb.buf);
strbuf_release(&sb);
va_end(ap);
@@ -1201,12 +1205,14 @@ int fsck_buffer(const struct object_id *oid, enum object_type type,
}
int fsck_objects_error_function(struct fsck_options *o,
- const struct object_id *oid,
- enum object_type object_type UNUSED,
- enum fsck_msg_type msg_type,
- enum fsck_msg_id msg_id UNUSED,
- const char *message)
+ void *fsck_report,
+ enum fsck_msg_type msg_type,
+ enum fsck_msg_id msg_id UNUSED,
+ const char *message)
{
+ struct fsck_object_report *report = fsck_report;
+ const struct object_id *oid = report->oid;
+
if (msg_type == FSCK_WARN) {
warning("object %s: %s", fsck_describe_object(o, oid), message);
return 0;
@@ -1304,16 +1310,16 @@ int git_fsck_config(const char *var, const char *value,
*/
int fsck_objects_error_cb_print_missing_gitmodules(struct fsck_options *o,
- const struct object_id *oid,
- enum object_type object_type,
+ void *fsck_report,
enum fsck_msg_type msg_type,
enum fsck_msg_id msg_id,
const char *message)
{
if (msg_id == FSCK_MSG_GITMODULES_MISSING) {
- puts(oid_to_hex(oid));
+ struct fsck_object_report *report = fsck_report;
+ puts(oid_to_hex(report->oid));
return 0;
}
- return fsck_objects_error_function(o, oid, object_type,
+ return fsck_objects_error_function(o, fsck_report,
msg_type, msg_id, message);
}
diff --git a/fsck.h b/fsck.h
index 41ebebbb59..3b80d02506 100644
--- a/fsck.h
+++ b/fsck.h
@@ -114,23 +114,30 @@ int is_valid_msg_type(const char *msg_id, const char *msg_type);
typedef int (*fsck_walk_func)(struct object *obj, enum object_type object_type,
void *data, struct fsck_options *options);
-/* callback for fsck_object, type is FSCK_ERROR or FSCK_WARN */
+/*
+ * Callback for reporting errors either for objects or refs. The "fsck_report"
+ * is a generic pointer that can be used to pass any information.
+ */
typedef int (*fsck_error)(struct fsck_options *o,
- const struct object_id *oid, enum object_type object_type,
+ void *fsck_report,
enum fsck_msg_type msg_type, enum fsck_msg_id msg_id,
const char *message);
int fsck_objects_error_function(struct fsck_options *o,
- const struct object_id *oid, enum object_type object_type,
+ void *fsck_report,
enum fsck_msg_type msg_type, enum fsck_msg_id msg_id,
const char *message);
int fsck_objects_error_cb_print_missing_gitmodules(struct fsck_options *o,
- const struct object_id *oid,
- enum object_type object_type,
+ void *fsck_report,
enum fsck_msg_type msg_type,
enum fsck_msg_id msg_id,
const char *message);
+struct fsck_object_report {
+ const struct object_id *oid;
+ enum object_type object_type;
+};
+
struct fsck_options {
fsck_walk_func walk;
fsck_error error_func;
diff --git a/object-file.c b/object-file.c
index 065103be3e..05ac6ebed6 100644
--- a/object-file.c
+++ b/object-file.c
@@ -2470,11 +2470,10 @@ int repo_has_object_file(struct repository *r,
* give more context.
*/
static int hash_format_check_report(struct fsck_options *opts UNUSED,
- const struct object_id *oid UNUSED,
- enum object_type object_type UNUSED,
- enum fsck_msg_type msg_type UNUSED,
- enum fsck_msg_id msg_id UNUSED,
- const char *message)
+ void *fsck_report UNUSED,
+ enum fsck_msg_type msg_type UNUSED,
+ enum fsck_msg_id msg_id UNUSED,
+ const char *message)
{
error(_("object fails fsck: %s"), message);
return 1;
--
2.46.0
^ permalink raw reply related [flat|nested] 282+ messages in thread
* [GSoC][PATCH v16 4/9] fsck: add a unified interface for reporting fsck messages
2024-08-08 11:21 ` [GSoC][PATCH v16 " shejialuo
` (2 preceding siblings ...)
2024-08-08 11:26 ` [GSoC][PATCH v16 3/9] fsck: make "fsck_error" callback generic shejialuo
@ 2024-08-08 11:26 ` shejialuo
2024-08-08 11:27 ` [GSoC][PATCH v16 5/9] fsck: add refs report function shejialuo
` (5 subsequent siblings)
9 siblings, 0 replies; 282+ messages in thread
From: shejialuo @ 2024-08-08 11:26 UTC (permalink / raw)
To: git
Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine,
Justin Tobler
The static function "report" provided by "fsck.c" aims at checking error
type and calling the callback "error_func" to report the message. Both
refs and objects need to check the error type of the current fsck
message. In order to extract this common behavior, create a new function
"fsck_vreport". Instead of using "...", provide "va_list" to allow more
flexibility.
Instead of changing "report" prototype to be align with the
"fsck_vreport" function, we leave the "report" prototype unchanged due
to the reason that there are nearly 62 references about "report"
function. Simply change "report" function to use "fsck_vreport" to
report objects related messages.
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
fsck.c | 44 ++++++++++++++++++++++++++++++--------------
1 file changed, 30 insertions(+), 14 deletions(-)
diff --git a/fsck.c b/fsck.c
index cca6ae144f..3614aa56a3 100644
--- a/fsck.c
+++ b/fsck.c
@@ -226,16 +226,15 @@ static int object_on_skiplist(struct fsck_options *opts,
return opts && oid && oidset_contains(&opts->skip_oids, oid);
}
-__attribute__((format (printf, 5, 6)))
-static int report(struct fsck_options *options,
- const struct object_id *oid, enum object_type object_type,
- enum fsck_msg_id msg_id, const char *fmt, ...)
+/*
+ * Provide the common functionality for either fscking refs or objects.
+ * It will get the current msg error type and call the error_func callback
+ * which is registered in the "fsck_options" struct.
+ */
+static int fsck_vreport(struct fsck_options *options,
+ void *fsck_report,
+ enum fsck_msg_id msg_id, const char *fmt, va_list ap)
{
- va_list ap;
- struct fsck_object_report report = {
- .oid = oid,
- .object_type = object_type
- };
struct strbuf sb = STRBUF_INIT;
enum fsck_msg_type msg_type = fsck_msg_type(msg_id, options);
int result;
@@ -243,9 +242,6 @@ static int report(struct fsck_options *options,
if (msg_type == FSCK_IGNORE)
return 0;
- if (object_on_skiplist(options, oid))
- return 0;
-
if (msg_type == FSCK_FATAL)
msg_type = FSCK_ERROR;
else if (msg_type == FSCK_INFO)
@@ -254,11 +250,31 @@ static int report(struct fsck_options *options,
prepare_msg_ids();
strbuf_addf(&sb, "%s: ", msg_id_info[msg_id].camelcased);
- va_start(ap, fmt);
strbuf_vaddf(&sb, fmt, ap);
- result = options->error_func(options, &report,
+ result = options->error_func(options, fsck_report,
msg_type, msg_id, sb.buf);
strbuf_release(&sb);
+
+ return result;
+}
+
+__attribute__((format (printf, 5, 6)))
+static int report(struct fsck_options *options,
+ const struct object_id *oid, enum object_type object_type,
+ enum fsck_msg_id msg_id, const char *fmt, ...)
+{
+ va_list ap;
+ struct fsck_object_report report = {
+ .oid = oid,
+ .object_type = object_type
+ };
+ int result;
+
+ if (object_on_skiplist(options, oid))
+ return 0;
+
+ va_start(ap, fmt);
+ result = fsck_vreport(options, &report, msg_id, fmt, ap);
va_end(ap);
return result;
--
2.46.0
^ permalink raw reply related [flat|nested] 282+ messages in thread
* [GSoC][PATCH v16 5/9] fsck: add refs report function
2024-08-08 11:21 ` [GSoC][PATCH v16 " shejialuo
` (3 preceding siblings ...)
2024-08-08 11:26 ` [GSoC][PATCH v16 4/9] fsck: add a unified interface for reporting fsck messages shejialuo
@ 2024-08-08 11:27 ` shejialuo
2024-08-08 11:27 ` [GSoC][PATCH v16 6/9] refs: set up ref consistency check infrastructure shejialuo
` (4 subsequent siblings)
9 siblings, 0 replies; 282+ messages in thread
From: shejialuo @ 2024-08-08 11:27 UTC (permalink / raw)
To: git
Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine,
Justin Tobler
Introduce a new struct "fsck_ref_report" to contain the information we
need when reporting refs-related messages.
With the new "fsck_vreport" function, add a new function
"fsck_report_ref" to report refs-related fsck error message. Unlike
"report" function uses the exact parameters, we simply pass "struct
fsck_ref_report *report" as the parameter. This is because at current we
don't know exactly how many fields we need. By passing this parameter,
we don't need to change this function prototype when we want to add more
information into "fsck_ref_report".
We have introduced "fsck_report_ref" function to report the error
message for refs. We still need to add the corresponding callback
function. Create refs-specific "error_func" callback
"fsck_refs_error_function".
Last, add "FSCK_REFS_OPTIONS_DEFAULT" macro to create default options
when checking ref consistency.
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
fsck.c | 39 +++++++++++++++++++++++++++++++++++++++
fsck.h | 25 +++++++++++++++++++++++++
2 files changed, 64 insertions(+)
diff --git a/fsck.c b/fsck.c
index 3614aa56a3..e16c892f6a 100644
--- a/fsck.c
+++ b/fsck.c
@@ -280,6 +280,19 @@ static int report(struct fsck_options *options,
return result;
}
+int fsck_report_ref(struct fsck_options *options,
+ struct fsck_ref_report *report,
+ enum fsck_msg_id msg_id,
+ const char *fmt, ...)
+{
+ va_list ap;
+ int result;
+ va_start(ap, fmt);
+ result = fsck_vreport(options, report, msg_id, fmt, ap);
+ va_end(ap);
+ return result;
+}
+
void fsck_enable_object_names(struct fsck_options *options)
{
if (!options->object_names)
@@ -1237,6 +1250,32 @@ int fsck_objects_error_function(struct fsck_options *o,
return 1;
}
+int fsck_refs_error_function(struct fsck_options *options UNUSED,
+ void *fsck_report,
+ enum fsck_msg_type msg_type,
+ enum fsck_msg_id msg_id UNUSED,
+ const char *message)
+{
+ struct fsck_ref_report *report = fsck_report;
+ struct strbuf sb = STRBUF_INIT;
+ int ret = 0;
+
+ strbuf_addstr(&sb, report->path);
+
+ if (report->oid)
+ strbuf_addf(&sb, " -> (%s)", oid_to_hex(report->oid));
+ else if (report->referent)
+ strbuf_addf(&sb, " -> (%s)", report->referent);
+
+ if (msg_type == FSCK_WARN)
+ warning("%s: %s", sb.buf, message);
+ else
+ ret = error("%s: %s", sb.buf, message);
+
+ strbuf_release(&sb);
+ return ret;
+}
+
static int fsck_blobs(struct oidset *blobs_found, struct oidset *blobs_done,
enum fsck_msg_id msg_missing, enum fsck_msg_id msg_type,
struct fsck_options *options, const char *blob_type)
diff --git a/fsck.h b/fsck.h
index 3b80d02506..2002590f60 100644
--- a/fsck.h
+++ b/fsck.h
@@ -133,11 +133,23 @@ int fsck_objects_error_cb_print_missing_gitmodules(struct fsck_options *o,
enum fsck_msg_id msg_id,
const char *message);
+int fsck_refs_error_function(struct fsck_options *options,
+ void *fsck_report,
+ enum fsck_msg_type msg_type,
+ enum fsck_msg_id msg_id,
+ const char *message);
+
struct fsck_object_report {
const struct object_id *oid;
enum object_type object_type;
};
+struct fsck_ref_report {
+ const char *path;
+ const struct object_id *oid;
+ const char *referent;
+};
+
struct fsck_options {
fsck_walk_func walk;
fsck_error error_func;
@@ -175,6 +187,9 @@ struct fsck_options {
.gitattributes_done = OIDSET_INIT, \
.error_func = fsck_objects_error_cb_print_missing_gitmodules, \
}
+#define FSCK_REFS_OPTIONS_DEFAULT { \
+ .error_func = fsck_refs_error_function, \
+}
/* descend in all linked child objects
* the return value is:
@@ -216,6 +231,16 @@ int fsck_tag_standalone(const struct object_id *oid, const char *buffer,
*/
int fsck_finish(struct fsck_options *options);
+/*
+ * Report an error or warning for refs.
+ */
+__attribute__((format (printf, 4, 5)))
+int fsck_report_ref(struct fsck_options *options,
+ struct fsck_ref_report *report,
+ enum fsck_msg_id msg_id,
+ const char *fmt, ...);
+
+
/*
* Subsystem for storing human-readable names for each object.
*
--
2.46.0
^ permalink raw reply related [flat|nested] 282+ messages in thread
* [GSoC][PATCH v16 6/9] refs: set up ref consistency check infrastructure
2024-08-08 11:21 ` [GSoC][PATCH v16 " shejialuo
` (4 preceding siblings ...)
2024-08-08 11:27 ` [GSoC][PATCH v16 5/9] fsck: add refs report function shejialuo
@ 2024-08-08 11:27 ` shejialuo
2024-08-08 11:27 ` [GSoC][PATCH v16 7/9] builtin/refs: add verify subcommand shejialuo
` (3 subsequent siblings)
9 siblings, 0 replies; 282+ messages in thread
From: shejialuo @ 2024-08-08 11:27 UTC (permalink / raw)
To: git
Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine,
Justin Tobler
The "struct ref_store" is the base class which contains the "be" pointer
which provides backend-specific functions whose interfaces are defined
in the "ref_storage_be". We could reuse this polymorphism to define only
one interface. For every backend, we need to provide its own function
pointer.
The interfaces defined in the `ref_storage_be` are carefully structured
in semantic. It's organized as the five parts:
1. The name and the initialization interfaces.
2. The ref transaction interfaces.
3. The ref internal interfaces (pack, rename and copy).
4. The ref filesystem interfaces.
5. The reflog related interfaces.
To keep consistent with the git-fsck(1), add a new interface named
"fsck_refs_fn" to the end of "ref_storage_be". This semantic cannot be
grouped into any above five categories. Explicitly add blank line to
make it different from others.
Last, implement placeholder functions for each ref backends.
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
refs.c | 5 +++++
refs.h | 8 ++++++++
refs/debug.c | 11 +++++++++++
refs/files-backend.c | 13 ++++++++++++-
refs/packed-backend.c | 8 ++++++++
refs/refs-internal.h | 6 ++++++
refs/reftable-backend.c | 8 ++++++++
7 files changed, 58 insertions(+), 1 deletion(-)
diff --git a/refs.c b/refs.c
index 915aeb4d1d..6f642dc681 100644
--- a/refs.c
+++ b/refs.c
@@ -318,6 +318,11 @@ int check_refname_format(const char *refname, int flags)
return check_or_sanitize_refname(refname, flags, NULL);
}
+int refs_fsck(struct ref_store *refs, struct fsck_options *o)
+{
+ return refs->be->fsck(refs, o);
+}
+
void sanitize_refname_component(const char *refname, struct strbuf *out)
{
if (check_or_sanitize_refname(refname, REFNAME_ALLOW_ONELEVEL, out))
diff --git a/refs.h b/refs.h
index b3e39bc257..405073621a 100644
--- a/refs.h
+++ b/refs.h
@@ -4,6 +4,7 @@
#include "commit.h"
#include "repository.h"
+struct fsck_options;
struct object_id;
struct ref_store;
struct strbuf;
@@ -541,6 +542,13 @@ int refs_for_each_reflog(struct ref_store *refs, each_reflog_fn fn, void *cb_dat
*/
int check_refname_format(const char *refname, int flags);
+/*
+ * Check the reference database for consistency. Return 0 if refs and
+ * reflogs are consistent, and non-zero otherwise. The errors will be
+ * written to stderr.
+ */
+int refs_fsck(struct ref_store *refs, struct fsck_options *o);
+
/*
* Apply the rules from check_refname_format, but mutate the result until it
* is acceptable, and place the result in "out".
diff --git a/refs/debug.c b/refs/debug.c
index 547d9245b9..45e2e784a0 100644
--- a/refs/debug.c
+++ b/refs/debug.c
@@ -419,6 +419,15 @@ static int debug_reflog_expire(struct ref_store *ref_store, const char *refname,
return res;
}
+static int debug_fsck(struct ref_store *ref_store,
+ struct fsck_options *o)
+{
+ struct debug_ref_store *drefs = (struct debug_ref_store *)ref_store;
+ int res = drefs->refs->be->fsck(drefs->refs, o);
+ trace_printf_key(&trace_refs, "fsck: %d\n", res);
+ return res;
+}
+
struct ref_storage_be refs_be_debug = {
.name = "debug",
.init = NULL,
@@ -451,4 +460,6 @@ struct ref_storage_be refs_be_debug = {
.create_reflog = debug_create_reflog,
.delete_reflog = debug_delete_reflog,
.reflog_expire = debug_reflog_expire,
+
+ .fsck = debug_fsck,
};
diff --git a/refs/files-backend.c b/refs/files-backend.c
index aa52d9be7c..4630eb1f80 100644
--- a/refs/files-backend.c
+++ b/refs/files-backend.c
@@ -3408,6 +3408,15 @@ static int files_ref_store_remove_on_disk(struct ref_store *ref_store,
return ret;
}
+static int files_fsck(struct ref_store *ref_store,
+ struct fsck_options *o)
+{
+ struct files_ref_store *refs =
+ files_downcast(ref_store, REF_STORE_READ, "fsck");
+
+ return refs->packed_ref_store->be->fsck(refs->packed_ref_store, o);
+}
+
struct ref_storage_be refs_be_files = {
.name = "files",
.init = files_ref_store_init,
@@ -3434,5 +3443,7 @@ struct ref_storage_be refs_be_files = {
.reflog_exists = files_reflog_exists,
.create_reflog = files_create_reflog,
.delete_reflog = files_delete_reflog,
- .reflog_expire = files_reflog_expire
+ .reflog_expire = files_reflog_expire,
+
+ .fsck = files_fsck,
};
diff --git a/refs/packed-backend.c b/refs/packed-backend.c
index a0666407cd..5209b0b212 100644
--- a/refs/packed-backend.c
+++ b/refs/packed-backend.c
@@ -1735,6 +1735,12 @@ static struct ref_iterator *packed_reflog_iterator_begin(struct ref_store *ref_s
return empty_ref_iterator_begin();
}
+static int packed_fsck(struct ref_store *ref_store,
+ struct fsck_options *o)
+{
+ return 0;
+}
+
struct ref_storage_be refs_be_packed = {
.name = "packed",
.init = packed_ref_store_init,
@@ -1762,4 +1768,6 @@ struct ref_storage_be refs_be_packed = {
.create_reflog = NULL,
.delete_reflog = NULL,
.reflog_expire = NULL,
+
+ .fsck = packed_fsck,
};
diff --git a/refs/refs-internal.h b/refs/refs-internal.h
index fa975d69aa..a905e187cd 100644
--- a/refs/refs-internal.h
+++ b/refs/refs-internal.h
@@ -4,6 +4,7 @@
#include "refs.h"
#include "iterator.h"
+struct fsck_options;
struct ref_transaction;
/*
@@ -650,6 +651,9 @@ typedef int read_raw_ref_fn(struct ref_store *ref_store, const char *refname,
typedef int read_symbolic_ref_fn(struct ref_store *ref_store, const char *refname,
struct strbuf *referent);
+typedef int fsck_fn(struct ref_store *ref_store,
+ struct fsck_options *o);
+
struct ref_storage_be {
const char *name;
ref_store_init_fn *init;
@@ -677,6 +681,8 @@ struct ref_storage_be {
create_reflog_fn *create_reflog;
delete_reflog_fn *delete_reflog;
reflog_expire_fn *reflog_expire;
+
+ fsck_fn *fsck;
};
extern struct ref_storage_be refs_be_files;
diff --git a/refs/reftable-backend.c b/refs/reftable-backend.c
index fbe74c239d..b5a1a526df 100644
--- a/refs/reftable-backend.c
+++ b/refs/reftable-backend.c
@@ -2303,6 +2303,12 @@ static int reftable_be_reflog_expire(struct ref_store *ref_store,
return ret;
}
+static int reftable_be_fsck(struct ref_store *ref_store,
+ struct fsck_options *o)
+{
+ return 0;
+}
+
struct ref_storage_be refs_be_reftable = {
.name = "reftable",
.init = reftable_be_init,
@@ -2330,4 +2336,6 @@ struct ref_storage_be refs_be_reftable = {
.create_reflog = reftable_be_create_reflog,
.delete_reflog = reftable_be_delete_reflog,
.reflog_expire = reftable_be_reflog_expire,
+
+ .fsck = reftable_be_fsck,
};
--
2.46.0
^ permalink raw reply related [flat|nested] 282+ messages in thread
* [GSoC][PATCH v16 7/9] builtin/refs: add verify subcommand
2024-08-08 11:21 ` [GSoC][PATCH v16 " shejialuo
` (5 preceding siblings ...)
2024-08-08 11:27 ` [GSoC][PATCH v16 6/9] refs: set up ref consistency check infrastructure shejialuo
@ 2024-08-08 11:27 ` shejialuo
2024-10-04 19:14 ` [PATCH 1/2] refs.c: remove redundant translation markers Teng Long
2024-08-08 11:31 ` [GSoC][PATCH v16 8/9] files-backend: add unified interface for refs scanning shejialuo
` (2 subsequent siblings)
9 siblings, 1 reply; 282+ messages in thread
From: shejialuo @ 2024-08-08 11:27 UTC (permalink / raw)
To: git
Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine,
Justin Tobler
Introduce a new subcommand "verify" in git-refs(1) to allow the user to
check the reference database consistency and also this subcommand will
be used as the entry point of checking refs for "git-fsck(1)".
Add "verbose" field into "fsck_options" to indicate whether we should
print verbose messages when checking refs and objects consistency.
Remove bit-field for "strict" field, this is because we cannot take
address of a bit-field which makes it unhandy to set member variables
when parsing the command line options.
The "git-fsck(1)" declares "fsck_options" variable with "static"
identifier which avoids complaint by the leak-checker. However, in
"git-refs verify", we need to do memory clean manually. Thus add
"fsck_options_clear" function in "fsck.c" to provide memory clean
operation.
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
Documentation/git-refs.txt | 13 +++++++++++++
builtin/refs.c | 34 ++++++++++++++++++++++++++++++++++
fsck.c | 11 +++++++++++
fsck.h | 8 +++++++-
4 files changed, 65 insertions(+), 1 deletion(-)
diff --git a/Documentation/git-refs.txt b/Documentation/git-refs.txt
index 5b99e04385..ce31f93061 100644
--- a/Documentation/git-refs.txt
+++ b/Documentation/git-refs.txt
@@ -10,6 +10,7 @@ SYNOPSIS
--------
[verse]
'git refs migrate' --ref-format=<format> [--dry-run]
+'git refs verify' [--strict] [--verbose]
DESCRIPTION
-----------
@@ -22,6 +23,9 @@ COMMANDS
migrate::
Migrate ref store between different formats.
+verify::
+ Verify reference database consistency.
+
OPTIONS
-------
@@ -39,6 +43,15 @@ include::ref-storage-format.txt[]
can be used to double check that the migration works as expected before
performing the actual migration.
+The following options are specific to 'git refs verify':
+
+--strict::
+ Enable stricter error checking. This will cause warnings to be
+ reported as errors. See linkgit:git-fsck[1].
+
+--verbose::
+ When verifying the reference database consistency, be chatty.
+
KNOWN LIMITATIONS
-----------------
diff --git a/builtin/refs.c b/builtin/refs.c
index 46dcd150d4..131f98be98 100644
--- a/builtin/refs.c
+++ b/builtin/refs.c
@@ -1,4 +1,6 @@
#include "builtin.h"
+#include "config.h"
+#include "fsck.h"
#include "parse-options.h"
#include "refs.h"
#include "repository.h"
@@ -7,6 +9,9 @@
#define REFS_MIGRATE_USAGE \
N_("git refs migrate --ref-format=<format> [--dry-run]")
+#define REFS_VERIFY_USAGE \
+ N_("git refs verify [--strict] [--verbose]")
+
static int cmd_refs_migrate(int argc, const char **argv, const char *prefix)
{
const char * const migrate_usage[] = {
@@ -58,15 +63,44 @@ static int cmd_refs_migrate(int argc, const char **argv, const char *prefix)
return err;
}
+static int cmd_refs_verify(int argc, const char **argv, const char *prefix)
+{
+ struct fsck_options fsck_refs_options = FSCK_REFS_OPTIONS_DEFAULT;
+ const char * const verify_usage[] = {
+ REFS_VERIFY_USAGE,
+ NULL,
+ };
+ struct option options[] = {
+ OPT_BOOL(0, "verbose", &fsck_refs_options.verbose, N_("be verbose")),
+ OPT_BOOL(0, "strict", &fsck_refs_options.strict, N_("enable strict checking")),
+ OPT_END(),
+ };
+ int ret;
+
+ argc = parse_options(argc, argv, prefix, options, verify_usage, 0);
+ if (argc)
+ usage(_("'git refs verify' takes no arguments"));
+
+ git_config(git_fsck_config, &fsck_refs_options);
+ prepare_repo_settings(the_repository);
+
+ ret = refs_fsck(get_main_ref_store(the_repository), &fsck_refs_options);
+
+ fsck_options_clear(&fsck_refs_options);
+ return ret;
+}
+
int cmd_refs(int argc, const char **argv, const char *prefix)
{
const char * const refs_usage[] = {
REFS_MIGRATE_USAGE,
+ REFS_VERIFY_USAGE,
NULL,
};
parse_opt_subcommand_fn *fn = NULL;
struct option opts[] = {
OPT_SUBCOMMAND("migrate", &fn, cmd_refs_migrate),
+ OPT_SUBCOMMAND("verify", &fn, cmd_refs_verify),
OPT_END(),
};
diff --git a/fsck.c b/fsck.c
index e16c892f6a..3756f52459 100644
--- a/fsck.c
+++ b/fsck.c
@@ -1331,6 +1331,17 @@ int fsck_finish(struct fsck_options *options)
return ret;
}
+void fsck_options_clear(struct fsck_options *options)
+{
+ free(options->msg_type);
+ oidset_clear(&options->skip_oids);
+ oidset_clear(&options->gitmodules_found);
+ oidset_clear(&options->gitmodules_done);
+ oidset_clear(&options->gitattributes_found);
+ oidset_clear(&options->gitattributes_done);
+ kh_clear_oid_map(options->object_names);
+}
+
int git_fsck_config(const char *var, const char *value,
const struct config_context *ctx, void *cb)
{
diff --git a/fsck.h b/fsck.h
index 2002590f60..d551a9fe86 100644
--- a/fsck.h
+++ b/fsck.h
@@ -153,7 +153,8 @@ struct fsck_ref_report {
struct fsck_options {
fsck_walk_func walk;
fsck_error error_func;
- unsigned strict:1;
+ unsigned strict;
+ unsigned verbose;
enum fsck_msg_type *msg_type;
struct oidset skip_oids;
struct oidset gitmodules_found;
@@ -231,6 +232,11 @@ int fsck_tag_standalone(const struct object_id *oid, const char *buffer,
*/
int fsck_finish(struct fsck_options *options);
+/*
+ * Clear the fsck_options struct, freeing any allocated memory.
+ */
+void fsck_options_clear(struct fsck_options *options);
+
/*
* Report an error or warning for refs.
*/
--
2.46.0
^ permalink raw reply related [flat|nested] 282+ messages in thread
* [PATCH 1/2] refs.c: remove redundant translation markers
2024-08-08 11:27 ` [GSoC][PATCH v16 7/9] builtin/refs: add verify subcommand shejialuo
@ 2024-10-04 19:14 ` Teng Long
2024-10-06 10:01 ` shejialuo
0 siblings, 1 reply; 282+ messages in thread
From: Teng Long @ 2024-10-04 19:14 UTC (permalink / raw)
To: shejialuo; +Cc: git, gitster, jltobler, karthik.188, ps, sunshine, Teng Long
From: Teng Long <dyroneteng@gmail.com>
shejialuo <shejialuo@gmail.com> wrote:
>> --- a/builtin/refs.c
>> +++ b/builtin/refs.c
>> @@ -1,4 +1,6 @@
>> #include "builtin.h"
>> +#include "config.h"
>> +#include "fsck.h"
>> #include "parse-options.h"
>> #include "refs.h"
>> #include "repository.h"
>> @@ -7,6 +9,9 @@
>> #define REFS_MIGRATE_USAGE \
>> N_("git refs migrate --ref-format=<format> [--dry-run]")
>>
>> +#define REFS_VERIFY_USAGE \
>> + N_("git refs verify [--strict] [--verbose]")
This define may not require translation, then maybe it should
look like:
Signed-off-by: Teng Long <dyroneteng@gmail.com>
---
builtin/refs.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/builtin/refs.c b/builtin/refs.c
index 24978a7b..3a799aa6 100644
--- a/builtin/refs.c
+++ b/builtin/refs.c
@@ -10,7 +10,7 @@
N_("git refs migrate --ref-format=<format> [--dry-run]")
#define REFS_VERIFY_USAGE \
- N_("git refs verify [--strict] [--verbose]")
+ "git refs verify [--strict] [--verbose]"
static int cmd_refs_migrate(int argc, const char **argv, const char *prefix)
{
--
2.43.0.rc1.4.g79da3975.dirty
Thanks.
^ permalink raw reply related [flat|nested] 282+ messages in thread
* Re: [PATCH 1/2] refs.c: remove redundant translation markers
2024-10-04 19:14 ` [PATCH 1/2] refs.c: remove redundant translation markers Teng Long
@ 2024-10-06 10:01 ` shejialuo
0 siblings, 0 replies; 282+ messages in thread
From: shejialuo @ 2024-10-06 10:01 UTC (permalink / raw)
To: Teng Long; +Cc: git, gitster, jltobler, karthik.188, ps, sunshine
On Sat, Oct 05, 2024 at 03:14:33AM +0800, Teng Long wrote:
> From: Teng Long <dyroneteng@gmail.com>
[snip]
> >> @@ -7,6 +9,9 @@
> >> #define REFS_MIGRATE_USAGE \
> >> N_("git refs migrate --ref-format=<format> [--dry-run]")
> >>
> >> +#define REFS_VERIFY_USAGE \
> >> + N_("git refs verify [--strict] [--verbose]")
>
> This define may not require translation, then maybe it should
> look like:
>
> Signed-off-by: Teng Long <dyroneteng@gmail.com>
> ---
> builtin/refs.c | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/builtin/refs.c b/builtin/refs.c
> index 24978a7b..3a799aa6 100644
> --- a/builtin/refs.c
> +++ b/builtin/refs.c
> @@ -10,7 +10,7 @@
> N_("git refs migrate --ref-format=<format> [--dry-run]")
>
> #define REFS_VERIFY_USAGE \
> - N_("git refs verify [--strict] [--verbose]")
> + "git refs verify [--strict] [--verbose]"
I am OK with this change, actually I just followed the way what
Patrick does for "git refs migrate" subcommand. So at first glance, I
wonder why the "git refs migrate" subcommand usage should not be
changed.
After searching the mailing list for information, I found that Junio has
explained why in
https://lore.kernel.org/git/xmqqbk5y3j8a.fsf@gitster.g/
Because `--ref-format` could be used as the placeholder, it will be much
more helpful when translating in a different locale.
Thanks,
Jialuo
^ permalink raw reply [flat|nested] 282+ messages in thread
* [GSoC][PATCH v16 8/9] files-backend: add unified interface for refs scanning
2024-08-08 11:21 ` [GSoC][PATCH v16 " shejialuo
` (6 preceding siblings ...)
2024-08-08 11:27 ` [GSoC][PATCH v16 7/9] builtin/refs: add verify subcommand shejialuo
@ 2024-08-08 11:31 ` shejialuo
2024-08-08 11:31 ` [GSoC][PATCH v16 9/9] fsck: add ref name check for files backend shejialuo
2024-08-08 11:54 ` [GSoC][PATCH v16 0/9] ref consistency check infra setup Patrick Steinhardt
9 siblings, 0 replies; 282+ messages in thread
From: shejialuo @ 2024-08-08 11:31 UTC (permalink / raw)
To: git
Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine,
Justin Tobler
For refs and reflogs, we need to scan its corresponding directories to
check every regular file or symbolic link which shares the same pattern.
Introduce a unified interface for scanning directories for
files-backend.
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
| 3 ++
| 1 +
| 73 ++++++++++++++++++++++++++++++++++-
3 files changed, 76 insertions(+), 1 deletion(-)
--git a/Documentation/fsck-msgids.txt b/Documentation/fsck-msgids.txt
index f643585a34..7c809fddf1 100644
--- a/Documentation/fsck-msgids.txt
+++ b/Documentation/fsck-msgids.txt
@@ -19,6 +19,9 @@
`badParentSha1`::
(ERROR) A commit object has a bad parent sha1.
+`badRefFiletype`::
+ (ERROR) A ref has a bad file type.
+
`badTagName`::
(INFO) A tag has an invalid format.
--git a/fsck.h b/fsck.h
index d551a9fe86..af02174973 100644
--- a/fsck.h
+++ b/fsck.h
@@ -31,6 +31,7 @@ enum fsck_msg_type {
FUNC(BAD_NAME, ERROR) \
FUNC(BAD_OBJECT_SHA1, ERROR) \
FUNC(BAD_PARENT_SHA1, ERROR) \
+ FUNC(BAD_REF_FILETYPE, ERROR) \
FUNC(BAD_TIMEZONE, ERROR) \
FUNC(BAD_TREE, ERROR) \
FUNC(BAD_TREE_SHA1, ERROR) \
--git a/refs/files-backend.c b/refs/files-backend.c
index 4630eb1f80..e511e1dcce 100644
--- a/refs/files-backend.c
+++ b/refs/files-backend.c
@@ -6,6 +6,7 @@
#include "../gettext.h"
#include "../hash.h"
#include "../hex.h"
+#include "../fsck.h"
#include "../refs.h"
#include "refs-internal.h"
#include "ref-cache.h"
@@ -3408,13 +3409,83 @@ static int files_ref_store_remove_on_disk(struct ref_store *ref_store,
return ret;
}
+/*
+ * For refs and reflogs, they share a unified interface when scanning
+ * the whole directory. This function is used as the callback for each
+ * regular file or symlink in the directory.
+ */
+typedef int (*files_fsck_refs_fn)(struct ref_store *ref_store,
+ struct fsck_options *o,
+ const char *refs_check_dir,
+ struct dir_iterator *iter);
+
+static int files_fsck_refs_dir(struct ref_store *ref_store,
+ struct fsck_options *o,
+ const char *refs_check_dir,
+ files_fsck_refs_fn *fsck_refs_fn)
+{
+ struct strbuf sb = STRBUF_INIT;
+ struct dir_iterator *iter;
+ int iter_status;
+ int ret = 0;
+
+ strbuf_addf(&sb, "%s/%s", ref_store->gitdir, refs_check_dir);
+
+ iter = dir_iterator_begin(sb.buf, 0);
+ if (!iter) {
+ ret = error_errno(_("cannot open directory %s"), sb.buf);
+ goto out;
+ }
+
+ while ((iter_status = dir_iterator_advance(iter)) == ITER_OK) {
+ if (S_ISDIR(iter->st.st_mode)) {
+ continue;
+ } else if (S_ISREG(iter->st.st_mode) ||
+ S_ISLNK(iter->st.st_mode)) {
+ if (o->verbose)
+ fprintf_ln(stderr, "Checking %s/%s",
+ refs_check_dir, iter->relative_path);
+ for (size_t i = 0; fsck_refs_fn[i]; i++) {
+ if (fsck_refs_fn[i](ref_store, o, refs_check_dir, iter))
+ ret = -1;
+ }
+ } else {
+ struct fsck_ref_report report = { .path = iter->basename };
+ if (fsck_report_ref(o, &report,
+ FSCK_MSG_BAD_REF_FILETYPE,
+ "unexpected file type"))
+ ret = -1;
+ }
+ }
+
+ if (iter_status != ITER_DONE)
+ ret = error(_("failed to iterate over '%s'"), sb.buf);
+
+out:
+ strbuf_release(&sb);
+ return ret;
+}
+
+static int files_fsck_refs(struct ref_store *ref_store,
+ struct fsck_options *o)
+{
+ files_fsck_refs_fn fsck_refs_fn[]= {
+ NULL,
+ };
+
+ if (o->verbose)
+ fprintf_ln(stderr, _("Checking references consistency"));
+ return files_fsck_refs_dir(ref_store, o, "refs", fsck_refs_fn);
+}
+
static int files_fsck(struct ref_store *ref_store,
struct fsck_options *o)
{
struct files_ref_store *refs =
files_downcast(ref_store, REF_STORE_READ, "fsck");
- return refs->packed_ref_store->be->fsck(refs->packed_ref_store, o);
+ return files_fsck_refs(ref_store, o) |
+ refs->packed_ref_store->be->fsck(refs->packed_ref_store, o);
}
struct ref_storage_be refs_be_files = {
--
2.46.0
^ permalink raw reply related [flat|nested] 282+ messages in thread
* [GSoC][PATCH v16 9/9] fsck: add ref name check for files backend
2024-08-08 11:21 ` [GSoC][PATCH v16 " shejialuo
` (7 preceding siblings ...)
2024-08-08 11:31 ` [GSoC][PATCH v16 8/9] files-backend: add unified interface for refs scanning shejialuo
@ 2024-08-08 11:31 ` shejialuo
2024-08-08 11:54 ` [GSoC][PATCH v16 0/9] ref consistency check infra setup Patrick Steinhardt
9 siblings, 0 replies; 282+ messages in thread
From: shejialuo @ 2024-08-08 11:31 UTC (permalink / raw)
To: git
Cc: Patrick Steinhardt, Karthik Nayak, Junio C Hamano, Eric Sunshine,
Justin Tobler
The git-fsck(1) only implicitly checks the reference, it does not fully
check refs with bad format name such as standalone "@".
However, a file ending with ".lock" should not be marked as having a bad
ref name. It is expected that concurrent writers may have such lock files.
We currently ignore this situation. But for bare ".lock" file, we will
report it as error.
In order to provide such checks, add a new fsck message id "badRefName"
with default ERROR type. Use existing "check_refname_format" to explicit
check the ref name. And add a new unit test to verify the functionality.
Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
---
| 3 ++
| 1 +
| 31 ++++++++++++
| 92 +++++++++++++++++++++++++++++++++++
4 files changed, 127 insertions(+)
create mode 100755 t/t0602-reffiles-fsck.sh
--git a/Documentation/fsck-msgids.txt b/Documentation/fsck-msgids.txt
index 7c809fddf1..68a2801f15 100644
--- a/Documentation/fsck-msgids.txt
+++ b/Documentation/fsck-msgids.txt
@@ -22,6 +22,9 @@
`badRefFiletype`::
(ERROR) A ref has a bad file type.
+`badRefName`::
+ (ERROR) A ref has an invalid format.
+
`badTagName`::
(INFO) A tag has an invalid format.
--git a/fsck.h b/fsck.h
index af02174973..500b4c04d2 100644
--- a/fsck.h
+++ b/fsck.h
@@ -32,6 +32,7 @@ enum fsck_msg_type {
FUNC(BAD_OBJECT_SHA1, ERROR) \
FUNC(BAD_PARENT_SHA1, ERROR) \
FUNC(BAD_REF_FILETYPE, ERROR) \
+ FUNC(BAD_REF_NAME, ERROR) \
FUNC(BAD_TIMEZONE, ERROR) \
FUNC(BAD_TREE, ERROR) \
FUNC(BAD_TREE_SHA1, ERROR) \
--git a/refs/files-backend.c b/refs/files-backend.c
index e511e1dcce..7f6eefa960 100644
--- a/refs/files-backend.c
+++ b/refs/files-backend.c
@@ -3419,6 +3419,36 @@ typedef int (*files_fsck_refs_fn)(struct ref_store *ref_store,
const char *refs_check_dir,
struct dir_iterator *iter);
+static int files_fsck_refs_name(struct ref_store *ref_store UNUSED,
+ struct fsck_options *o,
+ const char *refs_check_dir,
+ struct dir_iterator *iter)
+{
+ struct strbuf sb = STRBUF_INIT;
+ int ret = 0;
+
+ /*
+ * Ignore the files ending with ".lock" as they may be lock files
+ * However, do not allow bare ".lock" files.
+ */
+ if (iter->basename[0] != '.' && ends_with(iter->basename, ".lock"))
+ goto cleanup;
+
+ if (check_refname_format(iter->basename, REFNAME_ALLOW_ONELEVEL)) {
+ struct fsck_ref_report report = { .path = NULL };
+
+ strbuf_addf(&sb, "%s/%s", refs_check_dir, iter->relative_path);
+ report.path = sb.buf;
+ ret = fsck_report_ref(o, &report,
+ FSCK_MSG_BAD_REF_NAME,
+ "invalid refname format");
+ }
+
+cleanup:
+ strbuf_release(&sb);
+ return ret;
+}
+
static int files_fsck_refs_dir(struct ref_store *ref_store,
struct fsck_options *o,
const char *refs_check_dir,
@@ -3470,6 +3500,7 @@ static int files_fsck_refs(struct ref_store *ref_store,
struct fsck_options *o)
{
files_fsck_refs_fn fsck_refs_fn[]= {
+ files_fsck_refs_name,
NULL,
};
--git a/t/t0602-reffiles-fsck.sh b/t/t0602-reffiles-fsck.sh
new file mode 100755
index 0000000000..71a4d1a5ae
--- /dev/null
+++ b/t/t0602-reffiles-fsck.sh
@@ -0,0 +1,92 @@
+#!/bin/sh
+
+test_description='Test reffiles backend consistency check'
+
+GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=main
+export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME
+GIT_TEST_DEFAULT_REF_FORMAT=files
+export GIT_TEST_DEFAULT_REF_FORMAT
+TEST_PASSES_SANITIZE_LEAK=true
+
+. ./test-lib.sh
+
+test_expect_success 'ref name should be checked' '
+ test_when_finished "rm -rf repo" &&
+ git init repo &&
+ branch_dir_prefix=.git/refs/heads &&
+ tag_dir_prefix=.git/refs/tags &&
+ cd repo &&
+
+ git commit --allow-empty -m initial &&
+ git checkout -b branch-1 &&
+ git tag tag-1 &&
+ git commit --allow-empty -m second &&
+ git checkout -b branch-2 &&
+ git tag tag-2 &&
+ git tag multi_hierarchy/tag-2 &&
+
+ cp $branch_dir_prefix/branch-1 $branch_dir_prefix/.branch-1 &&
+ test_must_fail git refs verify 2>err &&
+ cat >expect <<-EOF &&
+ error: refs/heads/.branch-1: badRefName: invalid refname format
+ EOF
+ rm $branch_dir_prefix/.branch-1 &&
+ test_cmp expect err &&
+
+ cp $branch_dir_prefix/branch-1 $branch_dir_prefix/@ &&
+ test_must_fail git refs verify 2>err &&
+ cat >expect <<-EOF &&
+ error: refs/heads/@: badRefName: invalid refname format
+ EOF
+ rm $branch_dir_prefix/@ &&
+ test_cmp expect err &&
+
+ cp $tag_dir_prefix/multi_hierarchy/tag-2 $tag_dir_prefix/multi_hierarchy/@ &&
+ test_must_fail git refs verify 2>err &&
+ cat >expect <<-EOF &&
+ error: refs/tags/multi_hierarchy/@: badRefName: invalid refname format
+ EOF
+ rm $tag_dir_prefix/multi_hierarchy/@ &&
+ test_cmp expect err &&
+
+ cp $tag_dir_prefix/tag-1 $tag_dir_prefix/tag-1.lock &&
+ git refs verify 2>err &&
+ rm $tag_dir_prefix/tag-1.lock &&
+ test_must_be_empty err &&
+
+ cp $tag_dir_prefix/tag-1 $tag_dir_prefix/.lock &&
+ test_must_fail git refs verify 2>err &&
+ cat >expect <<-EOF &&
+ error: refs/tags/.lock: badRefName: invalid refname format
+ EOF
+ rm $tag_dir_prefix/.lock &&
+ test_cmp expect err
+'
+
+test_expect_success 'ref name check should be adapted into fsck messages' '
+ test_when_finished "rm -rf repo" &&
+ git init repo &&
+ branch_dir_prefix=.git/refs/heads &&
+ tag_dir_prefix=.git/refs/tags &&
+ cd repo &&
+ git commit --allow-empty -m initial &&
+ git checkout -b branch-1 &&
+ git tag tag-1 &&
+ git commit --allow-empty -m second &&
+ git checkout -b branch-2 &&
+ git tag tag-2 &&
+
+ cp $branch_dir_prefix/branch-1 $branch_dir_prefix/.branch-1 &&
+ git -c fsck.badRefName=warn refs verify 2>err &&
+ cat >expect <<-EOF &&
+ warning: refs/heads/.branch-1: badRefName: invalid refname format
+ EOF
+ rm $branch_dir_prefix/.branch-1 &&
+ test_cmp expect err &&
+
+ cp $branch_dir_prefix/branch-1 $branch_dir_prefix/@ &&
+ git -c fsck.badRefName=ignore refs verify 2>err &&
+ test_must_be_empty err
+'
+
+test_done
--
2.46.0
^ permalink raw reply related [flat|nested] 282+ messages in thread
* Re: [GSoC][PATCH v16 0/9] ref consistency check infra setup
2024-08-08 11:21 ` [GSoC][PATCH v16 " shejialuo
` (8 preceding siblings ...)
2024-08-08 11:31 ` [GSoC][PATCH v16 9/9] fsck: add ref name check for files backend shejialuo
@ 2024-08-08 11:54 ` Patrick Steinhardt
2024-08-08 16:37 ` Junio C Hamano
9 siblings, 1 reply; 282+ messages in thread
From: Patrick Steinhardt @ 2024-08-08 11:54 UTC (permalink / raw)
To: shejialuo
Cc: git, Karthik Nayak, Junio C Hamano, Eric Sunshine, Justin Tobler
[-- Attachment #1: Type: text/plain, Size: 472 bytes --]
On Thu, Aug 08, 2024 at 07:21:22PM +0800, shejialuo wrote:
> Hi All, this version handles some minor changes:
>
> 1. Remove redundant newlines.
> 2. Fix typo in commit message.
>
> At last, I wanna thank every reviewer. As we can see, this series starts
> from 5.27, it's a very long journey. I have learned a lot.
>
> Thanks
> Jialuo
The range diff looks as expected to me, so this series is ready
to be merged from my point of view. Thanks!
Patrick
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]
^ permalink raw reply [flat|nested] 282+ messages in thread
* Re: [GSoC][PATCH v16 0/9] ref consistency check infra setup
2024-08-08 11:54 ` [GSoC][PATCH v16 0/9] ref consistency check infra setup Patrick Steinhardt
@ 2024-08-08 16:37 ` Junio C Hamano
0 siblings, 0 replies; 282+ messages in thread
From: Junio C Hamano @ 2024-08-08 16:37 UTC (permalink / raw)
To: Patrick Steinhardt
Cc: shejialuo, git, Karthik Nayak, Eric Sunshine, Justin Tobler
Patrick Steinhardt <ps@pks.im> writes:
> On Thu, Aug 08, 2024 at 07:21:22PM +0800, shejialuo wrote:
>> Hi All, this version handles some minor changes:
>>
>> 1. Remove redundant newlines.
>> 2. Fix typo in commit message.
>>
>> At last, I wanna thank every reviewer. As we can see, this series starts
>> from 5.27, it's a very long journey. I have learned a lot.
>>
>> Thanks
>> Jialuo
>
> The range diff looks as expected to me, so this series is ready
> to be merged from my point of view. Thanks!
>
> Patrick
Yeah, looking good here, too.
Will replace. Let me mark this topic for 'next'.
Thanks.
^ permalink raw reply [flat|nested] 282+ messages in thread