* [PATCH 01/14] odb: rename `FOR_EACH_OBJECT_*` flags
2026-01-15 11:04 [PATCH 00/14] odb: introduce `odb_for_each_object()` Patrick Steinhardt
@ 2026-01-15 11:04 ` Patrick Steinhardt
2026-01-15 18:00 ` Justin Tobler
2026-01-15 11:04 ` [PATCH 02/14] odb: fix flags parameter to be unsigned Patrick Steinhardt
` (16 subsequent siblings)
17 siblings, 1 reply; 120+ messages in thread
From: Patrick Steinhardt @ 2026-01-15 11:04 UTC (permalink / raw)
To: git
Rename the `FOR_EACH_OBJECT_*` flags to have an `ODB_` prefix. This
prepares us for a new upcoming `odb_for_each_object()` function and
ensures that both the function and its flags have the same prefix.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
builtin/cat-file.c | 2 +-
builtin/pack-objects.c | 10 +++++-----
commit-graph.c | 4 ++--
object-file.c | 4 ++--
object-file.h | 2 +-
odb.h | 13 +++++++------
packfile.c | 20 ++++++++++----------
packfile.h | 4 ++--
reachable.c | 8 ++++----
repack-promisor.c | 2 +-
revision.c | 2 +-
11 files changed, 36 insertions(+), 35 deletions(-)
diff --git a/builtin/cat-file.c b/builtin/cat-file.c
index 2ad712e9f8..6964a5a52c 100644
--- a/builtin/cat-file.c
+++ b/builtin/cat-file.c
@@ -922,7 +922,7 @@ static int batch_objects(struct batch_options *opt)
cb.seen = &seen;
batch_each_object(opt, batch_unordered_object,
- FOR_EACH_OBJECT_PACK_ORDER, &cb);
+ ODB_FOR_EACH_OBJECT_PACK_ORDER, &cb);
oidset_clear(&seen);
} else {
diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index 6ee31d48c9..74317051fd 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -3912,7 +3912,7 @@ static void read_packs_list_from_stdin(struct rev_info *revs)
for_each_object_in_pack(p,
add_object_entry_from_pack,
revs,
- FOR_EACH_OBJECT_PACK_ORDER);
+ ODB_FOR_EACH_OBJECT_PACK_ORDER);
}
strbuf_release(&buf);
@@ -4344,10 +4344,10 @@ static void add_objects_in_unpacked_packs(void)
if (for_each_packed_object(to_pack.repo,
add_object_in_unpacked_pack,
NULL,
- FOR_EACH_OBJECT_PACK_ORDER |
- FOR_EACH_OBJECT_LOCAL_ONLY |
- FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS |
- FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS))
+ ODB_FOR_EACH_OBJECT_PACK_ORDER |
+ ODB_FOR_EACH_OBJECT_LOCAL_ONLY |
+ ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS |
+ ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS))
die(_("cannot open pack index"));
}
diff --git a/commit-graph.c b/commit-graph.c
index 6b1f02e179..7f1145a082 100644
--- a/commit-graph.c
+++ b/commit-graph.c
@@ -1927,7 +1927,7 @@ static int fill_oids_from_packs(struct write_commit_graph_context *ctx,
goto cleanup;
}
for_each_object_in_pack(p, add_packed_commits, ctx,
- FOR_EACH_OBJECT_PACK_ORDER);
+ ODB_FOR_EACH_OBJECT_PACK_ORDER);
close_pack(p);
free(p);
}
@@ -1965,7 +1965,7 @@ static void fill_oids_from_all_packs(struct write_commit_graph_context *ctx)
_("Finding commits for commit graph among packed objects"),
ctx->approx_nr_objects);
for_each_packed_object(ctx->r, add_packed_commits, ctx,
- FOR_EACH_OBJECT_PACK_ORDER);
+ ODB_FOR_EACH_OBJECT_PACK_ORDER);
if (ctx->progress_done < ctx->approx_nr_objects)
display_progress(ctx->progress, ctx->approx_nr_objects);
stop_progress(&ctx->progress);
diff --git a/object-file.c b/object-file.c
index e7e4c3348f..64e9e239dc 100644
--- a/object-file.c
+++ b/object-file.c
@@ -1789,7 +1789,7 @@ int for_each_loose_file_in_source(struct odb_source *source,
int for_each_loose_object(struct object_database *odb,
each_loose_object_fn cb, void *data,
- enum for_each_object_flags flags)
+ enum odb_for_each_object_flags flags)
{
struct odb_source *source;
@@ -1800,7 +1800,7 @@ int for_each_loose_object(struct object_database *odb,
if (r)
return r;
- if (flags & FOR_EACH_OBJECT_LOCAL_ONLY)
+ if (flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY)
break;
}
diff --git a/object-file.h b/object-file.h
index 1229d5f675..42bb50e10c 100644
--- a/object-file.h
+++ b/object-file.h
@@ -134,7 +134,7 @@ int for_each_loose_file_in_source(struct odb_source *source,
*/
int for_each_loose_object(struct object_database *odb,
each_loose_object_fn, void *,
- enum for_each_object_flags flags);
+ enum odb_for_each_object_flags flags);
/**
diff --git a/odb.h b/odb.h
index bab07755f4..74503addf1 100644
--- a/odb.h
+++ b/odb.h
@@ -442,24 +442,25 @@ static inline void obj_read_unlock(void)
if(obj_read_use_lock)
pthread_mutex_unlock(&obj_read_mutex);
}
+
/* Flags for for_each_*_object(). */
-enum for_each_object_flags {
+enum odb_for_each_object_flags {
/* Iterate only over local objects, not alternates. */
- FOR_EACH_OBJECT_LOCAL_ONLY = (1<<0),
+ ODB_FOR_EACH_OBJECT_LOCAL_ONLY = (1<<0),
/* Only iterate over packs obtained from the promisor remote. */
- FOR_EACH_OBJECT_PROMISOR_ONLY = (1<<1),
+ ODB_FOR_EACH_OBJECT_PROMISOR_ONLY = (1<<1),
/*
* Visit objects within a pack in packfile order rather than .idx order
*/
- FOR_EACH_OBJECT_PACK_ORDER = (1<<2),
+ ODB_FOR_EACH_OBJECT_PACK_ORDER = (1<<2),
/* Only iterate over packs that are not marked as kept in-core. */
- FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS = (1<<3),
+ ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS = (1<<3),
/* Only iterate over packs that do not have .keep files. */
- FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS = (1<<4),
+ ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS = (1<<4),
};
enum {
diff --git a/packfile.c b/packfile.c
index 402c3b5dc7..b65f0b43f1 100644
--- a/packfile.c
+++ b/packfile.c
@@ -2259,12 +2259,12 @@ int has_object_kept_pack(struct repository *r, const struct object_id *oid,
int for_each_object_in_pack(struct packed_git *p,
each_packed_object_fn cb, void *data,
- enum for_each_object_flags flags)
+ enum odb_for_each_object_flags flags)
{
uint32_t i;
int r = 0;
- if (flags & FOR_EACH_OBJECT_PACK_ORDER) {
+ if (flags & ODB_FOR_EACH_OBJECT_PACK_ORDER) {
if (load_pack_revindex(p->repo, p))
return -1;
}
@@ -2285,7 +2285,7 @@ int for_each_object_in_pack(struct packed_git *p,
* - in pack-order, it is pack position, which we must
* convert to an index position in order to get the oid.
*/
- if (flags & FOR_EACH_OBJECT_PACK_ORDER)
+ if (flags & ODB_FOR_EACH_OBJECT_PACK_ORDER)
index_pos = pack_pos_to_index(p, i);
else
index_pos = i;
@@ -2302,7 +2302,7 @@ int for_each_object_in_pack(struct packed_git *p,
}
int for_each_packed_object(struct repository *repo, each_packed_object_fn cb,
- void *data, enum for_each_object_flags flags)
+ void *data, enum odb_for_each_object_flags flags)
{
struct odb_source *source;
int r = 0;
@@ -2318,15 +2318,15 @@ int for_each_packed_object(struct repository *repo, each_packed_object_fn cb,
for (e = packfile_store_get_packs(source->packfiles); e; e = e->next) {
struct packed_git *p = e->pack;
- if ((flags & FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local)
+ if ((flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local)
continue;
- if ((flags & FOR_EACH_OBJECT_PROMISOR_ONLY) &&
+ if ((flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY) &&
!p->pack_promisor)
continue;
- if ((flags & FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS) &&
+ if ((flags & ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS) &&
p->pack_keep_in_core)
continue;
- if ((flags & FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS) &&
+ if ((flags & ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS) &&
p->pack_keep)
continue;
if (open_pack_index(p)) {
@@ -2413,8 +2413,8 @@ int is_promisor_object(struct repository *r, const struct object_id *oid)
if (repo_has_promisor_remote(r)) {
for_each_packed_object(r, add_promisor_object,
&promisor_objects,
- FOR_EACH_OBJECT_PROMISOR_ONLY |
- FOR_EACH_OBJECT_PACK_ORDER);
+ ODB_FOR_EACH_OBJECT_PROMISOR_ONLY |
+ ODB_FOR_EACH_OBJECT_PACK_ORDER);
}
promisor_objects_prepared = 1;
}
diff --git a/packfile.h b/packfile.h
index acc5c55ad5..15551258bd 100644
--- a/packfile.h
+++ b/packfile.h
@@ -339,9 +339,9 @@ typedef int each_packed_object_fn(const struct object_id *oid,
void *data);
int for_each_object_in_pack(struct packed_git *p,
each_packed_object_fn, void *data,
- enum for_each_object_flags flags);
+ enum odb_for_each_object_flags flags);
int for_each_packed_object(struct repository *repo, each_packed_object_fn cb,
- void *data, enum for_each_object_flags flags);
+ void *data, enum odb_for_each_object_flags flags);
/* A hook to report invalid files in pack directory */
#define PACKDIR_FILE_PACK 1
diff --git a/reachable.c b/reachable.c
index 4b532039d5..82676b2668 100644
--- a/reachable.c
+++ b/reachable.c
@@ -307,7 +307,7 @@ int add_unseen_recent_objects_to_traversal(struct rev_info *revs,
int ignore_in_core_kept_packs)
{
struct recent_data data;
- enum for_each_object_flags flags;
+ enum odb_for_each_object_flags flags;
int r;
data.revs = revs;
@@ -319,13 +319,13 @@ int add_unseen_recent_objects_to_traversal(struct rev_info *revs,
data.extra_recent_oids_loaded = 0;
r = for_each_loose_object(the_repository->objects, add_recent_loose, &data,
- FOR_EACH_OBJECT_LOCAL_ONLY);
+ ODB_FOR_EACH_OBJECT_LOCAL_ONLY);
if (r)
goto done;
- flags = FOR_EACH_OBJECT_LOCAL_ONLY | FOR_EACH_OBJECT_PACK_ORDER;
+ flags = ODB_FOR_EACH_OBJECT_LOCAL_ONLY | ODB_FOR_EACH_OBJECT_PACK_ORDER;
if (ignore_in_core_kept_packs)
- flags |= FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS;
+ flags |= ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS;
r = for_each_packed_object(revs->repo, add_recent_packed, &data, flags);
diff --git a/repack-promisor.c b/repack-promisor.c
index ee6e0669f6..45c330b9a5 100644
--- a/repack-promisor.c
+++ b/repack-promisor.c
@@ -56,7 +56,7 @@ void repack_promisor_objects(struct repository *repo,
ctx.cmd = &cmd;
ctx.algop = repo->hash_algo;
for_each_packed_object(repo, write_oid, &ctx,
- FOR_EACH_OBJECT_PROMISOR_ONLY);
+ ODB_FOR_EACH_OBJECT_PROMISOR_ONLY);
if (cmd.in == -1) {
/* No packed objects; cmd was never started */
diff --git a/revision.c b/revision.c
index b65a763770..5aadf46dac 100644
--- a/revision.c
+++ b/revision.c
@@ -3938,7 +3938,7 @@ int prepare_revision_walk(struct rev_info *revs)
if (revs->exclude_promisor_objects) {
for_each_packed_object(revs->repo, mark_uninteresting, revs,
- FOR_EACH_OBJECT_PROMISOR_ONLY);
+ ODB_FOR_EACH_OBJECT_PROMISOR_ONLY);
}
if (!revs->reflog_info)
--
2.52.0.660.gd05f3a8ea5.dirty
^ permalink raw reply related [flat|nested] 120+ messages in thread* [PATCH 02/14] odb: fix flags parameter to be unsigned
2026-01-15 11:04 [PATCH 00/14] odb: introduce `odb_for_each_object()` Patrick Steinhardt
2026-01-15 11:04 ` [PATCH 01/14] odb: rename `FOR_EACH_OBJECT_*` flags Patrick Steinhardt
@ 2026-01-15 11:04 ` Patrick Steinhardt
2026-01-15 11:04 ` [PATCH 03/14] object-file: extract function to read object info from path Patrick Steinhardt
` (15 subsequent siblings)
17 siblings, 0 replies; 120+ messages in thread
From: Patrick Steinhardt @ 2026-01-15 11:04 UTC (permalink / raw)
To: git
The `flags` parameter accepted by various `for_each_object()` functions
is a bitfield of multiple flags. Such parameters are typically unsigned
in the Git codebase, but we use `enum odb_for_each_object_flags` in
some places.
Adapt these function signatures to use the correct type.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
object-file.c | 3 ++-
object-file.h | 3 ++-
packfile.c | 4 ++--
packfile.h | 4 ++--
4 files changed, 8 insertions(+), 6 deletions(-)
diff --git a/object-file.c b/object-file.c
index 64e9e239dc..8fa461dd59 100644
--- a/object-file.c
+++ b/object-file.c
@@ -414,7 +414,8 @@ static int parse_loose_header(const char *hdr, struct object_info *oi)
int odb_source_loose_read_object_info(struct odb_source *source,
const struct object_id *oid,
- struct object_info *oi, int flags)
+ struct object_info *oi,
+ unsigned flags)
{
int ret;
int fd;
diff --git a/object-file.h b/object-file.h
index 42bb50e10c..2acf19fb91 100644
--- a/object-file.h
+++ b/object-file.h
@@ -47,7 +47,8 @@ void odb_source_loose_reprepare(struct odb_source *source);
int odb_source_loose_read_object_info(struct odb_source *source,
const struct object_id *oid,
- struct object_info *oi, int flags);
+ struct object_info *oi,
+ unsigned flags);
int odb_source_loose_read_object_stream(struct odb_read_stream **out,
struct odb_source *source,
diff --git a/packfile.c b/packfile.c
index b65f0b43f1..79fe64a25b 100644
--- a/packfile.c
+++ b/packfile.c
@@ -2259,7 +2259,7 @@ int has_object_kept_pack(struct repository *r, const struct object_id *oid,
int for_each_object_in_pack(struct packed_git *p,
each_packed_object_fn cb, void *data,
- enum odb_for_each_object_flags flags)
+ unsigned flags)
{
uint32_t i;
int r = 0;
@@ -2302,7 +2302,7 @@ int for_each_object_in_pack(struct packed_git *p,
}
int for_each_packed_object(struct repository *repo, each_packed_object_fn cb,
- void *data, enum odb_for_each_object_flags flags)
+ void *data, unsigned flags)
{
struct odb_source *source;
int r = 0;
diff --git a/packfile.h b/packfile.h
index 15551258bd..447c44c4a7 100644
--- a/packfile.h
+++ b/packfile.h
@@ -339,9 +339,9 @@ typedef int each_packed_object_fn(const struct object_id *oid,
void *data);
int for_each_object_in_pack(struct packed_git *p,
each_packed_object_fn, void *data,
- enum odb_for_each_object_flags flags);
+ unsigned flags);
int for_each_packed_object(struct repository *repo, each_packed_object_fn cb,
- void *data, enum odb_for_each_object_flags flags);
+ void *data, unsigned flags);
/* A hook to report invalid files in pack directory */
#define PACKDIR_FILE_PACK 1
--
2.52.0.660.gd05f3a8ea5.dirty
^ permalink raw reply related [flat|nested] 120+ messages in thread* [PATCH 03/14] object-file: extract function to read object info from path
2026-01-15 11:04 [PATCH 00/14] odb: introduce `odb_for_each_object()` Patrick Steinhardt
2026-01-15 11:04 ` [PATCH 01/14] odb: rename `FOR_EACH_OBJECT_*` flags Patrick Steinhardt
2026-01-15 11:04 ` [PATCH 02/14] odb: fix flags parameter to be unsigned Patrick Steinhardt
@ 2026-01-15 11:04 ` Patrick Steinhardt
2026-01-15 18:31 ` Justin Tobler
2026-01-20 9:09 ` Karthik Nayak
2026-01-15 11:04 ` [PATCH 04/14] object-file: introduce function to iterate through objects Patrick Steinhardt
` (14 subsequent siblings)
17 siblings, 2 replies; 120+ messages in thread
From: Patrick Steinhardt @ 2026-01-15 11:04 UTC (permalink / raw)
To: git
Extract a new function that allows us to read object info for a specific
loose object via a user-supplied path. This function will be used in a
subsequent commit.
Note that this also allows us to drop `stat_loose_object()`, which is
a simple wrapper around `odb_loose_path()` plus lstat(3p).
Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
object-file.c | 39 ++++++++++++++++-----------------------
1 file changed, 16 insertions(+), 23 deletions(-)
diff --git a/object-file.c b/object-file.c
index 8fa461dd59..a651129426 100644
--- a/object-file.c
+++ b/object-file.c
@@ -165,30 +165,13 @@ int stream_object_signature(struct repository *r, const struct object_id *oid)
}
/*
- * Find "oid" as a loose object in given source.
- * Returns 0 on success, negative on failure.
+ * Find "oid" as a loose object in given source, open the object and return its
+ * file descriptor. Returns the file descriptor on success, negative on failure.
*
* The "path" out-parameter will give the path of the object we found (if any).
* Note that it may point to static storage and is only valid until another
* call to stat_loose_object().
*/
-static int stat_loose_object(struct odb_source_loose *loose,
- const struct object_id *oid,
- struct stat *st, const char **path)
-{
- static struct strbuf buf = STRBUF_INIT;
-
- *path = odb_loose_path(loose->source, &buf, oid);
- if (!lstat(*path, st))
- return 0;
-
- return -1;
-}
-
-/*
- * Like stat_loose_object(), but actually open the object and return the
- * descriptor. See the caveats on the "path" parameter above.
- */
static int open_loose_object(struct odb_source_loose *loose,
const struct object_id *oid, const char **path)
{
@@ -412,7 +395,8 @@ static int parse_loose_header(const char *hdr, struct object_info *oi)
return 0;
}
-int odb_source_loose_read_object_info(struct odb_source *source,
+static int read_object_info_from_path(struct odb_source *source,
+ const char *path,
const struct object_id *oid,
struct object_info *oi,
unsigned flags)
@@ -420,7 +404,6 @@ int odb_source_loose_read_object_info(struct odb_source *source,
int ret;
int fd;
unsigned long mapsize;
- const char *path;
void *map = NULL;
git_zstream stream, *stream_to_end = NULL;
char hdr[MAX_HEADER_LEN];
@@ -443,7 +426,7 @@ int odb_source_loose_read_object_info(struct odb_source *source,
goto out;
}
- if (stat_loose_object(source->loose, oid, &st, &path) < 0) {
+ if (lstat(path, &st) < 0) {
ret = -1;
goto out;
}
@@ -455,7 +438,7 @@ int odb_source_loose_read_object_info(struct odb_source *source,
goto out;
}
- fd = open_loose_object(source->loose, oid, &path);
+ fd = git_open(path);
if (fd < 0) {
if (errno != ENOENT)
error_errno(_("unable to open loose object %s"), oid_to_hex(oid));
@@ -534,6 +517,16 @@ int odb_source_loose_read_object_info(struct odb_source *source,
return ret;
}
+int odb_source_loose_read_object_info(struct odb_source *source,
+ const struct object_id *oid,
+ struct object_info *oi,
+ unsigned flags)
+{
+ static struct strbuf buf = STRBUF_INIT;
+ odb_loose_path(source, &buf, oid);
+ return read_object_info_from_path(source, buf.buf, oid, oi, flags);
+}
+
static void hash_object_body(const struct git_hash_algo *algo, struct git_hash_ctx *c,
const void *buf, unsigned long len,
struct object_id *oid,
--
2.52.0.660.gd05f3a8ea5.dirty
^ permalink raw reply related [flat|nested] 120+ messages in thread* Re: [PATCH 03/14] object-file: extract function to read object info from path
2026-01-15 11:04 ` [PATCH 03/14] object-file: extract function to read object info from path Patrick Steinhardt
@ 2026-01-15 18:31 ` Justin Tobler
2026-01-16 7:03 ` Patrick Steinhardt
2026-01-20 9:09 ` Karthik Nayak
1 sibling, 1 reply; 120+ messages in thread
From: Justin Tobler @ 2026-01-15 18:31 UTC (permalink / raw)
To: Patrick Steinhardt; +Cc: git
On 26/01/15 12:04PM, Patrick Steinhardt wrote:
> Extract a new function that allows us to read object info for a specific
> loose object via a user-supplied path. This function will be used in a
> subsequent commit.
Ok, I assume that the new function we are talking about here is
read_object_info_from_path(). This new function does the same thing as
the previous version of odb_source_loose_read_object_info(), but now
requires the path to be provided.
> Note that this also allows us to drop `stat_loose_object()`, which is
> a simple wrapper around `odb_loose_path()` plus lstat(3p).
>
> Signed-off-by: Patrick Steinhardt <ps@pks.im>
> ---
> object-file.c | 39 ++++++++++++++++-----------------------
> 1 file changed, 16 insertions(+), 23 deletions(-)
>
> diff --git a/object-file.c b/object-file.c
> index 8fa461dd59..a651129426 100644
> --- a/object-file.c
> +++ b/object-file.c
[snip]
> @@ -455,7 +438,7 @@ int odb_source_loose_read_object_info(struct odb_source *source,
> goto out;
> }
>
> - fd = open_loose_object(source->loose, oid, &path);
> + fd = git_open(path);
Here we already have the path, so there is no need to invoke
odb_loose_path() again via open_loose_object(). We can instead call
git_open() directly. Looks good.
If I understand correctly, even before this change the path was already
available so using open_loose_object() here was already redundant.
> if (fd < 0) {
> if (errno != ENOENT)
> error_errno(_("unable to open loose object %s"), oid_to_hex(oid));
> @@ -534,6 +517,16 @@ int odb_source_loose_read_object_info(struct odb_source *source,
> return ret;
> }
>
> +int odb_source_loose_read_object_info(struct odb_source *source,
> + const struct object_id *oid,
> + struct object_info *oi,
> + unsigned flags)
> +{
> + static struct strbuf buf = STRBUF_INIT;
> + odb_loose_path(source, &buf, oid);
> + return read_object_info_from_path(source, buf.buf, oid, oi, flags);
Looks good.
-Justin
^ permalink raw reply [flat|nested] 120+ messages in thread* Re: [PATCH 03/14] object-file: extract function to read object info from path
2026-01-15 18:31 ` Justin Tobler
@ 2026-01-16 7:03 ` Patrick Steinhardt
0 siblings, 0 replies; 120+ messages in thread
From: Patrick Steinhardt @ 2026-01-16 7:03 UTC (permalink / raw)
To: Justin Tobler; +Cc: git
On Thu, Jan 15, 2026 at 12:31:13PM -0600, Justin Tobler wrote:
> On 26/01/15 12:04PM, Patrick Steinhardt wrote:
> > diff --git a/object-file.c b/object-file.c
> > index 8fa461dd59..a651129426 100644
> > --- a/object-file.c
> > +++ b/object-file.c
> [snip]
> > @@ -455,7 +438,7 @@ int odb_source_loose_read_object_info(struct odb_source *source,
> > goto out;
> > }
> >
> > - fd = open_loose_object(source->loose, oid, &path);
> > + fd = git_open(path);
>
> Here we already have the path, so there is no need to invoke
> odb_loose_path() again via open_loose_object(). We can instead call
> git_open() directly. Looks good.
>
> If I understand correctly, even before this change the path was already
> available so using open_loose_object() here was already redundant.
It actually wasn't. `open_loose_object()` was responsible for calling
`odb_loose_path()`, and that path was then also assigned to the out
pointer.
Patrick
^ permalink raw reply [flat|nested] 120+ messages in thread
* Re: [PATCH 03/14] object-file: extract function to read object info from path
2026-01-15 11:04 ` [PATCH 03/14] object-file: extract function to read object info from path Patrick Steinhardt
2026-01-15 18:31 ` Justin Tobler
@ 2026-01-20 9:09 ` Karthik Nayak
1 sibling, 0 replies; 120+ messages in thread
From: Karthik Nayak @ 2026-01-20 9:09 UTC (permalink / raw)
To: Patrick Steinhardt, git
[-- Attachment #1: Type: text/plain, Size: 3968 bytes --]
Patrick Steinhardt <ps@pks.im> writes:
> Extract a new function that allows us to read object info for a specific
> loose object via a user-supplied path. This function will be used in a
> subsequent commit.
>
> Note that this also allows us to drop `stat_loose_object()`, which is
> a simple wrapper around `odb_loose_path()` plus lstat(3p).
>
> Signed-off-by: Patrick Steinhardt <ps@pks.im>
> ---
> object-file.c | 39 ++++++++++++++++-----------------------
> 1 file changed, 16 insertions(+), 23 deletions(-)
>
> diff --git a/object-file.c b/object-file.c
> index 8fa461dd59..a651129426 100644
> --- a/object-file.c
> +++ b/object-file.c
> @@ -165,30 +165,13 @@ int stream_object_signature(struct repository *r, const struct object_id *oid)
> }
>
> /*
> - * Find "oid" as a loose object in given source.
> - * Returns 0 on success, negative on failure.
> + * Find "oid" as a loose object in given source, open the object and return its
> + * file descriptor. Returns the file descriptor on success, negative on failure.
> *
> * The "path" out-parameter will give the path of the object we found (if any).
> * Note that it may point to static storage and is only valid until another
> * call to stat_loose_object().
> */
> -static int stat_loose_object(struct odb_source_loose *loose,
> - const struct object_id *oid,
> - struct stat *st, const char **path)
> -{
> - static struct strbuf buf = STRBUF_INIT;
> -
> - *path = odb_loose_path(loose->source, &buf, oid);
> - if (!lstat(*path, st))
> - return 0;
> -
> - return -1;
> -}
> -
> -/*
> - * Like stat_loose_object(), but actually open the object and return the
> - * descriptor. See the caveats on the "path" parameter above.
> - */
> static int open_loose_object(struct odb_source_loose *loose,
> const struct object_id *oid, const char **path)
> {
> @@ -412,7 +395,8 @@ static int parse_loose_header(const char *hdr, struct object_info *oi)
> return 0;
> }
>
> -int odb_source_loose_read_object_info(struct odb_source *source,
> +static int read_object_info_from_path(struct odb_source *source,
> + const char *path,
> const struct object_id *oid,
> struct object_info *oi,
> unsigned flags)
> @@ -420,7 +404,6 @@ int odb_source_loose_read_object_info(struct odb_source *source,
> int ret;
> int fd;
> unsigned long mapsize;
> - const char *path;
> void *map = NULL;
> git_zstream stream, *stream_to_end = NULL;
> char hdr[MAX_HEADER_LEN];
> @@ -443,7 +426,7 @@ int odb_source_loose_read_object_info(struct odb_source *source,
> goto out;
> }
>
> - if (stat_loose_object(source->loose, oid, &st, &path) < 0) {
> + if (lstat(path, &st) < 0) {
> ret = -1;
> goto out;
> }
> @@ -455,7 +438,7 @@ int odb_source_loose_read_object_info(struct odb_source *source,
> goto out;
> }
>
> - fd = open_loose_object(source->loose, oid, &path);
Okay, so with this change, there's only one user of
`open_loose_object()` left. I don't see any cleanups needed there.
> + fd = git_open(path);
> if (fd < 0) {
> if (errno != ENOENT)
> error_errno(_("unable to open loose object %s"), oid_to_hex(oid));
> @@ -534,6 +517,16 @@ int odb_source_loose_read_object_info(struct odb_source *source,
> return ret;
> }
>
> +int odb_source_loose_read_object_info(struct odb_source *source,
> + const struct object_id *oid,
> + struct object_info *oi,
> + unsigned flags)
> +{
> + static struct strbuf buf = STRBUF_INIT;
> + odb_loose_path(source, &buf, oid);
> + return read_object_info_from_path(source, buf.buf, oid, oi, flags);
> +}
> +
I was a bit confused why we extracted out obd_loose_path() out, but that
should be explained in the next commit.
Looks good.
> static void hash_object_body(const struct git_hash_algo *algo, struct git_hash_ctx *c,
> const void *buf, unsigned long len,
> struct object_id *oid,
>
> --
> 2.52.0.660.gd05f3a8ea5.dirty
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 690 bytes --]
^ permalink raw reply [flat|nested] 120+ messages in thread
* [PATCH 04/14] object-file: introduce function to iterate through objects
2026-01-15 11:04 [PATCH 00/14] odb: introduce `odb_for_each_object()` Patrick Steinhardt
` (2 preceding siblings ...)
2026-01-15 11:04 ` [PATCH 03/14] object-file: extract function to read object info from path Patrick Steinhardt
@ 2026-01-15 11:04 ` Patrick Steinhardt
2026-01-15 20:54 ` Justin Tobler
2026-01-20 9:16 ` Karthik Nayak
2026-01-15 11:04 ` [PATCH 05/14] packfile: extract function to iterate through objects of a store Patrick Steinhardt
` (13 subsequent siblings)
17 siblings, 2 replies; 120+ messages in thread
From: Patrick Steinhardt @ 2026-01-15 11:04 UTC (permalink / raw)
To: git
We have multiple divergent interfaces to iterate through objects of a
specific backend:
- `for_each_loose_object()` yields all loose objects.
- `for_each_packed_object()` (somewhat obviously) yields all packed
objects.
These functions have different function signatures, which makes it hard
to create a common abstraction layer that covers both of these.
Introduce a new function `odb_source_loose_for_each_object()` to plug
this gap. This function doesn't take any data specific to loose objects,
but instead it accepts a `struct object_info` that will be populated the
exact same as if `odb_source_loose_read_object()` was called.
The benefit of this new interface is that we can continue to pass
backend-specific data, as `struct object_info` contains a union for
these exact use cases. This will allow us to unify how we iterate
through objects across both loose and packed objects in a subsequent
commit.
The `for_each_loose_object()` function continues to exist for now, but
it will be removed at the end of this patch series.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
object-file.c | 41 +++++++++++++++++++++++++++++++++++++++++
object-file.h | 11 +++++++++++
odb.h | 12 ++++++++++++
3 files changed, 64 insertions(+)
diff --git a/object-file.c b/object-file.c
index a651129426..65e730684b 100644
--- a/object-file.c
+++ b/object-file.c
@@ -1801,6 +1801,47 @@ int for_each_loose_object(struct object_database *odb,
return 0;
}
+struct for_each_object_wrapper_data {
+ struct odb_source *source;
+ struct object_info *oi;
+ odb_for_each_object_cb cb;
+ void *cb_data;
+};
+
+static int for_each_object_wrapper_cb(const struct object_id *oid,
+ const char *path,
+ void *cb_data)
+{
+ struct for_each_object_wrapper_data *data = cb_data;
+ if (data->oi &&
+ read_object_info_from_path(data->source, path, oid, data->oi, 0) < 0)
+ return -1;
+ return data->cb(oid, data->oi, data->cb_data);
+}
+
+int odb_source_loose_for_each_object(struct odb_source *source,
+ struct object_info *oi,
+ odb_for_each_object_cb cb,
+ void *cb_data,
+ unsigned flags)
+{
+ struct for_each_object_wrapper_data data = {
+ .source = source,
+ .oi = oi,
+ .cb = cb,
+ .cb_data = cb_data,
+ };
+
+ /* There are no loose promisor objects, so we can return immediately. */
+ if ((flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY))
+ return 0;
+ if ((flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY) && !source->local)
+ return 0;
+
+ return for_each_loose_file_in_source(source, for_each_object_wrapper_cb,
+ NULL, NULL, &data);
+}
+
static int append_loose_object(const struct object_id *oid,
const char *path UNUSED,
void *data)
diff --git a/object-file.h b/object-file.h
index 2acf19fb91..048b778531 100644
--- a/object-file.h
+++ b/object-file.h
@@ -137,6 +137,17 @@ int for_each_loose_object(struct object_database *odb,
each_loose_object_fn, void *,
enum odb_for_each_object_flags flags);
+/*
+ * Iterate through all loose objects in the given object database source and
+ * invoke the callback function for each of them. If given, the object info
+ * will be populated with the object's data as if you had called
+ * `odb_source_loose_read_object_info()` on the object.
+ */
+int odb_source_loose_for_each_object(struct odb_source *source,
+ struct object_info *oi,
+ odb_for_each_object_cb cb,
+ void *cb_data,
+ unsigned flags);
/**
* format_object_header() is a thin wrapper around s xsnprintf() that
diff --git a/odb.h b/odb.h
index 74503addf1..f97f249580 100644
--- a/odb.h
+++ b/odb.h
@@ -463,6 +463,18 @@ enum odb_for_each_object_flags {
ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS = (1<<4),
};
+/*
+ * A callback function that can be used to iterate through objects. If given,
+ * the optional `oi` parameter will be populated the same as if you would call
+ * `odb_read_object_info()`.
+ *
+ * Returning a non-zero error code will cause iteration to abort. The error
+ * code will be propagated.
+ */
+typedef int (*odb_for_each_object_cb)(const struct object_id *oid,
+ struct object_info *oi,
+ void *cb_data);
+
enum {
/*
* By default, `odb_write_object()` does not actually write anything
--
2.52.0.660.gd05f3a8ea5.dirty
^ permalink raw reply related [flat|nested] 120+ messages in thread* Re: [PATCH 04/14] object-file: introduce function to iterate through objects
2026-01-15 11:04 ` [PATCH 04/14] object-file: introduce function to iterate through objects Patrick Steinhardt
@ 2026-01-15 20:54 ` Justin Tobler
2026-01-16 7:03 ` Patrick Steinhardt
2026-01-20 9:16 ` Karthik Nayak
1 sibling, 1 reply; 120+ messages in thread
From: Justin Tobler @ 2026-01-15 20:54 UTC (permalink / raw)
To: Patrick Steinhardt; +Cc: git
On 26/01/15 12:04PM, Patrick Steinhardt wrote:
> We have multiple divergent interfaces to iterate through objects of a
> specific backend:
>
> - `for_each_loose_object()` yields all loose objects.
>
> - `for_each_packed_object()` (somewhat obviously) yields all packed
> objects.
>
> These functions have different function signatures, which makes it hard
> to create a common abstraction layer that covers both of these.
I assume that the intention is to eventually have a generic
for_each_object() function that can iterate across objects regardless of
the source. Is the end goal to have each source define the appropriate
for_each_object callback?
> Introduce a new function `odb_source_loose_for_each_object()` to plug
> this gap. This function doesn't take any data specific to loose objects,
> but instead it accepts a `struct object_info` that will be populated the
> exact same as if `odb_source_loose_read_object()` was called.
>
> The benefit of this new interface is that we can continue to pass
> backend-specific data, as `struct object_info` contains a union for
> these exact use cases. This will allow us to unify how we iterate
> through objects across both loose and packed objects in a subsequent
> commit.
Naive question: in a future where we have additional ODB backends, does
this mean that `struct object_info` would also need to be updated to
include them?
> The `for_each_loose_object()` function continues to exist for now, but
> it will be removed at the end of this patch series.
^ permalink raw reply [flat|nested] 120+ messages in thread
* Re: [PATCH 04/14] object-file: introduce function to iterate through objects
2026-01-15 20:54 ` Justin Tobler
@ 2026-01-16 7:03 ` Patrick Steinhardt
0 siblings, 0 replies; 120+ messages in thread
From: Patrick Steinhardt @ 2026-01-16 7:03 UTC (permalink / raw)
To: Justin Tobler; +Cc: git
On Thu, Jan 15, 2026 at 02:54:23PM -0600, Justin Tobler wrote:
> On 26/01/15 12:04PM, Patrick Steinhardt wrote:
> > We have multiple divergent interfaces to iterate through objects of a
> > specific backend:
> >
> > - `for_each_loose_object()` yields all loose objects.
> >
> > - `for_each_packed_object()` (somewhat obviously) yields all packed
> > objects.
> >
> > These functions have different function signatures, which makes it hard
> > to create a common abstraction layer that covers both of these.
>
> I assume that the intention is to eventually have a generic
> for_each_object() function that can iterate across objects regardless of
> the source. Is the end goal to have each source define the appropriate
> for_each_object callback?
Yup.
> > Introduce a new function `odb_source_loose_for_each_object()` to plug
> > this gap. This function doesn't take any data specific to loose objects,
> > but instead it accepts a `struct object_info` that will be populated the
> > exact same as if `odb_source_loose_read_object()` was called.
> >
> > The benefit of this new interface is that we can continue to pass
> > backend-specific data, as `struct object_info` contains a union for
> > these exact use cases. This will allow us to unify how we iterate
> > through objects across both loose and packed objects in a subsequent
> > commit.
>
> Naive question: in a future where we have additional ODB backends, does
> this mean that `struct object_info` would also need to be updated to
> include them?
Yes. We'd introduce a new `OI_*` type to signifiy the specific backend
via the `whence` fieldand will (optionally) have a new member in the
union of backend-specific data.
Patrick
^ permalink raw reply [flat|nested] 120+ messages in thread
* Re: [PATCH 04/14] object-file: introduce function to iterate through objects
2026-01-15 11:04 ` [PATCH 04/14] object-file: introduce function to iterate through objects Patrick Steinhardt
2026-01-15 20:54 ` Justin Tobler
@ 2026-01-20 9:16 ` Karthik Nayak
1 sibling, 0 replies; 120+ messages in thread
From: Karthik Nayak @ 2026-01-20 9:16 UTC (permalink / raw)
To: Patrick Steinhardt, git
[-- Attachment #1: Type: text/plain, Size: 2242 bytes --]
Patrick Steinhardt <ps@pks.im> writes:
> We have multiple divergent interfaces to iterate through objects of a
> specific backend:
>
> - `for_each_loose_object()` yields all loose objects.
>
> - `for_each_packed_object()` (somewhat obviously) yields all packed
> objects.
>
> These functions have different function signatures, which makes it hard
> to create a common abstraction layer that covers both of these.
>
> Introduce a new function `odb_source_loose_for_each_object()` to plug
> this gap. This function doesn't take any data specific to loose objects,
> but instead it accepts a `struct object_info` that will be populated the
> exact same as if `odb_source_loose_read_object()` was called.
>
> The benefit of this new interface is that we can continue to pass
> backend-specific data, as `struct object_info` contains a union for
> these exact use cases. This will allow us to unify how we iterate
> through objects across both loose and packed objects in a subsequent
> commit.
>
> The `for_each_loose_object()` function continues to exist for now, but
> it will be removed at the end of this patch series.
>
> Signed-off-by: Patrick Steinhardt <ps@pks.im>
> ---
> object-file.c | 41 +++++++++++++++++++++++++++++++++++++++++
> object-file.h | 11 +++++++++++
> odb.h | 12 ++++++++++++
> 3 files changed, 64 insertions(+)
>
> diff --git a/object-file.c b/object-file.c
> index a651129426..65e730684b 100644
> --- a/object-file.c
> +++ b/object-file.c
> @@ -1801,6 +1801,47 @@ int for_each_loose_object(struct object_database *odb,
> return 0;
> }
>
> +struct for_each_object_wrapper_data {
> + struct odb_source *source;
> + struct object_info *oi;
> + odb_for_each_object_cb cb;
> + void *cb_data;
> +};
> +
> +static int for_each_object_wrapper_cb(const struct object_id *oid,
> + const char *path,
> + void *cb_data)
> +{
> + struct for_each_object_wrapper_data *data = cb_data;
> + if (data->oi &&
> + read_object_info_from_path(data->source, path, oid, data->oi, 0) < 0)
> + return -1;
> + return data->cb(oid, data->oi, data->cb_data);
> +}
Okay so here, we use `read_object_info_from_path()` since we already
have the path, we don't need to call `odb_loose_path()`.
[snip]
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 690 bytes --]
^ permalink raw reply [flat|nested] 120+ messages in thread
* [PATCH 05/14] packfile: extract function to iterate through objects of a store
2026-01-15 11:04 [PATCH 00/14] odb: introduce `odb_for_each_object()` Patrick Steinhardt
` (3 preceding siblings ...)
2026-01-15 11:04 ` [PATCH 04/14] object-file: introduce function to iterate through objects Patrick Steinhardt
@ 2026-01-15 11:04 ` Patrick Steinhardt
2026-01-15 11:04 ` [PATCH 06/14] packfile: introduce function to iterate through objects Patrick Steinhardt
` (12 subsequent siblings)
17 siblings, 0 replies; 120+ messages in thread
From: Patrick Steinhardt @ 2026-01-15 11:04 UTC (permalink / raw)
To: git
In the next commit we're about to introduce a new function that knows to
iterate through objects of a given packfile store. Same as with the
equivalent function for loose objects, this new function will also be
agnostic of backends by using a `struct object_info`.
Prepare for this by extracting a new shared function to iterate through
a single packfile store.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
packfile.c | 78 ++++++++++++++++++++++++++++++++++++--------------------------
1 file changed, 45 insertions(+), 33 deletions(-)
diff --git a/packfile.c b/packfile.c
index 79fe64a25b..d15a2ce12b 100644
--- a/packfile.c
+++ b/packfile.c
@@ -2301,51 +2301,63 @@ int for_each_object_in_pack(struct packed_git *p,
return r;
}
-int for_each_packed_object(struct repository *repo, each_packed_object_fn cb,
- void *data, unsigned flags)
+static int packfile_store_for_each_object_internal(struct packfile_store *store,
+ each_packed_object_fn cb,
+ void *data,
+ unsigned flags,
+ int *pack_errors)
{
- struct odb_source *source;
- int r = 0;
- int pack_errors = 0;
+ struct packfile_list_entry *e;
+ int ret = 0;
- odb_prepare_alternates(repo->objects);
+ store->skip_mru_updates = true;
- for (source = repo->objects->sources; source; source = source->next) {
- struct packfile_list_entry *e;
+ for (e = packfile_store_get_packs(store); e; e = e->next) {
+ struct packed_git *p = e->pack;
- source->packfiles->skip_mru_updates = true;
+ if ((flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local)
+ continue;
+ if ((flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY) &&
+ !p->pack_promisor)
+ continue;
+ if ((flags & ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS) &&
+ p->pack_keep_in_core)
+ continue;
+ if ((flags & ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS) &&
+ p->pack_keep)
+ continue;
+ if (open_pack_index(p)) {
+ *pack_errors = 1;
+ continue;
+ }
- for (e = packfile_store_get_packs(source->packfiles); e; e = e->next) {
- struct packed_git *p = e->pack;
+ ret = for_each_object_in_pack(p, cb, data, flags);
+ if (ret)
+ break;
+ }
- if ((flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local)
- continue;
- if ((flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY) &&
- !p->pack_promisor)
- continue;
- if ((flags & ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS) &&
- p->pack_keep_in_core)
- continue;
- if ((flags & ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS) &&
- p->pack_keep)
- continue;
- if (open_pack_index(p)) {
- pack_errors = 1;
- continue;
- }
+ store->skip_mru_updates = false;
- r = for_each_object_in_pack(p, cb, data, flags);
- if (r)
- break;
- }
+ return ret;
+}
- source->packfiles->skip_mru_updates = false;
+int for_each_packed_object(struct repository *repo, each_packed_object_fn cb,
+ void *data, unsigned flags)
+{
+ struct odb_source *source;
+ int pack_errors = 0;
+ int ret = 0;
- if (r)
+ odb_prepare_alternates(repo->objects);
+
+ for (source = repo->objects->sources; source; source = source->next) {
+ ret = packfile_store_for_each_object_internal(source->packfiles, cb, data,
+ flags, &pack_errors);
+ if (ret)
break;
}
- return r ? r : pack_errors;
+ return ret ? ret : pack_errors;
}
static int add_promisor_object(const struct object_id *oid,
--
2.52.0.660.gd05f3a8ea5.dirty
^ permalink raw reply related [flat|nested] 120+ messages in thread* [PATCH 06/14] packfile: introduce function to iterate through objects
2026-01-15 11:04 [PATCH 00/14] odb: introduce `odb_for_each_object()` Patrick Steinhardt
` (4 preceding siblings ...)
2026-01-15 11:04 ` [PATCH 05/14] packfile: extract function to iterate through objects of a store Patrick Steinhardt
@ 2026-01-15 11:04 ` Patrick Steinhardt
2026-01-15 11:04 ` [PATCH 07/14] odb: introduce `odb_for_each_object()` Patrick Steinhardt
` (11 subsequent siblings)
17 siblings, 0 replies; 120+ messages in thread
From: Patrick Steinhardt @ 2026-01-15 11:04 UTC (permalink / raw)
To: git
Introduce a new function `packfile_store_for_each_object()`. This
function is the equivalent to `odb_source_loose_for_each_object()` in
that it:
- Works on a single packfile store and thus per object source.
- Passes a `struct object_info` to the callback function.
As such, it provides the same callback interface as we already provide
for loose objects now. These functions will be used in a subsequent step
to implement `odb_for_each_object()`.
The `for_each_packed_object()` function continues to exist for now, but
it will be removed at the end of this patch series.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
packfile.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++++
packfile.h | 14 ++++++++++++++
2 files changed, 62 insertions(+)
diff --git a/packfile.c b/packfile.c
index d15a2ce12b..cd45c6f21c 100644
--- a/packfile.c
+++ b/packfile.c
@@ -2360,6 +2360,54 @@ int for_each_packed_object(struct repository *repo, each_packed_object_fn cb,
return ret ? ret : pack_errors;
}
+struct packfile_store_for_each_object_wrapper_data {
+ struct packfile_store *store;
+ struct object_info *oi;
+ odb_for_each_object_cb cb;
+ void *cb_data;
+};
+
+static int packfile_store_for_each_object_wrapper(const struct object_id *oid,
+ struct packed_git *pack,
+ uint32_t index_pos,
+ void *cb_data)
+{
+ struct packfile_store_for_each_object_wrapper_data *data = cb_data;
+
+ if (data->oi) {
+ off_t offset = nth_packed_object_offset(pack, index_pos);
+
+ if (packed_object_info(pack, offset, data->oi) < 0) {
+ mark_bad_packed_object(pack, oid);
+ return -1;
+ }
+ }
+
+ return data->cb(oid, data->oi, data->cb_data);
+}
+
+int packfile_store_for_each_object(struct packfile_store *store,
+ struct object_info *oi,
+ odb_for_each_object_cb cb,
+ void *cb_data,
+ unsigned flags)
+{
+ struct packfile_store_for_each_object_wrapper_data data = {
+ .store = store,
+ .oi = oi,
+ .cb = cb,
+ .cb_data = cb_data,
+ };
+ int pack_errors = 0, ret;
+
+ ret = packfile_store_for_each_object_internal(store, packfile_store_for_each_object_wrapper,
+ &data, flags, &pack_errors);
+ if (ret)
+ return ret;
+
+ return pack_errors ? -1 : 0;
+}
+
static int add_promisor_object(const struct object_id *oid,
struct packed_git *pack,
uint32_t pos UNUSED,
diff --git a/packfile.h b/packfile.h
index 447c44c4a7..ab0637fbe9 100644
--- a/packfile.h
+++ b/packfile.h
@@ -343,6 +343,20 @@ int for_each_object_in_pack(struct packed_git *p,
int for_each_packed_object(struct repository *repo, each_packed_object_fn cb,
void *data, unsigned flags);
+/*
+ * Iterate through all packed objects in the given packfile store and invoke
+ * the callback function for each of them. If given, the object info will be
+ * populated with the object's data as if you had called
+ * `packfile_store_read_object_info()` on the object.
+ *
+ * The flags parameter is a combination of `odb_for_each_object_flags`.
+ */
+int packfile_store_for_each_object(struct packfile_store *store,
+ struct object_info *oi,
+ odb_for_each_object_cb cb,
+ void *cb_data,
+ unsigned flags);
+
/* A hook to report invalid files in pack directory */
#define PACKDIR_FILE_PACK 1
#define PACKDIR_FILE_IDX 2
--
2.52.0.660.gd05f3a8ea5.dirty
^ permalink raw reply related [flat|nested] 120+ messages in thread* [PATCH 07/14] odb: introduce `odb_for_each_object()`
2026-01-15 11:04 [PATCH 00/14] odb: introduce `odb_for_each_object()` Patrick Steinhardt
` (5 preceding siblings ...)
2026-01-15 11:04 ` [PATCH 06/14] packfile: introduce function to iterate through objects Patrick Steinhardt
@ 2026-01-15 11:04 ` Patrick Steinhardt
2026-01-15 21:17 ` Justin Tobler
` (2 more replies)
2026-01-15 11:04 ` [PATCH 08/14] builtin/fsck: refactor to use `odb_for_each_object()` Patrick Steinhardt
` (10 subsequent siblings)
17 siblings, 3 replies; 120+ messages in thread
From: Patrick Steinhardt @ 2026-01-15 11:04 UTC (permalink / raw)
To: git
Introduce a new function `odb_for_each_object()` that knows to iterate
through all objects part of a given object database. This function is
essentially a simple wrapper around the object database sources.
Subsequent commits will adapt callers to use this new function.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
odb.c | 27 +++++++++++++++++++++++++++
odb.h | 17 +++++++++++++++++
2 files changed, 44 insertions(+)
diff --git a/odb.c b/odb.c
index ac70b6a099..65f0447aa5 100644
--- a/odb.c
+++ b/odb.c
@@ -995,6 +995,33 @@ int odb_freshen_object(struct object_database *odb,
return 0;
}
+int odb_for_each_object(struct object_database *odb,
+ struct object_info *oi,
+ odb_for_each_object_cb cb,
+ void *cb_data,
+ unsigned flags)
+{
+ int ret;
+
+ odb_prepare_alternates(odb);
+ for (struct odb_source *source = odb->sources; source; source = source->next) {
+ if (flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY && !source->local)
+ continue;
+
+ if (!(flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY)) {
+ ret = odb_source_loose_for_each_object(source, oi, cb, cb_data, flags);
+ if (ret)
+ return ret;
+ }
+
+ ret = packfile_store_for_each_object(source->packfiles, oi, cb, cb_data, flags);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
void odb_assert_oid_type(struct object_database *odb,
const struct object_id *oid, enum object_type expect)
{
diff --git a/odb.h b/odb.h
index f97f249580..8f6d95aee5 100644
--- a/odb.h
+++ b/odb.h
@@ -475,6 +475,23 @@ typedef int (*odb_for_each_object_cb)(const struct object_id *oid,
struct object_info *oi,
void *cb_data);
+/*
+ * Iterate through all objects contained in the object database. Note that
+ * objects may be iterated over multiple times in case they are either stored
+ * in different backends or in case they are stored in multiple sources.
+ *
+ * Returning a non-zero error code will cause iteration to abort. The error
+ * code will be propagated.
+ *
+ * Returns 0 on success, a negative error code in case a failure occurred, or
+ * an arbitrary non-zero error code returned by the callback itself.
+ */
+int odb_for_each_object(struct object_database *odb,
+ struct object_info *oi,
+ odb_for_each_object_cb cb,
+ void *cb_data,
+ unsigned flags);
+
enum {
/*
* By default, `odb_write_object()` does not actually write anything
--
2.52.0.660.gd05f3a8ea5.dirty
^ permalink raw reply related [flat|nested] 120+ messages in thread* Re: [PATCH 07/14] odb: introduce `odb_for_each_object()`
2026-01-15 11:04 ` [PATCH 07/14] odb: introduce `odb_for_each_object()` Patrick Steinhardt
@ 2026-01-15 21:17 ` Justin Tobler
2026-01-16 7:03 ` Patrick Steinhardt
2026-01-16 17:46 ` Justin Tobler
2026-01-20 9:20 ` Karthik Nayak
2 siblings, 1 reply; 120+ messages in thread
From: Justin Tobler @ 2026-01-15 21:17 UTC (permalink / raw)
To: Patrick Steinhardt; +Cc: git
On 26/01/15 12:04PM, Patrick Steinhardt wrote:
> Introduce a new function `odb_for_each_object()` that knows to iterate
> through all objects part of a given object database. This function is
> essentially a simple wrapper around the object database sources.
>
> Subsequent commits will adapt callers to use this new function.
>
> Signed-off-by: Patrick Steinhardt <ps@pks.im>
> ---
> odb.c | 27 +++++++++++++++++++++++++++
> odb.h | 17 +++++++++++++++++
> 2 files changed, 44 insertions(+)
>
> diff --git a/odb.c b/odb.c
> index ac70b6a099..65f0447aa5 100644
> --- a/odb.c
> +++ b/odb.c
> @@ -995,6 +995,33 @@ int odb_freshen_object(struct object_database *odb,
> return 0;
> }
>
> +int odb_for_each_object(struct object_database *odb,
> + struct object_info *oi,
> + odb_for_each_object_cb cb,
> + void *cb_data,
> + unsigned flags)
> +{
> + int ret;
> +
> + odb_prepare_alternates(odb);
> + for (struct odb_source *source = odb->sources; source; source = source->next) {
> + if (flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY && !source->local)
> + continue;
> +
> + if (!(flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY)) {
> + ret = odb_source_loose_for_each_object(source, oi, cb, cb_data, flags);
> + if (ret)
> + return ret;
> + }
> +
> + ret = packfile_store_for_each_object(source->packfiles, oi, cb, cb_data, flags);
> + if (ret)
> + return ret;
> + }
> +
> + return 0;
> +}
Ok, I think I understand a bit more clearly now. As implemented here,
odb_for_each_object() iterates across each the objects (loose and
packed) in each source. Object iteration is not handled transparently
for each source yet though and we still explicitly iterate both loose
and packed objects. If I understand correctly, this current
implementation will become specific to the "files" backend/source in the
future.
-Justin
^ permalink raw reply [flat|nested] 120+ messages in thread* Re: [PATCH 07/14] odb: introduce `odb_for_each_object()`
2026-01-15 21:17 ` Justin Tobler
@ 2026-01-16 7:03 ` Patrick Steinhardt
0 siblings, 0 replies; 120+ messages in thread
From: Patrick Steinhardt @ 2026-01-16 7:03 UTC (permalink / raw)
To: Justin Tobler; +Cc: git
On Thu, Jan 15, 2026 at 03:17:07PM -0600, Justin Tobler wrote:
> On 26/01/15 12:04PM, Patrick Steinhardt wrote:
> > diff --git a/odb.c b/odb.c
> > index ac70b6a099..65f0447aa5 100644
> > --- a/odb.c
> > +++ b/odb.c
> > @@ -995,6 +995,33 @@ int odb_freshen_object(struct object_database *odb,
> > return 0;
> > }
> >
> > +int odb_for_each_object(struct object_database *odb,
> > + struct object_info *oi,
> > + odb_for_each_object_cb cb,
> > + void *cb_data,
> > + unsigned flags)
> > +{
> > + int ret;
> > +
> > + odb_prepare_alternates(odb);
> > + for (struct odb_source *source = odb->sources; source; source = source->next) {
> > + if (flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY && !source->local)
> > + continue;
> > +
> > + if (!(flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY)) {
> > + ret = odb_source_loose_for_each_object(source, oi, cb, cb_data, flags);
> > + if (ret)
> > + return ret;
> > + }
> > +
> > + ret = packfile_store_for_each_object(source->packfiles, oi, cb, cb_data, flags);
> > + if (ret)
> > + return ret;
> > + }
> > +
> > + return 0;
> > +}
>
> Ok, I think I understand a bit more clearly now. As implemented here,
> odb_for_each_object() iterates across each the objects (loose and
> packed) in each source. Object iteration is not handled transparently
> for each source yet though and we still explicitly iterate both loose
> and packed objects. If I understand correctly, this current
> implementation will become specific to the "files" backend/source in the
> future.
Yup, that's correct :)
Patrick
^ permalink raw reply [flat|nested] 120+ messages in thread
* Re: [PATCH 07/14] odb: introduce `odb_for_each_object()`
2026-01-15 11:04 ` [PATCH 07/14] odb: introduce `odb_for_each_object()` Patrick Steinhardt
2026-01-15 21:17 ` Justin Tobler
@ 2026-01-16 17:46 ` Justin Tobler
2026-01-19 7:10 ` Patrick Steinhardt
2026-01-20 9:20 ` Karthik Nayak
2 siblings, 1 reply; 120+ messages in thread
From: Justin Tobler @ 2026-01-16 17:46 UTC (permalink / raw)
To: Patrick Steinhardt; +Cc: git
On 26/01/15 12:04PM, Patrick Steinhardt wrote:
> diff --git a/odb.h b/odb.h
> index f97f249580..8f6d95aee5 100644
> --- a/odb.h
> +++ b/odb.h
> @@ -475,6 +475,23 @@ typedef int (*odb_for_each_object_cb)(const struct object_id *oid,
> struct object_info *oi,
> void *cb_data);
>
> +/*
> + * Iterate through all objects contained in the object database. Note that
> + * objects may be iterated over multiple times in case they are either stored
> + * in different backends or in case they are stored in multiple sources.
> + *
> + * Returning a non-zero error code will cause iteration to abort. The error
> + * code will be propagated.
> + *
> + * Returns 0 on success, a negative error code in case a failure occurred, or
> + * an arbitrary non-zero error code returned by the callback itself.
> + */
> +int odb_for_each_object(struct object_database *odb,
> + struct object_info *oi,
Something I probably don't fully understand yet is the role of `struct
object_info` being passed in here by `odb_for_each_object()` callers.
Outside of configuring the specific object info attributes that are
needed for a given callback, is there reason that callers would care
about the data that gets populated in it? I was under the impression
that this object info was really only needed for the internal
`odb_for_eachodbject_cb` that gets invoked.
> + odb_for_each_object_cb cb,
> + void *cb_data,
> + unsigned flags);
^ permalink raw reply [flat|nested] 120+ messages in thread
* Re: [PATCH 07/14] odb: introduce `odb_for_each_object()`
2026-01-16 17:46 ` Justin Tobler
@ 2026-01-19 7:10 ` Patrick Steinhardt
0 siblings, 0 replies; 120+ messages in thread
From: Patrick Steinhardt @ 2026-01-19 7:10 UTC (permalink / raw)
To: Justin Tobler; +Cc: git
On Fri, Jan 16, 2026 at 11:46:12AM -0600, Justin Tobler wrote:
> On 26/01/15 12:04PM, Patrick Steinhardt wrote:
> > diff --git a/odb.h b/odb.h
> > index f97f249580..8f6d95aee5 100644
> > --- a/odb.h
> > +++ b/odb.h
> > @@ -475,6 +475,23 @@ typedef int (*odb_for_each_object_cb)(const struct object_id *oid,
> > struct object_info *oi,
> > void *cb_data);
> >
> > +/*
> > + * Iterate through all objects contained in the object database. Note that
> > + * objects may be iterated over multiple times in case they are either stored
> > + * in different backends or in case they are stored in multiple sources.
> > + *
> > + * Returning a non-zero error code will cause iteration to abort. The error
> > + * code will be propagated.
> > + *
> > + * Returns 0 on success, a negative error code in case a failure occurred, or
> > + * an arbitrary non-zero error code returned by the callback itself.
> > + */
> > +int odb_for_each_object(struct object_database *odb,
> > + struct object_info *oi,
>
> Something I probably don't fully understand yet is the role of `struct
> object_info` being passed in here by `odb_for_each_object()` callers.
> Outside of configuring the specific object info attributes that are
> needed for a given callback, is there reason that callers would care
> about the data that gets populated in it? I was under the impression
> that this object info was really only needed for the internal
> `odb_for_eachodbject_cb` that gets invoked.
Some callers do care about this info. We see this later in the series
where they for example want to learn about the mtime of each of the
iterated objects, but we also have other cases where we want to for
example sum up the size of all objects.
Another use case for passing `struct object_info` is so that the caller
can tell apart which backend an object is coming from via the `whence`
field.
Apart from that there's also good reason to keep the current layout. For
the packfile backend for example it's significantly cheaper to iterate
and look up object info at the same time compared to iterating and then
calling `odb_read_object_info()` for each individual object. We already
have the information available when iterating, so it's just a matter of
also populating the object info with it in case the caller needs it.
Patrick
^ permalink raw reply [flat|nested] 120+ messages in thread
* Re: [PATCH 07/14] odb: introduce `odb_for_each_object()`
2026-01-15 11:04 ` [PATCH 07/14] odb: introduce `odb_for_each_object()` Patrick Steinhardt
2026-01-15 21:17 ` Justin Tobler
2026-01-16 17:46 ` Justin Tobler
@ 2026-01-20 9:20 ` Karthik Nayak
2026-01-21 7:39 ` Patrick Steinhardt
2 siblings, 1 reply; 120+ messages in thread
From: Karthik Nayak @ 2026-01-20 9:20 UTC (permalink / raw)
To: Patrick Steinhardt, git
[-- Attachment #1: Type: text/plain, Size: 1211 bytes --]
Patrick Steinhardt <ps@pks.im> writes:
[snip]
> diff --git a/odb.h b/odb.h
> index f97f249580..8f6d95aee5 100644
> --- a/odb.h
> +++ b/odb.h
> @@ -475,6 +475,23 @@ typedef int (*odb_for_each_object_cb)(const struct object_id *oid,
> struct object_info *oi,
> void *cb_data);
>
> +/*
> + * Iterate through all objects contained in the object database. Note that
> + * objects may be iterated over multiple times in case they are either stored
> + * in different backends or in case they are stored in multiple sources.
> + *
> + * Returning a non-zero error code will cause iteration to abort. The error
> + * code will be propagated.
> + *
Super-Nit: This is for the callback function. It would be nice to be
explicit about that.
> + * Returns 0 on success, a negative error code in case a failure occurred, or
> + * an arbitrary non-zero error code returned by the callback itself.
> + */
> +int odb_for_each_object(struct object_database *odb,
> + struct object_info *oi,
> + odb_for_each_object_cb cb,
> + void *cb_data,
> + unsigned flags);
> +
> enum {
> /*
> * By default, `odb_write_object()` does not actually write anything
>
> --
> 2.52.0.660.gd05f3a8ea5.dirty
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 690 bytes --]
^ permalink raw reply [flat|nested] 120+ messages in thread* Re: [PATCH 07/14] odb: introduce `odb_for_each_object()`
2026-01-20 9:20 ` Karthik Nayak
@ 2026-01-21 7:39 ` Patrick Steinhardt
0 siblings, 0 replies; 120+ messages in thread
From: Patrick Steinhardt @ 2026-01-21 7:39 UTC (permalink / raw)
To: Karthik Nayak; +Cc: git
On Tue, Jan 20, 2026 at 09:20:05AM +0000, Karthik Nayak wrote:
> Patrick Steinhardt <ps@pks.im> writes:
> > diff --git a/odb.h b/odb.h
> > index f97f249580..8f6d95aee5 100644
> > --- a/odb.h
> > +++ b/odb.h
> > @@ -475,6 +475,23 @@ typedef int (*odb_for_each_object_cb)(const struct object_id *oid,
> > struct object_info *oi,
> > void *cb_data);
> >
> > +/*
> > + * Iterate through all objects contained in the object database. Note that
> > + * objects may be iterated over multiple times in case they are either stored
> > + * in different backends or in case they are stored in multiple sources.
> > + *
> > + * Returning a non-zero error code will cause iteration to abort. The error
> > + * code will be propagated.
> > + *
>
> Super-Nit: This is for the callback function. It would be nice to be
> explicit about that.
Makes sense indeed, will change.
Patrick
^ permalink raw reply [flat|nested] 120+ messages in thread
* [PATCH 08/14] builtin/fsck: refactor to use `odb_for_each_object()`
2026-01-15 11:04 [PATCH 00/14] odb: introduce `odb_for_each_object()` Patrick Steinhardt
` (6 preceding siblings ...)
2026-01-15 11:04 ` [PATCH 07/14] odb: introduce `odb_for_each_object()` Patrick Steinhardt
@ 2026-01-15 11:04 ` Patrick Steinhardt
2026-01-15 21:24 ` Justin Tobler
2026-01-15 11:04 ` [PATCH 09/14] treewide: enumerate promisor objects via `odb_for_each_object()` Patrick Steinhardt
` (9 subsequent siblings)
17 siblings, 1 reply; 120+ messages in thread
From: Patrick Steinhardt @ 2026-01-15 11:04 UTC (permalink / raw)
To: git
In git-fsck(1) we have two callsites where we iterate over all objects
via `for_each_loose_object()` and `for_each_packed_object()`. Both of
these are trivially convertible with `odb_for_each_object()`.
Refactor these callsites accordingly.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
builtin/fsck.c | 57 ++++++++++++---------------------------------------------
1 file changed, 12 insertions(+), 45 deletions(-)
diff --git a/builtin/fsck.c b/builtin/fsck.c
index 4979bc795e..96107695ae 100644
--- a/builtin/fsck.c
+++ b/builtin/fsck.c
@@ -218,15 +218,17 @@ static int mark_used(struct object *obj, enum object_type type UNUSED,
return 0;
}
-static void mark_unreachable_referents(const struct object_id *oid)
+static int mark_unreachable_referents(const struct object_id *oid,
+ struct object_info *io UNUSED,
+ void *data UNUSED)
{
struct fsck_options options = FSCK_OPTIONS_DEFAULT;
struct object *obj = lookup_object(the_repository, oid);
if (!obj || !(obj->flags & HAS_OBJ))
- return; /* not part of our original set */
+ return 0; /* not part of our original set */
if (obj->flags & REACHABLE)
- return; /* reachable objects already traversed */
+ return 0; /* reachable objects already traversed */
/*
* Avoid passing OBJ_NONE to fsck_walk, which will parse the object
@@ -243,22 +245,7 @@ static void mark_unreachable_referents(const struct object_id *oid)
fsck_walk(obj, NULL, &options);
if (obj->type == OBJ_TREE)
free_tree_buffer((struct tree *)obj);
-}
-static int mark_loose_unreachable_referents(const struct object_id *oid,
- const char *path UNUSED,
- void *data UNUSED)
-{
- mark_unreachable_referents(oid);
- return 0;
-}
-
-static int mark_packed_unreachable_referents(const struct object_id *oid,
- struct packed_git *pack UNUSED,
- uint32_t pos UNUSED,
- void *data UNUSED)
-{
- mark_unreachable_referents(oid);
return 0;
}
@@ -394,12 +381,8 @@ static void check_connectivity(void)
* and ignore any that weren't present in our earlier
* traversal.
*/
- for_each_loose_object(the_repository->objects,
- mark_loose_unreachable_referents, NULL, 0);
- for_each_packed_object(the_repository,
- mark_packed_unreachable_referents,
- NULL,
- 0);
+ odb_for_each_object(the_repository->objects, NULL,
+ mark_unreachable_referents, NULL, 0);
}
/* Look up all the requirements, warn about missing objects.. */
@@ -848,26 +831,12 @@ static void fsck_index(struct index_state *istate, const char *index_path,
fsck_resolve_undo(istate, index_path);
}
-static void mark_object_for_connectivity(const struct object_id *oid)
+static int mark_object_for_connectivity(const struct object_id *oid,
+ struct object_info *oi UNUSED,
+ void *cb_data UNUSED)
{
struct object *obj = lookup_unknown_object(the_repository, oid);
obj->flags |= HAS_OBJ;
-}
-
-static int mark_loose_for_connectivity(const struct object_id *oid,
- const char *path UNUSED,
- void *data UNUSED)
-{
- mark_object_for_connectivity(oid);
- return 0;
-}
-
-static int mark_packed_for_connectivity(const struct object_id *oid,
- struct packed_git *pack UNUSED,
- uint32_t pos UNUSED,
- void *data UNUSED)
-{
- mark_object_for_connectivity(oid);
return 0;
}
@@ -1001,10 +970,8 @@ int cmd_fsck(int argc,
fsck_refs(the_repository);
if (connectivity_only) {
- for_each_loose_object(the_repository->objects,
- mark_loose_for_connectivity, NULL, 0);
- for_each_packed_object(the_repository,
- mark_packed_for_connectivity, NULL, 0);
+ odb_for_each_object(the_repository->objects, NULL,
+ mark_object_for_connectivity, NULL, 0);
} else {
odb_prepare_alternates(the_repository->objects);
for (source = the_repository->objects->sources; source; source = source->next)
--
2.52.0.660.gd05f3a8ea5.dirty
^ permalink raw reply related [flat|nested] 120+ messages in thread* Re: [PATCH 08/14] builtin/fsck: refactor to use `odb_for_each_object()`
2026-01-15 11:04 ` [PATCH 08/14] builtin/fsck: refactor to use `odb_for_each_object()` Patrick Steinhardt
@ 2026-01-15 21:24 ` Justin Tobler
0 siblings, 0 replies; 120+ messages in thread
From: Justin Tobler @ 2026-01-15 21:24 UTC (permalink / raw)
To: Patrick Steinhardt; +Cc: git
On 26/01/15 12:04PM, Patrick Steinhardt wrote:
> In git-fsck(1) we have two callsites where we iterate over all objects
> via `for_each_loose_object()` and `for_each_packed_object()`. Both of
> these are trivially convertible with `odb_for_each_object()`.
>
> Refactor these callsites accordingly.
>
> Signed-off-by: Patrick Steinhardt <ps@pks.im>
> ---
> builtin/fsck.c | 57 ++++++++++++---------------------------------------------
> 1 file changed, 12 insertions(+), 45 deletions(-)
>
> diff --git a/builtin/fsck.c b/builtin/fsck.c
> index 4979bc795e..96107695ae 100644
> --- a/builtin/fsck.c
> +++ b/builtin/fsck.c
> @@ -218,15 +218,17 @@ static int mark_used(struct object *obj, enum object_type type UNUSED,
> return 0;
> }
>
> -static void mark_unreachable_referents(const struct object_id *oid)
> +static int mark_unreachable_referents(const struct object_id *oid,
> + struct object_info *io UNUSED,
> + void *data UNUSED)
> {
> struct fsck_options options = FSCK_OPTIONS_DEFAULT;
> struct object *obj = lookup_object(the_repository, oid);
>
> if (!obj || !(obj->flags & HAS_OBJ))
> - return; /* not part of our original set */
> + return 0; /* not part of our original set */
> if (obj->flags & REACHABLE)
> - return; /* reachable objects already traversed */
> + return 0; /* reachable objects already traversed */
>
> /*
> * Avoid passing OBJ_NONE to fsck_walk, which will parse the object
> @@ -243,22 +245,7 @@ static void mark_unreachable_referents(const struct object_id *oid)
> fsck_walk(obj, NULL, &options);
> if (obj->type == OBJ_TREE)
> free_tree_buffer((struct tree *)obj);
> -}
>
> -static int mark_loose_unreachable_referents(const struct object_id *oid,
> - const char *path UNUSED,
> - void *data UNUSED)
> -{
> - mark_unreachable_referents(oid);
> - return 0;
> -}
> -
> -static int mark_packed_unreachable_referents(const struct object_id *oid,
> - struct packed_git *pack UNUSED,
> - uint32_t pos UNUSED,
> - void *data UNUSED)
> -{
> - mark_unreachable_referents(oid);
Ah ok, now that object iteration is unified, we don't need the two
separate callbacks. Makes sense. :)
> return 0;
> }
>
> @@ -394,12 +381,8 @@ static void check_connectivity(void)
> * and ignore any that weren't present in our earlier
> * traversal.
> */
> - for_each_loose_object(the_repository->objects,
> - mark_loose_unreachable_referents, NULL, 0);
> - for_each_packed_object(the_repository,
> - mark_packed_unreachable_referents,
> - NULL,
> - 0);
> + odb_for_each_object(the_repository->objects, NULL,
> + mark_unreachable_referents, NULL, 0);
Nice! Now we no longer have to explicitly handle the various object
backends while iterating. This patch looks good.
-Justin
^ permalink raw reply [flat|nested] 120+ messages in thread
* [PATCH 09/14] treewide: enumerate promisor objects via `odb_for_each_object()`
2026-01-15 11:04 [PATCH 00/14] odb: introduce `odb_for_each_object()` Patrick Steinhardt
` (7 preceding siblings ...)
2026-01-15 11:04 ` [PATCH 08/14] builtin/fsck: refactor to use `odb_for_each_object()` Patrick Steinhardt
@ 2026-01-15 11:04 ` Patrick Steinhardt
2026-01-15 11:04 ` [PATCH 10/14] treewide: drop uses of `for_each_{loose,packed}_object()` Patrick Steinhardt
` (8 subsequent siblings)
17 siblings, 0 replies; 120+ messages in thread
From: Patrick Steinhardt @ 2026-01-15 11:04 UTC (permalink / raw)
To: git
We have multiple callsites where we enumerate all promisor objects in
the object database via `for_each_packed_object()`. This is done by
passing the `ODB_FOR_EACH_OBJECT_PROMISOR_ONLY` flag, which causes us to
skip over all non-promisor objects.
These callsites can be trivially converted to `odb_for_each_object()` as
we know to skip enumeration of loose objects in case the `PROMISOR_ONLY`
flag was passed by the caller.
Refactor the sites accordingly.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
packfile.c | 37 ++++++++++++++++++++++---------------
repack-promisor.c | 8 ++++----
revision.c | 10 ++++------
3 files changed, 30 insertions(+), 25 deletions(-)
diff --git a/packfile.c b/packfile.c
index cd45c6f21c..4f84bc19d9 100644
--- a/packfile.c
+++ b/packfile.c
@@ -2408,28 +2408,32 @@ int packfile_store_for_each_object(struct packfile_store *store,
return pack_errors ? -1 : 0;
}
+struct add_promisor_object_data {
+ struct repository *repo;
+ struct oidset *set;
+};
+
static int add_promisor_object(const struct object_id *oid,
- struct packed_git *pack,
- uint32_t pos UNUSED,
- void *set_)
+ struct object_info *oi UNUSED,
+ void *cb_data)
{
- struct oidset *set = set_;
+ struct add_promisor_object_data *data = cb_data;
struct object *obj;
int we_parsed_object;
- obj = lookup_object(pack->repo, oid);
+ obj = lookup_object(data->repo, oid);
if (obj && obj->parsed) {
we_parsed_object = 0;
} else {
we_parsed_object = 1;
- obj = parse_object_with_flags(pack->repo, oid,
+ obj = parse_object_with_flags(data->repo, oid,
PARSE_OBJECT_SKIP_HASH_CHECK);
}
if (!obj)
return 1;
- oidset_insert(set, oid);
+ oidset_insert(data->set, oid);
/*
* If this is a tree, commit, or tag, the objects it refers
@@ -2447,19 +2451,19 @@ static int add_promisor_object(const struct object_id *oid,
*/
return 0;
while (tree_entry_gently(&desc, &entry))
- oidset_insert(set, &entry.oid);
+ oidset_insert(data->set, &entry.oid);
if (we_parsed_object)
free_tree_buffer(tree);
} else if (obj->type == OBJ_COMMIT) {
struct commit *commit = (struct commit *) obj;
struct commit_list *parents = commit->parents;
- oidset_insert(set, get_commit_tree_oid(commit));
+ oidset_insert(data->set, get_commit_tree_oid(commit));
for (; parents; parents = parents->next)
- oidset_insert(set, &parents->item->object.oid);
+ oidset_insert(data->set, &parents->item->object.oid);
} else if (obj->type == OBJ_TAG) {
struct tag *tag = (struct tag *) obj;
- oidset_insert(set, get_tagged_oid(tag));
+ oidset_insert(data->set, get_tagged_oid(tag));
}
return 0;
}
@@ -2471,10 +2475,13 @@ int is_promisor_object(struct repository *r, const struct object_id *oid)
if (!promisor_objects_prepared) {
if (repo_has_promisor_remote(r)) {
- for_each_packed_object(r, add_promisor_object,
- &promisor_objects,
- ODB_FOR_EACH_OBJECT_PROMISOR_ONLY |
- ODB_FOR_EACH_OBJECT_PACK_ORDER);
+ struct add_promisor_object_data data = {
+ .repo = r,
+ .set = &promisor_objects,
+ };
+
+ odb_for_each_object(r->objects, NULL, add_promisor_object, &data,
+ ODB_FOR_EACH_OBJECT_PROMISOR_ONLY | ODB_FOR_EACH_OBJECT_PACK_ORDER);
}
promisor_objects_prepared = 1;
}
diff --git a/repack-promisor.c b/repack-promisor.c
index 45c330b9a5..35c4073632 100644
--- a/repack-promisor.c
+++ b/repack-promisor.c
@@ -17,8 +17,8 @@ struct write_oid_context {
* necessary.
*/
static int write_oid(const struct object_id *oid,
- struct packed_git *pack UNUSED,
- uint32_t pos UNUSED, void *data)
+ struct object_info *oi UNUSED,
+ void *data)
{
struct write_oid_context *ctx = data;
struct child_process *cmd = ctx->cmd;
@@ -55,8 +55,8 @@ void repack_promisor_objects(struct repository *repo,
*/
ctx.cmd = &cmd;
ctx.algop = repo->hash_algo;
- for_each_packed_object(repo, write_oid, &ctx,
- ODB_FOR_EACH_OBJECT_PROMISOR_ONLY);
+ odb_for_each_object(repo->objects, NULL, write_oid, &ctx,
+ ODB_FOR_EACH_OBJECT_PROMISOR_ONLY);
if (cmd.in == -1) {
/* No packed objects; cmd was never started */
diff --git a/revision.c b/revision.c
index 5aadf46dac..e34bcd8e88 100644
--- a/revision.c
+++ b/revision.c
@@ -3626,8 +3626,7 @@ void reset_revision_walk(void)
}
static int mark_uninteresting(const struct object_id *oid,
- struct packed_git *pack UNUSED,
- uint32_t pos UNUSED,
+ struct object_info *oi UNUSED,
void *cb)
{
struct rev_info *revs = cb;
@@ -3936,10 +3935,9 @@ int prepare_revision_walk(struct rev_info *revs)
(revs->limited && limiting_can_increase_treesame(revs)))
revs->treesame.name = "treesame";
- if (revs->exclude_promisor_objects) {
- for_each_packed_object(revs->repo, mark_uninteresting, revs,
- ODB_FOR_EACH_OBJECT_PROMISOR_ONLY);
- }
+ if (revs->exclude_promisor_objects)
+ odb_for_each_object(revs->repo->objects, NULL, mark_uninteresting,
+ revs, ODB_FOR_EACH_OBJECT_PROMISOR_ONLY);
if (!revs->reflog_info)
prepare_to_use_bloom_filter(revs);
--
2.52.0.660.gd05f3a8ea5.dirty
^ permalink raw reply related [flat|nested] 120+ messages in thread* [PATCH 10/14] treewide: drop uses of `for_each_{loose,packed}_object()`
2026-01-15 11:04 [PATCH 00/14] odb: introduce `odb_for_each_object()` Patrick Steinhardt
` (8 preceding siblings ...)
2026-01-15 11:04 ` [PATCH 09/14] treewide: enumerate promisor objects via `odb_for_each_object()` Patrick Steinhardt
@ 2026-01-15 11:04 ` Patrick Steinhardt
2026-01-15 21:44 ` Justin Tobler
2026-01-15 11:04 ` [PATCH 11/14] odb: introduce mtime fields for object info requests Patrick Steinhardt
` (7 subsequent siblings)
17 siblings, 1 reply; 120+ messages in thread
From: Patrick Steinhardt @ 2026-01-15 11:04 UTC (permalink / raw)
To: git
We're using `for_each_loose_object()` and `for_each_packed_object()` at
a couple of callsites to enumerate all loose and packed objects,
respectively. These functions will be removed in a subsequent commit in
favor of the newly introduced `odb_source_loose_for_each_object()` and
`packfile_store_for_each_object()` replacements.
Prepare for this by refactoring the sites accordingly.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
builtin/cat-file.c | 28 ++++++++++++++++++++++------
commit-graph.c | 44 +++++++++++++++++++++++++++++++-------------
2 files changed, 53 insertions(+), 19 deletions(-)
diff --git a/builtin/cat-file.c b/builtin/cat-file.c
index 6964a5a52c..7d16fbc1b8 100644
--- a/builtin/cat-file.c
+++ b/builtin/cat-file.c
@@ -806,11 +806,14 @@ struct for_each_object_payload {
void *payload;
};
-static int batch_one_object_loose(const struct object_id *oid,
- const char *path UNUSED,
- void *_payload)
+static int batch_one_object_oi(const struct object_id *oid,
+ struct object_info *oi,
+ void *_payload)
{
struct for_each_object_payload *payload = _payload;
+ if (oi && oi->whence == OI_PACKED)
+ return payload->callback(oid, oi->u.packed.pack, oi->u.packed.offset,
+ payload->payload);
return payload->callback(oid, NULL, 0, payload->payload);
}
@@ -846,8 +849,15 @@ static void batch_each_object(struct batch_options *opt,
.payload = _payload,
};
struct bitmap_index *bitmap = prepare_bitmap_git(the_repository);
+ struct odb_source *source;
- for_each_loose_object(the_repository->objects, batch_one_object_loose, &payload, 0);
+ odb_prepare_alternates(the_repository->objects);
+ for (source = the_repository->objects->sources; source; source = source->next) {
+ int ret = odb_source_loose_for_each_object(source, NULL, batch_one_object_oi,
+ &payload, flags);
+ if (ret)
+ break;
+ }
if (bitmap && !for_each_bitmapped_object(bitmap, &opt->objects_filter,
batch_one_object_bitmapped, &payload)) {
@@ -861,8 +871,14 @@ static void batch_each_object(struct batch_options *opt,
&payload, flags);
}
} else {
- for_each_packed_object(the_repository, batch_one_object_packed,
- &payload, flags);
+ struct object_info oi = { 0 };
+
+ for (source = the_repository->objects->sources; source; source = source->next) {
+ int ret = packfile_store_for_each_object(source->packfiles, &oi,
+ batch_one_object_oi, &payload, flags);
+ if (ret)
+ break;
+ }
}
free_bitmap_index(bitmap);
diff --git a/commit-graph.c b/commit-graph.c
index 7f1145a082..a3087d7883 100644
--- a/commit-graph.c
+++ b/commit-graph.c
@@ -1479,30 +1479,38 @@ static int write_graph_chunk_bloom_data(struct hashfile *f,
return 0;
}
+static int add_packed_commits_oi(const struct object_id *oid,
+ struct object_info *oi,
+ void *data)
+{
+ struct write_commit_graph_context *ctx = (struct write_commit_graph_context*)data;
+
+ if (ctx->progress)
+ display_progress(ctx->progress, ++ctx->progress_done);
+
+ if (*oi->typep != OBJ_COMMIT)
+ return 0;
+
+ oid_array_append(&ctx->oids, oid);
+ set_commit_pos(ctx->r, oid);
+
+ return 0;
+}
+
static int add_packed_commits(const struct object_id *oid,
struct packed_git *pack,
uint32_t pos,
void *data)
{
- struct write_commit_graph_context *ctx = (struct write_commit_graph_context*)data;
enum object_type type;
off_t offset = nth_packed_object_offset(pack, pos);
struct object_info oi = OBJECT_INFO_INIT;
- if (ctx->progress)
- display_progress(ctx->progress, ++ctx->progress_done);
-
oi.typep = &type;
if (packed_object_info(pack, offset, &oi) < 0)
die(_("unable to get type of object %s"), oid_to_hex(oid));
- if (type != OBJ_COMMIT)
- return 0;
-
- oid_array_append(&ctx->oids, oid);
- set_commit_pos(ctx->r, oid);
-
- return 0;
+ return add_packed_commits_oi(oid, &oi, data);
}
static void add_missing_parents(struct write_commit_graph_context *ctx, struct commit *commit)
@@ -1959,13 +1967,23 @@ static int fill_oids_from_commits(struct write_commit_graph_context *ctx,
static void fill_oids_from_all_packs(struct write_commit_graph_context *ctx)
{
+ struct odb_source *source;
+ enum object_type type;
+ struct object_info oi = {
+ .typep = &type,
+ };
+
if (ctx->report_progress)
ctx->progress = start_delayed_progress(
ctx->r,
_("Finding commits for commit graph among packed objects"),
ctx->approx_nr_objects);
- for_each_packed_object(ctx->r, add_packed_commits, ctx,
- ODB_FOR_EACH_OBJECT_PACK_ORDER);
+
+ odb_prepare_alternates(ctx->r->objects);
+ for (source = ctx->r->objects->sources; source; source = source->next)
+ packfile_store_for_each_object(source->packfiles, &oi, add_packed_commits_oi,
+ ctx, ODB_FOR_EACH_OBJECT_PACK_ORDER);
+
if (ctx->progress_done < ctx->approx_nr_objects)
display_progress(ctx->progress, ctx->approx_nr_objects);
stop_progress(&ctx->progress);
--
2.52.0.660.gd05f3a8ea5.dirty
^ permalink raw reply related [flat|nested] 120+ messages in thread* Re: [PATCH 10/14] treewide: drop uses of `for_each_{loose,packed}_object()`
2026-01-15 11:04 ` [PATCH 10/14] treewide: drop uses of `for_each_{loose,packed}_object()` Patrick Steinhardt
@ 2026-01-15 21:44 ` Justin Tobler
2026-01-16 7:03 ` Patrick Steinhardt
0 siblings, 1 reply; 120+ messages in thread
From: Justin Tobler @ 2026-01-15 21:44 UTC (permalink / raw)
To: Patrick Steinhardt; +Cc: git
On 26/01/15 12:04PM, Patrick Steinhardt wrote:
> We're using `for_each_loose_object()` and `for_each_packed_object()` at
> a couple of callsites to enumerate all loose and packed objects,
> respectively. These functions will be removed in a subsequent commit in
> favor of the newly introduced `odb_source_loose_for_each_object()` and
> `packfile_store_for_each_object()` replacements.
>
> Prepare for this by refactoring the sites accordingly.
>
> Signed-off-by: Patrick Steinhardt <ps@pks.im>
> ---
> builtin/cat-file.c | 28 ++++++++++++++++++++++------
> commit-graph.c | 44 +++++++++++++++++++++++++++++++-------------
> 2 files changed, 53 insertions(+), 19 deletions(-)
>
> diff --git a/builtin/cat-file.c b/builtin/cat-file.c
> index 6964a5a52c..7d16fbc1b8 100644
> --- a/builtin/cat-file.c
> +++ b/builtin/cat-file.c
> @@ -806,11 +806,14 @@ struct for_each_object_payload {
> void *payload;
> };
>
> -static int batch_one_object_loose(const struct object_id *oid,
> - const char *path UNUSED,
> - void *_payload)
> +static int batch_one_object_oi(const struct object_id *oid,
> + struct object_info *oi,
> + void *_payload)
> {
> struct for_each_object_payload *payload = _payload;
> + if (oi && oi->whence == OI_PACKED)
> + return payload->callback(oid, oi->u.packed.pack, oi->u.packed.offset,
> + payload->payload);
> return payload->callback(oid, NULL, 0, payload->payload);
> }
>
> @@ -846,8 +849,15 @@ static void batch_each_object(struct batch_options *opt,
> .payload = _payload,
> };
> struct bitmap_index *bitmap = prepare_bitmap_git(the_repository);
> + struct odb_source *source;
>
> - for_each_loose_object(the_repository->objects, batch_one_object_loose, &payload, 0);
> + odb_prepare_alternates(the_repository->objects);
> + for (source = the_repository->objects->sources; source; source = source->next) {
> + int ret = odb_source_loose_for_each_object(source, NULL, batch_one_object_oi,
> + &payload, flags);
> + if (ret)
> + break;
> + }
>
> if (bitmap && !for_each_bitmapped_object(bitmap, &opt->objects_filter,
> batch_one_object_bitmapped, &payload)) {
> @@ -861,8 +871,14 @@ static void batch_each_object(struct batch_options *opt,
> &payload, flags);
> }
> } else {
> - for_each_packed_object(the_repository, batch_one_object_packed,
> - &payload, flags);
> + struct object_info oi = { 0 };
> +
> + for (source = the_repository->objects->sources; source; source = source->next) {
> + int ret = packfile_store_for_each_object(source->packfiles, &oi,
> + batch_one_object_oi, &payload, flags);
> + if (ret)
> + break;
> + }
Huh, I was a bit surprised to see that we are still handling object
iteration in a backend specific banner here. I would assume ideally we
would want to transparently iterate across objects wherever possible. I
assume the reason here has something to do with how iteration is handled
with bitmaps?
-Justin
^ permalink raw reply [flat|nested] 120+ messages in thread* Re: [PATCH 10/14] treewide: drop uses of `for_each_{loose,packed}_object()`
2026-01-15 21:44 ` Justin Tobler
@ 2026-01-16 7:03 ` Patrick Steinhardt
2026-01-16 17:47 ` Justin Tobler
0 siblings, 1 reply; 120+ messages in thread
From: Patrick Steinhardt @ 2026-01-16 7:03 UTC (permalink / raw)
To: Justin Tobler; +Cc: git
On Thu, Jan 15, 2026 at 03:44:50PM -0600, Justin Tobler wrote:
> On 26/01/15 12:04PM, Patrick Steinhardt wrote:
> > diff --git a/builtin/cat-file.c b/builtin/cat-file.c
> > index 6964a5a52c..7d16fbc1b8 100644
> > --- a/builtin/cat-file.c
> > +++ b/builtin/cat-file.c
> > @@ -861,8 +871,14 @@ static void batch_each_object(struct batch_options *opt,
> > &payload, flags);
> > }
> > } else {
> > - for_each_packed_object(the_repository, batch_one_object_packed,
> > - &payload, flags);
> > + struct object_info oi = { 0 };
> > +
> > + for (source = the_repository->objects->sources; source; source = source->next) {
> > + int ret = packfile_store_for_each_object(source->packfiles, &oi,
> > + batch_one_object_oi, &payload, flags);
> > + if (ret)
> > + break;
> > + }
>
> Huh, I was a bit surprised to see that we are still handling object
> iteration in a backend specific banner here. I would assume ideally we
> would want to transparently iterate across objects wherever possible. I
> assume the reason here has something to do with how iteration is handled
> with bitmaps?
Exactly. I was pondering a bit over whether or not I should invest a bit
more time to also make this part here generic. But I felt like the patch
series was already long enough, so I decided to not pursue this for now.
It's certainly something to iterate on in the future though.
Patrick
^ permalink raw reply [flat|nested] 120+ messages in thread* Re: [PATCH 10/14] treewide: drop uses of `for_each_{loose,packed}_object()`
2026-01-16 7:03 ` Patrick Steinhardt
@ 2026-01-16 17:47 ` Justin Tobler
2026-01-19 7:10 ` Patrick Steinhardt
0 siblings, 1 reply; 120+ messages in thread
From: Justin Tobler @ 2026-01-16 17:47 UTC (permalink / raw)
To: Patrick Steinhardt; +Cc: git
On 26/01/16 08:03AM, Patrick Steinhardt wrote:
> On Thu, Jan 15, 2026 at 03:44:50PM -0600, Justin Tobler wrote:
> > On 26/01/15 12:04PM, Patrick Steinhardt wrote:
> > > diff --git a/builtin/cat-file.c b/builtin/cat-file.c
> > > index 6964a5a52c..7d16fbc1b8 100644
> > > --- a/builtin/cat-file.c
> > > +++ b/builtin/cat-file.c
> > > @@ -861,8 +871,14 @@ static void batch_each_object(struct batch_options *opt,
> > > &payload, flags);
> > > }
> > > } else {
> > > - for_each_packed_object(the_repository, batch_one_object_packed,
> > > - &payload, flags);
> > > + struct object_info oi = { 0 };
> > > +
> > > + for (source = the_repository->objects->sources; source; source = source->next) {
> > > + int ret = packfile_store_for_each_object(source->packfiles, &oi,
> > > + batch_one_object_oi, &payload, flags);
> > > + if (ret)
> > > + break;
> > > + }
> >
> > Huh, I was a bit surprised to see that we are still handling object
> > iteration in a backend specific banner here. I would assume ideally we
> > would want to transparently iterate across objects wherever possible. I
> > assume the reason here has something to do with how iteration is handled
> > with bitmaps?
>
> Exactly. I was pondering a bit over whether or not I should invest a bit
> more time to also make this part here generic. But I felt like the patch
> series was already long enough, so I decided to not pursue this for now.
>
> It's certainly something to iterate on in the future though.
Certainly not worth rerolling by itself, but it might be nice to explain
this in the commit message and/or comment. :)
-Justin
^ permalink raw reply [flat|nested] 120+ messages in thread* Re: [PATCH 10/14] treewide: drop uses of `for_each_{loose,packed}_object()`
2026-01-16 17:47 ` Justin Tobler
@ 2026-01-19 7:10 ` Patrick Steinhardt
0 siblings, 0 replies; 120+ messages in thread
From: Patrick Steinhardt @ 2026-01-19 7:10 UTC (permalink / raw)
To: Justin Tobler; +Cc: git
On Fri, Jan 16, 2026 at 11:47:45AM -0600, Justin Tobler wrote:
> On 26/01/16 08:03AM, Patrick Steinhardt wrote:
> > On Thu, Jan 15, 2026 at 03:44:50PM -0600, Justin Tobler wrote:
> > > On 26/01/15 12:04PM, Patrick Steinhardt wrote:
> > > > diff --git a/builtin/cat-file.c b/builtin/cat-file.c
> > > > index 6964a5a52c..7d16fbc1b8 100644
> > > > --- a/builtin/cat-file.c
> > > > +++ b/builtin/cat-file.c
> > > > @@ -861,8 +871,14 @@ static void batch_each_object(struct batch_options *opt,
> > > > &payload, flags);
> > > > }
> > > > } else {
> > > > - for_each_packed_object(the_repository, batch_one_object_packed,
> > > > - &payload, flags);
> > > > + struct object_info oi = { 0 };
> > > > +
> > > > + for (source = the_repository->objects->sources; source; source = source->next) {
> > > > + int ret = packfile_store_for_each_object(source->packfiles, &oi,
> > > > + batch_one_object_oi, &payload, flags);
> > > > + if (ret)
> > > > + break;
> > > > + }
> > >
> > > Huh, I was a bit surprised to see that we are still handling object
> > > iteration in a backend specific banner here. I would assume ideally we
> > > would want to transparently iterate across objects wherever possible. I
> > > assume the reason here has something to do with how iteration is handled
> > > with bitmaps?
> >
> > Exactly. I was pondering a bit over whether or not I should invest a bit
> > more time to also make this part here generic. But I felt like the patch
> > series was already long enough, so I decided to not pursue this for now.
> >
> > It's certainly something to iterate on in the future though.
>
> Certainly not worth rerolling by itself, but it might be nice to explain
> this in the commit message and/or comment. :)
Fair, I've appended this locally. Thanks!
Patrick
^ permalink raw reply [flat|nested] 120+ messages in thread
* [PATCH 11/14] odb: introduce mtime fields for object info requests
2026-01-15 11:04 [PATCH 00/14] odb: introduce `odb_for_each_object()` Patrick Steinhardt
` (9 preceding siblings ...)
2026-01-15 11:04 ` [PATCH 10/14] treewide: drop uses of `for_each_{loose,packed}_object()` Patrick Steinhardt
@ 2026-01-15 11:04 ` Patrick Steinhardt
2026-01-15 11:04 ` [PATCH 12/14] builtin/pack-objects: use `packfile_store_for_each_object()` Patrick Steinhardt
` (6 subsequent siblings)
17 siblings, 0 replies; 120+ messages in thread
From: Patrick Steinhardt @ 2026-01-15 11:04 UTC (permalink / raw)
To: git
There are some use cases where we need to figure out the mtime for
objects. Most importantly, this is the case when we want to prune
unreachable objects. But getting at that data requires users to manually
derive the info either via the loose object's mtime, the packfiles'
mtime or via the ".mtimes" file.
Introduce a new `struct object_info::mtimep` pointer that allows callers
to request an object's mtime. This new field will be used in a
subsequent commit.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
object-file.c | 29 +++++++++++++++++++++++++----
odb.c | 2 ++
odb.h | 1 +
packfile.c | 40 +++++++++++++++++++++++++++++++++-------
4 files changed, 61 insertions(+), 11 deletions(-)
diff --git a/object-file.c b/object-file.c
index 65e730684b..c0f896673b 100644
--- a/object-file.c
+++ b/object-file.c
@@ -409,6 +409,7 @@ static int read_object_info_from_path(struct odb_source *source,
char hdr[MAX_HEADER_LEN];
unsigned long size_scratch;
enum object_type type_scratch;
+ struct stat st;
/*
* If we don't care about type or size, then we don't
@@ -421,7 +422,7 @@ static int read_object_info_from_path(struct odb_source *source,
if (!oi || (!oi->typep && !oi->sizep && !oi->contentp)) {
struct stat st;
- if ((!oi || !oi->disk_sizep) && (flags & OBJECT_INFO_QUICK)) {
+ if ((!oi || (!oi->disk_sizep && !oi->mtimep)) && (flags & OBJECT_INFO_QUICK)) {
ret = quick_has_loose(source->loose, oid) ? 0 : -1;
goto out;
}
@@ -431,8 +432,12 @@ static int read_object_info_from_path(struct odb_source *source,
goto out;
}
- if (oi && oi->disk_sizep)
- *oi->disk_sizep = st.st_size;
+ if (oi) {
+ if (oi->disk_sizep)
+ *oi->disk_sizep = st.st_size;
+ if (oi->mtimep)
+ *oi->mtimep = st.st_mtime;
+ }
ret = 0;
goto out;
@@ -446,7 +451,21 @@ static int read_object_info_from_path(struct odb_source *source,
goto out;
}
- map = map_fd(fd, path, &mapsize);
+ if (fstat(fd, &st)) {
+ close(fd);
+ ret = -1;
+ goto out;
+ }
+
+ mapsize = xsize_t(st.st_size);
+ if (!mapsize) {
+ close(fd);
+ ret = error(_("object file %s is empty"), path);
+ goto out;
+ }
+
+ map = xmmap(NULL, mapsize, PROT_READ, MAP_PRIVATE, fd, 0);
+ close(fd);
if (!map) {
ret = -1;
goto out;
@@ -454,6 +473,8 @@ static int read_object_info_from_path(struct odb_source *source,
if (oi->disk_sizep)
*oi->disk_sizep = mapsize;
+ if (oi->mtimep)
+ *oi->mtimep = st.st_mtime;
stream_to_end = &stream;
diff --git a/odb.c b/odb.c
index 65f0447aa5..67decd3908 100644
--- a/odb.c
+++ b/odb.c
@@ -702,6 +702,8 @@ static int do_oid_object_info_extended(struct object_database *odb,
oidclr(oi->delta_base_oid, odb->repo->hash_algo);
if (oi->contentp)
*oi->contentp = xmemdupz(co->buf, co->size);
+ if (oi->mtimep)
+ *oi->mtimep = 0;
oi->whence = OI_CACHED;
}
return 0;
diff --git a/odb.h b/odb.h
index 8f6d95aee5..9e22f79172 100644
--- a/odb.h
+++ b/odb.h
@@ -317,6 +317,7 @@ struct object_info {
off_t *disk_sizep;
struct object_id *delta_base_oid;
void **contentp;
+ time_t *mtimep;
/* Response */
enum {
diff --git a/packfile.c b/packfile.c
index 4f84bc19d9..c96ec21f86 100644
--- a/packfile.c
+++ b/packfile.c
@@ -1578,13 +1578,14 @@ static void add_delta_base_cache(struct packed_git *p, off_t base_offset,
hashmap_add(&delta_base_cache, &ent->ent);
}
-int packed_object_info(struct packed_git *p,
- off_t obj_offset, struct object_info *oi)
+static int packed_object_info_with_index_pos(struct packed_git *p, off_t obj_offset,
+ uint32_t *maybe_index_pos, struct object_info *oi)
{
struct pack_window *w_curs = NULL;
unsigned long size;
off_t curpos = obj_offset;
enum object_type type = OBJ_NONE;
+ uint32_t pack_pos;
int ret;
/*
@@ -1619,16 +1620,34 @@ int packed_object_info(struct packed_git *p,
}
}
- if (oi->disk_sizep) {
- uint32_t pos;
- if (offset_to_pack_pos(p, obj_offset, &pos) < 0) {
+ if (oi->disk_sizep || (oi->mtimep && p->is_cruft)) {
+ if (offset_to_pack_pos(p, obj_offset, &pack_pos) < 0) {
error("could not find object at offset %"PRIuMAX" "
"in pack %s", (uintmax_t)obj_offset, p->pack_name);
ret = -1;
goto out;
}
+ }
+
+ if (oi->disk_sizep)
+ *oi->disk_sizep = pack_pos_to_offset(p, pack_pos + 1) - obj_offset;
+
+ if (oi->mtimep) {
+ if (p->is_cruft) {
+ uint32_t index_pos;
+
+ if (load_pack_mtimes(p) < 0)
+ die(_("could not load cruft pack .mtimes"));
+
+ if (maybe_index_pos)
+ index_pos = *maybe_index_pos;
+ else
+ index_pos = pack_pos_to_index(p, pack_pos);
- *oi->disk_sizep = pack_pos_to_offset(p, pos + 1) - obj_offset;
+ *oi->mtimep = nth_packed_mtime(p, index_pos);
+ } else {
+ *oi->mtimep = p->mtime;
+ }
}
if (oi->typep) {
@@ -1681,6 +1700,12 @@ int packed_object_info(struct packed_git *p,
return ret;
}
+int packed_object_info(struct packed_git *p, off_t obj_offset,
+ struct object_info *oi)
+{
+ return packed_object_info_with_index_pos(p, obj_offset, NULL, oi);
+}
+
static void *unpack_compressed_entry(struct packed_git *p,
struct pack_window **w_curs,
off_t curpos,
@@ -2377,7 +2402,8 @@ static int packfile_store_for_each_object_wrapper(const struct object_id *oid,
if (data->oi) {
off_t offset = nth_packed_object_offset(pack, index_pos);
- if (packed_object_info(pack, offset, data->oi) < 0) {
+ if (packed_object_info_with_index_pos(pack, offset,
+ &index_pos, data->oi) < 0) {
mark_bad_packed_object(pack, oid);
return -1;
}
--
2.52.0.660.gd05f3a8ea5.dirty
^ permalink raw reply related [flat|nested] 120+ messages in thread* [PATCH 12/14] builtin/pack-objects: use `packfile_store_for_each_object()`
2026-01-15 11:04 [PATCH 00/14] odb: introduce `odb_for_each_object()` Patrick Steinhardt
` (10 preceding siblings ...)
2026-01-15 11:04 ` [PATCH 11/14] odb: introduce mtime fields for object info requests Patrick Steinhardt
@ 2026-01-15 11:04 ` Patrick Steinhardt
2026-01-15 11:04 ` [PATCH 13/14] reachable: convert to use `odb_for_each_object()` Patrick Steinhardt
` (5 subsequent siblings)
17 siblings, 0 replies; 120+ messages in thread
From: Patrick Steinhardt @ 2026-01-15 11:04 UTC (permalink / raw)
To: git
When enumerating objects that are supposed to be stored in a new cruft
pack we use `for_each_packed_object()` and then derive each object's
mtime individually. Refactor this logic to instead use the new
`packfile_store_for_each_object()` function with an object info request
that asks for the respective mtimes.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
builtin/pack-objects.c | 45 +++++++++++++++++++++------------------------
1 file changed, 21 insertions(+), 24 deletions(-)
diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index 74317051fd..223ec3b49e 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -4314,25 +4314,12 @@ static void show_edge(struct commit *commit)
}
static int add_object_in_unpacked_pack(const struct object_id *oid,
- struct packed_git *pack,
- uint32_t pos,
+ struct object_info *oi,
void *data UNUSED)
{
if (cruft) {
- off_t offset;
- time_t mtime;
-
- if (pack->is_cruft) {
- if (load_pack_mtimes(pack) < 0)
- die(_("could not load cruft pack .mtimes"));
- mtime = nth_packed_mtime(pack, pos);
- } else {
- mtime = pack->mtime;
- }
- offset = nth_packed_object_offset(pack, pos);
-
- add_cruft_object_entry(oid, OBJ_NONE, pack, offset,
- NULL, mtime);
+ add_cruft_object_entry(oid, OBJ_NONE, oi->u.packed.pack,
+ oi->u.packed.offset, NULL, *oi->mtimep);
} else {
add_object_entry(oid, OBJ_NONE, "", 0);
}
@@ -4341,14 +4328,24 @@ static int add_object_in_unpacked_pack(const struct object_id *oid,
static void add_objects_in_unpacked_packs(void)
{
- if (for_each_packed_object(to_pack.repo,
- add_object_in_unpacked_pack,
- NULL,
- ODB_FOR_EACH_OBJECT_PACK_ORDER |
- ODB_FOR_EACH_OBJECT_LOCAL_ONLY |
- ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS |
- ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS))
- die(_("cannot open pack index"));
+ struct odb_source *source;
+ time_t mtime;
+ struct object_info oi = {
+ .mtimep = &mtime,
+ };
+
+ odb_prepare_alternates(to_pack.repo->objects);
+ for (source = to_pack.repo->objects->sources; source; source = source->next) {
+ if (!source->local)
+ continue;
+
+ if (packfile_store_for_each_object(source->packfiles, &oi,
+ add_object_in_unpacked_pack, NULL,
+ ODB_FOR_EACH_OBJECT_PACK_ORDER |
+ ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS |
+ ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS))
+ die(_("cannot open pack index"));
+ }
}
static int add_loose_object(const struct object_id *oid, const char *path,
--
2.52.0.660.gd05f3a8ea5.dirty
^ permalink raw reply related [flat|nested] 120+ messages in thread* [PATCH 13/14] reachable: convert to use `odb_for_each_object()`
2026-01-15 11:04 [PATCH 00/14] odb: introduce `odb_for_each_object()` Patrick Steinhardt
` (11 preceding siblings ...)
2026-01-15 11:04 ` [PATCH 12/14] builtin/pack-objects: use `packfile_store_for_each_object()` Patrick Steinhardt
@ 2026-01-15 11:04 ` Patrick Steinhardt
2026-01-15 11:04 ` [PATCH 14/14] odb: drop unused `for_each_{loose,packed}_object()` functions Patrick Steinhardt
` (4 subsequent siblings)
17 siblings, 0 replies; 120+ messages in thread
From: Patrick Steinhardt @ 2026-01-15 11:04 UTC (permalink / raw)
To: git
To figure out which objects expired objects we enumerate all loose and
packed objects individually so that we can figure out their respective
mtimes. Refactor the code to instead use `odb_for_each_object()` with a
request that ask for the object mtime instead.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
reachable.c | 125 +++++++++++++++++-------------------------------------------
1 file changed, 35 insertions(+), 90 deletions(-)
diff --git a/reachable.c b/reachable.c
index 82676b2668..101cfc2727 100644
--- a/reachable.c
+++ b/reachable.c
@@ -191,30 +191,27 @@ static int obj_is_recent(const struct object_id *oid, timestamp_t mtime,
return oidset_contains(&data->extra_recent_oids, oid);
}
-static void add_recent_object(const struct object_id *oid,
- struct packed_git *pack,
- off_t offset,
- timestamp_t mtime,
- struct recent_data *data)
+static int want_recent_object(struct recent_data *data,
+ const struct object_id *oid)
{
- struct object *obj;
- enum object_type type;
+ if (data->ignore_in_core_kept_packs &&
+ has_object_kept_pack(data->revs->repo, oid, KEPT_PACK_IN_CORE))
+ return 0;
+ return 1;
+}
- if (!obj_is_recent(oid, mtime, data))
- return;
+static int add_recent_object(const struct object_id *oid,
+ struct object_info *oi,
+ void *cb_data)
+{
+ struct recent_data *data = cb_data;
+ struct object *obj;
- /*
- * We do not want to call parse_object here, because
- * inflating blobs and trees could be very expensive.
- * However, we do need to know the correct type for
- * later processing, and the revision machinery expects
- * commits and tags to have been parsed.
- */
- type = odb_read_object_info(the_repository->objects, oid, NULL);
- if (type < 0)
- die("unable to get object info for %s", oid_to_hex(oid));
+ if (!want_recent_object(data, oid) ||
+ !obj_is_recent(oid, *oi->mtimep, data))
+ return 0;
- switch (type) {
+ switch (*oi->typep) {
case OBJ_TAG:
case OBJ_COMMIT:
obj = parse_object_or_die(the_repository, oid, NULL);
@@ -227,77 +224,22 @@ static void add_recent_object(const struct object_id *oid,
break;
default:
die("unknown object type for %s: %s",
- oid_to_hex(oid), type_name(type));
+ oid_to_hex(oid), type_name(*oi->typep));
}
if (!obj)
die("unable to lookup %s", oid_to_hex(oid));
-
- add_pending_object(data->revs, obj, "");
- if (data->cb)
- data->cb(obj, pack, offset, mtime);
-}
-
-static int want_recent_object(struct recent_data *data,
- const struct object_id *oid)
-{
- if (data->ignore_in_core_kept_packs &&
- has_object_kept_pack(data->revs->repo, oid, KEPT_PACK_IN_CORE))
+ if (obj->flags & SEEN)
return 0;
- return 1;
-}
-static int add_recent_loose(const struct object_id *oid,
- const char *path, void *data)
-{
- struct stat st;
- struct object *obj;
-
- if (!want_recent_object(data, oid))
- return 0;
-
- obj = lookup_object(the_repository, oid);
-
- if (obj && obj->flags & SEEN)
- return 0;
-
- if (stat(path, &st) < 0) {
- /*
- * It's OK if an object went away during our iteration; this
- * could be due to a simultaneous repack. But anything else
- * we should abort, since we might then fail to mark objects
- * which should not be pruned.
- */
- if (errno == ENOENT)
- return 0;
- return error_errno("unable to stat %s", oid_to_hex(oid));
+ add_pending_object(data->revs, obj, "");
+ if (data->cb) {
+ if (oi->whence == OI_PACKED)
+ data->cb(obj, oi->u.packed.pack, oi->u.packed.offset, *oi->mtimep);
+ else
+ data->cb(obj, NULL, 0, *oi->mtimep);
}
- add_recent_object(oid, NULL, 0, st.st_mtime, data);
- return 0;
-}
-
-static int add_recent_packed(const struct object_id *oid,
- struct packed_git *p,
- uint32_t pos,
- void *data)
-{
- struct object *obj;
- timestamp_t mtime = p->mtime;
-
- if (!want_recent_object(data, oid))
- return 0;
-
- obj = lookup_object(the_repository, oid);
-
- if (obj && obj->flags & SEEN)
- return 0;
- if (p->is_cruft) {
- if (load_pack_mtimes(p) < 0)
- die(_("could not load cruft pack .mtimes"));
- mtime = nth_packed_mtime(p, pos);
- }
- add_recent_object(oid, p, nth_packed_object_offset(p, pos), mtime, data);
return 0;
}
@@ -307,7 +249,13 @@ int add_unseen_recent_objects_to_traversal(struct rev_info *revs,
int ignore_in_core_kept_packs)
{
struct recent_data data;
- enum odb_for_each_object_flags flags;
+ unsigned flags;
+ enum object_type type;
+ time_t mtime;
+ struct object_info oi = {
+ .mtimep = &mtime,
+ .typep = &type,
+ };
int r;
data.revs = revs;
@@ -318,16 +266,13 @@ int add_unseen_recent_objects_to_traversal(struct rev_info *revs,
oidset_init(&data.extra_recent_oids, 0);
data.extra_recent_oids_loaded = 0;
- r = for_each_loose_object(the_repository->objects, add_recent_loose, &data,
- ODB_FOR_EACH_OBJECT_LOCAL_ONLY);
- if (r)
- goto done;
-
flags = ODB_FOR_EACH_OBJECT_LOCAL_ONLY | ODB_FOR_EACH_OBJECT_PACK_ORDER;
if (ignore_in_core_kept_packs)
flags |= ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS;
- r = for_each_packed_object(revs->repo, add_recent_packed, &data, flags);
+ r = odb_for_each_object(revs->repo->objects, &oi, add_recent_object, &data, flags);
+ if (r)
+ goto done;
done:
oidset_clear(&data.extra_recent_oids);
--
2.52.0.660.gd05f3a8ea5.dirty
^ permalink raw reply related [flat|nested] 120+ messages in thread* [PATCH 14/14] odb: drop unused `for_each_{loose,packed}_object()` functions
2026-01-15 11:04 [PATCH 00/14] odb: introduce `odb_for_each_object()` Patrick Steinhardt
` (12 preceding siblings ...)
2026-01-15 11:04 ` [PATCH 13/14] reachable: convert to use `odb_for_each_object()` Patrick Steinhardt
@ 2026-01-15 11:04 ` Patrick Steinhardt
2026-01-15 13:50 ` [PATCH 00/14] odb: introduce `odb_for_each_object()` Junio C Hamano
` (3 subsequent siblings)
17 siblings, 0 replies; 120+ messages in thread
From: Patrick Steinhardt @ 2026-01-15 11:04 UTC (permalink / raw)
To: git
We have converted all callers of `for_each_loose_object()` and
`for_each_packed_object()` to use their new replacement functions
instead. We can thus remove them now.
Do so and inline `packfile_store_for_each_object_internal()` now that it
only has a single callsite again. This makes it a bit easier to follow
the callback indirection that is happening there.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
object-file.c | 20 -------------
object-file.h | 11 -------
packfile.c | 92 +++++++++++++++++++----------------------------------------
packfile.h | 2 --
4 files changed, 29 insertions(+), 96 deletions(-)
diff --git a/object-file.c b/object-file.c
index c0f896673b..bc5209f2fe 100644
--- a/object-file.c
+++ b/object-file.c
@@ -1802,26 +1802,6 @@ int for_each_loose_file_in_source(struct odb_source *source,
return r;
}
-int for_each_loose_object(struct object_database *odb,
- each_loose_object_fn cb, void *data,
- enum odb_for_each_object_flags flags)
-{
- struct odb_source *source;
-
- odb_prepare_alternates(odb);
- for (source = odb->sources; source; source = source->next) {
- int r = for_each_loose_file_in_source(source, cb, NULL,
- NULL, data);
- if (r)
- return r;
-
- if (flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY)
- break;
- }
-
- return 0;
-}
-
struct for_each_object_wrapper_data {
struct odb_source *source;
struct object_info *oi;
diff --git a/object-file.h b/object-file.h
index 048b778531..af7f57d2a1 100644
--- a/object-file.h
+++ b/object-file.h
@@ -126,17 +126,6 @@ int for_each_loose_file_in_source(struct odb_source *source,
each_loose_subdir_fn subdir_cb,
void *data);
-/*
- * Iterate over all accessible loose objects without respect to
- * reachability. By default, this includes both local and alternate objects.
- * The order in which objects are visited is unspecified.
- *
- * Any flags specific to packs are ignored.
- */
-int for_each_loose_object(struct object_database *odb,
- each_loose_object_fn, void *,
- enum odb_for_each_object_flags flags);
-
/*
* Iterate through all loose objects in the given object database source and
* invoke the callback function for each of them. If given, the object info
diff --git a/packfile.c b/packfile.c
index c96ec21f86..493d81fdca 100644
--- a/packfile.c
+++ b/packfile.c
@@ -2326,65 +2326,6 @@ int for_each_object_in_pack(struct packed_git *p,
return r;
}
-static int packfile_store_for_each_object_internal(struct packfile_store *store,
- each_packed_object_fn cb,
- void *data,
- unsigned flags,
- int *pack_errors)
-{
- struct packfile_list_entry *e;
- int ret = 0;
-
- store->skip_mru_updates = true;
-
- for (e = packfile_store_get_packs(store); e; e = e->next) {
- struct packed_git *p = e->pack;
-
- if ((flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local)
- continue;
- if ((flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY) &&
- !p->pack_promisor)
- continue;
- if ((flags & ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS) &&
- p->pack_keep_in_core)
- continue;
- if ((flags & ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS) &&
- p->pack_keep)
- continue;
- if (open_pack_index(p)) {
- *pack_errors = 1;
- continue;
- }
-
- ret = for_each_object_in_pack(p, cb, data, flags);
- if (ret)
- break;
- }
-
- store->skip_mru_updates = false;
-
- return ret;
-}
-
-int for_each_packed_object(struct repository *repo, each_packed_object_fn cb,
- void *data, unsigned flags)
-{
- struct odb_source *source;
- int pack_errors = 0;
- int ret = 0;
-
- odb_prepare_alternates(repo->objects);
-
- for (source = repo->objects->sources; source; source = source->next) {
- ret = packfile_store_for_each_object_internal(source->packfiles, cb, data,
- flags, &pack_errors);
- if (ret)
- break;
- }
-
- return ret ? ret : pack_errors;
-}
-
struct packfile_store_for_each_object_wrapper_data {
struct packfile_store *store;
struct object_info *oi;
@@ -2424,12 +2365,37 @@ int packfile_store_for_each_object(struct packfile_store *store,
.cb = cb,
.cb_data = cb_data,
};
+ struct packfile_list_entry *e;
int pack_errors = 0, ret;
- ret = packfile_store_for_each_object_internal(store, packfile_store_for_each_object_wrapper,
- &data, flags, &pack_errors);
- if (ret)
- return ret;
+ store->skip_mru_updates = true;
+
+ for (e = packfile_store_get_packs(store); e; e = e->next) {
+ struct packed_git *p = e->pack;
+
+ if ((flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local)
+ continue;
+ if ((flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY) &&
+ !p->pack_promisor)
+ continue;
+ if ((flags & ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS) &&
+ p->pack_keep_in_core)
+ continue;
+ if ((flags & ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS) &&
+ p->pack_keep)
+ continue;
+ if (open_pack_index(p)) {
+ pack_errors = 1;
+ continue;
+ }
+
+ ret = for_each_object_in_pack(p, packfile_store_for_each_object_wrapper,
+ &data, flags);
+ if (ret)
+ break;
+ }
+
+ store->skip_mru_updates = false;
return pack_errors ? -1 : 0;
}
diff --git a/packfile.h b/packfile.h
index ab0637fbe9..8e0d2b7661 100644
--- a/packfile.h
+++ b/packfile.h
@@ -340,8 +340,6 @@ typedef int each_packed_object_fn(const struct object_id *oid,
int for_each_object_in_pack(struct packed_git *p,
each_packed_object_fn, void *data,
unsigned flags);
-int for_each_packed_object(struct repository *repo, each_packed_object_fn cb,
- void *data, unsigned flags);
/*
* Iterate through all packed objects in the given packfile store and invoke
--
2.52.0.660.gd05f3a8ea5.dirty
^ permalink raw reply related [flat|nested] 120+ messages in thread* Re: [PATCH 00/14] odb: introduce `odb_for_each_object()`
2026-01-15 11:04 [PATCH 00/14] odb: introduce `odb_for_each_object()` Patrick Steinhardt
` (13 preceding siblings ...)
2026-01-15 11:04 ` [PATCH 14/14] odb: drop unused `for_each_{loose,packed}_object()` functions Patrick Steinhardt
@ 2026-01-15 13:50 ` Junio C Hamano
2026-01-16 7:03 ` Patrick Steinhardt
2026-01-20 15:25 ` [PATCH v2 " Patrick Steinhardt
` (2 subsequent siblings)
17 siblings, 1 reply; 120+ messages in thread
From: Junio C Hamano @ 2026-01-15 13:50 UTC (permalink / raw)
To: Patrick Steinhardt; +Cc: git
Patrick Steinhardt <ps@pks.im> writes:
> The patch series is built on top of 8745eae506 (The 17th batch,
> 2026-01-11) with the following two series merged into it:
>
> - ps/read-object-info-improvements at b7f649ca93 (Merge
> remote-tracking branch 'junio/ps/read-object-info-improvements' into
> HEAD, 2026-01-15).
>
> - ps/packfile-store-in-odb-source at 1ff0e42d33 (Merge remote-tracking
> branch 'junio/ps/packfile-store-in-odb-source' into HEAD,
> 2026-01-15).
These two commit objects you cite have never been at the tip of
these branches in my tree; I'll go by the branch name for now ;-)
^ permalink raw reply [flat|nested] 120+ messages in thread* Re: [PATCH 00/14] odb: introduce `odb_for_each_object()`
2026-01-15 13:50 ` [PATCH 00/14] odb: introduce `odb_for_each_object()` Junio C Hamano
@ 2026-01-16 7:03 ` Patrick Steinhardt
2026-01-16 16:49 ` Junio C Hamano
0 siblings, 1 reply; 120+ messages in thread
From: Patrick Steinhardt @ 2026-01-16 7:03 UTC (permalink / raw)
To: Junio C Hamano; +Cc: git
On Thu, Jan 15, 2026 at 05:50:11AM -0800, Junio C Hamano wrote:
> Patrick Steinhardt <ps@pks.im> writes:
>
> > The patch series is built on top of 8745eae506 (The 17th batch,
> > 2026-01-11) with the following two series merged into it:
> >
> > - ps/read-object-info-improvements at b7f649ca93 (Merge
> > remote-tracking branch 'junio/ps/read-object-info-improvements' into
> > HEAD, 2026-01-15).
> >
> > - ps/packfile-store-in-odb-source at 1ff0e42d33 (Merge remote-tracking
> > branch 'junio/ps/packfile-store-in-odb-source' into HEAD,
> > 2026-01-15).
>
> These two commit objects you cite have never been at the tip of
> these branches in my tree; I'll go by the branch name for now ;-)
Ugh, yeah. I referenced the merge commits in my tree, which is of course
dumb. Will fix the cover letter to point to what you have now.
Patrick
^ permalink raw reply [flat|nested] 120+ messages in thread
* Re: [PATCH 00/14] odb: introduce `odb_for_each_object()`
2026-01-16 7:03 ` Patrick Steinhardt
@ 2026-01-16 16:49 ` Junio C Hamano
0 siblings, 0 replies; 120+ messages in thread
From: Junio C Hamano @ 2026-01-16 16:49 UTC (permalink / raw)
To: Patrick Steinhardt; +Cc: git
Patrick Steinhardt <ps@pks.im> writes:
> On Thu, Jan 15, 2026 at 05:50:11AM -0800, Junio C Hamano wrote:
>> Patrick Steinhardt <ps@pks.im> writes:
>>
>> > The patch series is built on top of 8745eae506 (The 17th batch,
>> > 2026-01-11) with the following two series merged into it:
>> >
>> > - ps/read-object-info-improvements at b7f649ca93 (Merge
>> > remote-tracking branch 'junio/ps/read-object-info-improvements' into
>> > HEAD, 2026-01-15).
>> >
>> > - ps/packfile-store-in-odb-source at 1ff0e42d33 (Merge remote-tracking
>> > branch 'junio/ps/packfile-store-in-odb-source' into HEAD,
>> > 2026-01-15).
>>
>> These two commit objects you cite have never been at the tip of
>> these branches in my tree; I'll go by the branch name for now ;-)
>
> Ugh, yeah. I referenced the merge commits in my tree, which is of course
> dumb. Will fix the cover letter to point to what you have now.
I am seeing good things in the series, without much nits to pick.
Maybe there is no need for another round, in which case there is no
need for fixed cover letter, either ;-).
^ permalink raw reply [flat|nested] 120+ messages in thread
* [PATCH v2 00/14] odb: introduce `odb_for_each_object()`
2026-01-15 11:04 [PATCH 00/14] odb: introduce `odb_for_each_object()` Patrick Steinhardt
` (14 preceding siblings ...)
2026-01-15 13:50 ` [PATCH 00/14] odb: introduce `odb_for_each_object()` Junio C Hamano
@ 2026-01-20 15:25 ` Patrick Steinhardt
2026-01-20 15:25 ` [PATCH v2 01/14] odb: rename `FOR_EACH_OBJECT_*` flags Patrick Steinhardt
` (13 more replies)
2026-01-21 12:50 ` [PATCH v3 00/14] odb: introduce `odb_for_each_object()` Patrick Steinhardt
2026-01-26 9:51 ` [PATCH v4 " Patrick Steinhardt
17 siblings, 14 replies; 120+ messages in thread
From: Patrick Steinhardt @ 2026-01-20 15:25 UTC (permalink / raw)
To: git; +Cc: Karthik Nayak, Justin Tobler, Junio C Hamano
Hi,
this patch series introduces a generic `odb_for_each_object()` function
to iterate through objects and adapts callers to use it. The intent is
to make iteration through objects independent of the actual storage
backend.
The series is structured as follows:
- Commits 1 to 2 do some cleanups for the for-each-object flags.
- Commits 3 to 7 introduce the infrastructure for
`odb_for_each_object()`.
- Commits 8 to 13 convert a couple of callers to use the new
interfaces.
- Commit 14 drops now-unused functions.
The patch series is built on top of 8745eae506 (The 17th batch,
2026-01-11) with the following two series merged into it:
- ps/read-object-info-improvements at a282a8f163 (packfile: move MIDX
into packfile store, 2026-01-09).
- ps/packfile-store-in-odb-source at 12d3b58b55 (packfile: drop
repository parameter from `packed_object_info()`, 2026-01-12) .
Changes in v2:
- Clarify the comment of `odb_for_each_object()` to point out that
it's the callback that can abort iteration by returning a non-zero
error code.
- Document in the commit message that we don't yet convert all sites
to use `odb_for_each_object()`.
- Link to v1: https://lore.kernel.org/r/20260115-pks-odb-for-each-object-v1-0-5418a91d5d99@pks.im
Thanks!
Patrick
---
Patrick Steinhardt (14):
odb: rename `FOR_EACH_OBJECT_*` flags
odb: fix flags parameter to be unsigned
object-file: extract function to read object info from path
object-file: introduce function to iterate through objects
packfile: extract function to iterate through objects of a store
packfile: introduce function to iterate through objects
odb: introduce `odb_for_each_object()`
builtin/fsck: refactor to use `odb_for_each_object()`
treewide: enumerate promisor objects via `odb_for_each_object()`
treewide: drop uses of `for_each_{loose,packed}_object()`
odb: introduce mtime fields for object info requests
builtin/pack-objects: use `packfile_store_for_each_object()`
reachable: convert to use `odb_for_each_object()`
odb: drop unused `for_each_{loose,packed}_object()` functions
builtin/cat-file.c | 30 +++++++--
builtin/fsck.c | 57 ++++------------
builtin/pack-objects.c | 47 +++++++-------
commit-graph.c | 46 +++++++++----
object-file.c | 120 ++++++++++++++++++++++------------
object-file.h | 21 +++---
odb.c | 29 +++++++++
odb.h | 43 ++++++++++--
packfile.c | 173 +++++++++++++++++++++++++++++++++----------------
packfile.h | 18 ++++-
reachable.c | 129 +++++++++++-------------------------
repack-promisor.c | 8 +--
revision.c | 10 ++-
13 files changed, 420 insertions(+), 311 deletions(-)
Range-diff versus v1:
1: 1202ac1d9d = 1: 7658b0e3d1 odb: rename `FOR_EACH_OBJECT_*` flags
2: 8fd78aad98 = 2: c082223854 odb: fix flags parameter to be unsigned
3: 40e049c68b = 3: 9d00d20178 object-file: extract function to read object info from path
4: 9eaebd1181 = 4: 213548b0ee object-file: introduce function to iterate through objects
5: d88e439de2 = 5: 1521d6285e packfile: extract function to iterate through objects of a store
6: 85f52c0db7 = 6: 7dcb9e5cb1 packfile: introduce function to iterate through objects
7: ed42cbcf6b ! 7: 9ab2a31068 odb: introduce `odb_for_each_object()`
@@ odb.h: typedef int (*odb_for_each_object_cb)(const struct object_id *oid,
+ * objects may be iterated over multiple times in case they are either stored
+ * in different backends or in case they are stored in multiple sources.
+ *
-+ * Returning a non-zero error code will cause iteration to abort. The error
-+ * code will be propagated.
++ * Returning a non-zero error code from the callback function will cause
++ * iteration to abort. The error code will be propagated.
+ *
+ * Returns 0 on success, a negative error code in case a failure occurred, or
+ * an arbitrary non-zero error code returned by the callback itself.
8: 39e10e18ed = 8: 343f2007bb builtin/fsck: refactor to use `odb_for_each_object()`
9: d3a87909f2 = 9: a524a2aae8 treewide: enumerate promisor objects via `odb_for_each_object()`
10: 06392d8a2e ! 10: f375828c1f treewide: drop uses of `for_each_{loose,packed}_object()`
@@ Commit message
Prepare for this by refactoring the sites accordingly.
+ Note that ideally, we'd convert all callsites to use the generic
+ `odb_for_each_object()` function already. But for some callers this is
+ not possible (yet), and it would require some significant refactorings
+ to make this work. Converting these site will thus be deferred to a
+ later patch series.
+
Signed-off-by: Patrick Steinhardt <ps@pks.im>
## builtin/cat-file.c ##
11: 4a9e5687d0 = 11: b2b2025502 odb: introduce mtime fields for object info requests
12: 80284057a8 = 12: 8b596e7a8e builtin/pack-objects: use `packfile_store_for_each_object()`
13: 7c38197ee5 = 13: b8bb1cf980 reachable: convert to use `odb_for_each_object()`
14: 886002ba49 = 14: b53ac29d2c odb: drop unused `for_each_{loose,packed}_object()` functions
---
base-commit: 1ff0e42d332523a11cc3d61b8d8463db5f9f14e8
change-id: 20260115-pks-odb-for-each-object-60b78cde09fd
^ permalink raw reply [flat|nested] 120+ messages in thread* [PATCH v2 01/14] odb: rename `FOR_EACH_OBJECT_*` flags
2026-01-20 15:25 ` [PATCH v2 " Patrick Steinhardt
@ 2026-01-20 15:25 ` Patrick Steinhardt
2026-01-20 15:25 ` [PATCH v2 02/14] odb: fix flags parameter to be unsigned Patrick Steinhardt
` (12 subsequent siblings)
13 siblings, 0 replies; 120+ messages in thread
From: Patrick Steinhardt @ 2026-01-20 15:25 UTC (permalink / raw)
To: git; +Cc: Karthik Nayak, Justin Tobler, Junio C Hamano
Rename the `FOR_EACH_OBJECT_*` flags to have an `ODB_` prefix. This
prepares us for a new upcoming `odb_for_each_object()` function and
ensures that both the function and its flags have the same prefix.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
builtin/cat-file.c | 2 +-
builtin/pack-objects.c | 10 +++++-----
commit-graph.c | 4 ++--
object-file.c | 4 ++--
object-file.h | 2 +-
odb.h | 13 +++++++------
packfile.c | 20 ++++++++++----------
packfile.h | 4 ++--
reachable.c | 8 ++++----
repack-promisor.c | 2 +-
revision.c | 2 +-
11 files changed, 36 insertions(+), 35 deletions(-)
diff --git a/builtin/cat-file.c b/builtin/cat-file.c
index 2ad712e9f8..6964a5a52c 100644
--- a/builtin/cat-file.c
+++ b/builtin/cat-file.c
@@ -922,7 +922,7 @@ static int batch_objects(struct batch_options *opt)
cb.seen = &seen;
batch_each_object(opt, batch_unordered_object,
- FOR_EACH_OBJECT_PACK_ORDER, &cb);
+ ODB_FOR_EACH_OBJECT_PACK_ORDER, &cb);
oidset_clear(&seen);
} else {
diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index 6ee31d48c9..74317051fd 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -3912,7 +3912,7 @@ static void read_packs_list_from_stdin(struct rev_info *revs)
for_each_object_in_pack(p,
add_object_entry_from_pack,
revs,
- FOR_EACH_OBJECT_PACK_ORDER);
+ ODB_FOR_EACH_OBJECT_PACK_ORDER);
}
strbuf_release(&buf);
@@ -4344,10 +4344,10 @@ static void add_objects_in_unpacked_packs(void)
if (for_each_packed_object(to_pack.repo,
add_object_in_unpacked_pack,
NULL,
- FOR_EACH_OBJECT_PACK_ORDER |
- FOR_EACH_OBJECT_LOCAL_ONLY |
- FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS |
- FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS))
+ ODB_FOR_EACH_OBJECT_PACK_ORDER |
+ ODB_FOR_EACH_OBJECT_LOCAL_ONLY |
+ ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS |
+ ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS))
die(_("cannot open pack index"));
}
diff --git a/commit-graph.c b/commit-graph.c
index 6b1f02e179..7f1145a082 100644
--- a/commit-graph.c
+++ b/commit-graph.c
@@ -1927,7 +1927,7 @@ static int fill_oids_from_packs(struct write_commit_graph_context *ctx,
goto cleanup;
}
for_each_object_in_pack(p, add_packed_commits, ctx,
- FOR_EACH_OBJECT_PACK_ORDER);
+ ODB_FOR_EACH_OBJECT_PACK_ORDER);
close_pack(p);
free(p);
}
@@ -1965,7 +1965,7 @@ static void fill_oids_from_all_packs(struct write_commit_graph_context *ctx)
_("Finding commits for commit graph among packed objects"),
ctx->approx_nr_objects);
for_each_packed_object(ctx->r, add_packed_commits, ctx,
- FOR_EACH_OBJECT_PACK_ORDER);
+ ODB_FOR_EACH_OBJECT_PACK_ORDER);
if (ctx->progress_done < ctx->approx_nr_objects)
display_progress(ctx->progress, ctx->approx_nr_objects);
stop_progress(&ctx->progress);
diff --git a/object-file.c b/object-file.c
index e7e4c3348f..64e9e239dc 100644
--- a/object-file.c
+++ b/object-file.c
@@ -1789,7 +1789,7 @@ int for_each_loose_file_in_source(struct odb_source *source,
int for_each_loose_object(struct object_database *odb,
each_loose_object_fn cb, void *data,
- enum for_each_object_flags flags)
+ enum odb_for_each_object_flags flags)
{
struct odb_source *source;
@@ -1800,7 +1800,7 @@ int for_each_loose_object(struct object_database *odb,
if (r)
return r;
- if (flags & FOR_EACH_OBJECT_LOCAL_ONLY)
+ if (flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY)
break;
}
diff --git a/object-file.h b/object-file.h
index 1229d5f675..42bb50e10c 100644
--- a/object-file.h
+++ b/object-file.h
@@ -134,7 +134,7 @@ int for_each_loose_file_in_source(struct odb_source *source,
*/
int for_each_loose_object(struct object_database *odb,
each_loose_object_fn, void *,
- enum for_each_object_flags flags);
+ enum odb_for_each_object_flags flags);
/**
diff --git a/odb.h b/odb.h
index bab07755f4..74503addf1 100644
--- a/odb.h
+++ b/odb.h
@@ -442,24 +442,25 @@ static inline void obj_read_unlock(void)
if(obj_read_use_lock)
pthread_mutex_unlock(&obj_read_mutex);
}
+
/* Flags for for_each_*_object(). */
-enum for_each_object_flags {
+enum odb_for_each_object_flags {
/* Iterate only over local objects, not alternates. */
- FOR_EACH_OBJECT_LOCAL_ONLY = (1<<0),
+ ODB_FOR_EACH_OBJECT_LOCAL_ONLY = (1<<0),
/* Only iterate over packs obtained from the promisor remote. */
- FOR_EACH_OBJECT_PROMISOR_ONLY = (1<<1),
+ ODB_FOR_EACH_OBJECT_PROMISOR_ONLY = (1<<1),
/*
* Visit objects within a pack in packfile order rather than .idx order
*/
- FOR_EACH_OBJECT_PACK_ORDER = (1<<2),
+ ODB_FOR_EACH_OBJECT_PACK_ORDER = (1<<2),
/* Only iterate over packs that are not marked as kept in-core. */
- FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS = (1<<3),
+ ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS = (1<<3),
/* Only iterate over packs that do not have .keep files. */
- FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS = (1<<4),
+ ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS = (1<<4),
};
enum {
diff --git a/packfile.c b/packfile.c
index 402c3b5dc7..b65f0b43f1 100644
--- a/packfile.c
+++ b/packfile.c
@@ -2259,12 +2259,12 @@ int has_object_kept_pack(struct repository *r, const struct object_id *oid,
int for_each_object_in_pack(struct packed_git *p,
each_packed_object_fn cb, void *data,
- enum for_each_object_flags flags)
+ enum odb_for_each_object_flags flags)
{
uint32_t i;
int r = 0;
- if (flags & FOR_EACH_OBJECT_PACK_ORDER) {
+ if (flags & ODB_FOR_EACH_OBJECT_PACK_ORDER) {
if (load_pack_revindex(p->repo, p))
return -1;
}
@@ -2285,7 +2285,7 @@ int for_each_object_in_pack(struct packed_git *p,
* - in pack-order, it is pack position, which we must
* convert to an index position in order to get the oid.
*/
- if (flags & FOR_EACH_OBJECT_PACK_ORDER)
+ if (flags & ODB_FOR_EACH_OBJECT_PACK_ORDER)
index_pos = pack_pos_to_index(p, i);
else
index_pos = i;
@@ -2302,7 +2302,7 @@ int for_each_object_in_pack(struct packed_git *p,
}
int for_each_packed_object(struct repository *repo, each_packed_object_fn cb,
- void *data, enum for_each_object_flags flags)
+ void *data, enum odb_for_each_object_flags flags)
{
struct odb_source *source;
int r = 0;
@@ -2318,15 +2318,15 @@ int for_each_packed_object(struct repository *repo, each_packed_object_fn cb,
for (e = packfile_store_get_packs(source->packfiles); e; e = e->next) {
struct packed_git *p = e->pack;
- if ((flags & FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local)
+ if ((flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local)
continue;
- if ((flags & FOR_EACH_OBJECT_PROMISOR_ONLY) &&
+ if ((flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY) &&
!p->pack_promisor)
continue;
- if ((flags & FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS) &&
+ if ((flags & ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS) &&
p->pack_keep_in_core)
continue;
- if ((flags & FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS) &&
+ if ((flags & ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS) &&
p->pack_keep)
continue;
if (open_pack_index(p)) {
@@ -2413,8 +2413,8 @@ int is_promisor_object(struct repository *r, const struct object_id *oid)
if (repo_has_promisor_remote(r)) {
for_each_packed_object(r, add_promisor_object,
&promisor_objects,
- FOR_EACH_OBJECT_PROMISOR_ONLY |
- FOR_EACH_OBJECT_PACK_ORDER);
+ ODB_FOR_EACH_OBJECT_PROMISOR_ONLY |
+ ODB_FOR_EACH_OBJECT_PACK_ORDER);
}
promisor_objects_prepared = 1;
}
diff --git a/packfile.h b/packfile.h
index acc5c55ad5..15551258bd 100644
--- a/packfile.h
+++ b/packfile.h
@@ -339,9 +339,9 @@ typedef int each_packed_object_fn(const struct object_id *oid,
void *data);
int for_each_object_in_pack(struct packed_git *p,
each_packed_object_fn, void *data,
- enum for_each_object_flags flags);
+ enum odb_for_each_object_flags flags);
int for_each_packed_object(struct repository *repo, each_packed_object_fn cb,
- void *data, enum for_each_object_flags flags);
+ void *data, enum odb_for_each_object_flags flags);
/* A hook to report invalid files in pack directory */
#define PACKDIR_FILE_PACK 1
diff --git a/reachable.c b/reachable.c
index 4b532039d5..82676b2668 100644
--- a/reachable.c
+++ b/reachable.c
@@ -307,7 +307,7 @@ int add_unseen_recent_objects_to_traversal(struct rev_info *revs,
int ignore_in_core_kept_packs)
{
struct recent_data data;
- enum for_each_object_flags flags;
+ enum odb_for_each_object_flags flags;
int r;
data.revs = revs;
@@ -319,13 +319,13 @@ int add_unseen_recent_objects_to_traversal(struct rev_info *revs,
data.extra_recent_oids_loaded = 0;
r = for_each_loose_object(the_repository->objects, add_recent_loose, &data,
- FOR_EACH_OBJECT_LOCAL_ONLY);
+ ODB_FOR_EACH_OBJECT_LOCAL_ONLY);
if (r)
goto done;
- flags = FOR_EACH_OBJECT_LOCAL_ONLY | FOR_EACH_OBJECT_PACK_ORDER;
+ flags = ODB_FOR_EACH_OBJECT_LOCAL_ONLY | ODB_FOR_EACH_OBJECT_PACK_ORDER;
if (ignore_in_core_kept_packs)
- flags |= FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS;
+ flags |= ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS;
r = for_each_packed_object(revs->repo, add_recent_packed, &data, flags);
diff --git a/repack-promisor.c b/repack-promisor.c
index ee6e0669f6..45c330b9a5 100644
--- a/repack-promisor.c
+++ b/repack-promisor.c
@@ -56,7 +56,7 @@ void repack_promisor_objects(struct repository *repo,
ctx.cmd = &cmd;
ctx.algop = repo->hash_algo;
for_each_packed_object(repo, write_oid, &ctx,
- FOR_EACH_OBJECT_PROMISOR_ONLY);
+ ODB_FOR_EACH_OBJECT_PROMISOR_ONLY);
if (cmd.in == -1) {
/* No packed objects; cmd was never started */
diff --git a/revision.c b/revision.c
index b65a763770..5aadf46dac 100644
--- a/revision.c
+++ b/revision.c
@@ -3938,7 +3938,7 @@ int prepare_revision_walk(struct rev_info *revs)
if (revs->exclude_promisor_objects) {
for_each_packed_object(revs->repo, mark_uninteresting, revs,
- FOR_EACH_OBJECT_PROMISOR_ONLY);
+ ODB_FOR_EACH_OBJECT_PROMISOR_ONLY);
}
if (!revs->reflog_info)
--
2.53.0.rc0.250.g0ac79233d6.dirty
^ permalink raw reply related [flat|nested] 120+ messages in thread* [PATCH v2 02/14] odb: fix flags parameter to be unsigned
2026-01-20 15:25 ` [PATCH v2 " Patrick Steinhardt
2026-01-20 15:25 ` [PATCH v2 01/14] odb: rename `FOR_EACH_OBJECT_*` flags Patrick Steinhardt
@ 2026-01-20 15:25 ` Patrick Steinhardt
2026-01-20 15:25 ` [PATCH v2 03/14] object-file: extract function to read object info from path Patrick Steinhardt
` (11 subsequent siblings)
13 siblings, 0 replies; 120+ messages in thread
From: Patrick Steinhardt @ 2026-01-20 15:25 UTC (permalink / raw)
To: git; +Cc: Karthik Nayak, Justin Tobler, Junio C Hamano
The `flags` parameter accepted by various `for_each_object()` functions
is a bitfield of multiple flags. Such parameters are typically unsigned
in the Git codebase, but we use `enum odb_for_each_object_flags` in
some places.
Adapt these function signatures to use the correct type.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
object-file.c | 3 ++-
object-file.h | 3 ++-
packfile.c | 4 ++--
packfile.h | 4 ++--
4 files changed, 8 insertions(+), 6 deletions(-)
diff --git a/object-file.c b/object-file.c
index 64e9e239dc..8fa461dd59 100644
--- a/object-file.c
+++ b/object-file.c
@@ -414,7 +414,8 @@ static int parse_loose_header(const char *hdr, struct object_info *oi)
int odb_source_loose_read_object_info(struct odb_source *source,
const struct object_id *oid,
- struct object_info *oi, int flags)
+ struct object_info *oi,
+ unsigned flags)
{
int ret;
int fd;
diff --git a/object-file.h b/object-file.h
index 42bb50e10c..2acf19fb91 100644
--- a/object-file.h
+++ b/object-file.h
@@ -47,7 +47,8 @@ void odb_source_loose_reprepare(struct odb_source *source);
int odb_source_loose_read_object_info(struct odb_source *source,
const struct object_id *oid,
- struct object_info *oi, int flags);
+ struct object_info *oi,
+ unsigned flags);
int odb_source_loose_read_object_stream(struct odb_read_stream **out,
struct odb_source *source,
diff --git a/packfile.c b/packfile.c
index b65f0b43f1..79fe64a25b 100644
--- a/packfile.c
+++ b/packfile.c
@@ -2259,7 +2259,7 @@ int has_object_kept_pack(struct repository *r, const struct object_id *oid,
int for_each_object_in_pack(struct packed_git *p,
each_packed_object_fn cb, void *data,
- enum odb_for_each_object_flags flags)
+ unsigned flags)
{
uint32_t i;
int r = 0;
@@ -2302,7 +2302,7 @@ int for_each_object_in_pack(struct packed_git *p,
}
int for_each_packed_object(struct repository *repo, each_packed_object_fn cb,
- void *data, enum odb_for_each_object_flags flags)
+ void *data, unsigned flags)
{
struct odb_source *source;
int r = 0;
diff --git a/packfile.h b/packfile.h
index 15551258bd..447c44c4a7 100644
--- a/packfile.h
+++ b/packfile.h
@@ -339,9 +339,9 @@ typedef int each_packed_object_fn(const struct object_id *oid,
void *data);
int for_each_object_in_pack(struct packed_git *p,
each_packed_object_fn, void *data,
- enum odb_for_each_object_flags flags);
+ unsigned flags);
int for_each_packed_object(struct repository *repo, each_packed_object_fn cb,
- void *data, enum odb_for_each_object_flags flags);
+ void *data, unsigned flags);
/* A hook to report invalid files in pack directory */
#define PACKDIR_FILE_PACK 1
--
2.53.0.rc0.250.g0ac79233d6.dirty
^ permalink raw reply related [flat|nested] 120+ messages in thread* [PATCH v2 03/14] object-file: extract function to read object info from path
2026-01-20 15:25 ` [PATCH v2 " Patrick Steinhardt
2026-01-20 15:25 ` [PATCH v2 01/14] odb: rename `FOR_EACH_OBJECT_*` flags Patrick Steinhardt
2026-01-20 15:25 ` [PATCH v2 02/14] odb: fix flags parameter to be unsigned Patrick Steinhardt
@ 2026-01-20 15:25 ` Patrick Steinhardt
2026-01-20 15:26 ` [PATCH v2 04/14] object-file: introduce function to iterate through objects Patrick Steinhardt
` (10 subsequent siblings)
13 siblings, 0 replies; 120+ messages in thread
From: Patrick Steinhardt @ 2026-01-20 15:25 UTC (permalink / raw)
To: git; +Cc: Karthik Nayak, Justin Tobler, Junio C Hamano
Extract a new function that allows us to read object info for a specific
loose object via a user-supplied path. This function will be used in a
subsequent commit.
Note that this also allows us to drop `stat_loose_object()`, which is
a simple wrapper around `odb_loose_path()` plus lstat(3p).
Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
object-file.c | 39 ++++++++++++++++-----------------------
1 file changed, 16 insertions(+), 23 deletions(-)
diff --git a/object-file.c b/object-file.c
index 8fa461dd59..a651129426 100644
--- a/object-file.c
+++ b/object-file.c
@@ -165,30 +165,13 @@ int stream_object_signature(struct repository *r, const struct object_id *oid)
}
/*
- * Find "oid" as a loose object in given source.
- * Returns 0 on success, negative on failure.
+ * Find "oid" as a loose object in given source, open the object and return its
+ * file descriptor. Returns the file descriptor on success, negative on failure.
*
* The "path" out-parameter will give the path of the object we found (if any).
* Note that it may point to static storage and is only valid until another
* call to stat_loose_object().
*/
-static int stat_loose_object(struct odb_source_loose *loose,
- const struct object_id *oid,
- struct stat *st, const char **path)
-{
- static struct strbuf buf = STRBUF_INIT;
-
- *path = odb_loose_path(loose->source, &buf, oid);
- if (!lstat(*path, st))
- return 0;
-
- return -1;
-}
-
-/*
- * Like stat_loose_object(), but actually open the object and return the
- * descriptor. See the caveats on the "path" parameter above.
- */
static int open_loose_object(struct odb_source_loose *loose,
const struct object_id *oid, const char **path)
{
@@ -412,7 +395,8 @@ static int parse_loose_header(const char *hdr, struct object_info *oi)
return 0;
}
-int odb_source_loose_read_object_info(struct odb_source *source,
+static int read_object_info_from_path(struct odb_source *source,
+ const char *path,
const struct object_id *oid,
struct object_info *oi,
unsigned flags)
@@ -420,7 +404,6 @@ int odb_source_loose_read_object_info(struct odb_source *source,
int ret;
int fd;
unsigned long mapsize;
- const char *path;
void *map = NULL;
git_zstream stream, *stream_to_end = NULL;
char hdr[MAX_HEADER_LEN];
@@ -443,7 +426,7 @@ int odb_source_loose_read_object_info(struct odb_source *source,
goto out;
}
- if (stat_loose_object(source->loose, oid, &st, &path) < 0) {
+ if (lstat(path, &st) < 0) {
ret = -1;
goto out;
}
@@ -455,7 +438,7 @@ int odb_source_loose_read_object_info(struct odb_source *source,
goto out;
}
- fd = open_loose_object(source->loose, oid, &path);
+ fd = git_open(path);
if (fd < 0) {
if (errno != ENOENT)
error_errno(_("unable to open loose object %s"), oid_to_hex(oid));
@@ -534,6 +517,16 @@ int odb_source_loose_read_object_info(struct odb_source *source,
return ret;
}
+int odb_source_loose_read_object_info(struct odb_source *source,
+ const struct object_id *oid,
+ struct object_info *oi,
+ unsigned flags)
+{
+ static struct strbuf buf = STRBUF_INIT;
+ odb_loose_path(source, &buf, oid);
+ return read_object_info_from_path(source, buf.buf, oid, oi, flags);
+}
+
static void hash_object_body(const struct git_hash_algo *algo, struct git_hash_ctx *c,
const void *buf, unsigned long len,
struct object_id *oid,
--
2.53.0.rc0.250.g0ac79233d6.dirty
^ permalink raw reply related [flat|nested] 120+ messages in thread* [PATCH v2 04/14] object-file: introduce function to iterate through objects
2026-01-20 15:25 ` [PATCH v2 " Patrick Steinhardt
` (2 preceding siblings ...)
2026-01-20 15:25 ` [PATCH v2 03/14] object-file: extract function to read object info from path Patrick Steinhardt
@ 2026-01-20 15:26 ` Patrick Steinhardt
2026-01-20 15:26 ` [PATCH v2 05/14] packfile: extract function to iterate through objects of a store Patrick Steinhardt
` (9 subsequent siblings)
13 siblings, 0 replies; 120+ messages in thread
From: Patrick Steinhardt @ 2026-01-20 15:26 UTC (permalink / raw)
To: git; +Cc: Karthik Nayak, Justin Tobler, Junio C Hamano
We have multiple divergent interfaces to iterate through objects of a
specific backend:
- `for_each_loose_object()` yields all loose objects.
- `for_each_packed_object()` (somewhat obviously) yields all packed
objects.
These functions have different function signatures, which makes it hard
to create a common abstraction layer that covers both of these.
Introduce a new function `odb_source_loose_for_each_object()` to plug
this gap. This function doesn't take any data specific to loose objects,
but instead it accepts a `struct object_info` that will be populated the
exact same as if `odb_source_loose_read_object()` was called.
The benefit of this new interface is that we can continue to pass
backend-specific data, as `struct object_info` contains a union for
these exact use cases. This will allow us to unify how we iterate
through objects across both loose and packed objects in a subsequent
commit.
The `for_each_loose_object()` function continues to exist for now, but
it will be removed at the end of this patch series.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
object-file.c | 41 +++++++++++++++++++++++++++++++++++++++++
object-file.h | 11 +++++++++++
odb.h | 12 ++++++++++++
3 files changed, 64 insertions(+)
diff --git a/object-file.c b/object-file.c
index a651129426..65e730684b 100644
--- a/object-file.c
+++ b/object-file.c
@@ -1801,6 +1801,47 @@ int for_each_loose_object(struct object_database *odb,
return 0;
}
+struct for_each_object_wrapper_data {
+ struct odb_source *source;
+ struct object_info *oi;
+ odb_for_each_object_cb cb;
+ void *cb_data;
+};
+
+static int for_each_object_wrapper_cb(const struct object_id *oid,
+ const char *path,
+ void *cb_data)
+{
+ struct for_each_object_wrapper_data *data = cb_data;
+ if (data->oi &&
+ read_object_info_from_path(data->source, path, oid, data->oi, 0) < 0)
+ return -1;
+ return data->cb(oid, data->oi, data->cb_data);
+}
+
+int odb_source_loose_for_each_object(struct odb_source *source,
+ struct object_info *oi,
+ odb_for_each_object_cb cb,
+ void *cb_data,
+ unsigned flags)
+{
+ struct for_each_object_wrapper_data data = {
+ .source = source,
+ .oi = oi,
+ .cb = cb,
+ .cb_data = cb_data,
+ };
+
+ /* There are no loose promisor objects, so we can return immediately. */
+ if ((flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY))
+ return 0;
+ if ((flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY) && !source->local)
+ return 0;
+
+ return for_each_loose_file_in_source(source, for_each_object_wrapper_cb,
+ NULL, NULL, &data);
+}
+
static int append_loose_object(const struct object_id *oid,
const char *path UNUSED,
void *data)
diff --git a/object-file.h b/object-file.h
index 2acf19fb91..048b778531 100644
--- a/object-file.h
+++ b/object-file.h
@@ -137,6 +137,17 @@ int for_each_loose_object(struct object_database *odb,
each_loose_object_fn, void *,
enum odb_for_each_object_flags flags);
+/*
+ * Iterate through all loose objects in the given object database source and
+ * invoke the callback function for each of them. If given, the object info
+ * will be populated with the object's data as if you had called
+ * `odb_source_loose_read_object_info()` on the object.
+ */
+int odb_source_loose_for_each_object(struct odb_source *source,
+ struct object_info *oi,
+ odb_for_each_object_cb cb,
+ void *cb_data,
+ unsigned flags);
/**
* format_object_header() is a thin wrapper around s xsnprintf() that
diff --git a/odb.h b/odb.h
index 74503addf1..f97f249580 100644
--- a/odb.h
+++ b/odb.h
@@ -463,6 +463,18 @@ enum odb_for_each_object_flags {
ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS = (1<<4),
};
+/*
+ * A callback function that can be used to iterate through objects. If given,
+ * the optional `oi` parameter will be populated the same as if you would call
+ * `odb_read_object_info()`.
+ *
+ * Returning a non-zero error code will cause iteration to abort. The error
+ * code will be propagated.
+ */
+typedef int (*odb_for_each_object_cb)(const struct object_id *oid,
+ struct object_info *oi,
+ void *cb_data);
+
enum {
/*
* By default, `odb_write_object()` does not actually write anything
--
2.53.0.rc0.250.g0ac79233d6.dirty
^ permalink raw reply related [flat|nested] 120+ messages in thread* [PATCH v2 05/14] packfile: extract function to iterate through objects of a store
2026-01-20 15:25 ` [PATCH v2 " Patrick Steinhardt
` (3 preceding siblings ...)
2026-01-20 15:26 ` [PATCH v2 04/14] object-file: introduce function to iterate through objects Patrick Steinhardt
@ 2026-01-20 15:26 ` Patrick Steinhardt
2026-01-20 15:26 ` [PATCH v2 06/14] packfile: introduce function to iterate through objects Patrick Steinhardt
` (8 subsequent siblings)
13 siblings, 0 replies; 120+ messages in thread
From: Patrick Steinhardt @ 2026-01-20 15:26 UTC (permalink / raw)
To: git; +Cc: Karthik Nayak, Justin Tobler, Junio C Hamano
In the next commit we're about to introduce a new function that knows to
iterate through objects of a given packfile store. Same as with the
equivalent function for loose objects, this new function will also be
agnostic of backends by using a `struct object_info`.
Prepare for this by extracting a new shared function to iterate through
a single packfile store.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
packfile.c | 78 ++++++++++++++++++++++++++++++++++++--------------------------
1 file changed, 45 insertions(+), 33 deletions(-)
diff --git a/packfile.c b/packfile.c
index 79fe64a25b..d15a2ce12b 100644
--- a/packfile.c
+++ b/packfile.c
@@ -2301,51 +2301,63 @@ int for_each_object_in_pack(struct packed_git *p,
return r;
}
-int for_each_packed_object(struct repository *repo, each_packed_object_fn cb,
- void *data, unsigned flags)
+static int packfile_store_for_each_object_internal(struct packfile_store *store,
+ each_packed_object_fn cb,
+ void *data,
+ unsigned flags,
+ int *pack_errors)
{
- struct odb_source *source;
- int r = 0;
- int pack_errors = 0;
+ struct packfile_list_entry *e;
+ int ret = 0;
- odb_prepare_alternates(repo->objects);
+ store->skip_mru_updates = true;
- for (source = repo->objects->sources; source; source = source->next) {
- struct packfile_list_entry *e;
+ for (e = packfile_store_get_packs(store); e; e = e->next) {
+ struct packed_git *p = e->pack;
- source->packfiles->skip_mru_updates = true;
+ if ((flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local)
+ continue;
+ if ((flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY) &&
+ !p->pack_promisor)
+ continue;
+ if ((flags & ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS) &&
+ p->pack_keep_in_core)
+ continue;
+ if ((flags & ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS) &&
+ p->pack_keep)
+ continue;
+ if (open_pack_index(p)) {
+ *pack_errors = 1;
+ continue;
+ }
- for (e = packfile_store_get_packs(source->packfiles); e; e = e->next) {
- struct packed_git *p = e->pack;
+ ret = for_each_object_in_pack(p, cb, data, flags);
+ if (ret)
+ break;
+ }
- if ((flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local)
- continue;
- if ((flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY) &&
- !p->pack_promisor)
- continue;
- if ((flags & ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS) &&
- p->pack_keep_in_core)
- continue;
- if ((flags & ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS) &&
- p->pack_keep)
- continue;
- if (open_pack_index(p)) {
- pack_errors = 1;
- continue;
- }
+ store->skip_mru_updates = false;
- r = for_each_object_in_pack(p, cb, data, flags);
- if (r)
- break;
- }
+ return ret;
+}
- source->packfiles->skip_mru_updates = false;
+int for_each_packed_object(struct repository *repo, each_packed_object_fn cb,
+ void *data, unsigned flags)
+{
+ struct odb_source *source;
+ int pack_errors = 0;
+ int ret = 0;
- if (r)
+ odb_prepare_alternates(repo->objects);
+
+ for (source = repo->objects->sources; source; source = source->next) {
+ ret = packfile_store_for_each_object_internal(source->packfiles, cb, data,
+ flags, &pack_errors);
+ if (ret)
break;
}
- return r ? r : pack_errors;
+ return ret ? ret : pack_errors;
}
static int add_promisor_object(const struct object_id *oid,
--
2.53.0.rc0.250.g0ac79233d6.dirty
^ permalink raw reply related [flat|nested] 120+ messages in thread* [PATCH v2 06/14] packfile: introduce function to iterate through objects
2026-01-20 15:25 ` [PATCH v2 " Patrick Steinhardt
` (4 preceding siblings ...)
2026-01-20 15:26 ` [PATCH v2 05/14] packfile: extract function to iterate through objects of a store Patrick Steinhardt
@ 2026-01-20 15:26 ` Patrick Steinhardt
2026-01-20 15:26 ` [PATCH v2 07/14] odb: introduce `odb_for_each_object()` Patrick Steinhardt
` (7 subsequent siblings)
13 siblings, 0 replies; 120+ messages in thread
From: Patrick Steinhardt @ 2026-01-20 15:26 UTC (permalink / raw)
To: git; +Cc: Karthik Nayak, Justin Tobler, Junio C Hamano
Introduce a new function `packfile_store_for_each_object()`. This
function is the equivalent to `odb_source_loose_for_each_object()` in
that it:
- Works on a single packfile store and thus per object source.
- Passes a `struct object_info` to the callback function.
As such, it provides the same callback interface as we already provide
for loose objects now. These functions will be used in a subsequent step
to implement `odb_for_each_object()`.
The `for_each_packed_object()` function continues to exist for now, but
it will be removed at the end of this patch series.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
packfile.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++++
packfile.h | 14 ++++++++++++++
2 files changed, 62 insertions(+)
diff --git a/packfile.c b/packfile.c
index d15a2ce12b..cd45c6f21c 100644
--- a/packfile.c
+++ b/packfile.c
@@ -2360,6 +2360,54 @@ int for_each_packed_object(struct repository *repo, each_packed_object_fn cb,
return ret ? ret : pack_errors;
}
+struct packfile_store_for_each_object_wrapper_data {
+ struct packfile_store *store;
+ struct object_info *oi;
+ odb_for_each_object_cb cb;
+ void *cb_data;
+};
+
+static int packfile_store_for_each_object_wrapper(const struct object_id *oid,
+ struct packed_git *pack,
+ uint32_t index_pos,
+ void *cb_data)
+{
+ struct packfile_store_for_each_object_wrapper_data *data = cb_data;
+
+ if (data->oi) {
+ off_t offset = nth_packed_object_offset(pack, index_pos);
+
+ if (packed_object_info(pack, offset, data->oi) < 0) {
+ mark_bad_packed_object(pack, oid);
+ return -1;
+ }
+ }
+
+ return data->cb(oid, data->oi, data->cb_data);
+}
+
+int packfile_store_for_each_object(struct packfile_store *store,
+ struct object_info *oi,
+ odb_for_each_object_cb cb,
+ void *cb_data,
+ unsigned flags)
+{
+ struct packfile_store_for_each_object_wrapper_data data = {
+ .store = store,
+ .oi = oi,
+ .cb = cb,
+ .cb_data = cb_data,
+ };
+ int pack_errors = 0, ret;
+
+ ret = packfile_store_for_each_object_internal(store, packfile_store_for_each_object_wrapper,
+ &data, flags, &pack_errors);
+ if (ret)
+ return ret;
+
+ return pack_errors ? -1 : 0;
+}
+
static int add_promisor_object(const struct object_id *oid,
struct packed_git *pack,
uint32_t pos UNUSED,
diff --git a/packfile.h b/packfile.h
index 447c44c4a7..ab0637fbe9 100644
--- a/packfile.h
+++ b/packfile.h
@@ -343,6 +343,20 @@ int for_each_object_in_pack(struct packed_git *p,
int for_each_packed_object(struct repository *repo, each_packed_object_fn cb,
void *data, unsigned flags);
+/*
+ * Iterate through all packed objects in the given packfile store and invoke
+ * the callback function for each of them. If given, the object info will be
+ * populated with the object's data as if you had called
+ * `packfile_store_read_object_info()` on the object.
+ *
+ * The flags parameter is a combination of `odb_for_each_object_flags`.
+ */
+int packfile_store_for_each_object(struct packfile_store *store,
+ struct object_info *oi,
+ odb_for_each_object_cb cb,
+ void *cb_data,
+ unsigned flags);
+
/* A hook to report invalid files in pack directory */
#define PACKDIR_FILE_PACK 1
#define PACKDIR_FILE_IDX 2
--
2.53.0.rc0.250.g0ac79233d6.dirty
^ permalink raw reply related [flat|nested] 120+ messages in thread* [PATCH v2 07/14] odb: introduce `odb_for_each_object()`
2026-01-20 15:25 ` [PATCH v2 " Patrick Steinhardt
` (5 preceding siblings ...)
2026-01-20 15:26 ` [PATCH v2 06/14] packfile: introduce function to iterate through objects Patrick Steinhardt
@ 2026-01-20 15:26 ` Patrick Steinhardt
2026-01-20 15:26 ` [PATCH v2 08/14] builtin/fsck: refactor to use `odb_for_each_object()` Patrick Steinhardt
` (6 subsequent siblings)
13 siblings, 0 replies; 120+ messages in thread
From: Patrick Steinhardt @ 2026-01-20 15:26 UTC (permalink / raw)
To: git; +Cc: Karthik Nayak, Justin Tobler, Junio C Hamano
Introduce a new function `odb_for_each_object()` that knows to iterate
through all objects part of a given object database. This function is
essentially a simple wrapper around the object database sources.
Subsequent commits will adapt callers to use this new function.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
odb.c | 27 +++++++++++++++++++++++++++
odb.h | 17 +++++++++++++++++
2 files changed, 44 insertions(+)
diff --git a/odb.c b/odb.c
index ac70b6a099..65f0447aa5 100644
--- a/odb.c
+++ b/odb.c
@@ -995,6 +995,33 @@ int odb_freshen_object(struct object_database *odb,
return 0;
}
+int odb_for_each_object(struct object_database *odb,
+ struct object_info *oi,
+ odb_for_each_object_cb cb,
+ void *cb_data,
+ unsigned flags)
+{
+ int ret;
+
+ odb_prepare_alternates(odb);
+ for (struct odb_source *source = odb->sources; source; source = source->next) {
+ if (flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY && !source->local)
+ continue;
+
+ if (!(flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY)) {
+ ret = odb_source_loose_for_each_object(source, oi, cb, cb_data, flags);
+ if (ret)
+ return ret;
+ }
+
+ ret = packfile_store_for_each_object(source->packfiles, oi, cb, cb_data, flags);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
void odb_assert_oid_type(struct object_database *odb,
const struct object_id *oid, enum object_type expect)
{
diff --git a/odb.h b/odb.h
index f97f249580..8a37fe08e0 100644
--- a/odb.h
+++ b/odb.h
@@ -475,6 +475,23 @@ typedef int (*odb_for_each_object_cb)(const struct object_id *oid,
struct object_info *oi,
void *cb_data);
+/*
+ * Iterate through all objects contained in the object database. Note that
+ * objects may be iterated over multiple times in case they are either stored
+ * in different backends or in case they are stored in multiple sources.
+ *
+ * Returning a non-zero error code from the callback function will cause
+ * iteration to abort. The error code will be propagated.
+ *
+ * Returns 0 on success, a negative error code in case a failure occurred, or
+ * an arbitrary non-zero error code returned by the callback itself.
+ */
+int odb_for_each_object(struct object_database *odb,
+ struct object_info *oi,
+ odb_for_each_object_cb cb,
+ void *cb_data,
+ unsigned flags);
+
enum {
/*
* By default, `odb_write_object()` does not actually write anything
--
2.53.0.rc0.250.g0ac79233d6.dirty
^ permalink raw reply related [flat|nested] 120+ messages in thread* [PATCH v2 08/14] builtin/fsck: refactor to use `odb_for_each_object()`
2026-01-20 15:25 ` [PATCH v2 " Patrick Steinhardt
` (6 preceding siblings ...)
2026-01-20 15:26 ` [PATCH v2 07/14] odb: introduce `odb_for_each_object()` Patrick Steinhardt
@ 2026-01-20 15:26 ` Patrick Steinhardt
2026-01-20 15:26 ` [PATCH v2 09/14] treewide: enumerate promisor objects via `odb_for_each_object()` Patrick Steinhardt
` (5 subsequent siblings)
13 siblings, 0 replies; 120+ messages in thread
From: Patrick Steinhardt @ 2026-01-20 15:26 UTC (permalink / raw)
To: git; +Cc: Karthik Nayak, Justin Tobler, Junio C Hamano
In git-fsck(1) we have two callsites where we iterate over all objects
via `for_each_loose_object()` and `for_each_packed_object()`. Both of
these are trivially convertible with `odb_for_each_object()`.
Refactor these callsites accordingly.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
builtin/fsck.c | 57 ++++++++++++---------------------------------------------
1 file changed, 12 insertions(+), 45 deletions(-)
diff --git a/builtin/fsck.c b/builtin/fsck.c
index 4979bc795e..96107695ae 100644
--- a/builtin/fsck.c
+++ b/builtin/fsck.c
@@ -218,15 +218,17 @@ static int mark_used(struct object *obj, enum object_type type UNUSED,
return 0;
}
-static void mark_unreachable_referents(const struct object_id *oid)
+static int mark_unreachable_referents(const struct object_id *oid,
+ struct object_info *io UNUSED,
+ void *data UNUSED)
{
struct fsck_options options = FSCK_OPTIONS_DEFAULT;
struct object *obj = lookup_object(the_repository, oid);
if (!obj || !(obj->flags & HAS_OBJ))
- return; /* not part of our original set */
+ return 0; /* not part of our original set */
if (obj->flags & REACHABLE)
- return; /* reachable objects already traversed */
+ return 0; /* reachable objects already traversed */
/*
* Avoid passing OBJ_NONE to fsck_walk, which will parse the object
@@ -243,22 +245,7 @@ static void mark_unreachable_referents(const struct object_id *oid)
fsck_walk(obj, NULL, &options);
if (obj->type == OBJ_TREE)
free_tree_buffer((struct tree *)obj);
-}
-static int mark_loose_unreachable_referents(const struct object_id *oid,
- const char *path UNUSED,
- void *data UNUSED)
-{
- mark_unreachable_referents(oid);
- return 0;
-}
-
-static int mark_packed_unreachable_referents(const struct object_id *oid,
- struct packed_git *pack UNUSED,
- uint32_t pos UNUSED,
- void *data UNUSED)
-{
- mark_unreachable_referents(oid);
return 0;
}
@@ -394,12 +381,8 @@ static void check_connectivity(void)
* and ignore any that weren't present in our earlier
* traversal.
*/
- for_each_loose_object(the_repository->objects,
- mark_loose_unreachable_referents, NULL, 0);
- for_each_packed_object(the_repository,
- mark_packed_unreachable_referents,
- NULL,
- 0);
+ odb_for_each_object(the_repository->objects, NULL,
+ mark_unreachable_referents, NULL, 0);
}
/* Look up all the requirements, warn about missing objects.. */
@@ -848,26 +831,12 @@ static void fsck_index(struct index_state *istate, const char *index_path,
fsck_resolve_undo(istate, index_path);
}
-static void mark_object_for_connectivity(const struct object_id *oid)
+static int mark_object_for_connectivity(const struct object_id *oid,
+ struct object_info *oi UNUSED,
+ void *cb_data UNUSED)
{
struct object *obj = lookup_unknown_object(the_repository, oid);
obj->flags |= HAS_OBJ;
-}
-
-static int mark_loose_for_connectivity(const struct object_id *oid,
- const char *path UNUSED,
- void *data UNUSED)
-{
- mark_object_for_connectivity(oid);
- return 0;
-}
-
-static int mark_packed_for_connectivity(const struct object_id *oid,
- struct packed_git *pack UNUSED,
- uint32_t pos UNUSED,
- void *data UNUSED)
-{
- mark_object_for_connectivity(oid);
return 0;
}
@@ -1001,10 +970,8 @@ int cmd_fsck(int argc,
fsck_refs(the_repository);
if (connectivity_only) {
- for_each_loose_object(the_repository->objects,
- mark_loose_for_connectivity, NULL, 0);
- for_each_packed_object(the_repository,
- mark_packed_for_connectivity, NULL, 0);
+ odb_for_each_object(the_repository->objects, NULL,
+ mark_object_for_connectivity, NULL, 0);
} else {
odb_prepare_alternates(the_repository->objects);
for (source = the_repository->objects->sources; source; source = source->next)
--
2.53.0.rc0.250.g0ac79233d6.dirty
^ permalink raw reply related [flat|nested] 120+ messages in thread* [PATCH v2 09/14] treewide: enumerate promisor objects via `odb_for_each_object()`
2026-01-20 15:25 ` [PATCH v2 " Patrick Steinhardt
` (7 preceding siblings ...)
2026-01-20 15:26 ` [PATCH v2 08/14] builtin/fsck: refactor to use `odb_for_each_object()` Patrick Steinhardt
@ 2026-01-20 15:26 ` Patrick Steinhardt
2026-01-20 15:26 ` [PATCH v2 10/14] treewide: drop uses of `for_each_{loose,packed}_object()` Patrick Steinhardt
` (4 subsequent siblings)
13 siblings, 0 replies; 120+ messages in thread
From: Patrick Steinhardt @ 2026-01-20 15:26 UTC (permalink / raw)
To: git; +Cc: Karthik Nayak, Justin Tobler, Junio C Hamano
We have multiple callsites where we enumerate all promisor objects in
the object database via `for_each_packed_object()`. This is done by
passing the `ODB_FOR_EACH_OBJECT_PROMISOR_ONLY` flag, which causes us to
skip over all non-promisor objects.
These callsites can be trivially converted to `odb_for_each_object()` as
we know to skip enumeration of loose objects in case the `PROMISOR_ONLY`
flag was passed by the caller.
Refactor the sites accordingly.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
packfile.c | 37 ++++++++++++++++++++++---------------
repack-promisor.c | 8 ++++----
revision.c | 10 ++++------
3 files changed, 30 insertions(+), 25 deletions(-)
diff --git a/packfile.c b/packfile.c
index cd45c6f21c..4f84bc19d9 100644
--- a/packfile.c
+++ b/packfile.c
@@ -2408,28 +2408,32 @@ int packfile_store_for_each_object(struct packfile_store *store,
return pack_errors ? -1 : 0;
}
+struct add_promisor_object_data {
+ struct repository *repo;
+ struct oidset *set;
+};
+
static int add_promisor_object(const struct object_id *oid,
- struct packed_git *pack,
- uint32_t pos UNUSED,
- void *set_)
+ struct object_info *oi UNUSED,
+ void *cb_data)
{
- struct oidset *set = set_;
+ struct add_promisor_object_data *data = cb_data;
struct object *obj;
int we_parsed_object;
- obj = lookup_object(pack->repo, oid);
+ obj = lookup_object(data->repo, oid);
if (obj && obj->parsed) {
we_parsed_object = 0;
} else {
we_parsed_object = 1;
- obj = parse_object_with_flags(pack->repo, oid,
+ obj = parse_object_with_flags(data->repo, oid,
PARSE_OBJECT_SKIP_HASH_CHECK);
}
if (!obj)
return 1;
- oidset_insert(set, oid);
+ oidset_insert(data->set, oid);
/*
* If this is a tree, commit, or tag, the objects it refers
@@ -2447,19 +2451,19 @@ static int add_promisor_object(const struct object_id *oid,
*/
return 0;
while (tree_entry_gently(&desc, &entry))
- oidset_insert(set, &entry.oid);
+ oidset_insert(data->set, &entry.oid);
if (we_parsed_object)
free_tree_buffer(tree);
} else if (obj->type == OBJ_COMMIT) {
struct commit *commit = (struct commit *) obj;
struct commit_list *parents = commit->parents;
- oidset_insert(set, get_commit_tree_oid(commit));
+ oidset_insert(data->set, get_commit_tree_oid(commit));
for (; parents; parents = parents->next)
- oidset_insert(set, &parents->item->object.oid);
+ oidset_insert(data->set, &parents->item->object.oid);
} else if (obj->type == OBJ_TAG) {
struct tag *tag = (struct tag *) obj;
- oidset_insert(set, get_tagged_oid(tag));
+ oidset_insert(data->set, get_tagged_oid(tag));
}
return 0;
}
@@ -2471,10 +2475,13 @@ int is_promisor_object(struct repository *r, const struct object_id *oid)
if (!promisor_objects_prepared) {
if (repo_has_promisor_remote(r)) {
- for_each_packed_object(r, add_promisor_object,
- &promisor_objects,
- ODB_FOR_EACH_OBJECT_PROMISOR_ONLY |
- ODB_FOR_EACH_OBJECT_PACK_ORDER);
+ struct add_promisor_object_data data = {
+ .repo = r,
+ .set = &promisor_objects,
+ };
+
+ odb_for_each_object(r->objects, NULL, add_promisor_object, &data,
+ ODB_FOR_EACH_OBJECT_PROMISOR_ONLY | ODB_FOR_EACH_OBJECT_PACK_ORDER);
}
promisor_objects_prepared = 1;
}
diff --git a/repack-promisor.c b/repack-promisor.c
index 45c330b9a5..35c4073632 100644
--- a/repack-promisor.c
+++ b/repack-promisor.c
@@ -17,8 +17,8 @@ struct write_oid_context {
* necessary.
*/
static int write_oid(const struct object_id *oid,
- struct packed_git *pack UNUSED,
- uint32_t pos UNUSED, void *data)
+ struct object_info *oi UNUSED,
+ void *data)
{
struct write_oid_context *ctx = data;
struct child_process *cmd = ctx->cmd;
@@ -55,8 +55,8 @@ void repack_promisor_objects(struct repository *repo,
*/
ctx.cmd = &cmd;
ctx.algop = repo->hash_algo;
- for_each_packed_object(repo, write_oid, &ctx,
- ODB_FOR_EACH_OBJECT_PROMISOR_ONLY);
+ odb_for_each_object(repo->objects, NULL, write_oid, &ctx,
+ ODB_FOR_EACH_OBJECT_PROMISOR_ONLY);
if (cmd.in == -1) {
/* No packed objects; cmd was never started */
diff --git a/revision.c b/revision.c
index 5aadf46dac..e34bcd8e88 100644
--- a/revision.c
+++ b/revision.c
@@ -3626,8 +3626,7 @@ void reset_revision_walk(void)
}
static int mark_uninteresting(const struct object_id *oid,
- struct packed_git *pack UNUSED,
- uint32_t pos UNUSED,
+ struct object_info *oi UNUSED,
void *cb)
{
struct rev_info *revs = cb;
@@ -3936,10 +3935,9 @@ int prepare_revision_walk(struct rev_info *revs)
(revs->limited && limiting_can_increase_treesame(revs)))
revs->treesame.name = "treesame";
- if (revs->exclude_promisor_objects) {
- for_each_packed_object(revs->repo, mark_uninteresting, revs,
- ODB_FOR_EACH_OBJECT_PROMISOR_ONLY);
- }
+ if (revs->exclude_promisor_objects)
+ odb_for_each_object(revs->repo->objects, NULL, mark_uninteresting,
+ revs, ODB_FOR_EACH_OBJECT_PROMISOR_ONLY);
if (!revs->reflog_info)
prepare_to_use_bloom_filter(revs);
--
2.53.0.rc0.250.g0ac79233d6.dirty
^ permalink raw reply related [flat|nested] 120+ messages in thread* [PATCH v2 10/14] treewide: drop uses of `for_each_{loose,packed}_object()`
2026-01-20 15:25 ` [PATCH v2 " Patrick Steinhardt
` (8 preceding siblings ...)
2026-01-20 15:26 ` [PATCH v2 09/14] treewide: enumerate promisor objects via `odb_for_each_object()` Patrick Steinhardt
@ 2026-01-20 15:26 ` Patrick Steinhardt
2026-01-20 15:26 ` [PATCH v2 11/14] odb: introduce mtime fields for object info requests Patrick Steinhardt
` (3 subsequent siblings)
13 siblings, 0 replies; 120+ messages in thread
From: Patrick Steinhardt @ 2026-01-20 15:26 UTC (permalink / raw)
To: git; +Cc: Karthik Nayak, Justin Tobler, Junio C Hamano
We're using `for_each_loose_object()` and `for_each_packed_object()` at
a couple of callsites to enumerate all loose and packed objects,
respectively. These functions will be removed in a subsequent commit in
favor of the newly introduced `odb_source_loose_for_each_object()` and
`packfile_store_for_each_object()` replacements.
Prepare for this by refactoring the sites accordingly.
Note that ideally, we'd convert all callsites to use the generic
`odb_for_each_object()` function already. But for some callers this is
not possible (yet), and it would require some significant refactorings
to make this work. Converting these site will thus be deferred to a
later patch series.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
builtin/cat-file.c | 28 ++++++++++++++++++++++------
commit-graph.c | 44 +++++++++++++++++++++++++++++++-------------
2 files changed, 53 insertions(+), 19 deletions(-)
diff --git a/builtin/cat-file.c b/builtin/cat-file.c
index 6964a5a52c..7d16fbc1b8 100644
--- a/builtin/cat-file.c
+++ b/builtin/cat-file.c
@@ -806,11 +806,14 @@ struct for_each_object_payload {
void *payload;
};
-static int batch_one_object_loose(const struct object_id *oid,
- const char *path UNUSED,
- void *_payload)
+static int batch_one_object_oi(const struct object_id *oid,
+ struct object_info *oi,
+ void *_payload)
{
struct for_each_object_payload *payload = _payload;
+ if (oi && oi->whence == OI_PACKED)
+ return payload->callback(oid, oi->u.packed.pack, oi->u.packed.offset,
+ payload->payload);
return payload->callback(oid, NULL, 0, payload->payload);
}
@@ -846,8 +849,15 @@ static void batch_each_object(struct batch_options *opt,
.payload = _payload,
};
struct bitmap_index *bitmap = prepare_bitmap_git(the_repository);
+ struct odb_source *source;
- for_each_loose_object(the_repository->objects, batch_one_object_loose, &payload, 0);
+ odb_prepare_alternates(the_repository->objects);
+ for (source = the_repository->objects->sources; source; source = source->next) {
+ int ret = odb_source_loose_for_each_object(source, NULL, batch_one_object_oi,
+ &payload, flags);
+ if (ret)
+ break;
+ }
if (bitmap && !for_each_bitmapped_object(bitmap, &opt->objects_filter,
batch_one_object_bitmapped, &payload)) {
@@ -861,8 +871,14 @@ static void batch_each_object(struct batch_options *opt,
&payload, flags);
}
} else {
- for_each_packed_object(the_repository, batch_one_object_packed,
- &payload, flags);
+ struct object_info oi = { 0 };
+
+ for (source = the_repository->objects->sources; source; source = source->next) {
+ int ret = packfile_store_for_each_object(source->packfiles, &oi,
+ batch_one_object_oi, &payload, flags);
+ if (ret)
+ break;
+ }
}
free_bitmap_index(bitmap);
diff --git a/commit-graph.c b/commit-graph.c
index 7f1145a082..a3087d7883 100644
--- a/commit-graph.c
+++ b/commit-graph.c
@@ -1479,30 +1479,38 @@ static int write_graph_chunk_bloom_data(struct hashfile *f,
return 0;
}
+static int add_packed_commits_oi(const struct object_id *oid,
+ struct object_info *oi,
+ void *data)
+{
+ struct write_commit_graph_context *ctx = (struct write_commit_graph_context*)data;
+
+ if (ctx->progress)
+ display_progress(ctx->progress, ++ctx->progress_done);
+
+ if (*oi->typep != OBJ_COMMIT)
+ return 0;
+
+ oid_array_append(&ctx->oids, oid);
+ set_commit_pos(ctx->r, oid);
+
+ return 0;
+}
+
static int add_packed_commits(const struct object_id *oid,
struct packed_git *pack,
uint32_t pos,
void *data)
{
- struct write_commit_graph_context *ctx = (struct write_commit_graph_context*)data;
enum object_type type;
off_t offset = nth_packed_object_offset(pack, pos);
struct object_info oi = OBJECT_INFO_INIT;
- if (ctx->progress)
- display_progress(ctx->progress, ++ctx->progress_done);
-
oi.typep = &type;
if (packed_object_info(pack, offset, &oi) < 0)
die(_("unable to get type of object %s"), oid_to_hex(oid));
- if (type != OBJ_COMMIT)
- return 0;
-
- oid_array_append(&ctx->oids, oid);
- set_commit_pos(ctx->r, oid);
-
- return 0;
+ return add_packed_commits_oi(oid, &oi, data);
}
static void add_missing_parents(struct write_commit_graph_context *ctx, struct commit *commit)
@@ -1959,13 +1967,23 @@ static int fill_oids_from_commits(struct write_commit_graph_context *ctx,
static void fill_oids_from_all_packs(struct write_commit_graph_context *ctx)
{
+ struct odb_source *source;
+ enum object_type type;
+ struct object_info oi = {
+ .typep = &type,
+ };
+
if (ctx->report_progress)
ctx->progress = start_delayed_progress(
ctx->r,
_("Finding commits for commit graph among packed objects"),
ctx->approx_nr_objects);
- for_each_packed_object(ctx->r, add_packed_commits, ctx,
- ODB_FOR_EACH_OBJECT_PACK_ORDER);
+
+ odb_prepare_alternates(ctx->r->objects);
+ for (source = ctx->r->objects->sources; source; source = source->next)
+ packfile_store_for_each_object(source->packfiles, &oi, add_packed_commits_oi,
+ ctx, ODB_FOR_EACH_OBJECT_PACK_ORDER);
+
if (ctx->progress_done < ctx->approx_nr_objects)
display_progress(ctx->progress, ctx->approx_nr_objects);
stop_progress(&ctx->progress);
--
2.53.0.rc0.250.g0ac79233d6.dirty
^ permalink raw reply related [flat|nested] 120+ messages in thread* [PATCH v2 11/14] odb: introduce mtime fields for object info requests
2026-01-20 15:25 ` [PATCH v2 " Patrick Steinhardt
` (9 preceding siblings ...)
2026-01-20 15:26 ` [PATCH v2 10/14] treewide: drop uses of `for_each_{loose,packed}_object()` Patrick Steinhardt
@ 2026-01-20 15:26 ` Patrick Steinhardt
2026-01-20 15:26 ` [PATCH v2 12/14] builtin/pack-objects: use `packfile_store_for_each_object()` Patrick Steinhardt
` (2 subsequent siblings)
13 siblings, 0 replies; 120+ messages in thread
From: Patrick Steinhardt @ 2026-01-20 15:26 UTC (permalink / raw)
To: git; +Cc: Karthik Nayak, Justin Tobler, Junio C Hamano
There are some use cases where we need to figure out the mtime for
objects. Most importantly, this is the case when we want to prune
unreachable objects. But getting at that data requires users to manually
derive the info either via the loose object's mtime, the packfiles'
mtime or via the ".mtimes" file.
Introduce a new `struct object_info::mtimep` pointer that allows callers
to request an object's mtime. This new field will be used in a
subsequent commit.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
object-file.c | 29 +++++++++++++++++++++++++----
odb.c | 2 ++
odb.h | 1 +
packfile.c | 40 +++++++++++++++++++++++++++++++++-------
4 files changed, 61 insertions(+), 11 deletions(-)
diff --git a/object-file.c b/object-file.c
index 65e730684b..c0f896673b 100644
--- a/object-file.c
+++ b/object-file.c
@@ -409,6 +409,7 @@ static int read_object_info_from_path(struct odb_source *source,
char hdr[MAX_HEADER_LEN];
unsigned long size_scratch;
enum object_type type_scratch;
+ struct stat st;
/*
* If we don't care about type or size, then we don't
@@ -421,7 +422,7 @@ static int read_object_info_from_path(struct odb_source *source,
if (!oi || (!oi->typep && !oi->sizep && !oi->contentp)) {
struct stat st;
- if ((!oi || !oi->disk_sizep) && (flags & OBJECT_INFO_QUICK)) {
+ if ((!oi || (!oi->disk_sizep && !oi->mtimep)) && (flags & OBJECT_INFO_QUICK)) {
ret = quick_has_loose(source->loose, oid) ? 0 : -1;
goto out;
}
@@ -431,8 +432,12 @@ static int read_object_info_from_path(struct odb_source *source,
goto out;
}
- if (oi && oi->disk_sizep)
- *oi->disk_sizep = st.st_size;
+ if (oi) {
+ if (oi->disk_sizep)
+ *oi->disk_sizep = st.st_size;
+ if (oi->mtimep)
+ *oi->mtimep = st.st_mtime;
+ }
ret = 0;
goto out;
@@ -446,7 +451,21 @@ static int read_object_info_from_path(struct odb_source *source,
goto out;
}
- map = map_fd(fd, path, &mapsize);
+ if (fstat(fd, &st)) {
+ close(fd);
+ ret = -1;
+ goto out;
+ }
+
+ mapsize = xsize_t(st.st_size);
+ if (!mapsize) {
+ close(fd);
+ ret = error(_("object file %s is empty"), path);
+ goto out;
+ }
+
+ map = xmmap(NULL, mapsize, PROT_READ, MAP_PRIVATE, fd, 0);
+ close(fd);
if (!map) {
ret = -1;
goto out;
@@ -454,6 +473,8 @@ static int read_object_info_from_path(struct odb_source *source,
if (oi->disk_sizep)
*oi->disk_sizep = mapsize;
+ if (oi->mtimep)
+ *oi->mtimep = st.st_mtime;
stream_to_end = &stream;
diff --git a/odb.c b/odb.c
index 65f0447aa5..67decd3908 100644
--- a/odb.c
+++ b/odb.c
@@ -702,6 +702,8 @@ static int do_oid_object_info_extended(struct object_database *odb,
oidclr(oi->delta_base_oid, odb->repo->hash_algo);
if (oi->contentp)
*oi->contentp = xmemdupz(co->buf, co->size);
+ if (oi->mtimep)
+ *oi->mtimep = 0;
oi->whence = OI_CACHED;
}
return 0;
diff --git a/odb.h b/odb.h
index 8a37fe08e0..68336d2730 100644
--- a/odb.h
+++ b/odb.h
@@ -317,6 +317,7 @@ struct object_info {
off_t *disk_sizep;
struct object_id *delta_base_oid;
void **contentp;
+ time_t *mtimep;
/* Response */
enum {
diff --git a/packfile.c b/packfile.c
index 4f84bc19d9..c96ec21f86 100644
--- a/packfile.c
+++ b/packfile.c
@@ -1578,13 +1578,14 @@ static void add_delta_base_cache(struct packed_git *p, off_t base_offset,
hashmap_add(&delta_base_cache, &ent->ent);
}
-int packed_object_info(struct packed_git *p,
- off_t obj_offset, struct object_info *oi)
+static int packed_object_info_with_index_pos(struct packed_git *p, off_t obj_offset,
+ uint32_t *maybe_index_pos, struct object_info *oi)
{
struct pack_window *w_curs = NULL;
unsigned long size;
off_t curpos = obj_offset;
enum object_type type = OBJ_NONE;
+ uint32_t pack_pos;
int ret;
/*
@@ -1619,16 +1620,34 @@ int packed_object_info(struct packed_git *p,
}
}
- if (oi->disk_sizep) {
- uint32_t pos;
- if (offset_to_pack_pos(p, obj_offset, &pos) < 0) {
+ if (oi->disk_sizep || (oi->mtimep && p->is_cruft)) {
+ if (offset_to_pack_pos(p, obj_offset, &pack_pos) < 0) {
error("could not find object at offset %"PRIuMAX" "
"in pack %s", (uintmax_t)obj_offset, p->pack_name);
ret = -1;
goto out;
}
+ }
+
+ if (oi->disk_sizep)
+ *oi->disk_sizep = pack_pos_to_offset(p, pack_pos + 1) - obj_offset;
+
+ if (oi->mtimep) {
+ if (p->is_cruft) {
+ uint32_t index_pos;
+
+ if (load_pack_mtimes(p) < 0)
+ die(_("could not load cruft pack .mtimes"));
+
+ if (maybe_index_pos)
+ index_pos = *maybe_index_pos;
+ else
+ index_pos = pack_pos_to_index(p, pack_pos);
- *oi->disk_sizep = pack_pos_to_offset(p, pos + 1) - obj_offset;
+ *oi->mtimep = nth_packed_mtime(p, index_pos);
+ } else {
+ *oi->mtimep = p->mtime;
+ }
}
if (oi->typep) {
@@ -1681,6 +1700,12 @@ int packed_object_info(struct packed_git *p,
return ret;
}
+int packed_object_info(struct packed_git *p, off_t obj_offset,
+ struct object_info *oi)
+{
+ return packed_object_info_with_index_pos(p, obj_offset, NULL, oi);
+}
+
static void *unpack_compressed_entry(struct packed_git *p,
struct pack_window **w_curs,
off_t curpos,
@@ -2377,7 +2402,8 @@ static int packfile_store_for_each_object_wrapper(const struct object_id *oid,
if (data->oi) {
off_t offset = nth_packed_object_offset(pack, index_pos);
- if (packed_object_info(pack, offset, data->oi) < 0) {
+ if (packed_object_info_with_index_pos(pack, offset,
+ &index_pos, data->oi) < 0) {
mark_bad_packed_object(pack, oid);
return -1;
}
--
2.53.0.rc0.250.g0ac79233d6.dirty
^ permalink raw reply related [flat|nested] 120+ messages in thread* [PATCH v2 12/14] builtin/pack-objects: use `packfile_store_for_each_object()`
2026-01-20 15:25 ` [PATCH v2 " Patrick Steinhardt
` (10 preceding siblings ...)
2026-01-20 15:26 ` [PATCH v2 11/14] odb: introduce mtime fields for object info requests Patrick Steinhardt
@ 2026-01-20 15:26 ` Patrick Steinhardt
2026-01-20 15:26 ` [PATCH v2 13/14] reachable: convert to use `odb_for_each_object()` Patrick Steinhardt
2026-01-20 15:26 ` [PATCH v2 14/14] odb: drop unused `for_each_{loose,packed}_object()` functions Patrick Steinhardt
13 siblings, 0 replies; 120+ messages in thread
From: Patrick Steinhardt @ 2026-01-20 15:26 UTC (permalink / raw)
To: git; +Cc: Karthik Nayak, Justin Tobler, Junio C Hamano
When enumerating objects that are supposed to be stored in a new cruft
pack we use `for_each_packed_object()` and then derive each object's
mtime individually. Refactor this logic to instead use the new
`packfile_store_for_each_object()` function with an object info request
that asks for the respective mtimes.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
builtin/pack-objects.c | 45 +++++++++++++++++++++------------------------
1 file changed, 21 insertions(+), 24 deletions(-)
diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index 74317051fd..223ec3b49e 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -4314,25 +4314,12 @@ static void show_edge(struct commit *commit)
}
static int add_object_in_unpacked_pack(const struct object_id *oid,
- struct packed_git *pack,
- uint32_t pos,
+ struct object_info *oi,
void *data UNUSED)
{
if (cruft) {
- off_t offset;
- time_t mtime;
-
- if (pack->is_cruft) {
- if (load_pack_mtimes(pack) < 0)
- die(_("could not load cruft pack .mtimes"));
- mtime = nth_packed_mtime(pack, pos);
- } else {
- mtime = pack->mtime;
- }
- offset = nth_packed_object_offset(pack, pos);
-
- add_cruft_object_entry(oid, OBJ_NONE, pack, offset,
- NULL, mtime);
+ add_cruft_object_entry(oid, OBJ_NONE, oi->u.packed.pack,
+ oi->u.packed.offset, NULL, *oi->mtimep);
} else {
add_object_entry(oid, OBJ_NONE, "", 0);
}
@@ -4341,14 +4328,24 @@ static int add_object_in_unpacked_pack(const struct object_id *oid,
static void add_objects_in_unpacked_packs(void)
{
- if (for_each_packed_object(to_pack.repo,
- add_object_in_unpacked_pack,
- NULL,
- ODB_FOR_EACH_OBJECT_PACK_ORDER |
- ODB_FOR_EACH_OBJECT_LOCAL_ONLY |
- ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS |
- ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS))
- die(_("cannot open pack index"));
+ struct odb_source *source;
+ time_t mtime;
+ struct object_info oi = {
+ .mtimep = &mtime,
+ };
+
+ odb_prepare_alternates(to_pack.repo->objects);
+ for (source = to_pack.repo->objects->sources; source; source = source->next) {
+ if (!source->local)
+ continue;
+
+ if (packfile_store_for_each_object(source->packfiles, &oi,
+ add_object_in_unpacked_pack, NULL,
+ ODB_FOR_EACH_OBJECT_PACK_ORDER |
+ ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS |
+ ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS))
+ die(_("cannot open pack index"));
+ }
}
static int add_loose_object(const struct object_id *oid, const char *path,
--
2.53.0.rc0.250.g0ac79233d6.dirty
^ permalink raw reply related [flat|nested] 120+ messages in thread* [PATCH v2 13/14] reachable: convert to use `odb_for_each_object()`
2026-01-20 15:25 ` [PATCH v2 " Patrick Steinhardt
` (11 preceding siblings ...)
2026-01-20 15:26 ` [PATCH v2 12/14] builtin/pack-objects: use `packfile_store_for_each_object()` Patrick Steinhardt
@ 2026-01-20 15:26 ` Patrick Steinhardt
2026-01-20 15:26 ` [PATCH v2 14/14] odb: drop unused `for_each_{loose,packed}_object()` functions Patrick Steinhardt
13 siblings, 0 replies; 120+ messages in thread
From: Patrick Steinhardt @ 2026-01-20 15:26 UTC (permalink / raw)
To: git; +Cc: Karthik Nayak, Justin Tobler, Junio C Hamano
To figure out which objects expired objects we enumerate all loose and
packed objects individually so that we can figure out their respective
mtimes. Refactor the code to instead use `odb_for_each_object()` with a
request that ask for the object mtime instead.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
reachable.c | 125 +++++++++++++++++-------------------------------------------
1 file changed, 35 insertions(+), 90 deletions(-)
diff --git a/reachable.c b/reachable.c
index 82676b2668..101cfc2727 100644
--- a/reachable.c
+++ b/reachable.c
@@ -191,30 +191,27 @@ static int obj_is_recent(const struct object_id *oid, timestamp_t mtime,
return oidset_contains(&data->extra_recent_oids, oid);
}
-static void add_recent_object(const struct object_id *oid,
- struct packed_git *pack,
- off_t offset,
- timestamp_t mtime,
- struct recent_data *data)
+static int want_recent_object(struct recent_data *data,
+ const struct object_id *oid)
{
- struct object *obj;
- enum object_type type;
+ if (data->ignore_in_core_kept_packs &&
+ has_object_kept_pack(data->revs->repo, oid, KEPT_PACK_IN_CORE))
+ return 0;
+ return 1;
+}
- if (!obj_is_recent(oid, mtime, data))
- return;
+static int add_recent_object(const struct object_id *oid,
+ struct object_info *oi,
+ void *cb_data)
+{
+ struct recent_data *data = cb_data;
+ struct object *obj;
- /*
- * We do not want to call parse_object here, because
- * inflating blobs and trees could be very expensive.
- * However, we do need to know the correct type for
- * later processing, and the revision machinery expects
- * commits and tags to have been parsed.
- */
- type = odb_read_object_info(the_repository->objects, oid, NULL);
- if (type < 0)
- die("unable to get object info for %s", oid_to_hex(oid));
+ if (!want_recent_object(data, oid) ||
+ !obj_is_recent(oid, *oi->mtimep, data))
+ return 0;
- switch (type) {
+ switch (*oi->typep) {
case OBJ_TAG:
case OBJ_COMMIT:
obj = parse_object_or_die(the_repository, oid, NULL);
@@ -227,77 +224,22 @@ static void add_recent_object(const struct object_id *oid,
break;
default:
die("unknown object type for %s: %s",
- oid_to_hex(oid), type_name(type));
+ oid_to_hex(oid), type_name(*oi->typep));
}
if (!obj)
die("unable to lookup %s", oid_to_hex(oid));
-
- add_pending_object(data->revs, obj, "");
- if (data->cb)
- data->cb(obj, pack, offset, mtime);
-}
-
-static int want_recent_object(struct recent_data *data,
- const struct object_id *oid)
-{
- if (data->ignore_in_core_kept_packs &&
- has_object_kept_pack(data->revs->repo, oid, KEPT_PACK_IN_CORE))
+ if (obj->flags & SEEN)
return 0;
- return 1;
-}
-static int add_recent_loose(const struct object_id *oid,
- const char *path, void *data)
-{
- struct stat st;
- struct object *obj;
-
- if (!want_recent_object(data, oid))
- return 0;
-
- obj = lookup_object(the_repository, oid);
-
- if (obj && obj->flags & SEEN)
- return 0;
-
- if (stat(path, &st) < 0) {
- /*
- * It's OK if an object went away during our iteration; this
- * could be due to a simultaneous repack. But anything else
- * we should abort, since we might then fail to mark objects
- * which should not be pruned.
- */
- if (errno == ENOENT)
- return 0;
- return error_errno("unable to stat %s", oid_to_hex(oid));
+ add_pending_object(data->revs, obj, "");
+ if (data->cb) {
+ if (oi->whence == OI_PACKED)
+ data->cb(obj, oi->u.packed.pack, oi->u.packed.offset, *oi->mtimep);
+ else
+ data->cb(obj, NULL, 0, *oi->mtimep);
}
- add_recent_object(oid, NULL, 0, st.st_mtime, data);
- return 0;
-}
-
-static int add_recent_packed(const struct object_id *oid,
- struct packed_git *p,
- uint32_t pos,
- void *data)
-{
- struct object *obj;
- timestamp_t mtime = p->mtime;
-
- if (!want_recent_object(data, oid))
- return 0;
-
- obj = lookup_object(the_repository, oid);
-
- if (obj && obj->flags & SEEN)
- return 0;
- if (p->is_cruft) {
- if (load_pack_mtimes(p) < 0)
- die(_("could not load cruft pack .mtimes"));
- mtime = nth_packed_mtime(p, pos);
- }
- add_recent_object(oid, p, nth_packed_object_offset(p, pos), mtime, data);
return 0;
}
@@ -307,7 +249,13 @@ int add_unseen_recent_objects_to_traversal(struct rev_info *revs,
int ignore_in_core_kept_packs)
{
struct recent_data data;
- enum odb_for_each_object_flags flags;
+ unsigned flags;
+ enum object_type type;
+ time_t mtime;
+ struct object_info oi = {
+ .mtimep = &mtime,
+ .typep = &type,
+ };
int r;
data.revs = revs;
@@ -318,16 +266,13 @@ int add_unseen_recent_objects_to_traversal(struct rev_info *revs,
oidset_init(&data.extra_recent_oids, 0);
data.extra_recent_oids_loaded = 0;
- r = for_each_loose_object(the_repository->objects, add_recent_loose, &data,
- ODB_FOR_EACH_OBJECT_LOCAL_ONLY);
- if (r)
- goto done;
-
flags = ODB_FOR_EACH_OBJECT_LOCAL_ONLY | ODB_FOR_EACH_OBJECT_PACK_ORDER;
if (ignore_in_core_kept_packs)
flags |= ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS;
- r = for_each_packed_object(revs->repo, add_recent_packed, &data, flags);
+ r = odb_for_each_object(revs->repo->objects, &oi, add_recent_object, &data, flags);
+ if (r)
+ goto done;
done:
oidset_clear(&data.extra_recent_oids);
--
2.53.0.rc0.250.g0ac79233d6.dirty
^ permalink raw reply related [flat|nested] 120+ messages in thread* [PATCH v2 14/14] odb: drop unused `for_each_{loose,packed}_object()` functions
2026-01-20 15:25 ` [PATCH v2 " Patrick Steinhardt
` (12 preceding siblings ...)
2026-01-20 15:26 ` [PATCH v2 13/14] reachable: convert to use `odb_for_each_object()` Patrick Steinhardt
@ 2026-01-20 15:26 ` Patrick Steinhardt
13 siblings, 0 replies; 120+ messages in thread
From: Patrick Steinhardt @ 2026-01-20 15:26 UTC (permalink / raw)
To: git; +Cc: Karthik Nayak, Justin Tobler, Junio C Hamano
We have converted all callers of `for_each_loose_object()` and
`for_each_packed_object()` to use their new replacement functions
instead. We can thus remove them now.
Do so and inline `packfile_store_for_each_object_internal()` now that it
only has a single callsite again. This makes it a bit easier to follow
the callback indirection that is happening there.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
object-file.c | 20 -------------
object-file.h | 11 -------
packfile.c | 92 +++++++++++++++++++----------------------------------------
packfile.h | 2 --
4 files changed, 29 insertions(+), 96 deletions(-)
diff --git a/object-file.c b/object-file.c
index c0f896673b..bc5209f2fe 100644
--- a/object-file.c
+++ b/object-file.c
@@ -1802,26 +1802,6 @@ int for_each_loose_file_in_source(struct odb_source *source,
return r;
}
-int for_each_loose_object(struct object_database *odb,
- each_loose_object_fn cb, void *data,
- enum odb_for_each_object_flags flags)
-{
- struct odb_source *source;
-
- odb_prepare_alternates(odb);
- for (source = odb->sources; source; source = source->next) {
- int r = for_each_loose_file_in_source(source, cb, NULL,
- NULL, data);
- if (r)
- return r;
-
- if (flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY)
- break;
- }
-
- return 0;
-}
-
struct for_each_object_wrapper_data {
struct odb_source *source;
struct object_info *oi;
diff --git a/object-file.h b/object-file.h
index 048b778531..af7f57d2a1 100644
--- a/object-file.h
+++ b/object-file.h
@@ -126,17 +126,6 @@ int for_each_loose_file_in_source(struct odb_source *source,
each_loose_subdir_fn subdir_cb,
void *data);
-/*
- * Iterate over all accessible loose objects without respect to
- * reachability. By default, this includes both local and alternate objects.
- * The order in which objects are visited is unspecified.
- *
- * Any flags specific to packs are ignored.
- */
-int for_each_loose_object(struct object_database *odb,
- each_loose_object_fn, void *,
- enum odb_for_each_object_flags flags);
-
/*
* Iterate through all loose objects in the given object database source and
* invoke the callback function for each of them. If given, the object info
diff --git a/packfile.c b/packfile.c
index c96ec21f86..493d81fdca 100644
--- a/packfile.c
+++ b/packfile.c
@@ -2326,65 +2326,6 @@ int for_each_object_in_pack(struct packed_git *p,
return r;
}
-static int packfile_store_for_each_object_internal(struct packfile_store *store,
- each_packed_object_fn cb,
- void *data,
- unsigned flags,
- int *pack_errors)
-{
- struct packfile_list_entry *e;
- int ret = 0;
-
- store->skip_mru_updates = true;
-
- for (e = packfile_store_get_packs(store); e; e = e->next) {
- struct packed_git *p = e->pack;
-
- if ((flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local)
- continue;
- if ((flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY) &&
- !p->pack_promisor)
- continue;
- if ((flags & ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS) &&
- p->pack_keep_in_core)
- continue;
- if ((flags & ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS) &&
- p->pack_keep)
- continue;
- if (open_pack_index(p)) {
- *pack_errors = 1;
- continue;
- }
-
- ret = for_each_object_in_pack(p, cb, data, flags);
- if (ret)
- break;
- }
-
- store->skip_mru_updates = false;
-
- return ret;
-}
-
-int for_each_packed_object(struct repository *repo, each_packed_object_fn cb,
- void *data, unsigned flags)
-{
- struct odb_source *source;
- int pack_errors = 0;
- int ret = 0;
-
- odb_prepare_alternates(repo->objects);
-
- for (source = repo->objects->sources; source; source = source->next) {
- ret = packfile_store_for_each_object_internal(source->packfiles, cb, data,
- flags, &pack_errors);
- if (ret)
- break;
- }
-
- return ret ? ret : pack_errors;
-}
-
struct packfile_store_for_each_object_wrapper_data {
struct packfile_store *store;
struct object_info *oi;
@@ -2424,12 +2365,37 @@ int packfile_store_for_each_object(struct packfile_store *store,
.cb = cb,
.cb_data = cb_data,
};
+ struct packfile_list_entry *e;
int pack_errors = 0, ret;
- ret = packfile_store_for_each_object_internal(store, packfile_store_for_each_object_wrapper,
- &data, flags, &pack_errors);
- if (ret)
- return ret;
+ store->skip_mru_updates = true;
+
+ for (e = packfile_store_get_packs(store); e; e = e->next) {
+ struct packed_git *p = e->pack;
+
+ if ((flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local)
+ continue;
+ if ((flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY) &&
+ !p->pack_promisor)
+ continue;
+ if ((flags & ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS) &&
+ p->pack_keep_in_core)
+ continue;
+ if ((flags & ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS) &&
+ p->pack_keep)
+ continue;
+ if (open_pack_index(p)) {
+ pack_errors = 1;
+ continue;
+ }
+
+ ret = for_each_object_in_pack(p, packfile_store_for_each_object_wrapper,
+ &data, flags);
+ if (ret)
+ break;
+ }
+
+ store->skip_mru_updates = false;
return pack_errors ? -1 : 0;
}
diff --git a/packfile.h b/packfile.h
index ab0637fbe9..8e0d2b7661 100644
--- a/packfile.h
+++ b/packfile.h
@@ -340,8 +340,6 @@ typedef int each_packed_object_fn(const struct object_id *oid,
int for_each_object_in_pack(struct packed_git *p,
each_packed_object_fn, void *data,
unsigned flags);
-int for_each_packed_object(struct repository *repo, each_packed_object_fn cb,
- void *data, unsigned flags);
/*
* Iterate through all packed objects in the given packfile store and invoke
--
2.53.0.rc0.250.g0ac79233d6.dirty
^ permalink raw reply related [flat|nested] 120+ messages in thread
* [PATCH v3 00/14] odb: introduce `odb_for_each_object()`
2026-01-15 11:04 [PATCH 00/14] odb: introduce `odb_for_each_object()` Patrick Steinhardt
` (15 preceding siblings ...)
2026-01-20 15:25 ` [PATCH v2 " Patrick Steinhardt
@ 2026-01-21 12:50 ` Patrick Steinhardt
2026-01-21 12:50 ` [PATCH v3 01/14] odb: rename `FOR_EACH_OBJECT_*` flags Patrick Steinhardt
` (14 more replies)
2026-01-26 9:51 ` [PATCH v4 " Patrick Steinhardt
17 siblings, 15 replies; 120+ messages in thread
From: Patrick Steinhardt @ 2026-01-21 12:50 UTC (permalink / raw)
To: git; +Cc: Karthik Nayak, Justin Tobler, Junio C Hamano
Hi,
this patch series introduces a generic `odb_for_each_object()` function
to iterate through objects and adapts callers to use it. The intent is
to make iteration through objects independent of the actual storage
backend.
The series is structured as follows:
- Commits 1 to 2 do some cleanups for the for-each-object flags.
- Commits 3 to 7 introduce the infrastructure for
`odb_for_each_object()`.
- Commits 8 to 13 convert a couple of callers to use the new
interfaces.
- Commit 14 drops now-unused functions.
The patch series is built on top of 8745eae506 (The 17th batch,
2026-01-11) with the following two series merged into it:
- ps/read-object-info-improvements at a282a8f163 (packfile: move MIDX
into packfile store, 2026-01-09).
- ps/packfile-store-in-odb-source at 12d3b58b55 (packfile: drop
repository parameter from `packed_object_info()`, 2026-01-12) .
Changes in v3:
- Fix error code propagation in last commit.
- Link to v2: https://lore.kernel.org/r/20260120-pks-odb-for-each-object-v2-0-d05cbfd3d6f8@pks.im
Changes in v2:
- Clarify the comment of `odb_for_each_object()` to point out that
it's the callback that can abort iteration by returning a non-zero
error code.
- Document in the commit message that we don't yet convert all sites
to use `odb_for_each_object()`.
- Link to v1: https://lore.kernel.org/r/20260115-pks-odb-for-each-object-v1-0-5418a91d5d99@pks.im
Thanks!
Patrick
---
Patrick Steinhardt (14):
odb: rename `FOR_EACH_OBJECT_*` flags
odb: fix flags parameter to be unsigned
object-file: extract function to read object info from path
object-file: introduce function to iterate through objects
packfile: extract function to iterate through objects of a store
packfile: introduce function to iterate through objects
odb: introduce `odb_for_each_object()`
builtin/fsck: refactor to use `odb_for_each_object()`
treewide: enumerate promisor objects via `odb_for_each_object()`
treewide: drop uses of `for_each_{loose,packed}_object()`
odb: introduce mtime fields for object info requests
builtin/pack-objects: use `packfile_store_for_each_object()`
reachable: convert to use `odb_for_each_object()`
odb: drop unused `for_each_{loose,packed}_object()` functions
builtin/cat-file.c | 30 +++++++--
builtin/fsck.c | 57 ++++------------
builtin/pack-objects.c | 47 ++++++-------
commit-graph.c | 46 +++++++++----
object-file.c | 120 +++++++++++++++++++++------------
object-file.h | 21 +++---
odb.c | 29 ++++++++
odb.h | 43 ++++++++++--
packfile.c | 180 +++++++++++++++++++++++++++++++++----------------
packfile.h | 18 ++++-
reachable.c | 129 ++++++++++-------------------------
repack-promisor.c | 8 +--
revision.c | 10 ++-
13 files changed, 426 insertions(+), 312 deletions(-)
Range-diff versus v2:
1: 3cd6a9b898 = 1: f931af359e odb: rename `FOR_EACH_OBJECT_*` flags
2: 2b9a766928 = 2: 4454d3b8e6 odb: fix flags parameter to be unsigned
3: e5a8257291 = 3: 0953291ffc object-file: extract function to read object info from path
4: 309fb50d2a = 4: b0a8ff2d9d object-file: introduce function to iterate through objects
5: 8332af532d = 5: def018bbca packfile: extract function to iterate through objects of a store
6: 17675561dc = 6: caccd45aa0 packfile: introduce function to iterate through objects
7: aa79e2f2ea = 7: 4e429e52b2 odb: introduce `odb_for_each_object()`
8: 33737e286b = 8: 8f16adec2c builtin/fsck: refactor to use `odb_for_each_object()`
9: 606b944a67 = 9: a1c95ffc4f treewide: enumerate promisor objects via `odb_for_each_object()`
10: bf31434259 = 10: c0ecc5517e treewide: drop uses of `for_each_{loose,packed}_object()`
11: 359ac505ae = 11: 1687ac9f3c odb: introduce mtime fields for object info requests
12: eb7c6f5571 = 12: 1d4b35e3a5 builtin/pack-objects: use `packfile_store_for_each_object()`
13: 80227f4d71 = 13: f360ff980a reachable: convert to use `odb_for_each_object()`
14: b614e33feb ! 14: bbad8b1a2b odb: drop unused `for_each_{loose,packed}_object()` functions
@@ packfile.c: int packfile_store_for_each_object(struct packfile_store *store,
+ ret = for_each_object_in_pack(p, packfile_store_for_each_object_wrapper,
+ &data, flags);
+ if (ret)
-+ break;
++ goto out;
+ }
+
-+ store->skip_mru_updates = false;
++ ret = 0;
- return pack_errors ? -1 : 0;
+- return pack_errors ? -1 : 0;
++out:
++ store->skip_mru_updates = false;
++
++ if (!ret && pack_errors)
++ ret = -1;
++ return ret;
}
+
+ struct add_promisor_object_data {
## packfile.h ##
@@ packfile.h: typedef int each_packed_object_fn(const struct object_id *oid,
---
base-commit: 1ff0e42d332523a11cc3d61b8d8463db5f9f14e8
change-id: 20260115-pks-odb-for-each-object-60b78cde09fd
^ permalink raw reply [flat|nested] 120+ messages in thread* [PATCH v3 01/14] odb: rename `FOR_EACH_OBJECT_*` flags
2026-01-21 12:50 ` [PATCH v3 00/14] odb: introduce `odb_for_each_object()` Patrick Steinhardt
@ 2026-01-21 12:50 ` Patrick Steinhardt
2026-01-21 12:50 ` [PATCH v3 02/14] odb: fix flags parameter to be unsigned Patrick Steinhardt
` (13 subsequent siblings)
14 siblings, 0 replies; 120+ messages in thread
From: Patrick Steinhardt @ 2026-01-21 12:50 UTC (permalink / raw)
To: git; +Cc: Karthik Nayak, Justin Tobler, Junio C Hamano
Rename the `FOR_EACH_OBJECT_*` flags to have an `ODB_` prefix. This
prepares us for a new upcoming `odb_for_each_object()` function and
ensures that both the function and its flags have the same prefix.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
builtin/cat-file.c | 2 +-
builtin/pack-objects.c | 10 +++++-----
commit-graph.c | 4 ++--
object-file.c | 4 ++--
object-file.h | 2 +-
odb.h | 13 +++++++------
packfile.c | 20 ++++++++++----------
packfile.h | 4 ++--
reachable.c | 8 ++++----
repack-promisor.c | 2 +-
revision.c | 2 +-
11 files changed, 36 insertions(+), 35 deletions(-)
diff --git a/builtin/cat-file.c b/builtin/cat-file.c
index 2ad712e9f8..6964a5a52c 100644
--- a/builtin/cat-file.c
+++ b/builtin/cat-file.c
@@ -922,7 +922,7 @@ static int batch_objects(struct batch_options *opt)
cb.seen = &seen;
batch_each_object(opt, batch_unordered_object,
- FOR_EACH_OBJECT_PACK_ORDER, &cb);
+ ODB_FOR_EACH_OBJECT_PACK_ORDER, &cb);
oidset_clear(&seen);
} else {
diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index 6ee31d48c9..74317051fd 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -3912,7 +3912,7 @@ static void read_packs_list_from_stdin(struct rev_info *revs)
for_each_object_in_pack(p,
add_object_entry_from_pack,
revs,
- FOR_EACH_OBJECT_PACK_ORDER);
+ ODB_FOR_EACH_OBJECT_PACK_ORDER);
}
strbuf_release(&buf);
@@ -4344,10 +4344,10 @@ static void add_objects_in_unpacked_packs(void)
if (for_each_packed_object(to_pack.repo,
add_object_in_unpacked_pack,
NULL,
- FOR_EACH_OBJECT_PACK_ORDER |
- FOR_EACH_OBJECT_LOCAL_ONLY |
- FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS |
- FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS))
+ ODB_FOR_EACH_OBJECT_PACK_ORDER |
+ ODB_FOR_EACH_OBJECT_LOCAL_ONLY |
+ ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS |
+ ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS))
die(_("cannot open pack index"));
}
diff --git a/commit-graph.c b/commit-graph.c
index 6b1f02e179..7f1145a082 100644
--- a/commit-graph.c
+++ b/commit-graph.c
@@ -1927,7 +1927,7 @@ static int fill_oids_from_packs(struct write_commit_graph_context *ctx,
goto cleanup;
}
for_each_object_in_pack(p, add_packed_commits, ctx,
- FOR_EACH_OBJECT_PACK_ORDER);
+ ODB_FOR_EACH_OBJECT_PACK_ORDER);
close_pack(p);
free(p);
}
@@ -1965,7 +1965,7 @@ static void fill_oids_from_all_packs(struct write_commit_graph_context *ctx)
_("Finding commits for commit graph among packed objects"),
ctx->approx_nr_objects);
for_each_packed_object(ctx->r, add_packed_commits, ctx,
- FOR_EACH_OBJECT_PACK_ORDER);
+ ODB_FOR_EACH_OBJECT_PACK_ORDER);
if (ctx->progress_done < ctx->approx_nr_objects)
display_progress(ctx->progress, ctx->approx_nr_objects);
stop_progress(&ctx->progress);
diff --git a/object-file.c b/object-file.c
index e7e4c3348f..64e9e239dc 100644
--- a/object-file.c
+++ b/object-file.c
@@ -1789,7 +1789,7 @@ int for_each_loose_file_in_source(struct odb_source *source,
int for_each_loose_object(struct object_database *odb,
each_loose_object_fn cb, void *data,
- enum for_each_object_flags flags)
+ enum odb_for_each_object_flags flags)
{
struct odb_source *source;
@@ -1800,7 +1800,7 @@ int for_each_loose_object(struct object_database *odb,
if (r)
return r;
- if (flags & FOR_EACH_OBJECT_LOCAL_ONLY)
+ if (flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY)
break;
}
diff --git a/object-file.h b/object-file.h
index 1229d5f675..42bb50e10c 100644
--- a/object-file.h
+++ b/object-file.h
@@ -134,7 +134,7 @@ int for_each_loose_file_in_source(struct odb_source *source,
*/
int for_each_loose_object(struct object_database *odb,
each_loose_object_fn, void *,
- enum for_each_object_flags flags);
+ enum odb_for_each_object_flags flags);
/**
diff --git a/odb.h b/odb.h
index bab07755f4..74503addf1 100644
--- a/odb.h
+++ b/odb.h
@@ -442,24 +442,25 @@ static inline void obj_read_unlock(void)
if(obj_read_use_lock)
pthread_mutex_unlock(&obj_read_mutex);
}
+
/* Flags for for_each_*_object(). */
-enum for_each_object_flags {
+enum odb_for_each_object_flags {
/* Iterate only over local objects, not alternates. */
- FOR_EACH_OBJECT_LOCAL_ONLY = (1<<0),
+ ODB_FOR_EACH_OBJECT_LOCAL_ONLY = (1<<0),
/* Only iterate over packs obtained from the promisor remote. */
- FOR_EACH_OBJECT_PROMISOR_ONLY = (1<<1),
+ ODB_FOR_EACH_OBJECT_PROMISOR_ONLY = (1<<1),
/*
* Visit objects within a pack in packfile order rather than .idx order
*/
- FOR_EACH_OBJECT_PACK_ORDER = (1<<2),
+ ODB_FOR_EACH_OBJECT_PACK_ORDER = (1<<2),
/* Only iterate over packs that are not marked as kept in-core. */
- FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS = (1<<3),
+ ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS = (1<<3),
/* Only iterate over packs that do not have .keep files. */
- FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS = (1<<4),
+ ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS = (1<<4),
};
enum {
diff --git a/packfile.c b/packfile.c
index 402c3b5dc7..b65f0b43f1 100644
--- a/packfile.c
+++ b/packfile.c
@@ -2259,12 +2259,12 @@ int has_object_kept_pack(struct repository *r, const struct object_id *oid,
int for_each_object_in_pack(struct packed_git *p,
each_packed_object_fn cb, void *data,
- enum for_each_object_flags flags)
+ enum odb_for_each_object_flags flags)
{
uint32_t i;
int r = 0;
- if (flags & FOR_EACH_OBJECT_PACK_ORDER) {
+ if (flags & ODB_FOR_EACH_OBJECT_PACK_ORDER) {
if (load_pack_revindex(p->repo, p))
return -1;
}
@@ -2285,7 +2285,7 @@ int for_each_object_in_pack(struct packed_git *p,
* - in pack-order, it is pack position, which we must
* convert to an index position in order to get the oid.
*/
- if (flags & FOR_EACH_OBJECT_PACK_ORDER)
+ if (flags & ODB_FOR_EACH_OBJECT_PACK_ORDER)
index_pos = pack_pos_to_index(p, i);
else
index_pos = i;
@@ -2302,7 +2302,7 @@ int for_each_object_in_pack(struct packed_git *p,
}
int for_each_packed_object(struct repository *repo, each_packed_object_fn cb,
- void *data, enum for_each_object_flags flags)
+ void *data, enum odb_for_each_object_flags flags)
{
struct odb_source *source;
int r = 0;
@@ -2318,15 +2318,15 @@ int for_each_packed_object(struct repository *repo, each_packed_object_fn cb,
for (e = packfile_store_get_packs(source->packfiles); e; e = e->next) {
struct packed_git *p = e->pack;
- if ((flags & FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local)
+ if ((flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local)
continue;
- if ((flags & FOR_EACH_OBJECT_PROMISOR_ONLY) &&
+ if ((flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY) &&
!p->pack_promisor)
continue;
- if ((flags & FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS) &&
+ if ((flags & ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS) &&
p->pack_keep_in_core)
continue;
- if ((flags & FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS) &&
+ if ((flags & ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS) &&
p->pack_keep)
continue;
if (open_pack_index(p)) {
@@ -2413,8 +2413,8 @@ int is_promisor_object(struct repository *r, const struct object_id *oid)
if (repo_has_promisor_remote(r)) {
for_each_packed_object(r, add_promisor_object,
&promisor_objects,
- FOR_EACH_OBJECT_PROMISOR_ONLY |
- FOR_EACH_OBJECT_PACK_ORDER);
+ ODB_FOR_EACH_OBJECT_PROMISOR_ONLY |
+ ODB_FOR_EACH_OBJECT_PACK_ORDER);
}
promisor_objects_prepared = 1;
}
diff --git a/packfile.h b/packfile.h
index acc5c55ad5..15551258bd 100644
--- a/packfile.h
+++ b/packfile.h
@@ -339,9 +339,9 @@ typedef int each_packed_object_fn(const struct object_id *oid,
void *data);
int for_each_object_in_pack(struct packed_git *p,
each_packed_object_fn, void *data,
- enum for_each_object_flags flags);
+ enum odb_for_each_object_flags flags);
int for_each_packed_object(struct repository *repo, each_packed_object_fn cb,
- void *data, enum for_each_object_flags flags);
+ void *data, enum odb_for_each_object_flags flags);
/* A hook to report invalid files in pack directory */
#define PACKDIR_FILE_PACK 1
diff --git a/reachable.c b/reachable.c
index 4b532039d5..82676b2668 100644
--- a/reachable.c
+++ b/reachable.c
@@ -307,7 +307,7 @@ int add_unseen_recent_objects_to_traversal(struct rev_info *revs,
int ignore_in_core_kept_packs)
{
struct recent_data data;
- enum for_each_object_flags flags;
+ enum odb_for_each_object_flags flags;
int r;
data.revs = revs;
@@ -319,13 +319,13 @@ int add_unseen_recent_objects_to_traversal(struct rev_info *revs,
data.extra_recent_oids_loaded = 0;
r = for_each_loose_object(the_repository->objects, add_recent_loose, &data,
- FOR_EACH_OBJECT_LOCAL_ONLY);
+ ODB_FOR_EACH_OBJECT_LOCAL_ONLY);
if (r)
goto done;
- flags = FOR_EACH_OBJECT_LOCAL_ONLY | FOR_EACH_OBJECT_PACK_ORDER;
+ flags = ODB_FOR_EACH_OBJECT_LOCAL_ONLY | ODB_FOR_EACH_OBJECT_PACK_ORDER;
if (ignore_in_core_kept_packs)
- flags |= FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS;
+ flags |= ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS;
r = for_each_packed_object(revs->repo, add_recent_packed, &data, flags);
diff --git a/repack-promisor.c b/repack-promisor.c
index ee6e0669f6..45c330b9a5 100644
--- a/repack-promisor.c
+++ b/repack-promisor.c
@@ -56,7 +56,7 @@ void repack_promisor_objects(struct repository *repo,
ctx.cmd = &cmd;
ctx.algop = repo->hash_algo;
for_each_packed_object(repo, write_oid, &ctx,
- FOR_EACH_OBJECT_PROMISOR_ONLY);
+ ODB_FOR_EACH_OBJECT_PROMISOR_ONLY);
if (cmd.in == -1) {
/* No packed objects; cmd was never started */
diff --git a/revision.c b/revision.c
index b65a763770..5aadf46dac 100644
--- a/revision.c
+++ b/revision.c
@@ -3938,7 +3938,7 @@ int prepare_revision_walk(struct rev_info *revs)
if (revs->exclude_promisor_objects) {
for_each_packed_object(revs->repo, mark_uninteresting, revs,
- FOR_EACH_OBJECT_PROMISOR_ONLY);
+ ODB_FOR_EACH_OBJECT_PROMISOR_ONLY);
}
if (!revs->reflog_info)
--
2.53.0.rc0.250.g0ac79233d6.dirty
^ permalink raw reply related [flat|nested] 120+ messages in thread* [PATCH v3 02/14] odb: fix flags parameter to be unsigned
2026-01-21 12:50 ` [PATCH v3 00/14] odb: introduce `odb_for_each_object()` Patrick Steinhardt
2026-01-21 12:50 ` [PATCH v3 01/14] odb: rename `FOR_EACH_OBJECT_*` flags Patrick Steinhardt
@ 2026-01-21 12:50 ` Patrick Steinhardt
2026-01-21 21:11 ` Jeff King
2026-01-21 12:50 ` [PATCH v3 03/14] object-file: extract function to read object info from path Patrick Steinhardt
` (12 subsequent siblings)
14 siblings, 1 reply; 120+ messages in thread
From: Patrick Steinhardt @ 2026-01-21 12:50 UTC (permalink / raw)
To: git; +Cc: Karthik Nayak, Justin Tobler, Junio C Hamano
The `flags` parameter accepted by various `for_each_object()` functions
is a bitfield of multiple flags. Such parameters are typically unsigned
in the Git codebase, but we use `enum odb_for_each_object_flags` in
some places.
Adapt these function signatures to use the correct type.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
object-file.c | 3 ++-
object-file.h | 3 ++-
packfile.c | 4 ++--
packfile.h | 4 ++--
4 files changed, 8 insertions(+), 6 deletions(-)
diff --git a/object-file.c b/object-file.c
index 64e9e239dc..8fa461dd59 100644
--- a/object-file.c
+++ b/object-file.c
@@ -414,7 +414,8 @@ static int parse_loose_header(const char *hdr, struct object_info *oi)
int odb_source_loose_read_object_info(struct odb_source *source,
const struct object_id *oid,
- struct object_info *oi, int flags)
+ struct object_info *oi,
+ unsigned flags)
{
int ret;
int fd;
diff --git a/object-file.h b/object-file.h
index 42bb50e10c..2acf19fb91 100644
--- a/object-file.h
+++ b/object-file.h
@@ -47,7 +47,8 @@ void odb_source_loose_reprepare(struct odb_source *source);
int odb_source_loose_read_object_info(struct odb_source *source,
const struct object_id *oid,
- struct object_info *oi, int flags);
+ struct object_info *oi,
+ unsigned flags);
int odb_source_loose_read_object_stream(struct odb_read_stream **out,
struct odb_source *source,
diff --git a/packfile.c b/packfile.c
index b65f0b43f1..79fe64a25b 100644
--- a/packfile.c
+++ b/packfile.c
@@ -2259,7 +2259,7 @@ int has_object_kept_pack(struct repository *r, const struct object_id *oid,
int for_each_object_in_pack(struct packed_git *p,
each_packed_object_fn cb, void *data,
- enum odb_for_each_object_flags flags)
+ unsigned flags)
{
uint32_t i;
int r = 0;
@@ -2302,7 +2302,7 @@ int for_each_object_in_pack(struct packed_git *p,
}
int for_each_packed_object(struct repository *repo, each_packed_object_fn cb,
- void *data, enum odb_for_each_object_flags flags)
+ void *data, unsigned flags)
{
struct odb_source *source;
int r = 0;
diff --git a/packfile.h b/packfile.h
index 15551258bd..447c44c4a7 100644
--- a/packfile.h
+++ b/packfile.h
@@ -339,9 +339,9 @@ typedef int each_packed_object_fn(const struct object_id *oid,
void *data);
int for_each_object_in_pack(struct packed_git *p,
each_packed_object_fn, void *data,
- enum odb_for_each_object_flags flags);
+ unsigned flags);
int for_each_packed_object(struct repository *repo, each_packed_object_fn cb,
- void *data, enum odb_for_each_object_flags flags);
+ void *data, unsigned flags);
/* A hook to report invalid files in pack directory */
#define PACKDIR_FILE_PACK 1
--
2.53.0.rc0.250.g0ac79233d6.dirty
^ permalink raw reply related [flat|nested] 120+ messages in thread* Re: [PATCH v3 02/14] odb: fix flags parameter to be unsigned
2026-01-21 12:50 ` [PATCH v3 02/14] odb: fix flags parameter to be unsigned Patrick Steinhardt
@ 2026-01-21 21:11 ` Jeff King
2026-01-22 0:00 ` Taylor Blau
2026-01-22 6:50 ` Patrick Steinhardt
0 siblings, 2 replies; 120+ messages in thread
From: Jeff King @ 2026-01-21 21:11 UTC (permalink / raw)
To: Patrick Steinhardt; +Cc: git, Karthik Nayak, Justin Tobler, Junio C Hamano
On Wed, Jan 21, 2026 at 01:50:18PM +0100, Patrick Steinhardt wrote:
> The `flags` parameter accepted by various `for_each_object()` functions
> is a bitfield of multiple flags. Such parameters are typically unsigned
> in the Git codebase, but we use `enum odb_for_each_object_flags` in
> some places.
I agree that using "unsigned" instead of "int" for flags is a good
practice in general. But isn't using "unsigned" instead of an enum
strictly worse?
The enum is more descriptive to human readers (since the type defines
which flags we expect to see). And it lets the compiler use the correct
type in the few cases where it might matter. E.g., if you imagine an
enum that defines 40 bits, then the compiler will know that it needs to
use a type larger than 32 bits to store it. Whereas passing a raw
"unsigned" will truncate some values.
I don't expect this latter reason to be common, but if we are going to
have a general principle for how to pass flags, it feels like passing
the enum (assuming the flags are defined in one) is always better. And
IMHO just the first reason (human readers) makes it worth doing that way
anyway.
You can find this pattern in lots of places (try grepping for "enum
[a-z_]* flag"). The ones that aren't are typically using flags that are
not using enums at all (just #defines).
> diff --git a/object-file.c b/object-file.c
> index 64e9e239dc..8fa461dd59 100644
> --- a/object-file.c
> +++ b/object-file.c
> @@ -414,7 +414,8 @@ static int parse_loose_header(const char *hdr, struct object_info *oi)
>
> int odb_source_loose_read_object_info(struct odb_source *source,
> const struct object_id *oid,
> - struct object_info *oi, int flags)
> + struct object_info *oi,
> + unsigned flags)
So I'd argue this should be switching to the enum...
> diff --git a/packfile.h b/packfile.h
> index 15551258bd..447c44c4a7 100644
> --- a/packfile.h
> +++ b/packfile.h
> @@ -339,9 +339,9 @@ typedef int each_packed_object_fn(const struct object_id *oid,
> void *data);
> int for_each_object_in_pack(struct packed_git *p,
> each_packed_object_fn, void *data,
> - enum odb_for_each_object_flags flags);
> + unsigned flags);
> int for_each_packed_object(struct repository *repo, each_packed_object_fn cb,
> - void *data, enum odb_for_each_object_flags flags);
> + void *data, unsigned flags);
..and these should be left untouched.
-Peff
^ permalink raw reply [flat|nested] 120+ messages in thread
* Re: [PATCH v3 02/14] odb: fix flags parameter to be unsigned
2026-01-21 21:11 ` Jeff King
@ 2026-01-22 0:00 ` Taylor Blau
2026-01-22 15:41 ` Junio C Hamano
2026-01-22 6:50 ` Patrick Steinhardt
1 sibling, 1 reply; 120+ messages in thread
From: Taylor Blau @ 2026-01-22 0:00 UTC (permalink / raw)
To: Jeff King
Cc: Patrick Steinhardt, git, Karthik Nayak, Justin Tobler,
Junio C Hamano
On Wed, Jan 21, 2026 at 04:11:28PM -0500, Jeff King wrote:
> On Wed, Jan 21, 2026 at 01:50:18PM +0100, Patrick Steinhardt wrote:
>
> > The `flags` parameter accepted by various `for_each_object()` functions
> > is a bitfield of multiple flags. Such parameters are typically unsigned
> > in the Git codebase, but we use `enum odb_for_each_object_flags` in
> > some places.
>
> I agree that using "unsigned" instead of "int" for flags is a good
> practice in general. But isn't using "unsigned" instead of an enum
> strictly worse?
I agree with you that we should be using an enum in these cases over
unsigned for the reasons you suggest. I've stumbled over this in the
past, so perhaps this is worth adding to the CodingGuidelines?
Thanks,
Taylor
^ permalink raw reply [flat|nested] 120+ messages in thread
* Re: [PATCH v3 02/14] odb: fix flags parameter to be unsigned
2026-01-22 0:00 ` Taylor Blau
@ 2026-01-22 15:41 ` Junio C Hamano
2026-01-22 19:23 ` Jeff King
0 siblings, 1 reply; 120+ messages in thread
From: Junio C Hamano @ 2026-01-22 15:41 UTC (permalink / raw)
To: Taylor Blau
Cc: Jeff King, Patrick Steinhardt, git, Karthik Nayak, Justin Tobler
Taylor Blau <me@ttaylorr.com> writes:
> I agree with you that we should be using an enum in these cases over
> unsigned for the reasons you suggest. I've stumbled over this in the
> past, so perhaps this is worth adding to the CodingGuidelines?
I am OK with declaring our preference of "enum" over "#define"d
constants. The only two minor hesitation I have against the use of
"enum", especially for bitset but not for enumeration, are that
(1) enum gives a false sense of type safety to casual coders. If I
have two enum types and pass one to as a parameter to a
function that expects the other one, would the compiler help me
catch that as a potential mistake? -Wenum-conversion is not
enabled even with -Wall so I am assuming that the compiler
folks fells that it is not reliable enough.
(2) it is not easy to force an enum type to be unsigned, unless you
are at C23 or above. If shifting enums are warned by the
compilers by default, I wouldn't worry about it, but use of
unsigned is more explicit in this regard.
Use of enum does help debuggers, as gcc figures out that three and
tres are both (ONEBIT | TWOBIT) when asked to print it in the
following snippet.
enum bits {
ONEBIT = (1 << 0),
TWOBIT = (1 << 1),
};
int main(int ac, char **av)
{
enum bits one = ONEBIT;
enum bits two = TWOBIT;
enum bits three = one | two;
enum bits tres = 3;
...
^ permalink raw reply [flat|nested] 120+ messages in thread* Re: [PATCH v3 02/14] odb: fix flags parameter to be unsigned
2026-01-22 15:41 ` Junio C Hamano
@ 2026-01-22 19:23 ` Jeff King
2026-01-23 10:57 ` Patrick Steinhardt
2026-01-26 22:32 ` Junio C Hamano
0 siblings, 2 replies; 120+ messages in thread
From: Jeff King @ 2026-01-22 19:23 UTC (permalink / raw)
To: Junio C Hamano
Cc: Taylor Blau, Patrick Steinhardt, git, Karthik Nayak,
Justin Tobler
On Thu, Jan 22, 2026 at 07:41:51AM -0800, Junio C Hamano wrote:
> Taylor Blau <me@ttaylorr.com> writes:
>
> > I agree with you that we should be using an enum in these cases over
> > unsigned for the reasons you suggest. I've stumbled over this in the
> > past, so perhaps this is worth adding to the CodingGuidelines?
>
> I am OK with declaring our preference of "enum" over "#define"d
> constants. The only two minor hesitation I have against the use of
> "enum", especially for bitset but not for enumeration, are that
I don't think there's any disagreement over using enums in general. It's
just a question of what type to declare in function interfaces.
> (1) enum gives a false sense of type safety to casual coders. If I
> have two enum types and pass one to as a parameter to a
> function that expects the other one, would the compiler help me
> catch that as a potential mistake? -Wenum-conversion is not
> enabled even with -Wall so I am assuming that the compiler
> folks fells that it is not reliable enough.
It is enabled with -Wextra, which we turn on with DEVELOPER=1. I think
gcc will catch the most obvious mismatches like:
enum one { FOO };
enum two { BAR };
void func(enum one value);
void doit(void) { func(BAR); }
which yields:
$ gcc -c -Wall -Wextra foo.c
foo.c: In function ‘doit’:
foo.c:4:24: warning: implicit conversion from ‘enum two’ to ‘enum one’ [-Wenum-conversion]
4 | void doit(void) { func(BAR); }
| ^~~
What it doesn't help with is passing arbitrary integers, which includes
#define'd constants. Swapping out "enum two" for:
#define BAR 1
will not produce a warning. That's the issue that I ran into with the
color code in:
https://lore.kernel.org/git/20250916202748.GM612873@coredump.intra.peff.net/
Unfortunately bit operations on enum values seem to lose the "type" for
the purposes of this warning, and just become regular integers. So if we
modify our example to:
num one { FOO_A = 1 << 0, FOO_B = 1 << 1 };
enum two { BAR_A = 1 << 0, BAR_B = 1 << 1 };
void func(enum one value);
void doit(void) { func(BAR_A | BAR_B); }
it no longer complains.
I still think we are better off declaring the flag parameters with the
enum type, though. It will catch some problematic cases. And even if
there were no compiler support at all, I think the hint to humans about
the expected type is worth it.
> (2) it is not easy to force an enum type to be unsigned, unless you
> are at C23 or above. If shifting enums are warned by the
> compilers by default, I wouldn't worry about it, but use of
> unsigned is more explicit in this regard.
Do we need to force unsignedness for bit-flags? The compiler will use a
type that is sufficiently large for the enum values defined, and I would
not expect anybody to shift them. Only to construct them with bitwise-OR
and check them with bitwise-AND.
-Peff
^ permalink raw reply [flat|nested] 120+ messages in thread* Re: [PATCH v3 02/14] odb: fix flags parameter to be unsigned
2026-01-22 19:23 ` Jeff King
@ 2026-01-23 10:57 ` Patrick Steinhardt
2026-01-26 22:32 ` Junio C Hamano
1 sibling, 0 replies; 120+ messages in thread
From: Patrick Steinhardt @ 2026-01-23 10:57 UTC (permalink / raw)
To: Jeff King; +Cc: Junio C Hamano, Taylor Blau, git, Karthik Nayak, Justin Tobler
On Thu, Jan 22, 2026 at 02:23:37PM -0500, Jeff King wrote:
> On Thu, Jan 22, 2026 at 07:41:51AM -0800, Junio C Hamano wrote:
>
> > Taylor Blau <me@ttaylorr.com> writes:
> >
> > > I agree with you that we should be using an enum in these cases over
> > > unsigned for the reasons you suggest. I've stumbled over this in the
> > > past, so perhaps this is worth adding to the CodingGuidelines?
> >
> > I am OK with declaring our preference of "enum" over "#define"d
> > constants. The only two minor hesitation I have against the use of
> > "enum", especially for bitset but not for enumeration, are that
>
> I don't think there's any disagreement over using enums in general. It's
> just a question of what type to declare in function interfaces.
>
> > (1) enum gives a false sense of type safety to casual coders. If I
> > have two enum types and pass one to as a parameter to a
> > function that expects the other one, would the compiler help me
> > catch that as a potential mistake? -Wenum-conversion is not
> > enabled even with -Wall so I am assuming that the compiler
> > folks fells that it is not reliable enough.
>
> It is enabled with -Wextra, which we turn on with DEVELOPER=1. I think
> gcc will catch the most obvious mismatches like:
>
> enum one { FOO };
> enum two { BAR };
> void func(enum one value);
> void doit(void) { func(BAR); }
>
> which yields:
>
> $ gcc -c -Wall -Wextra foo.c
> foo.c: In function ‘doit’:
> foo.c:4:24: warning: implicit conversion from ‘enum two’ to ‘enum one’ [-Wenum-conversion]
> 4 | void doit(void) { func(BAR); }
> | ^~~
>
> What it doesn't help with is passing arbitrary integers, which includes
> #define'd constants. Swapping out "enum two" for:
>
> #define BAR 1
>
> will not produce a warning. That's the issue that I ran into with the
> color code in:
>
> https://lore.kernel.org/git/20250916202748.GM612873@coredump.intra.peff.net/
>
> Unfortunately bit operations on enum values seem to lose the "type" for
> the purposes of this warning, and just become regular integers. So if we
> modify our example to:
>
> num one { FOO_A = 1 << 0, FOO_B = 1 << 1 };
> enum two { BAR_A = 1 << 0, BAR_B = 1 << 1 };
> void func(enum one value);
> void doit(void) { func(BAR_A | BAR_B); }
>
> it no longer complains.
>
> I still think we are better off declaring the flag parameters with the
> enum type, though. It will catch some problematic cases. And even if
> there were no compiler support at all, I think the hint to humans about
> the expected type is worth it.
I don't care strongly enough myself, but do you or Taylor maybe want to
send a patch that documents our preference? If so I'll be happy to adapt
my series to use whatever style we agree on.
Thanks!
Patrick
^ permalink raw reply [flat|nested] 120+ messages in thread* Re: [PATCH v3 02/14] odb: fix flags parameter to be unsigned
2026-01-22 19:23 ` Jeff King
2026-01-23 10:57 ` Patrick Steinhardt
@ 2026-01-26 22:32 ` Junio C Hamano
1 sibling, 0 replies; 120+ messages in thread
From: Junio C Hamano @ 2026-01-26 22:32 UTC (permalink / raw)
To: Jeff King
Cc: Taylor Blau, Patrick Steinhardt, git, Karthik Nayak,
Justin Tobler
Jeff King <peff@peff.net> writes:
> I don't think there's any disagreement over using enums in general. It's
> just a question of what type to declare in function interfaces.
>
>> (1) enum gives a false sense of type safety to casual coders. If I
>> have two enum types and pass one to as a parameter to a
>> function that expects the other one, would the compiler help me
>> catch that as a potential mistake? -Wenum-conversion is not
>> enabled even with -Wall so I am assuming that the compiler
>> folks fells that it is not reliable enough.
>
> It is enabled with -Wextra, which we turn on with DEVELOPER=1. I think
> gcc will catch the most obvious mismatches like:
>
> enum one { FOO };
> enum two { BAR };
> void func(enum one value);
> void doit(void) { func(BAR); }
>
> which yields:
>
> $ gcc -c -Wall -Wextra foo.c
> foo.c: In function ‘doit’:
> foo.c:4:24: warning: implicit conversion from ‘enum two’ to ‘enum one’ [-Wenum-conversion]
> 4 | void doit(void) { func(BAR); }
> | ^~~
This is good. I think we just saw a potential use of this feature
in Patrick's topic to turn a #define to an enum in <odb.h>.
>> (2) it is not easy to force an enum type to be unsigned, unless you
>> are at C23 or above. If shifting enums are warned by the
>> compilers by default, I wouldn't worry about it, but use of
>> unsigned is more explicit in this regard.
>
> Do we need to force unsignedness for bit-flags? The compiler will use a
> type that is sufficiently large for the enum values defined, and I would
> not expect anybody to shift them.
Yes, as long as nobody shifts, it does not matter. It's just not
having to worry about it trumps having to declare that we would
immediately notice if anybody does something strange like that ;-)
^ permalink raw reply [flat|nested] 120+ messages in thread
* Re: [PATCH v3 02/14] odb: fix flags parameter to be unsigned
2026-01-21 21:11 ` Jeff King
2026-01-22 0:00 ` Taylor Blau
@ 2026-01-22 6:50 ` Patrick Steinhardt
2026-01-22 23:44 ` Taylor Blau
1 sibling, 1 reply; 120+ messages in thread
From: Patrick Steinhardt @ 2026-01-22 6:50 UTC (permalink / raw)
To: Jeff King; +Cc: git, Karthik Nayak, Justin Tobler, Junio C Hamano
On Wed, Jan 21, 2026 at 04:11:28PM -0500, Jeff King wrote:
> On Wed, Jan 21, 2026 at 01:50:18PM +0100, Patrick Steinhardt wrote:
>
> > The `flags` parameter accepted by various `for_each_object()` functions
> > is a bitfield of multiple flags. Such parameters are typically unsigned
> > in the Git codebase, but we use `enum odb_for_each_object_flags` in
> > some places.
>
> I agree that using "unsigned" instead of "int" for flags is a good
> practice in general. But isn't using "unsigned" instead of an enum
> strictly worse?
>
> The enum is more descriptive to human readers (since the type defines
> which flags we expect to see). And it lets the compiler use the correct
> type in the few cases where it might matter. E.g., if you imagine an
> enum that defines 40 bits, then the compiler will know that it needs to
> use a type larger than 32 bits to store it. Whereas passing a raw
> "unsigned" will truncate some values.
>
> I don't expect this latter reason to be common, but if we are going to
> have a general principle for how to pass flags, it feels like passing
> the enum (assuming the flags are defined in one) is always better. And
> IMHO just the first reason (human readers) makes it worth doing that way
> anyway.
I'd agree if we used the enum as a plain value directly. But in case
we're using it as a bitset I think it muddies the waters a bit, and I
had the understanding that we typically want to use `unsigned` for
flag bitsets like this.
I think the reason I'm a bit torn is that I'm not a huge fan of having
enum values that don't fall into the range of valid enums. It's valid C
of course, but it just smells weird to me.
> You can find this pattern in lots of places (try grepping for "enum
> [a-z_]* flag"). The ones that aren't are typically using flags that are
> not using enums at all (just #defines).
True, but `unsigned flags` is way more common:
$ git grep 'unsigned flags' | wc -l
219
$ git grep 'enum [a-z_]* flag' | wc -l
56
In any case, I don't feel too strongly about all of this. I'm happy to
adapt if there is general consensus that we want to use enums instead,
but if so I'd like us to document this in our coding guidelines.
Thanks!
Patrick
^ permalink raw reply [flat|nested] 120+ messages in thread* Re: [PATCH v3 02/14] odb: fix flags parameter to be unsigned
2026-01-22 6:50 ` Patrick Steinhardt
@ 2026-01-22 23:44 ` Taylor Blau
0 siblings, 0 replies; 120+ messages in thread
From: Taylor Blau @ 2026-01-22 23:44 UTC (permalink / raw)
To: Patrick Steinhardt
Cc: Jeff King, git, Karthik Nayak, Justin Tobler, Junio C Hamano
On Thu, Jan 22, 2026 at 07:50:58AM +0100, Patrick Steinhardt wrote:
> > You can find this pattern in lots of places (try grepping for "enum
> > [a-z_]* flag"). The ones that aren't are typically using flags that are
> > not using enums at all (just #defines).
>
> True, but `unsigned flags` is way more common:
>
> $ git grep 'unsigned flags' | wc -l
> 219
>
> $ git grep 'enum [a-z_]* flag' | wc -l
> 56
Sure, though I think the convention can/should evolve where it makes
sense. I tend to agree with Peff earlier in this thread that enum flags
are preferable to unsigned ones for the reasons he laid out. I don't
think we should go and proactively convert the 219 instances of
"unsigned flags", but for new code I think we should prefer enum flags.
Thanks,
Taylor
^ permalink raw reply [flat|nested] 120+ messages in thread
* [PATCH v3 03/14] object-file: extract function to read object info from path
2026-01-21 12:50 ` [PATCH v3 00/14] odb: introduce `odb_for_each_object()` Patrick Steinhardt
2026-01-21 12:50 ` [PATCH v3 01/14] odb: rename `FOR_EACH_OBJECT_*` flags Patrick Steinhardt
2026-01-21 12:50 ` [PATCH v3 02/14] odb: fix flags parameter to be unsigned Patrick Steinhardt
@ 2026-01-21 12:50 ` Patrick Steinhardt
2026-01-22 0:04 ` Taylor Blau
2026-01-21 12:50 ` [PATCH v3 04/14] object-file: introduce function to iterate through objects Patrick Steinhardt
` (11 subsequent siblings)
14 siblings, 1 reply; 120+ messages in thread
From: Patrick Steinhardt @ 2026-01-21 12:50 UTC (permalink / raw)
To: git; +Cc: Karthik Nayak, Justin Tobler, Junio C Hamano
Extract a new function that allows us to read object info for a specific
loose object via a user-supplied path. This function will be used in a
subsequent commit.
Note that this also allows us to drop `stat_loose_object()`, which is
a simple wrapper around `odb_loose_path()` plus lstat(3p).
Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
object-file.c | 39 ++++++++++++++++-----------------------
1 file changed, 16 insertions(+), 23 deletions(-)
diff --git a/object-file.c b/object-file.c
index 8fa461dd59..a651129426 100644
--- a/object-file.c
+++ b/object-file.c
@@ -165,30 +165,13 @@ int stream_object_signature(struct repository *r, const struct object_id *oid)
}
/*
- * Find "oid" as a loose object in given source.
- * Returns 0 on success, negative on failure.
+ * Find "oid" as a loose object in given source, open the object and return its
+ * file descriptor. Returns the file descriptor on success, negative on failure.
*
* The "path" out-parameter will give the path of the object we found (if any).
* Note that it may point to static storage and is only valid until another
* call to stat_loose_object().
*/
-static int stat_loose_object(struct odb_source_loose *loose,
- const struct object_id *oid,
- struct stat *st, const char **path)
-{
- static struct strbuf buf = STRBUF_INIT;
-
- *path = odb_loose_path(loose->source, &buf, oid);
- if (!lstat(*path, st))
- return 0;
-
- return -1;
-}
-
-/*
- * Like stat_loose_object(), but actually open the object and return the
- * descriptor. See the caveats on the "path" parameter above.
- */
static int open_loose_object(struct odb_source_loose *loose,
const struct object_id *oid, const char **path)
{
@@ -412,7 +395,8 @@ static int parse_loose_header(const char *hdr, struct object_info *oi)
return 0;
}
-int odb_source_loose_read_object_info(struct odb_source *source,
+static int read_object_info_from_path(struct odb_source *source,
+ const char *path,
const struct object_id *oid,
struct object_info *oi,
unsigned flags)
@@ -420,7 +404,6 @@ int odb_source_loose_read_object_info(struct odb_source *source,
int ret;
int fd;
unsigned long mapsize;
- const char *path;
void *map = NULL;
git_zstream stream, *stream_to_end = NULL;
char hdr[MAX_HEADER_LEN];
@@ -443,7 +426,7 @@ int odb_source_loose_read_object_info(struct odb_source *source,
goto out;
}
- if (stat_loose_object(source->loose, oid, &st, &path) < 0) {
+ if (lstat(path, &st) < 0) {
ret = -1;
goto out;
}
@@ -455,7 +438,7 @@ int odb_source_loose_read_object_info(struct odb_source *source,
goto out;
}
- fd = open_loose_object(source->loose, oid, &path);
+ fd = git_open(path);
if (fd < 0) {
if (errno != ENOENT)
error_errno(_("unable to open loose object %s"), oid_to_hex(oid));
@@ -534,6 +517,16 @@ int odb_source_loose_read_object_info(struct odb_source *source,
return ret;
}
+int odb_source_loose_read_object_info(struct odb_source *source,
+ const struct object_id *oid,
+ struct object_info *oi,
+ unsigned flags)
+{
+ static struct strbuf buf = STRBUF_INIT;
+ odb_loose_path(source, &buf, oid);
+ return read_object_info_from_path(source, buf.buf, oid, oi, flags);
+}
+
static void hash_object_body(const struct git_hash_algo *algo, struct git_hash_ctx *c,
const void *buf, unsigned long len,
struct object_id *oid,
--
2.53.0.rc0.250.g0ac79233d6.dirty
^ permalink raw reply related [flat|nested] 120+ messages in thread* Re: [PATCH v3 03/14] object-file: extract function to read object info from path
2026-01-21 12:50 ` [PATCH v3 03/14] object-file: extract function to read object info from path Patrick Steinhardt
@ 2026-01-22 0:04 ` Taylor Blau
2026-01-22 6:51 ` Patrick Steinhardt
0 siblings, 1 reply; 120+ messages in thread
From: Taylor Blau @ 2026-01-22 0:04 UTC (permalink / raw)
To: Patrick Steinhardt; +Cc: git, Karthik Nayak, Justin Tobler, Junio C Hamano
On Wed, Jan 21, 2026 at 01:50:19PM +0100, Patrick Steinhardt wrote:
> Extract a new function that allows us to read object info for a specific
> loose object via a user-supplied path. This function will be used in a
> subsequent commit.
I think that I'm a tad unsure of this interface. I understand that for
the existing object storage mechanism that having a path makes sense:
loose objects are stored in files which are referenced by their path.
But this feels like a leaky abstraction to me. If we are dealing with an
object store implementation that uses entries in a database, or
arbitrary blob storage, do they have an equivalent concept of "path"?
Perhaps this is clear later on in the series, but I think at this point
I am a little unclear of the direction.
Thanks,
Taylor
^ permalink raw reply [flat|nested] 120+ messages in thread
* Re: [PATCH v3 03/14] object-file: extract function to read object info from path
2026-01-22 0:04 ` Taylor Blau
@ 2026-01-22 6:51 ` Patrick Steinhardt
2026-01-22 23:47 ` Taylor Blau
0 siblings, 1 reply; 120+ messages in thread
From: Patrick Steinhardt @ 2026-01-22 6:51 UTC (permalink / raw)
To: Taylor Blau; +Cc: git, Karthik Nayak, Justin Tobler, Junio C Hamano
On Wed, Jan 21, 2026 at 07:04:02PM -0500, Taylor Blau wrote:
> On Wed, Jan 21, 2026 at 01:50:19PM +0100, Patrick Steinhardt wrote:
> > Extract a new function that allows us to read object info for a specific
> > loose object via a user-supplied path. This function will be used in a
> > subsequent commit.
>
> I think that I'm a tad unsure of this interface. I understand that for
> the existing object storage mechanism that having a path makes sense:
> loose objects are stored in files which are referenced by their path.
>
> But this feels like a leaky abstraction to me. If we are dealing with an
> object store implementation that uses entries in a database, or
> arbitrary blob storage, do they have an equivalent concept of "path"?
It is leaky indeed, but that should be fine given that it's local to the
loose object backend anyway. So no other object storage format uses or
even sees it.
Patrick
^ permalink raw reply [flat|nested] 120+ messages in thread
* Re: [PATCH v3 03/14] object-file: extract function to read object info from path
2026-01-22 6:51 ` Patrick Steinhardt
@ 2026-01-22 23:47 ` Taylor Blau
0 siblings, 0 replies; 120+ messages in thread
From: Taylor Blau @ 2026-01-22 23:47 UTC (permalink / raw)
To: Patrick Steinhardt; +Cc: git, Karthik Nayak, Justin Tobler, Junio C Hamano
On Thu, Jan 22, 2026 at 07:51:41AM +0100, Patrick Steinhardt wrote:
> On Wed, Jan 21, 2026 at 07:04:02PM -0500, Taylor Blau wrote:
> > On Wed, Jan 21, 2026 at 01:50:19PM +0100, Patrick Steinhardt wrote:
> > > Extract a new function that allows us to read object info for a specific
> > > loose object via a user-supplied path. This function will be used in a
> > > subsequent commit.
> >
> > I think that I'm a tad unsure of this interface. I understand that for
> > the existing object storage mechanism that having a path makes sense:
> > loose objects are stored in files which are referenced by their path.
> >
> > But this feels like a leaky abstraction to me. If we are dealing with an
> > object store implementation that uses entries in a database, or
> > arbitrary blob storage, do they have an equivalent concept of "path"?
>
> It is leaky indeed, but that should be fine given that it's local to the
> loose object backend anyway. So no other object storage format uses or
> even sees it.
If it's local to the loose object backend then I agree it's OK here.
I think I was unclear that was the case since I saw "path" being used in
conjunction with the generic "odb_source" type.
Thanks,
Taylor
^ permalink raw reply [flat|nested] 120+ messages in thread
* [PATCH v3 04/14] object-file: introduce function to iterate through objects
2026-01-21 12:50 ` [PATCH v3 00/14] odb: introduce `odb_for_each_object()` Patrick Steinhardt
` (2 preceding siblings ...)
2026-01-21 12:50 ` [PATCH v3 03/14] object-file: extract function to read object info from path Patrick Steinhardt
@ 2026-01-21 12:50 ` Patrick Steinhardt
2026-01-22 0:15 ` Taylor Blau
2026-01-21 12:50 ` [PATCH v3 05/14] packfile: extract function to iterate through objects of a store Patrick Steinhardt
` (10 subsequent siblings)
14 siblings, 1 reply; 120+ messages in thread
From: Patrick Steinhardt @ 2026-01-21 12:50 UTC (permalink / raw)
To: git; +Cc: Karthik Nayak, Justin Tobler, Junio C Hamano
We have multiple divergent interfaces to iterate through objects of a
specific backend:
- `for_each_loose_object()` yields all loose objects.
- `for_each_packed_object()` (somewhat obviously) yields all packed
objects.
These functions have different function signatures, which makes it hard
to create a common abstraction layer that covers both of these.
Introduce a new function `odb_source_loose_for_each_object()` to plug
this gap. This function doesn't take any data specific to loose objects,
but instead it accepts a `struct object_info` that will be populated the
exact same as if `odb_source_loose_read_object()` was called.
The benefit of this new interface is that we can continue to pass
backend-specific data, as `struct object_info` contains a union for
these exact use cases. This will allow us to unify how we iterate
through objects across both loose and packed objects in a subsequent
commit.
The `for_each_loose_object()` function continues to exist for now, but
it will be removed at the end of this patch series.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
object-file.c | 41 +++++++++++++++++++++++++++++++++++++++++
object-file.h | 11 +++++++++++
odb.h | 12 ++++++++++++
3 files changed, 64 insertions(+)
diff --git a/object-file.c b/object-file.c
index a651129426..65e730684b 100644
--- a/object-file.c
+++ b/object-file.c
@@ -1801,6 +1801,47 @@ int for_each_loose_object(struct object_database *odb,
return 0;
}
+struct for_each_object_wrapper_data {
+ struct odb_source *source;
+ struct object_info *oi;
+ odb_for_each_object_cb cb;
+ void *cb_data;
+};
+
+static int for_each_object_wrapper_cb(const struct object_id *oid,
+ const char *path,
+ void *cb_data)
+{
+ struct for_each_object_wrapper_data *data = cb_data;
+ if (data->oi &&
+ read_object_info_from_path(data->source, path, oid, data->oi, 0) < 0)
+ return -1;
+ return data->cb(oid, data->oi, data->cb_data);
+}
+
+int odb_source_loose_for_each_object(struct odb_source *source,
+ struct object_info *oi,
+ odb_for_each_object_cb cb,
+ void *cb_data,
+ unsigned flags)
+{
+ struct for_each_object_wrapper_data data = {
+ .source = source,
+ .oi = oi,
+ .cb = cb,
+ .cb_data = cb_data,
+ };
+
+ /* There are no loose promisor objects, so we can return immediately. */
+ if ((flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY))
+ return 0;
+ if ((flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY) && !source->local)
+ return 0;
+
+ return for_each_loose_file_in_source(source, for_each_object_wrapper_cb,
+ NULL, NULL, &data);
+}
+
static int append_loose_object(const struct object_id *oid,
const char *path UNUSED,
void *data)
diff --git a/object-file.h b/object-file.h
index 2acf19fb91..048b778531 100644
--- a/object-file.h
+++ b/object-file.h
@@ -137,6 +137,17 @@ int for_each_loose_object(struct object_database *odb,
each_loose_object_fn, void *,
enum odb_for_each_object_flags flags);
+/*
+ * Iterate through all loose objects in the given object database source and
+ * invoke the callback function for each of them. If given, the object info
+ * will be populated with the object's data as if you had called
+ * `odb_source_loose_read_object_info()` on the object.
+ */
+int odb_source_loose_for_each_object(struct odb_source *source,
+ struct object_info *oi,
+ odb_for_each_object_cb cb,
+ void *cb_data,
+ unsigned flags);
/**
* format_object_header() is a thin wrapper around s xsnprintf() that
diff --git a/odb.h b/odb.h
index 74503addf1..f97f249580 100644
--- a/odb.h
+++ b/odb.h
@@ -463,6 +463,18 @@ enum odb_for_each_object_flags {
ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS = (1<<4),
};
+/*
+ * A callback function that can be used to iterate through objects. If given,
+ * the optional `oi` parameter will be populated the same as if you would call
+ * `odb_read_object_info()`.
+ *
+ * Returning a non-zero error code will cause iteration to abort. The error
+ * code will be propagated.
+ */
+typedef int (*odb_for_each_object_cb)(const struct object_id *oid,
+ struct object_info *oi,
+ void *cb_data);
+
enum {
/*
* By default, `odb_write_object()` does not actually write anything
--
2.53.0.rc0.250.g0ac79233d6.dirty
^ permalink raw reply related [flat|nested] 120+ messages in thread* Re: [PATCH v3 04/14] object-file: introduce function to iterate through objects
2026-01-21 12:50 ` [PATCH v3 04/14] object-file: introduce function to iterate through objects Patrick Steinhardt
@ 2026-01-22 0:15 ` Taylor Blau
2026-01-22 6:52 ` Patrick Steinhardt
0 siblings, 1 reply; 120+ messages in thread
From: Taylor Blau @ 2026-01-22 0:15 UTC (permalink / raw)
To: Patrick Steinhardt; +Cc: git, Karthik Nayak, Justin Tobler, Junio C Hamano
On Wed, Jan 21, 2026 at 01:50:20PM +0100, Patrick Steinhardt wrote:
> Introduce a new function `odb_source_loose_for_each_object()` to plug
> this gap. This function doesn't take any data specific to loose objects,
> but instead it accepts a `struct object_info` that will be populated the
> exact same as if `odb_source_loose_read_object()` was called.
This may be a bit of a tangent, but I wonder if we are over-applying the
function prefixing convention.
In general I am really happy with this convention, and it yields
organized headers where functions are clearly grouped by what structure
they operate on. But I have noticed a handful of times where we replaced
a very concise function name with a longer prefixed version.
I think I don't have a clear sense of what the benefit of prefixing is
in this particular instance. Supposing for a moment that we don't have
an existing for_each_loose_object() function (which I think is the
end-state of this series). What does the name
"odb_source_loose_for_each_object()" convey that
"for_each_loose_object()" does not?
I think if there were multiple ways to iterate over loose objects, it
makes a lot of sense to prefix them such that they are grouped to avoid
mixing interfaces or using one API when you meant to call another. But
my understanding is that the intent here is to consolidate all of the
different ways to iterate over objects which live in different
odb_source implementations opaque to the caller. As a result, what other
way exists to iterate over loose objects?
Another aspect of this is how approachable the function is to newcomers.
On the one hand, I can see an argument that prefixing makes it clear
which functions belong together, and so if a newcomer is familiar with
the concept of ODB sources, then they should reasonably expect that a
function to iterate over loose objects would begin with "odb_source_".
But on the other hand, while a newcomer may be familiar with the basics
of Git's object model enough to understand the distinction between loose
and packed objects, they may not be familiar with the concept of an ODB
source. In that case, the prefix makes it somewhat more difficult to
find the right function to use.
I think there is a reasonable argument towards prefixing in the case
that we want to link against this function from outside of Git. But
AFAIK that is not likely to happen in the near future. So in the interim
I think we are left with function names which are a little more verbose
than the ones they are replacing without a clear benefit.
To be clear, I am generally in favor of this convention and have been
applying it myself especially when splitting out the repack builtin
implementation into their own compilation units. But I wonder if we
could relax the convention in cases like these without sacrificing
clarity/organization.
Thanks,
Taylor
^ permalink raw reply [flat|nested] 120+ messages in thread
* Re: [PATCH v3 04/14] object-file: introduce function to iterate through objects
2026-01-22 0:15 ` Taylor Blau
@ 2026-01-22 6:52 ` Patrick Steinhardt
2026-01-23 0:01 ` Taylor Blau
0 siblings, 1 reply; 120+ messages in thread
From: Patrick Steinhardt @ 2026-01-22 6:52 UTC (permalink / raw)
To: Taylor Blau; +Cc: git, Karthik Nayak, Justin Tobler, Junio C Hamano
On Wed, Jan 21, 2026 at 07:15:16PM -0500, Taylor Blau wrote:
> On Wed, Jan 21, 2026 at 01:50:20PM +0100, Patrick Steinhardt wrote:
> > Introduce a new function `odb_source_loose_for_each_object()` to plug
> > this gap. This function doesn't take any data specific to loose objects,
> > but instead it accepts a `struct object_info` that will be populated the
> > exact same as if `odb_source_loose_read_object()` was called.
>
> This may be a bit of a tangent, but I wonder if we are over-applying the
> function prefixing convention.
>
> In general I am really happy with this convention, and it yields
> organized headers where functions are clearly grouped by what structure
> they operate on. But I have noticed a handful of times where we replaced
> a very concise function name with a longer prefixed version.
>
> I think I don't have a clear sense of what the benefit of prefixing is
> in this particular instance. Supposing for a moment that we don't have
> an existing for_each_loose_object() function (which I think is the
> end-state of this series). What does the name
> "odb_source_loose_for_each_object()" convey that
> "for_each_loose_object()" does not?
As you say further down, it makes it easy to see that it's a function
that belongs to `struct odb_source_loose`. It immediately gives the
reader a sense what the main structure is it belongs to, thus gives
scope and makes LSPs work better because of the common prefix.
> I think if there were multiple ways to iterate over loose objects, it
> makes a lot of sense to prefix them such that they are grouped to avoid
> mixing interfaces or using one API when you meant to call another. But
> my understanding is that the intent here is to consolidate all of the
> different ways to iterate over objects which live in different
> odb_source implementations opaque to the caller. As a result, what other
> way exists to iterate over loose objects?
There will be more to come: iterating over objects with a prefix, for
example. In general, this series is taking a layered approach:
- `odb_for_each_object()` is the high-level function that users should
use if possible. It is part of the ODB layer and abstracts away
details about the ODB sources.
- `odb_source_for_each_object()` will be introduced in the next patch
series. It allows the user to take an ODB source and iterate over
its contained objects, regardless of what the backend is.
- `odb_source_loose_for_each_object()` is the low-level implementation
for one specific backend. We also have equivalent functions for the
other backends, like for example for packed objects.
The longer the function name, the more specific the logic becomes. Sure,
eventually it becomes a mouthful, but ideally users wouldn't have to
ever interact with the low-level details at all.
> Another aspect of this is how approachable the function is to newcomers.
> On the one hand, I can see an argument that prefixing makes it clear
> which functions belong together, and so if a newcomer is familiar with
> the concept of ODB sources, then they should reasonably expect that a
> function to iterate over loose objects would begin with "odb_source_".
>
> But on the other hand, while a newcomer may be familiar with the basics
> of Git's object model enough to understand the distinction between loose
> and packed objects, they may not be familiar with the concept of an ODB
> source. In that case, the prefix makes it somewhat more difficult to
> find the right function to use.
So I would claim that this is even intentional. If a reader is not aware
what an ODB source is, then chances are high that using the function
that iterates through one specific source is the wrong thing to do. They
should rather use `odb_for_each_object()` in that case, which is the
higher-level interface that doesn't require the reader to know about ODB
sources in the first place.
I kind of see this as "guiding" the reader and giving them some hints
what the preferred interface is.
Patrick
^ permalink raw reply [flat|nested] 120+ messages in thread* Re: [PATCH v3 04/14] object-file: introduce function to iterate through objects
2026-01-22 6:52 ` Patrick Steinhardt
@ 2026-01-23 0:01 ` Taylor Blau
0 siblings, 0 replies; 120+ messages in thread
From: Taylor Blau @ 2026-01-23 0:01 UTC (permalink / raw)
To: Patrick Steinhardt; +Cc: git, Karthik Nayak, Justin Tobler, Junio C Hamano
On Thu, Jan 22, 2026 at 07:52:00AM +0100, Patrick Steinhardt wrote:
> > I think if there were multiple ways to iterate over loose objects, it
> > makes a lot of sense to prefix them such that they are grouped to avoid
> > mixing interfaces or using one API when you meant to call another. But
> > my understanding is that the intent here is to consolidate all of the
> > different ways to iterate over objects which live in different
> > odb_source implementations opaque to the caller. As a result, what other
> > way exists to iterate over loose objects?
>
> There will be more to come: iterating over objects with a prefix, for
> example. In general, this series is taking a layered approach:
>
> - `odb_for_each_object()` is the high-level function that users should
> use if possible. It is part of the ODB layer and abstracts away
> details about the ODB sources.
>
> - `odb_source_for_each_object()` will be introduced in the next patch
> series. It allows the user to take an ODB source and iterate over
> its contained objects, regardless of what the backend is.
>
> - `odb_source_loose_for_each_object()` is the low-level implementation
> for one specific backend. We also have equivalent functions for the
> other backends, like for example for packed objects.
>
> The longer the function name, the more specific the logic becomes. Sure,
> eventually it becomes a mouthful, but ideally users wouldn't have to
> ever interact with the low-level details at all.
Thanks for the extra information, this is definitely what I was missing.
If there are many ways to iterate over objects, then the naming scheme
above makes sense.
The point that I was trying to get across was that I think that the
convention of naming a function that does "foo" to a struct "S" as
"S_foo()" is great, but that we shouldn't apply that convention when
there is only one way to do "foo" in general.
For this particular case, I think I would have pushed back if you said
that `odb_for_each_object()` was the only function that we'd end up with
(i.e., there is no non-ODB way to do this, so for_each_object() is just
as descriptive IMO). But that's not the case, so I think the naming
scheme you have here makes sense.
Thanks,
Taylor
^ permalink raw reply [flat|nested] 120+ messages in thread
* [PATCH v3 05/14] packfile: extract function to iterate through objects of a store
2026-01-21 12:50 ` [PATCH v3 00/14] odb: introduce `odb_for_each_object()` Patrick Steinhardt
` (3 preceding siblings ...)
2026-01-21 12:50 ` [PATCH v3 04/14] object-file: introduce function to iterate through objects Patrick Steinhardt
@ 2026-01-21 12:50 ` Patrick Steinhardt
2026-01-22 1:37 ` Taylor Blau
2026-01-21 12:50 ` [PATCH v3 06/14] packfile: introduce function to iterate through objects Patrick Steinhardt
` (9 subsequent siblings)
14 siblings, 1 reply; 120+ messages in thread
From: Patrick Steinhardt @ 2026-01-21 12:50 UTC (permalink / raw)
To: git; +Cc: Karthik Nayak, Justin Tobler, Junio C Hamano
In the next commit we're about to introduce a new function that knows to
iterate through objects of a given packfile store. Same as with the
equivalent function for loose objects, this new function will also be
agnostic of backends by using a `struct object_info`.
Prepare for this by extracting a new shared function to iterate through
a single packfile store.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
packfile.c | 78 ++++++++++++++++++++++++++++++++++++--------------------------
1 file changed, 45 insertions(+), 33 deletions(-)
diff --git a/packfile.c b/packfile.c
index 79fe64a25b..d15a2ce12b 100644
--- a/packfile.c
+++ b/packfile.c
@@ -2301,51 +2301,63 @@ int for_each_object_in_pack(struct packed_git *p,
return r;
}
-int for_each_packed_object(struct repository *repo, each_packed_object_fn cb,
- void *data, unsigned flags)
+static int packfile_store_for_each_object_internal(struct packfile_store *store,
+ each_packed_object_fn cb,
+ void *data,
+ unsigned flags,
+ int *pack_errors)
{
- struct odb_source *source;
- int r = 0;
- int pack_errors = 0;
+ struct packfile_list_entry *e;
+ int ret = 0;
- odb_prepare_alternates(repo->objects);
+ store->skip_mru_updates = true;
- for (source = repo->objects->sources; source; source = source->next) {
- struct packfile_list_entry *e;
+ for (e = packfile_store_get_packs(store); e; e = e->next) {
+ struct packed_git *p = e->pack;
- source->packfiles->skip_mru_updates = true;
+ if ((flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local)
+ continue;
+ if ((flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY) &&
+ !p->pack_promisor)
+ continue;
+ if ((flags & ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS) &&
+ p->pack_keep_in_core)
+ continue;
+ if ((flags & ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS) &&
+ p->pack_keep)
+ continue;
+ if (open_pack_index(p)) {
+ *pack_errors = 1;
+ continue;
+ }
- for (e = packfile_store_get_packs(source->packfiles); e; e = e->next) {
- struct packed_git *p = e->pack;
+ ret = for_each_object_in_pack(p, cb, data, flags);
+ if (ret)
+ break;
+ }
- if ((flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local)
- continue;
- if ((flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY) &&
- !p->pack_promisor)
- continue;
- if ((flags & ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS) &&
- p->pack_keep_in_core)
- continue;
- if ((flags & ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS) &&
- p->pack_keep)
- continue;
- if (open_pack_index(p)) {
- pack_errors = 1;
- continue;
- }
+ store->skip_mru_updates = false;
- r = for_each_object_in_pack(p, cb, data, flags);
- if (r)
- break;
- }
+ return ret;
+}
- source->packfiles->skip_mru_updates = false;
+int for_each_packed_object(struct repository *repo, each_packed_object_fn cb,
+ void *data, unsigned flags)
+{
+ struct odb_source *source;
+ int pack_errors = 0;
+ int ret = 0;
- if (r)
+ odb_prepare_alternates(repo->objects);
+
+ for (source = repo->objects->sources; source; source = source->next) {
+ ret = packfile_store_for_each_object_internal(source->packfiles, cb, data,
+ flags, &pack_errors);
+ if (ret)
break;
}
- return r ? r : pack_errors;
+ return ret ? ret : pack_errors;
}
static int add_promisor_object(const struct object_id *oid,
--
2.53.0.rc0.250.g0ac79233d6.dirty
^ permalink raw reply related [flat|nested] 120+ messages in thread* Re: [PATCH v3 05/14] packfile: extract function to iterate through objects of a store
2026-01-21 12:50 ` [PATCH v3 05/14] packfile: extract function to iterate through objects of a store Patrick Steinhardt
@ 2026-01-22 1:37 ` Taylor Blau
0 siblings, 0 replies; 120+ messages in thread
From: Taylor Blau @ 2026-01-22 1:37 UTC (permalink / raw)
To: Patrick Steinhardt; +Cc: git, Karthik Nayak, Justin Tobler, Junio C Hamano
On Wed, Jan 21, 2026 at 01:50:21PM +0100, Patrick Steinhardt wrote:
> ---
> packfile.c | 78 ++++++++++++++++++++++++++++++++++++--------------------------
> 1 file changed, 45 insertions(+), 33 deletions(-)
Reading with --color-moved and ignoring space changes makes it clear
that this patch is extracting the logic to iterate through the packfile
store of a single source into its own function, and then calling that
function from within for_each_packed_object().
Seems reasonable.
Thanks,
Taylor
^ permalink raw reply [flat|nested] 120+ messages in thread
* [PATCH v3 06/14] packfile: introduce function to iterate through objects
2026-01-21 12:50 ` [PATCH v3 00/14] odb: introduce `odb_for_each_object()` Patrick Steinhardt
` (4 preceding siblings ...)
2026-01-21 12:50 ` [PATCH v3 05/14] packfile: extract function to iterate through objects of a store Patrick Steinhardt
@ 2026-01-21 12:50 ` Patrick Steinhardt
2026-01-23 0:06 ` Taylor Blau
2026-01-21 12:50 ` [PATCH v3 07/14] odb: introduce `odb_for_each_object()` Patrick Steinhardt
` (8 subsequent siblings)
14 siblings, 1 reply; 120+ messages in thread
From: Patrick Steinhardt @ 2026-01-21 12:50 UTC (permalink / raw)
To: git; +Cc: Karthik Nayak, Justin Tobler, Junio C Hamano
Introduce a new function `packfile_store_for_each_object()`. This
function is the equivalent to `odb_source_loose_for_each_object()` in
that it:
- Works on a single packfile store and thus per object source.
- Passes a `struct object_info` to the callback function.
As such, it provides the same callback interface as we already provide
for loose objects now. These functions will be used in a subsequent step
to implement `odb_for_each_object()`.
The `for_each_packed_object()` function continues to exist for now, but
it will be removed at the end of this patch series.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
packfile.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++++
packfile.h | 14 ++++++++++++++
2 files changed, 62 insertions(+)
diff --git a/packfile.c b/packfile.c
index d15a2ce12b..cd45c6f21c 100644
--- a/packfile.c
+++ b/packfile.c
@@ -2360,6 +2360,54 @@ int for_each_packed_object(struct repository *repo, each_packed_object_fn cb,
return ret ? ret : pack_errors;
}
+struct packfile_store_for_each_object_wrapper_data {
+ struct packfile_store *store;
+ struct object_info *oi;
+ odb_for_each_object_cb cb;
+ void *cb_data;
+};
+
+static int packfile_store_for_each_object_wrapper(const struct object_id *oid,
+ struct packed_git *pack,
+ uint32_t index_pos,
+ void *cb_data)
+{
+ struct packfile_store_for_each_object_wrapper_data *data = cb_data;
+
+ if (data->oi) {
+ off_t offset = nth_packed_object_offset(pack, index_pos);
+
+ if (packed_object_info(pack, offset, data->oi) < 0) {
+ mark_bad_packed_object(pack, oid);
+ return -1;
+ }
+ }
+
+ return data->cb(oid, data->oi, data->cb_data);
+}
+
+int packfile_store_for_each_object(struct packfile_store *store,
+ struct object_info *oi,
+ odb_for_each_object_cb cb,
+ void *cb_data,
+ unsigned flags)
+{
+ struct packfile_store_for_each_object_wrapper_data data = {
+ .store = store,
+ .oi = oi,
+ .cb = cb,
+ .cb_data = cb_data,
+ };
+ int pack_errors = 0, ret;
+
+ ret = packfile_store_for_each_object_internal(store, packfile_store_for_each_object_wrapper,
+ &data, flags, &pack_errors);
+ if (ret)
+ return ret;
+
+ return pack_errors ? -1 : 0;
+}
+
static int add_promisor_object(const struct object_id *oid,
struct packed_git *pack,
uint32_t pos UNUSED,
diff --git a/packfile.h b/packfile.h
index 447c44c4a7..ab0637fbe9 100644
--- a/packfile.h
+++ b/packfile.h
@@ -343,6 +343,20 @@ int for_each_object_in_pack(struct packed_git *p,
int for_each_packed_object(struct repository *repo, each_packed_object_fn cb,
void *data, unsigned flags);
+/*
+ * Iterate through all packed objects in the given packfile store and invoke
+ * the callback function for each of them. If given, the object info will be
+ * populated with the object's data as if you had called
+ * `packfile_store_read_object_info()` on the object.
+ *
+ * The flags parameter is a combination of `odb_for_each_object_flags`.
+ */
+int packfile_store_for_each_object(struct packfile_store *store,
+ struct object_info *oi,
+ odb_for_each_object_cb cb,
+ void *cb_data,
+ unsigned flags);
+
/* A hook to report invalid files in pack directory */
#define PACKDIR_FILE_PACK 1
#define PACKDIR_FILE_IDX 2
--
2.53.0.rc0.250.g0ac79233d6.dirty
^ permalink raw reply related [flat|nested] 120+ messages in thread* Re: [PATCH v3 06/14] packfile: introduce function to iterate through objects
2026-01-21 12:50 ` [PATCH v3 06/14] packfile: introduce function to iterate through objects Patrick Steinhardt
@ 2026-01-23 0:06 ` Taylor Blau
2026-01-23 9:42 ` Patrick Steinhardt
0 siblings, 1 reply; 120+ messages in thread
From: Taylor Blau @ 2026-01-23 0:06 UTC (permalink / raw)
To: Patrick Steinhardt; +Cc: git, Karthik Nayak, Justin Tobler, Junio C Hamano
On Wed, Jan 21, 2026 at 01:50:22PM +0100, Patrick Steinhardt wrote:
> Introduce a new function `packfile_store_for_each_object()`. This
> function is the equivalent to `odb_source_loose_for_each_object()` in
s/to/of/ ?
> that it:
>
> - Works on a single packfile store and thus per object source.
s/thus per/thus/ ?
> diff --git a/packfile.c b/packfile.c
> index d15a2ce12b..cd45c6f21c 100644
> --- a/packfile.c
> +++ b/packfile.c
> @@ -2360,6 +2360,54 @@ int for_each_packed_object(struct repository *repo, each_packed_object_fn cb,
> return ret ? ret : pack_errors;
> }
>
> +struct packfile_store_for_each_object_wrapper_data {
> + struct packfile_store *store;
> + struct object_info *oi;
> + odb_for_each_object_cb cb;
> + void *cb_data;
> +};
> +
> +static int packfile_store_for_each_object_wrapper(const struct object_id *oid,
> + struct packed_git *pack,
> + uint32_t index_pos,
> + void *cb_data)
> +{
> + struct packfile_store_for_each_object_wrapper_data *data = cb_data;
> +
> + if (data->oi) {
Interesting. Is it the case that if the caller provides a non-NULL
pointer to an object_info struct, that we will reuse the request portion
for all iterated objects, updating the response portion as we go along?
If so, I am a little uneasy about the potential for us to mix portions
of the response from an earlier object with a later one. Skimming
packed_object_info(), I don't think that we are in any immediate danger
since it overwrites all fields in the response section. But that feels
somewhat fragile to me, say, if packed_object_info() were to at some
point conditionally assign a field.
I wonder if we should split the request/response sections of object_info
into their own object_info_req and object_info_resp structs. If we did
that, then we could invert the pattern for providing the response,
filling it out ourselves and then passing a pointer to it back to the
caller via the callback function.
TBH, I wonder whether we should push this onto the caller entirely. If
they need to make an object_info request for each object, is there any
cost to having them do that explicitly themselves?
Thanks,
Taylor
^ permalink raw reply [flat|nested] 120+ messages in thread* Re: [PATCH v3 06/14] packfile: introduce function to iterate through objects
2026-01-23 0:06 ` Taylor Blau
@ 2026-01-23 9:42 ` Patrick Steinhardt
2026-01-23 9:52 ` Chris Torek
0 siblings, 1 reply; 120+ messages in thread
From: Patrick Steinhardt @ 2026-01-23 9:42 UTC (permalink / raw)
To: Taylor Blau; +Cc: git, Karthik Nayak, Justin Tobler, Junio C Hamano
On Thu, Jan 22, 2026 at 07:06:09PM -0500, Taylor Blau wrote:
> On Wed, Jan 21, 2026 at 01:50:22PM +0100, Patrick Steinhardt wrote:
> > Introduce a new function `packfile_store_for_each_object()`. This
> > function is the equivalent to `odb_source_loose_for_each_object()` in
>
> s/to/of/ ?
Hm, isn't "to" correct in this case? The remainder of the sentence reads
weird though.
> > that it:
> >
> > - Works on a single packfile store and thus per object source.
>
> s/thus per/thus/ ?
I'll also rephrase this a bit.
> > diff --git a/packfile.c b/packfile.c
> > index d15a2ce12b..cd45c6f21c 100644
> > --- a/packfile.c
> > +++ b/packfile.c
> > @@ -2360,6 +2360,54 @@ int for_each_packed_object(struct repository *repo, each_packed_object_fn cb,
> > return ret ? ret : pack_errors;
> > }
> >
> > +struct packfile_store_for_each_object_wrapper_data {
> > + struct packfile_store *store;
> > + struct object_info *oi;
> > + odb_for_each_object_cb cb;
> > + void *cb_data;
> > +};
> > +
> > +static int packfile_store_for_each_object_wrapper(const struct object_id *oid,
> > + struct packed_git *pack,
> > + uint32_t index_pos,
> > + void *cb_data)
> > +{
> > + struct packfile_store_for_each_object_wrapper_data *data = cb_data;
> > +
> > + if (data->oi) {
>
> Interesting. Is it the case that if the caller provides a non-NULL
> pointer to an object_info struct, that we will reuse the request portion
> for all iterated objects, updating the response portion as we go along?
>
> If so, I am a little uneasy about the potential for us to mix portions
> of the response from an earlier object with a later one. Skimming
> packed_object_info(), I don't think that we are in any immediate danger
> since it overwrites all fields in the response section. But that feels
> somewhat fragile to me, say, if packed_object_info() were to at some
> point conditionally assign a field.
>
> I wonder if we should split the request/response sections of object_info
> into their own object_info_req and object_info_resp structs. If we did
> that, then we could invert the pattern for providing the response,
> filling it out ourselves and then passing a pointer to it back to the
> caller via the callback function.
Yeah, I agree that the current interfaces we have around reading objects
is weird because of the mixed in/out behaviour of `struct object_info`.
I didn't really feel like changing it in this series though because it
would lead to a lot changes all over the place.
Maybe this is something we can do as a follow-up?
> TBH, I wonder whether we should push this onto the caller entirely. If
> they need to make an object_info request for each object, is there any
> cost to having them do that explicitly themselves?
There is, yeah. The nice thing about combining iteration with the object
info request is that we have more information available when reading the
object info:
- For packfiles we already have the info where exactly the object
sits, so there is no need to do another search for the object.
- For loose objects we already have the path available, even though
this probably doesn't matter too much as the path is trivial to
compute.
For other backends I very much expect that we'll be able to make use of
similar optimizations. For a remote database for example you could craft
the query in such a way that we yield all objects with the exact info
required instead of having to perform a separate query for the object
info.
Patrick
^ permalink raw reply [flat|nested] 120+ messages in thread* Re: [PATCH v3 06/14] packfile: introduce function to iterate through objects
2026-01-23 9:42 ` Patrick Steinhardt
@ 2026-01-23 9:52 ` Chris Torek
2026-01-23 16:22 ` Junio C Hamano
0 siblings, 1 reply; 120+ messages in thread
From: Chris Torek @ 2026-01-23 9:52 UTC (permalink / raw)
To: Patrick Steinhardt
Cc: Taylor Blau, git, Karthik Nayak, Justin Tobler, Junio C Hamano
On Fri, Jan 23, 2026 at 1:43 AM Patrick Steinhardt <ps@pks.im> wrote:
> On Thu, Jan 22, 2026 at 07:06:09PM -0500, Taylor Blau wrote:
> > On Wed, Jan 21, 2026 at 01:50:22PM +0100, Patrick Steinhardt wrote:
> > > Introduce a new function `packfile_store_for_each_object()`. This
> > > function is the equivalent to `odb_source_loose_for_each_object()` in
> >
> > s/to/of/ ?
>
> Hm, isn't "to" correct in this case? The remainder of the sentence reads
> weird though.
Different English dialects. The preposition after "different" differs...
(It also matters whether you use the definite article, "the function F1
is THE equivalent of F2 in case X" vs "function F1 is equivalent to F2
in case X".)
Indian English uses "a doubt" to mean "a question", which always
drives me up the (a?) wall, but then there are languages without
articles (Chinese and Russian for instance)...
Chris
^ permalink raw reply [flat|nested] 120+ messages in thread
* Re: [PATCH v3 06/14] packfile: introduce function to iterate through objects
2026-01-23 9:52 ` Chris Torek
@ 2026-01-23 16:22 ` Junio C Hamano
2026-01-23 17:45 ` Taylor Blau
0 siblings, 1 reply; 120+ messages in thread
From: Junio C Hamano @ 2026-01-23 16:22 UTC (permalink / raw)
To: Chris Torek
Cc: Patrick Steinhardt, Taylor Blau, git, Karthik Nayak,
Justin Tobler
Chris Torek <chris.torek@gmail.com> writes:
>> > > function is the equivalent to `odb_source_loose_for_each_object()` in
>> >
>> > s/to/of/ ?
>>
>> Hm, isn't "to" correct in this case? The remainder of the sentence reads
>> weird though.
>
> Different English dialects. The preposition after "different" differs...
>
> (It also matters whether you use the definite article, "the function F1
> is THE equivalent of F2 in case X" vs "function F1 is equivalent to F2
> in case X".)
Heh, "equivalent" is "Y is an equivalent of X" is a noun. It is
adjective in "A is equivalent to B". Of course, article is used
only with the former (i.e. noun) form, but article is not the
essential difference, parts of speech is.
^ permalink raw reply [flat|nested] 120+ messages in thread
* Re: [PATCH v3 06/14] packfile: introduce function to iterate through objects
2026-01-23 16:22 ` Junio C Hamano
@ 2026-01-23 17:45 ` Taylor Blau
0 siblings, 0 replies; 120+ messages in thread
From: Taylor Blau @ 2026-01-23 17:45 UTC (permalink / raw)
To: Junio C Hamano
Cc: Chris Torek, Patrick Steinhardt, git, Karthik Nayak,
Justin Tobler
On Fri, Jan 23, 2026 at 08:22:48AM -0800, Junio C Hamano wrote:
> Chris Torek <chris.torek@gmail.com> writes:
>
> >> > > function is the equivalent to `odb_source_loose_for_each_object()` in
> >> >
> >> > s/to/of/ ?
> >>
> >> Hm, isn't "to" correct in this case? The remainder of the sentence reads
> >> weird though.
> >
> > Different English dialects. The preposition after "different" differs...
> >
> > (It also matters whether you use the definite article, "the function F1
> > is THE equivalent of F2 in case X" vs "function F1 is equivalent to F2
> > in case X".)
>
> Heh, "equivalent" is "Y is an equivalent of X" is a noun. It is
> adjective in "A is equivalent to B". Of course, article is used
> only with the former (i.e. noun) form, but article is not the
> essential difference, parts of speech is.
An alternative suggestion would be s/the //, making this read:
This function is equivalent to `odb_source_loose_for_each_object()`
in that it [...]
Thanks,
Taylor
^ permalink raw reply [flat|nested] 120+ messages in thread
* [PATCH v3 07/14] odb: introduce `odb_for_each_object()`
2026-01-21 12:50 ` [PATCH v3 00/14] odb: introduce `odb_for_each_object()` Patrick Steinhardt
` (5 preceding siblings ...)
2026-01-21 12:50 ` [PATCH v3 06/14] packfile: introduce function to iterate through objects Patrick Steinhardt
@ 2026-01-21 12:50 ` Patrick Steinhardt
2026-01-23 0:13 ` Taylor Blau
2026-01-21 12:50 ` [PATCH v3 08/14] builtin/fsck: refactor to use `odb_for_each_object()` Patrick Steinhardt
` (7 subsequent siblings)
14 siblings, 1 reply; 120+ messages in thread
From: Patrick Steinhardt @ 2026-01-21 12:50 UTC (permalink / raw)
To: git; +Cc: Karthik Nayak, Justin Tobler, Junio C Hamano
Introduce a new function `odb_for_each_object()` that knows to iterate
through all objects part of a given object database. This function is
essentially a simple wrapper around the object database sources.
Subsequent commits will adapt callers to use this new function.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
odb.c | 27 +++++++++++++++++++++++++++
odb.h | 17 +++++++++++++++++
2 files changed, 44 insertions(+)
diff --git a/odb.c b/odb.c
index ac70b6a099..65f0447aa5 100644
--- a/odb.c
+++ b/odb.c
@@ -995,6 +995,33 @@ int odb_freshen_object(struct object_database *odb,
return 0;
}
+int odb_for_each_object(struct object_database *odb,
+ struct object_info *oi,
+ odb_for_each_object_cb cb,
+ void *cb_data,
+ unsigned flags)
+{
+ int ret;
+
+ odb_prepare_alternates(odb);
+ for (struct odb_source *source = odb->sources; source; source = source->next) {
+ if (flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY && !source->local)
+ continue;
+
+ if (!(flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY)) {
+ ret = odb_source_loose_for_each_object(source, oi, cb, cb_data, flags);
+ if (ret)
+ return ret;
+ }
+
+ ret = packfile_store_for_each_object(source->packfiles, oi, cb, cb_data, flags);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
void odb_assert_oid_type(struct object_database *odb,
const struct object_id *oid, enum object_type expect)
{
diff --git a/odb.h b/odb.h
index f97f249580..8a37fe08e0 100644
--- a/odb.h
+++ b/odb.h
@@ -475,6 +475,23 @@ typedef int (*odb_for_each_object_cb)(const struct object_id *oid,
struct object_info *oi,
void *cb_data);
+/*
+ * Iterate through all objects contained in the object database. Note that
+ * objects may be iterated over multiple times in case they are either stored
+ * in different backends or in case they are stored in multiple sources.
+ *
+ * Returning a non-zero error code from the callback function will cause
+ * iteration to abort. The error code will be propagated.
+ *
+ * Returns 0 on success, a negative error code in case a failure occurred, or
+ * an arbitrary non-zero error code returned by the callback itself.
+ */
+int odb_for_each_object(struct object_database *odb,
+ struct object_info *oi,
+ odb_for_each_object_cb cb,
+ void *cb_data,
+ unsigned flags);
+
enum {
/*
* By default, `odb_write_object()` does not actually write anything
--
2.53.0.rc0.250.g0ac79233d6.dirty
^ permalink raw reply related [flat|nested] 120+ messages in thread* Re: [PATCH v3 07/14] odb: introduce `odb_for_each_object()`
2026-01-21 12:50 ` [PATCH v3 07/14] odb: introduce `odb_for_each_object()` Patrick Steinhardt
@ 2026-01-23 0:13 ` Taylor Blau
0 siblings, 0 replies; 120+ messages in thread
From: Taylor Blau @ 2026-01-23 0:13 UTC (permalink / raw)
To: Patrick Steinhardt; +Cc: git, Karthik Nayak, Justin Tobler, Junio C Hamano
On Wed, Jan 21, 2026 at 01:50:23PM +0100, Patrick Steinhardt wrote:
> Introduce a new function `odb_for_each_object()` that knows to iterate
> through all objects part of a given object database. This function is
> essentially a simple wrapper around the object database sources.
>
> Subsequent commits will adapt callers to use this new function.
Makes sense.
> +int odb_for_each_object(struct object_database *odb,
> + struct object_info *oi,
> + odb_for_each_object_cb cb,
> + void *cb_data,
> + unsigned flags)
> +{
> + int ret;
> +
> + odb_prepare_alternates(odb);
> + for (struct odb_source *source = odb->sources; source; source = source->next) {
> + if (flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY && !source->local)
> + continue;
> +
> + if (!(flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY)) {
> + ret = odb_source_loose_for_each_object(source, oi, cb, cb_data, flags);
> + if (ret)
> + return ret;
> + }
Having the bits corresponding to these two flags be set means that we
can avoid looking into the source entirely, as we know ahead of time
that none of its objects would match the caller's criteria.
> + ret = packfile_store_for_each_object(source->packfiles, oi, cb, cb_data, flags);
...but when we *do* need to iterate through an individual source, we
pass the flags down to that source which handles the rest of them. Good.
Thanks,
Taylor
^ permalink raw reply [flat|nested] 120+ messages in thread
* [PATCH v3 08/14] builtin/fsck: refactor to use `odb_for_each_object()`
2026-01-21 12:50 ` [PATCH v3 00/14] odb: introduce `odb_for_each_object()` Patrick Steinhardt
` (6 preceding siblings ...)
2026-01-21 12:50 ` [PATCH v3 07/14] odb: introduce `odb_for_each_object()` Patrick Steinhardt
@ 2026-01-21 12:50 ` Patrick Steinhardt
2026-01-23 0:32 ` Taylor Blau
2026-01-21 12:50 ` [PATCH v3 09/14] treewide: enumerate promisor objects via `odb_for_each_object()` Patrick Steinhardt
` (6 subsequent siblings)
14 siblings, 1 reply; 120+ messages in thread
From: Patrick Steinhardt @ 2026-01-21 12:50 UTC (permalink / raw)
To: git; +Cc: Karthik Nayak, Justin Tobler, Junio C Hamano
In git-fsck(1) we have two callsites where we iterate over all objects
via `for_each_loose_object()` and `for_each_packed_object()`. Both of
these are trivially convertible with `odb_for_each_object()`.
Refactor these callsites accordingly.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
builtin/fsck.c | 57 ++++++++++++---------------------------------------------
1 file changed, 12 insertions(+), 45 deletions(-)
diff --git a/builtin/fsck.c b/builtin/fsck.c
index 4979bc795e..96107695ae 100644
--- a/builtin/fsck.c
+++ b/builtin/fsck.c
@@ -218,15 +218,17 @@ static int mark_used(struct object *obj, enum object_type type UNUSED,
return 0;
}
-static void mark_unreachable_referents(const struct object_id *oid)
+static int mark_unreachable_referents(const struct object_id *oid,
+ struct object_info *io UNUSED,
+ void *data UNUSED)
{
struct fsck_options options = FSCK_OPTIONS_DEFAULT;
struct object *obj = lookup_object(the_repository, oid);
if (!obj || !(obj->flags & HAS_OBJ))
- return; /* not part of our original set */
+ return 0; /* not part of our original set */
if (obj->flags & REACHABLE)
- return; /* reachable objects already traversed */
+ return 0; /* reachable objects already traversed */
/*
* Avoid passing OBJ_NONE to fsck_walk, which will parse the object
@@ -243,22 +245,7 @@ static void mark_unreachable_referents(const struct object_id *oid)
fsck_walk(obj, NULL, &options);
if (obj->type == OBJ_TREE)
free_tree_buffer((struct tree *)obj);
-}
-static int mark_loose_unreachable_referents(const struct object_id *oid,
- const char *path UNUSED,
- void *data UNUSED)
-{
- mark_unreachable_referents(oid);
- return 0;
-}
-
-static int mark_packed_unreachable_referents(const struct object_id *oid,
- struct packed_git *pack UNUSED,
- uint32_t pos UNUSED,
- void *data UNUSED)
-{
- mark_unreachable_referents(oid);
return 0;
}
@@ -394,12 +381,8 @@ static void check_connectivity(void)
* and ignore any that weren't present in our earlier
* traversal.
*/
- for_each_loose_object(the_repository->objects,
- mark_loose_unreachable_referents, NULL, 0);
- for_each_packed_object(the_repository,
- mark_packed_unreachable_referents,
- NULL,
- 0);
+ odb_for_each_object(the_repository->objects, NULL,
+ mark_unreachable_referents, NULL, 0);
}
/* Look up all the requirements, warn about missing objects.. */
@@ -848,26 +831,12 @@ static void fsck_index(struct index_state *istate, const char *index_path,
fsck_resolve_undo(istate, index_path);
}
-static void mark_object_for_connectivity(const struct object_id *oid)
+static int mark_object_for_connectivity(const struct object_id *oid,
+ struct object_info *oi UNUSED,
+ void *cb_data UNUSED)
{
struct object *obj = lookup_unknown_object(the_repository, oid);
obj->flags |= HAS_OBJ;
-}
-
-static int mark_loose_for_connectivity(const struct object_id *oid,
- const char *path UNUSED,
- void *data UNUSED)
-{
- mark_object_for_connectivity(oid);
- return 0;
-}
-
-static int mark_packed_for_connectivity(const struct object_id *oid,
- struct packed_git *pack UNUSED,
- uint32_t pos UNUSED,
- void *data UNUSED)
-{
- mark_object_for_connectivity(oid);
return 0;
}
@@ -1001,10 +970,8 @@ int cmd_fsck(int argc,
fsck_refs(the_repository);
if (connectivity_only) {
- for_each_loose_object(the_repository->objects,
- mark_loose_for_connectivity, NULL, 0);
- for_each_packed_object(the_repository,
- mark_packed_for_connectivity, NULL, 0);
+ odb_for_each_object(the_repository->objects, NULL,
+ mark_object_for_connectivity, NULL, 0);
} else {
odb_prepare_alternates(the_repository->objects);
for (source = the_repository->objects->sources; source; source = source->next)
--
2.53.0.rc0.250.g0ac79233d6.dirty
^ permalink raw reply related [flat|nested] 120+ messages in thread* Re: [PATCH v3 08/14] builtin/fsck: refactor to use `odb_for_each_object()`
2026-01-21 12:50 ` [PATCH v3 08/14] builtin/fsck: refactor to use `odb_for_each_object()` Patrick Steinhardt
@ 2026-01-23 0:32 ` Taylor Blau
2026-01-23 9:42 ` Patrick Steinhardt
0 siblings, 1 reply; 120+ messages in thread
From: Taylor Blau @ 2026-01-23 0:32 UTC (permalink / raw)
To: Patrick Steinhardt; +Cc: git, Karthik Nayak, Justin Tobler, Junio C Hamano
On Wed, Jan 21, 2026 at 01:50:24PM +0100, Patrick Steinhardt wrote:
> Signed-off-by: Patrick Steinhardt <ps@pks.im>
> ---
> builtin/fsck.c | 57 ++++++++++++---------------------------------------------
> 1 file changed, 12 insertions(+), 45 deletions(-)
This patch was a really pleasant read. It's really great to see both
pairs of functions collapse into a single one that acts the same over
loose/packed objects as opposed to two functions which do the same thing
but need to have different signatures to be able to plug into the
iterators for loose vs. packed objects.
> diff --git a/builtin/fsck.c b/builtin/fsck.c
> index 4979bc795e..96107695ae 100644
> --- a/builtin/fsck.c
> +++ b/builtin/fsck.c
> @@ -218,15 +218,17 @@ static int mark_used(struct object *obj, enum object_type type UNUSED,
> return 0;
> }
>
> -static void mark_unreachable_referents(const struct object_id *oid)
> +static int mark_unreachable_referents(const struct object_id *oid,
> + struct object_info *io UNUSED,
s/io/oi/ ?
> + void *data UNUSED)
> {
The transformation here makes sense (and I think aloud through the
similar mark_object_for_connectivity() transformation below). One
thought that I had while reading, though, was how this function behaves
when it is passed the same object more than once, since you mention that
as a possibility in the commit which introduces odb_for_each_object().
I think that this is OK, since we already likely send the same object to
this function multiple times if, e.g., we freshened an object from the
cruft pack, in which case we'd see it both when iterating packed
objects as well as when iterating loose ones.
As far as I can tell, that's OK, but it might be nice to provide a brief
analysis of that in the commit message, just to be sure and to help
future readers.
> @@ -848,26 +831,12 @@ static void fsck_index(struct index_state *istate, const char *index_path,
> fsck_resolve_undo(istate, index_path);
> }
>
> -static void mark_object_for_connectivity(const struct object_id *oid)
> +static int mark_object_for_connectivity(const struct object_id *oid,
> + struct object_info *oi UNUSED,
> + void *cb_data UNUSED)
> {
> struct object *obj = lookup_unknown_object(the_repository, oid);
> obj->flags |= HAS_OBJ;
> -}
> -
> -static int mark_loose_for_connectivity(const struct object_id *oid,
> - const char *path UNUSED,
> - void *data UNUSED)
> -{
> - mark_object_for_connectivity(oid);
> - return 0;
> -}
> -
> -static int mark_packed_for_connectivity(const struct object_id *oid,
> - struct packed_git *pack UNUSED,
> - uint32_t pos UNUSED,
> - void *data UNUSED)
> -{
> - mark_object_for_connectivity(oid);
> return 0;
> }
This is really nice, and everything here makes sense. Both of the old
callback functions merely call mark_object_for_connectivity() but are
different in order to accommodate the different function signatures
required. The new function uses the common interface and does the exact
same thing. Looking great.
> @@ -1001,10 +970,8 @@ int cmd_fsck(int argc,
> fsck_refs(the_repository);
>
> if (connectivity_only) {
> - for_each_loose_object(the_repository->objects,
> - mark_loose_for_connectivity, NULL, 0);
> - for_each_packed_object(the_repository,
> - mark_packed_for_connectivity, NULL, 0);
> + odb_for_each_object(the_repository->objects, NULL,
> + mark_object_for_connectivity, NULL, 0);
Makes sense.
Thanks,
Taylor
^ permalink raw reply [flat|nested] 120+ messages in thread* Re: [PATCH v3 08/14] builtin/fsck: refactor to use `odb_for_each_object()`
2026-01-23 0:32 ` Taylor Blau
@ 2026-01-23 9:42 ` Patrick Steinhardt
0 siblings, 0 replies; 120+ messages in thread
From: Patrick Steinhardt @ 2026-01-23 9:42 UTC (permalink / raw)
To: Taylor Blau; +Cc: git, Karthik Nayak, Justin Tobler, Junio C Hamano
On Thu, Jan 22, 2026 at 07:32:32PM -0500, Taylor Blau wrote:
> On Wed, Jan 21, 2026 at 01:50:24PM +0100, Patrick Steinhardt wrote:
> > diff --git a/builtin/fsck.c b/builtin/fsck.c
> > index 4979bc795e..96107695ae 100644
> > --- a/builtin/fsck.c
> > +++ b/builtin/fsck.c
> > @@ -218,15 +218,17 @@ static int mark_used(struct object *obj, enum object_type type UNUSED,
> > return 0;
> > }
> >
> > -static void mark_unreachable_referents(const struct object_id *oid)
> > +static int mark_unreachable_referents(const struct object_id *oid,
> > + struct object_info *io UNUSED,
>
> s/io/oi/ ?
Well spotted.
> > + void *data UNUSED)
> > {
>
> The transformation here makes sense (and I think aloud through the
> similar mark_object_for_connectivity() transformation below). One
> thought that I had while reading, though, was how this function behaves
> when it is passed the same object more than once, since you mention that
> as a possibility in the commit which introduces odb_for_each_object().
>
> I think that this is OK, since we already likely send the same object to
> this function multiple times if, e.g., we freshened an object from the
> cruft pack, in which case we'd see it both when iterating packed
> objects as well as when iterating loose ones.
>
> As far as I can tell, that's OK, but it might be nice to provide a brief
> analysis of that in the commit message, just to be sure and to help
> future readers.
Fair point, will mention.
Patrick
^ permalink raw reply [flat|nested] 120+ messages in thread
* [PATCH v3 09/14] treewide: enumerate promisor objects via `odb_for_each_object()`
2026-01-21 12:50 ` [PATCH v3 00/14] odb: introduce `odb_for_each_object()` Patrick Steinhardt
` (7 preceding siblings ...)
2026-01-21 12:50 ` [PATCH v3 08/14] builtin/fsck: refactor to use `odb_for_each_object()` Patrick Steinhardt
@ 2026-01-21 12:50 ` Patrick Steinhardt
2026-01-23 0:33 ` Taylor Blau
2026-01-21 12:50 ` [PATCH v3 10/14] treewide: drop uses of `for_each_{loose,packed}_object()` Patrick Steinhardt
` (5 subsequent siblings)
14 siblings, 1 reply; 120+ messages in thread
From: Patrick Steinhardt @ 2026-01-21 12:50 UTC (permalink / raw)
To: git; +Cc: Karthik Nayak, Justin Tobler, Junio C Hamano
We have multiple callsites where we enumerate all promisor objects in
the object database via `for_each_packed_object()`. This is done by
passing the `ODB_FOR_EACH_OBJECT_PROMISOR_ONLY` flag, which causes us to
skip over all non-promisor objects.
These callsites can be trivially converted to `odb_for_each_object()` as
we know to skip enumeration of loose objects in case the `PROMISOR_ONLY`
flag was passed by the caller.
Refactor the sites accordingly.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
packfile.c | 37 ++++++++++++++++++++++---------------
repack-promisor.c | 8 ++++----
revision.c | 10 ++++------
3 files changed, 30 insertions(+), 25 deletions(-)
diff --git a/packfile.c b/packfile.c
index cd45c6f21c..4f84bc19d9 100644
--- a/packfile.c
+++ b/packfile.c
@@ -2408,28 +2408,32 @@ int packfile_store_for_each_object(struct packfile_store *store,
return pack_errors ? -1 : 0;
}
+struct add_promisor_object_data {
+ struct repository *repo;
+ struct oidset *set;
+};
+
static int add_promisor_object(const struct object_id *oid,
- struct packed_git *pack,
- uint32_t pos UNUSED,
- void *set_)
+ struct object_info *oi UNUSED,
+ void *cb_data)
{
- struct oidset *set = set_;
+ struct add_promisor_object_data *data = cb_data;
struct object *obj;
int we_parsed_object;
- obj = lookup_object(pack->repo, oid);
+ obj = lookup_object(data->repo, oid);
if (obj && obj->parsed) {
we_parsed_object = 0;
} else {
we_parsed_object = 1;
- obj = parse_object_with_flags(pack->repo, oid,
+ obj = parse_object_with_flags(data->repo, oid,
PARSE_OBJECT_SKIP_HASH_CHECK);
}
if (!obj)
return 1;
- oidset_insert(set, oid);
+ oidset_insert(data->set, oid);
/*
* If this is a tree, commit, or tag, the objects it refers
@@ -2447,19 +2451,19 @@ static int add_promisor_object(const struct object_id *oid,
*/
return 0;
while (tree_entry_gently(&desc, &entry))
- oidset_insert(set, &entry.oid);
+ oidset_insert(data->set, &entry.oid);
if (we_parsed_object)
free_tree_buffer(tree);
} else if (obj->type == OBJ_COMMIT) {
struct commit *commit = (struct commit *) obj;
struct commit_list *parents = commit->parents;
- oidset_insert(set, get_commit_tree_oid(commit));
+ oidset_insert(data->set, get_commit_tree_oid(commit));
for (; parents; parents = parents->next)
- oidset_insert(set, &parents->item->object.oid);
+ oidset_insert(data->set, &parents->item->object.oid);
} else if (obj->type == OBJ_TAG) {
struct tag *tag = (struct tag *) obj;
- oidset_insert(set, get_tagged_oid(tag));
+ oidset_insert(data->set, get_tagged_oid(tag));
}
return 0;
}
@@ -2471,10 +2475,13 @@ int is_promisor_object(struct repository *r, const struct object_id *oid)
if (!promisor_objects_prepared) {
if (repo_has_promisor_remote(r)) {
- for_each_packed_object(r, add_promisor_object,
- &promisor_objects,
- ODB_FOR_EACH_OBJECT_PROMISOR_ONLY |
- ODB_FOR_EACH_OBJECT_PACK_ORDER);
+ struct add_promisor_object_data data = {
+ .repo = r,
+ .set = &promisor_objects,
+ };
+
+ odb_for_each_object(r->objects, NULL, add_promisor_object, &data,
+ ODB_FOR_EACH_OBJECT_PROMISOR_ONLY | ODB_FOR_EACH_OBJECT_PACK_ORDER);
}
promisor_objects_prepared = 1;
}
diff --git a/repack-promisor.c b/repack-promisor.c
index 45c330b9a5..35c4073632 100644
--- a/repack-promisor.c
+++ b/repack-promisor.c
@@ -17,8 +17,8 @@ struct write_oid_context {
* necessary.
*/
static int write_oid(const struct object_id *oid,
- struct packed_git *pack UNUSED,
- uint32_t pos UNUSED, void *data)
+ struct object_info *oi UNUSED,
+ void *data)
{
struct write_oid_context *ctx = data;
struct child_process *cmd = ctx->cmd;
@@ -55,8 +55,8 @@ void repack_promisor_objects(struct repository *repo,
*/
ctx.cmd = &cmd;
ctx.algop = repo->hash_algo;
- for_each_packed_object(repo, write_oid, &ctx,
- ODB_FOR_EACH_OBJECT_PROMISOR_ONLY);
+ odb_for_each_object(repo->objects, NULL, write_oid, &ctx,
+ ODB_FOR_EACH_OBJECT_PROMISOR_ONLY);
if (cmd.in == -1) {
/* No packed objects; cmd was never started */
diff --git a/revision.c b/revision.c
index 5aadf46dac..e34bcd8e88 100644
--- a/revision.c
+++ b/revision.c
@@ -3626,8 +3626,7 @@ void reset_revision_walk(void)
}
static int mark_uninteresting(const struct object_id *oid,
- struct packed_git *pack UNUSED,
- uint32_t pos UNUSED,
+ struct object_info *oi UNUSED,
void *cb)
{
struct rev_info *revs = cb;
@@ -3936,10 +3935,9 @@ int prepare_revision_walk(struct rev_info *revs)
(revs->limited && limiting_can_increase_treesame(revs)))
revs->treesame.name = "treesame";
- if (revs->exclude_promisor_objects) {
- for_each_packed_object(revs->repo, mark_uninteresting, revs,
- ODB_FOR_EACH_OBJECT_PROMISOR_ONLY);
- }
+ if (revs->exclude_promisor_objects)
+ odb_for_each_object(revs->repo->objects, NULL, mark_uninteresting,
+ revs, ODB_FOR_EACH_OBJECT_PROMISOR_ONLY);
if (!revs->reflog_info)
prepare_to_use_bloom_filter(revs);
--
2.53.0.rc0.250.g0ac79233d6.dirty
^ permalink raw reply related [flat|nested] 120+ messages in thread* Re: [PATCH v3 09/14] treewide: enumerate promisor objects via `odb_for_each_object()`
2026-01-21 12:50 ` [PATCH v3 09/14] treewide: enumerate promisor objects via `odb_for_each_object()` Patrick Steinhardt
@ 2026-01-23 0:33 ` Taylor Blau
0 siblings, 0 replies; 120+ messages in thread
From: Taylor Blau @ 2026-01-23 0:33 UTC (permalink / raw)
To: Patrick Steinhardt; +Cc: git, Karthik Nayak, Justin Tobler, Junio C Hamano
On Wed, Jan 21, 2026 at 01:50:25PM +0100, Patrick Steinhardt wrote:
> ---
> packfile.c | 37 ++++++++++++++++++++++---------------
> repack-promisor.c | 8 ++++----
> revision.c | 10 ++++------
> 3 files changed, 30 insertions(+), 25 deletions(-)
All looks very sensible. Thanks for structuring the series in the way
that you did, it's very easy to follow these conversions and see that
they were done correctly.
Thanks,
Taylor
^ permalink raw reply [flat|nested] 120+ messages in thread
* [PATCH v3 10/14] treewide: drop uses of `for_each_{loose,packed}_object()`
2026-01-21 12:50 ` [PATCH v3 00/14] odb: introduce `odb_for_each_object()` Patrick Steinhardt
` (8 preceding siblings ...)
2026-01-21 12:50 ` [PATCH v3 09/14] treewide: enumerate promisor objects via `odb_for_each_object()` Patrick Steinhardt
@ 2026-01-21 12:50 ` Patrick Steinhardt
2026-01-23 0:46 ` Taylor Blau
2026-01-21 12:50 ` [PATCH v3 11/14] odb: introduce mtime fields for object info requests Patrick Steinhardt
` (4 subsequent siblings)
14 siblings, 1 reply; 120+ messages in thread
From: Patrick Steinhardt @ 2026-01-21 12:50 UTC (permalink / raw)
To: git; +Cc: Karthik Nayak, Justin Tobler, Junio C Hamano
We're using `for_each_loose_object()` and `for_each_packed_object()` at
a couple of callsites to enumerate all loose and packed objects,
respectively. These functions will be removed in a subsequent commit in
favor of the newly introduced `odb_source_loose_for_each_object()` and
`packfile_store_for_each_object()` replacements.
Prepare for this by refactoring the sites accordingly.
Note that ideally, we'd convert all callsites to use the generic
`odb_for_each_object()` function already. But for some callers this is
not possible (yet), and it would require some significant refactorings
to make this work. Converting these site will thus be deferred to a
later patch series.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
builtin/cat-file.c | 28 ++++++++++++++++++++++------
commit-graph.c | 44 +++++++++++++++++++++++++++++++-------------
2 files changed, 53 insertions(+), 19 deletions(-)
diff --git a/builtin/cat-file.c b/builtin/cat-file.c
index 6964a5a52c..7d16fbc1b8 100644
--- a/builtin/cat-file.c
+++ b/builtin/cat-file.c
@@ -806,11 +806,14 @@ struct for_each_object_payload {
void *payload;
};
-static int batch_one_object_loose(const struct object_id *oid,
- const char *path UNUSED,
- void *_payload)
+static int batch_one_object_oi(const struct object_id *oid,
+ struct object_info *oi,
+ void *_payload)
{
struct for_each_object_payload *payload = _payload;
+ if (oi && oi->whence == OI_PACKED)
+ return payload->callback(oid, oi->u.packed.pack, oi->u.packed.offset,
+ payload->payload);
return payload->callback(oid, NULL, 0, payload->payload);
}
@@ -846,8 +849,15 @@ static void batch_each_object(struct batch_options *opt,
.payload = _payload,
};
struct bitmap_index *bitmap = prepare_bitmap_git(the_repository);
+ struct odb_source *source;
- for_each_loose_object(the_repository->objects, batch_one_object_loose, &payload, 0);
+ odb_prepare_alternates(the_repository->objects);
+ for (source = the_repository->objects->sources; source; source = source->next) {
+ int ret = odb_source_loose_for_each_object(source, NULL, batch_one_object_oi,
+ &payload, flags);
+ if (ret)
+ break;
+ }
if (bitmap && !for_each_bitmapped_object(bitmap, &opt->objects_filter,
batch_one_object_bitmapped, &payload)) {
@@ -861,8 +871,14 @@ static void batch_each_object(struct batch_options *opt,
&payload, flags);
}
} else {
- for_each_packed_object(the_repository, batch_one_object_packed,
- &payload, flags);
+ struct object_info oi = { 0 };
+
+ for (source = the_repository->objects->sources; source; source = source->next) {
+ int ret = packfile_store_for_each_object(source->packfiles, &oi,
+ batch_one_object_oi, &payload, flags);
+ if (ret)
+ break;
+ }
}
free_bitmap_index(bitmap);
diff --git a/commit-graph.c b/commit-graph.c
index 7f1145a082..a3087d7883 100644
--- a/commit-graph.c
+++ b/commit-graph.c
@@ -1479,30 +1479,38 @@ static int write_graph_chunk_bloom_data(struct hashfile *f,
return 0;
}
+static int add_packed_commits_oi(const struct object_id *oid,
+ struct object_info *oi,
+ void *data)
+{
+ struct write_commit_graph_context *ctx = (struct write_commit_graph_context*)data;
+
+ if (ctx->progress)
+ display_progress(ctx->progress, ++ctx->progress_done);
+
+ if (*oi->typep != OBJ_COMMIT)
+ return 0;
+
+ oid_array_append(&ctx->oids, oid);
+ set_commit_pos(ctx->r, oid);
+
+ return 0;
+}
+
static int add_packed_commits(const struct object_id *oid,
struct packed_git *pack,
uint32_t pos,
void *data)
{
- struct write_commit_graph_context *ctx = (struct write_commit_graph_context*)data;
enum object_type type;
off_t offset = nth_packed_object_offset(pack, pos);
struct object_info oi = OBJECT_INFO_INIT;
- if (ctx->progress)
- display_progress(ctx->progress, ++ctx->progress_done);
-
oi.typep = &type;
if (packed_object_info(pack, offset, &oi) < 0)
die(_("unable to get type of object %s"), oid_to_hex(oid));
- if (type != OBJ_COMMIT)
- return 0;
-
- oid_array_append(&ctx->oids, oid);
- set_commit_pos(ctx->r, oid);
-
- return 0;
+ return add_packed_commits_oi(oid, &oi, data);
}
static void add_missing_parents(struct write_commit_graph_context *ctx, struct commit *commit)
@@ -1959,13 +1967,23 @@ static int fill_oids_from_commits(struct write_commit_graph_context *ctx,
static void fill_oids_from_all_packs(struct write_commit_graph_context *ctx)
{
+ struct odb_source *source;
+ enum object_type type;
+ struct object_info oi = {
+ .typep = &type,
+ };
+
if (ctx->report_progress)
ctx->progress = start_delayed_progress(
ctx->r,
_("Finding commits for commit graph among packed objects"),
ctx->approx_nr_objects);
- for_each_packed_object(ctx->r, add_packed_commits, ctx,
- ODB_FOR_EACH_OBJECT_PACK_ORDER);
+
+ odb_prepare_alternates(ctx->r->objects);
+ for (source = ctx->r->objects->sources; source; source = source->next)
+ packfile_store_for_each_object(source->packfiles, &oi, add_packed_commits_oi,
+ ctx, ODB_FOR_EACH_OBJECT_PACK_ORDER);
+
if (ctx->progress_done < ctx->approx_nr_objects)
display_progress(ctx->progress, ctx->approx_nr_objects);
stop_progress(&ctx->progress);
--
2.53.0.rc0.250.g0ac79233d6.dirty
^ permalink raw reply related [flat|nested] 120+ messages in thread* Re: [PATCH v3 10/14] treewide: drop uses of `for_each_{loose,packed}_object()`
2026-01-21 12:50 ` [PATCH v3 10/14] treewide: drop uses of `for_each_{loose,packed}_object()` Patrick Steinhardt
@ 2026-01-23 0:46 ` Taylor Blau
2026-01-23 9:43 ` Patrick Steinhardt
0 siblings, 1 reply; 120+ messages in thread
From: Taylor Blau @ 2026-01-23 0:46 UTC (permalink / raw)
To: Patrick Steinhardt; +Cc: git, Karthik Nayak, Justin Tobler, Junio C Hamano
On Wed, Jan 21, 2026 at 01:50:26PM +0100, Patrick Steinhardt wrote:
> We're using `for_each_loose_object()` and `for_each_packed_object()` at
> a couple of callsites to enumerate all loose and packed objects,
> respectively. These functions will be removed in a subsequent commit in
> favor of the newly introduced `odb_source_loose_for_each_object()` and
> `packfile_store_for_each_object()` replacements.
>
> Prepare for this by refactoring the sites accordingly.
>
> Note that ideally, we'd convert all callsites to use the generic
> `odb_for_each_object()` function already. But for some callers this is
> not possible (yet), and it would require some significant refactorings
> to make this work. Converting these site will thus be deferred to a
> later patch series.
>
> Signed-off-by: Patrick Steinhardt <ps@pks.im>
> ---
> builtin/cat-file.c | 28 ++++++++++++++++++++++------
> commit-graph.c | 44 +++++++++++++++++++++++++++++++-------------
> 2 files changed, 53 insertions(+), 19 deletions(-)
>
> diff --git a/builtin/cat-file.c b/builtin/cat-file.c
> index 6964a5a52c..7d16fbc1b8 100644
> --- a/builtin/cat-file.c
> +++ b/builtin/cat-file.c
> @@ -806,11 +806,14 @@ struct for_each_object_payload {
> void *payload;
> };
>
> -static int batch_one_object_loose(const struct object_id *oid,
> - const char *path UNUSED,
> - void *_payload)
> +static int batch_one_object_oi(const struct object_id *oid,
> + struct object_info *oi,
> + void *_payload)
> {
> struct for_each_object_payload *payload = _payload;
> + if (oi && oi->whence == OI_PACKED)
> + return payload->callback(oid, oi->u.packed.pack, oi->u.packed.offset,
Ah, here's a good argument for having the API provide the caller with
the object_info response it requested. Obviously the packfile_store
knows which packfile it's looking at, so asking the caller to
re-discover the same information is wasteful.
That said, I'm still a little leery of the way we're passing that
information around for the same reasons as I shared earlier in the
thread, but I definitely can see the motivation.
> @@ -846,8 +849,15 @@ static void batch_each_object(struct batch_options *opt,
> .payload = _payload,
> };
> struct bitmap_index *bitmap = prepare_bitmap_git(the_repository);
> + struct odb_source *source;
>
> - for_each_loose_object(the_repository->objects, batch_one_object_loose, &payload, 0);
> + odb_prepare_alternates(the_repository->objects);
> + for (source = the_repository->objects->sources; source; source = source->next) {
> + int ret = odb_source_loose_for_each_object(source, NULL, batch_one_object_oi,
> + &payload, flags);
> + if (ret)
> + break;
> + }
OK, I'm guessing that this is one such case where we can't yet use
odb_for_each_object() function directly because of the refactoring which
you alluded to in the commit message. That seems reasonable, though I
wonder if it's worth adding a /* TODO */ comment here to that effect.
Just out of curiosity, what does that refactoring entail? I'm curious
because I wonder whether the caller is just written in such a way that
it makes it hard to immediately plug into the new API, or whether there
are more fundamental issues at play that make the refactoring less than
straightforward. If the latter, those could potentially help inform the
direction here.
(To be clear, I figure that this is likely work that you have already
done, I'm just curious to see if the details would yield any benefit to
the immediate patch series under discussion.)
> diff --git a/commit-graph.c b/commit-graph.c
> index 7f1145a082..a3087d7883 100644
> --- a/commit-graph.c
> +++ b/commit-graph.c
The conversion here all looks great to me.
Thanks,
Taylor
^ permalink raw reply [flat|nested] 120+ messages in thread* Re: [PATCH v3 10/14] treewide: drop uses of `for_each_{loose,packed}_object()`
2026-01-23 0:46 ` Taylor Blau
@ 2026-01-23 9:43 ` Patrick Steinhardt
0 siblings, 0 replies; 120+ messages in thread
From: Patrick Steinhardt @ 2026-01-23 9:43 UTC (permalink / raw)
To: Taylor Blau; +Cc: git, Karthik Nayak, Justin Tobler, Junio C Hamano
On Thu, Jan 22, 2026 at 07:46:04PM -0500, Taylor Blau wrote:
> On Wed, Jan 21, 2026 at 01:50:26PM +0100, Patrick Steinhardt wrote:
> > diff --git a/builtin/cat-file.c b/builtin/cat-file.c
> > index 6964a5a52c..7d16fbc1b8 100644
> > --- a/builtin/cat-file.c
> > +++ b/builtin/cat-file.c
> > @@ -846,8 +849,15 @@ static void batch_each_object(struct batch_options *opt,
> > .payload = _payload,
> > };
> > struct bitmap_index *bitmap = prepare_bitmap_git(the_repository);
> > + struct odb_source *source;
> >
> > - for_each_loose_object(the_repository->objects, batch_one_object_loose, &payload, 0);
> > + odb_prepare_alternates(the_repository->objects);
> > + for (source = the_repository->objects->sources; source; source = source->next) {
> > + int ret = odb_source_loose_for_each_object(source, NULL, batch_one_object_oi,
> > + &payload, flags);
> > + if (ret)
> > + break;
> > + }
>
> OK, I'm guessing that this is one such case where we can't yet use
> odb_for_each_object() function directly because of the refactoring which
> you alluded to in the commit message. That seems reasonable, though I
> wonder if it's worth adding a /* TODO */ comment here to that effect.
Sure, I can add a comment.
> Just out of curiosity, what does that refactoring entail? I'm curious
> because I wonder whether the caller is just written in such a way that
> it makes it hard to immediately plug into the new API, or whether there
> are more fundamental issues at play that make the refactoring less than
> straightforward. If the latter, those could potentially help inform the
> direction here.
>
> (To be clear, I figure that this is likely work that you have already
> done, I'm just curious to see if the details would yield any benefit to
> the immediate patch series under discussion.)
What this code here intends to do is to filter objects via an object
filter (e.g. "--filter=blobs:none"). The way I intend do introduce this
functinoality in a subsequent series is to introduce a `struct
odb_for_each_object_options` that contains optional parameters:
- An object ID prefix that can be used to iterate over all objects
that have a certain matching prefix. This will be used for example
in "object-name.c".
- An object filter that can be used to filter objects like we do here.
- Potentially more things that I haven't discovered yet?
Once we have that, the filtering can then happen on the source level.
For the packfile store it would mean that we can try to filter via the
bitmap, if available, and that would allow us to move the logic that we
have here into the backend.
Patrick
^ permalink raw reply [flat|nested] 120+ messages in thread
* [PATCH v3 11/14] odb: introduce mtime fields for object info requests
2026-01-21 12:50 ` [PATCH v3 00/14] odb: introduce `odb_for_each_object()` Patrick Steinhardt
` (9 preceding siblings ...)
2026-01-21 12:50 ` [PATCH v3 10/14] treewide: drop uses of `for_each_{loose,packed}_object()` Patrick Steinhardt
@ 2026-01-21 12:50 ` Patrick Steinhardt
2026-01-23 1:06 ` Taylor Blau
2026-01-21 12:50 ` [PATCH v3 12/14] builtin/pack-objects: use `packfile_store_for_each_object()` Patrick Steinhardt
` (3 subsequent siblings)
14 siblings, 1 reply; 120+ messages in thread
From: Patrick Steinhardt @ 2026-01-21 12:50 UTC (permalink / raw)
To: git; +Cc: Karthik Nayak, Justin Tobler, Junio C Hamano
There are some use cases where we need to figure out the mtime for
objects. Most importantly, this is the case when we want to prune
unreachable objects. But getting at that data requires users to manually
derive the info either via the loose object's mtime, the packfiles'
mtime or via the ".mtimes" file.
Introduce a new `struct object_info::mtimep` pointer that allows callers
to request an object's mtime. This new field will be used in a
subsequent commit.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
object-file.c | 29 +++++++++++++++++++++++++----
odb.c | 2 ++
odb.h | 1 +
packfile.c | 40 +++++++++++++++++++++++++++++++++-------
4 files changed, 61 insertions(+), 11 deletions(-)
diff --git a/object-file.c b/object-file.c
index 65e730684b..c0f896673b 100644
--- a/object-file.c
+++ b/object-file.c
@@ -409,6 +409,7 @@ static int read_object_info_from_path(struct odb_source *source,
char hdr[MAX_HEADER_LEN];
unsigned long size_scratch;
enum object_type type_scratch;
+ struct stat st;
/*
* If we don't care about type or size, then we don't
@@ -421,7 +422,7 @@ static int read_object_info_from_path(struct odb_source *source,
if (!oi || (!oi->typep && !oi->sizep && !oi->contentp)) {
struct stat st;
- if ((!oi || !oi->disk_sizep) && (flags & OBJECT_INFO_QUICK)) {
+ if ((!oi || (!oi->disk_sizep && !oi->mtimep)) && (flags & OBJECT_INFO_QUICK)) {
ret = quick_has_loose(source->loose, oid) ? 0 : -1;
goto out;
}
@@ -431,8 +432,12 @@ static int read_object_info_from_path(struct odb_source *source,
goto out;
}
- if (oi && oi->disk_sizep)
- *oi->disk_sizep = st.st_size;
+ if (oi) {
+ if (oi->disk_sizep)
+ *oi->disk_sizep = st.st_size;
+ if (oi->mtimep)
+ *oi->mtimep = st.st_mtime;
+ }
ret = 0;
goto out;
@@ -446,7 +451,21 @@ static int read_object_info_from_path(struct odb_source *source,
goto out;
}
- map = map_fd(fd, path, &mapsize);
+ if (fstat(fd, &st)) {
+ close(fd);
+ ret = -1;
+ goto out;
+ }
+
+ mapsize = xsize_t(st.st_size);
+ if (!mapsize) {
+ close(fd);
+ ret = error(_("object file %s is empty"), path);
+ goto out;
+ }
+
+ map = xmmap(NULL, mapsize, PROT_READ, MAP_PRIVATE, fd, 0);
+ close(fd);
if (!map) {
ret = -1;
goto out;
@@ -454,6 +473,8 @@ static int read_object_info_from_path(struct odb_source *source,
if (oi->disk_sizep)
*oi->disk_sizep = mapsize;
+ if (oi->mtimep)
+ *oi->mtimep = st.st_mtime;
stream_to_end = &stream;
diff --git a/odb.c b/odb.c
index 65f0447aa5..67decd3908 100644
--- a/odb.c
+++ b/odb.c
@@ -702,6 +702,8 @@ static int do_oid_object_info_extended(struct object_database *odb,
oidclr(oi->delta_base_oid, odb->repo->hash_algo);
if (oi->contentp)
*oi->contentp = xmemdupz(co->buf, co->size);
+ if (oi->mtimep)
+ *oi->mtimep = 0;
oi->whence = OI_CACHED;
}
return 0;
diff --git a/odb.h b/odb.h
index 8a37fe08e0..68336d2730 100644
--- a/odb.h
+++ b/odb.h
@@ -317,6 +317,7 @@ struct object_info {
off_t *disk_sizep;
struct object_id *delta_base_oid;
void **contentp;
+ time_t *mtimep;
/* Response */
enum {
diff --git a/packfile.c b/packfile.c
index 4f84bc19d9..c96ec21f86 100644
--- a/packfile.c
+++ b/packfile.c
@@ -1578,13 +1578,14 @@ static void add_delta_base_cache(struct packed_git *p, off_t base_offset,
hashmap_add(&delta_base_cache, &ent->ent);
}
-int packed_object_info(struct packed_git *p,
- off_t obj_offset, struct object_info *oi)
+static int packed_object_info_with_index_pos(struct packed_git *p, off_t obj_offset,
+ uint32_t *maybe_index_pos, struct object_info *oi)
{
struct pack_window *w_curs = NULL;
unsigned long size;
off_t curpos = obj_offset;
enum object_type type = OBJ_NONE;
+ uint32_t pack_pos;
int ret;
/*
@@ -1619,16 +1620,34 @@ int packed_object_info(struct packed_git *p,
}
}
- if (oi->disk_sizep) {
- uint32_t pos;
- if (offset_to_pack_pos(p, obj_offset, &pos) < 0) {
+ if (oi->disk_sizep || (oi->mtimep && p->is_cruft)) {
+ if (offset_to_pack_pos(p, obj_offset, &pack_pos) < 0) {
error("could not find object at offset %"PRIuMAX" "
"in pack %s", (uintmax_t)obj_offset, p->pack_name);
ret = -1;
goto out;
}
+ }
+
+ if (oi->disk_sizep)
+ *oi->disk_sizep = pack_pos_to_offset(p, pack_pos + 1) - obj_offset;
+
+ if (oi->mtimep) {
+ if (p->is_cruft) {
+ uint32_t index_pos;
+
+ if (load_pack_mtimes(p) < 0)
+ die(_("could not load cruft pack .mtimes"));
+
+ if (maybe_index_pos)
+ index_pos = *maybe_index_pos;
+ else
+ index_pos = pack_pos_to_index(p, pack_pos);
- *oi->disk_sizep = pack_pos_to_offset(p, pos + 1) - obj_offset;
+ *oi->mtimep = nth_packed_mtime(p, index_pos);
+ } else {
+ *oi->mtimep = p->mtime;
+ }
}
if (oi->typep) {
@@ -1681,6 +1700,12 @@ int packed_object_info(struct packed_git *p,
return ret;
}
+int packed_object_info(struct packed_git *p, off_t obj_offset,
+ struct object_info *oi)
+{
+ return packed_object_info_with_index_pos(p, obj_offset, NULL, oi);
+}
+
static void *unpack_compressed_entry(struct packed_git *p,
struct pack_window **w_curs,
off_t curpos,
@@ -2377,7 +2402,8 @@ static int packfile_store_for_each_object_wrapper(const struct object_id *oid,
if (data->oi) {
off_t offset = nth_packed_object_offset(pack, index_pos);
- if (packed_object_info(pack, offset, data->oi) < 0) {
+ if (packed_object_info_with_index_pos(pack, offset,
+ &index_pos, data->oi) < 0) {
mark_bad_packed_object(pack, oid);
return -1;
}
--
2.53.0.rc0.250.g0ac79233d6.dirty
^ permalink raw reply related [flat|nested] 120+ messages in thread* Re: [PATCH v3 11/14] odb: introduce mtime fields for object info requests
2026-01-21 12:50 ` [PATCH v3 11/14] odb: introduce mtime fields for object info requests Patrick Steinhardt
@ 2026-01-23 1:06 ` Taylor Blau
2026-01-23 9:43 ` Patrick Steinhardt
0 siblings, 1 reply; 120+ messages in thread
From: Taylor Blau @ 2026-01-23 1:06 UTC (permalink / raw)
To: Patrick Steinhardt; +Cc: git, Karthik Nayak, Justin Tobler, Junio C Hamano
On Wed, Jan 21, 2026 at 01:50:27PM +0100, Patrick Steinhardt wrote:
> There are some use cases where we need to figure out the mtime for
> objects. Most importantly, this is the case when we want to prune
> unreachable objects. But getting at that data requires users to manually
> derive the info either via the loose object's mtime, the packfiles'
> mtime or via the ".mtimes" file.
>
> Introduce a new `struct object_info::mtimep` pointer that allows callers
> to request an object's mtime. This new field will be used in a
> subsequent commit.
The goal seems reasonable to me, but I am a little unsure about whether
or not this is the right place to expose this information. I have some
more thoughts below...
> diff --git a/object-file.c b/object-file.c
> index 65e730684b..c0f896673b 100644
> --- a/object-file.c
> +++ b/object-file.c
> @@ -409,6 +409,7 @@ static int read_object_info_from_path(struct odb_source *source,
> char hdr[MAX_HEADER_LEN];
> unsigned long size_scratch;
> enum object_type type_scratch;
> + struct stat st;
I was a little confused why we were declaring a stat struct here...
> /*
> * If we don't care about type or size, then we don't
> @@ -421,7 +422,7 @@ static int read_object_info_from_path(struct odb_source *source,
> if (!oi || (!oi->typep && !oi->sizep && !oi->contentp)) {
> struct stat st;
>
> - if ((!oi || !oi->disk_sizep) && (flags & OBJECT_INFO_QUICK)) {
> + if ((!oi || (!oi->disk_sizep && !oi->mtimep)) && (flags & OBJECT_INFO_QUICK)) {
> ret = quick_has_loose(source->loose, oid) ? 0 : -1;
> goto out;
> }
> @@ -431,8 +432,12 @@ static int read_object_info_from_path(struct odb_source *source,
> goto out;
> }
>
> - if (oi && oi->disk_sizep)
> - *oi->disk_sizep = st.st_size;
> + if (oi) {
> + if (oi->disk_sizep)
> + *oi->disk_sizep = st.st_size;
...and then assigning it here without actually calling lstat() between
the two. But the diff context elides the fact that there is another stat
declaration within this block that we *do* lstat() into before reading
it.
That tripped me up a little while reviewing, but not a huge deal. I do
wonder whether or not there is a clearer way to structure all of these
conditionals. I *think* that what you wrote here is right, but the way
that it has grown organically over time (to be clear, not the fault of
your series) makes it a little difficult to follow.
> + if (oi->mtimep)
> + *oi->mtimep = st.st_mtime;
> + }
>
> ret = 0;
> goto out;
> @@ -446,7 +451,21 @@ static int read_object_info_from_path(struct odb_source *source,
> goto out;
> }
>
> - map = map_fd(fd, path, &mapsize);
> + if (fstat(fd, &st)) {
> + close(fd);
> + ret = -1;
> + goto out;
> + }
Makes sense. We were previously letting map_fd() take care of stat()-ing
the file to know how large the mmap should be, but now we might need
that information for the mtime as well. So doing what map_fd() is doing
underneath here directly makes sense.
> diff --git a/odb.c b/odb.c
> index 65f0447aa5..67decd3908 100644
> --- a/odb.c
> +++ b/odb.c
> @@ -702,6 +702,8 @@ static int do_oid_object_info_extended(struct object_database *odb,
> oidclr(oi->delta_base_oid, odb->repo->hash_algo);
> if (oi->contentp)
> *oi->contentp = xmemdupz(co->buf, co->size);
> + if (oi->mtimep)
> + *oi->mtimep = 0;
Assuming that you do not change the object_info request/response
semantics, I wonder if it might make sense to zero out the entirety of
the response section as a belt-and-suspenders mechanism in case future
contributors forget to assign zero to the new fields themselves.
> @@ -1619,16 +1620,34 @@ int packed_object_info(struct packed_git *p,
> }
> }
>
> - if (oi->disk_sizep) {
> - uint32_t pos;
> - if (offset_to_pack_pos(p, obj_offset, &pos) < 0) {
> + if (oi->disk_sizep || (oi->mtimep && p->is_cruft)) {
> + if (offset_to_pack_pos(p, obj_offset, &pack_pos) < 0) {
> error("could not find object at offset %"PRIuMAX" "
> "in pack %s", (uintmax_t)obj_offset, p->pack_name);
> ret = -1;
> goto out;
> }
> + }
> +
> + if (oi->disk_sizep)
> + *oi->disk_sizep = pack_pos_to_offset(p, pack_pos + 1) - obj_offset;
> +
> + if (oi->mtimep) {
> + if (p->is_cruft) {
> + uint32_t index_pos;
> +
> + if (load_pack_mtimes(p) < 0)
> + die(_("could not load cruft pack .mtimes"));
Do you think it would be worth doing instead:
die(_("could not load .mtimes for cruft pack '%s'"), pack_basename(p));
? Most repositories should only ever have one cruft pack in practice
(even so, there should still be some value in identifying it by its
checksum in case someone is repacking underneath us). But some
repositories will have >1 cruft pack, so knowing which one is busted may
be useful in that case.
> +
> + if (maybe_index_pos)
> + index_pos = *maybe_index_pos;
> + else
> + index_pos = pack_pos_to_index(p, pack_pos);
>
> - *oi->disk_sizep = pack_pos_to_offset(p, pos + 1) - obj_offset;
> + *oi->mtimep = nth_packed_mtime(p, index_pos);
> + } else {
> + *oi->mtimep = p->mtime;
> + }
I am a little stuck here on whether or not this is the right layer to
determine an object's mtime. On the one hand, it makes sense to me that
callers would want to know the mtime of an object, either by the mtime
of the loose object on disk, or the mtime of the contain pack otherwise.
But I'm not sure whether the GC-specific definition of "mtime" is what
the caller would always want. For GC uses, yes, having mtime be aware of
cruft packs makes total sense to me. But for non-GC uses, would there
ever be a scenario where the caller would want to know the mtime of an
object's containing pack, regardless of whether or not that pack is
cruft?
I suppose they could get around that today by doing something like:
if (oi->whence == OI_PACKED) {
struct packed_git *p = oi->u.packed.p;
if (p->is_cruft) {
/* reinterpret the meaning of mtime... */
*oi->mtimep = p->mtime;
}
}
, but that feels a little clunky. I dunno, maybe this hypothetical
doesn't really exist and I'm overthinking this. But I have this nagging
feeling that we are exposing this information at too low of a level as
to make the object store aware of cruft pack/GC-specific mechanics.
Thanks,
Taylor
^ permalink raw reply [flat|nested] 120+ messages in thread* Re: [PATCH v3 11/14] odb: introduce mtime fields for object info requests
2026-01-23 1:06 ` Taylor Blau
@ 2026-01-23 9:43 ` Patrick Steinhardt
2026-01-23 17:48 ` Taylor Blau
0 siblings, 1 reply; 120+ messages in thread
From: Patrick Steinhardt @ 2026-01-23 9:43 UTC (permalink / raw)
To: Taylor Blau; +Cc: git, Karthik Nayak, Justin Tobler, Junio C Hamano
On Thu, Jan 22, 2026 at 08:06:40PM -0500, Taylor Blau wrote:
> On Wed, Jan 21, 2026 at 01:50:27PM +0100, Patrick Steinhardt wrote:
> > There are some use cases where we need to figure out the mtime for
> > objects. Most importantly, this is the case when we want to prune
> > unreachable objects. But getting at that data requires users to manually
> > derive the info either via the loose object's mtime, the packfiles'
> > mtime or via the ".mtimes" file.
> >
> > Introduce a new `struct object_info::mtimep` pointer that allows callers
> > to request an object's mtime. This new field will be used in a
> > subsequent commit.
>
> The goal seems reasonable to me, but I am a little unsure about whether
> or not this is the right place to expose this information. I have some
> more thoughts below...
>
> > diff --git a/odb.c b/odb.c
> > index 65f0447aa5..67decd3908 100644
> > --- a/odb.c
> > +++ b/odb.c
> > @@ -702,6 +702,8 @@ static int do_oid_object_info_extended(struct object_database *odb,
> > oidclr(oi->delta_base_oid, odb->repo->hash_algo);
> > if (oi->contentp)
> > *oi->contentp = xmemdupz(co->buf, co->size);
> > + if (oi->mtimep)
> > + *oi->mtimep = 0;
>
> Assuming that you do not change the object_info request/response
> semantics, I wonder if it might make sense to zero out the entirety of
> the response section as a belt-and-suspenders mechanism in case future
> contributors forget to assign zero to the new fields themselves.
Splitting up the request/response structure as you proposed in a
previous patch could definitely help with this. I'd prefer to rather do
such a bigger change as a follow-up though as it would lead to a lot of
churn.
> > @@ -1619,16 +1620,34 @@ int packed_object_info(struct packed_git *p,
> > }
> > }
> >
> > - if (oi->disk_sizep) {
> > - uint32_t pos;
> > - if (offset_to_pack_pos(p, obj_offset, &pos) < 0) {
> > + if (oi->disk_sizep || (oi->mtimep && p->is_cruft)) {
> > + if (offset_to_pack_pos(p, obj_offset, &pack_pos) < 0) {
> > error("could not find object at offset %"PRIuMAX" "
> > "in pack %s", (uintmax_t)obj_offset, p->pack_name);
> > ret = -1;
> > goto out;
> > }
> > + }
> > +
> > + if (oi->disk_sizep)
> > + *oi->disk_sizep = pack_pos_to_offset(p, pack_pos + 1) - obj_offset;
> > +
> > + if (oi->mtimep) {
> > + if (p->is_cruft) {
> > + uint32_t index_pos;
> > +
> > + if (load_pack_mtimes(p) < 0)
> > + die(_("could not load cruft pack .mtimes"));
>
> Do you think it would be worth doing instead:
>
> die(_("could not load .mtimes for cruft pack '%s'"), pack_basename(p));
>
> ? Most repositories should only ever have one cruft pack in practice
> (even so, there should still be some value in identifying it by its
> checksum in case someone is repacking underneath us). But some
> repositories will have >1 cruft pack, so knowing which one is busted may
> be useful in that case.
Yup, makes sense.
> > +
> > + if (maybe_index_pos)
> > + index_pos = *maybe_index_pos;
> > + else
> > + index_pos = pack_pos_to_index(p, pack_pos);
> >
> > - *oi->disk_sizep = pack_pos_to_offset(p, pos + 1) - obj_offset;
> > + *oi->mtimep = nth_packed_mtime(p, index_pos);
> > + } else {
> > + *oi->mtimep = p->mtime;
> > + }
>
> I am a little stuck here on whether or not this is the right layer to
> determine an object's mtime. On the one hand, it makes sense to me that
> callers would want to know the mtime of an object, either by the mtime
> of the loose object on disk, or the mtime of the contain pack otherwise.
>
> But I'm not sure whether the GC-specific definition of "mtime" is what
> the caller would always want. For GC uses, yes, having mtime be aware of
> cruft packs makes total sense to me. But for non-GC uses, would there
> ever be a scenario where the caller would want to know the mtime of an
> object's containing pack, regardless of whether or not that pack is
> cruft?
>
> I suppose they could get around that today by doing something like:
>
> if (oi->whence == OI_PACKED) {
> struct packed_git *p = oi->u.packed.p;
> if (p->is_cruft) {
> /* reinterpret the meaning of mtime... */
> *oi->mtimep = p->mtime;
> }
> }
>
> , but that feels a little clunky. I dunno, maybe this hypothetical
> doesn't really exist and I'm overthinking this. But I have this nagging
> feeling that we are exposing this information at too low of a level as
> to make the object store aware of cruft pack/GC-specific mechanics.
I'll answer on your next mail, where you also talk about this.
Patrick
^ permalink raw reply [flat|nested] 120+ messages in thread* Re: [PATCH v3 11/14] odb: introduce mtime fields for object info requests
2026-01-23 9:43 ` Patrick Steinhardt
@ 2026-01-23 17:48 ` Taylor Blau
2026-01-26 8:53 ` Patrick Steinhardt
0 siblings, 1 reply; 120+ messages in thread
From: Taylor Blau @ 2026-01-23 17:48 UTC (permalink / raw)
To: Patrick Steinhardt; +Cc: git, Karthik Nayak, Justin Tobler, Junio C Hamano
On Fri, Jan 23, 2026 at 10:43:07AM +0100, Patrick Steinhardt wrote:
> > > diff --git a/odb.c b/odb.c
> > > index 65f0447aa5..67decd3908 100644
> > > --- a/odb.c
> > > +++ b/odb.c
> > > @@ -702,6 +702,8 @@ static int do_oid_object_info_extended(struct object_database *odb,
> > > oidclr(oi->delta_base_oid, odb->repo->hash_algo);
> > > if (oi->contentp)
> > > *oi->contentp = xmemdupz(co->buf, co->size);
> > > + if (oi->mtimep)
> > > + *oi->mtimep = 0;
> >
> > Assuming that you do not change the object_info request/response
> > semantics, I wonder if it might make sense to zero out the entirety of
> > the response section as a belt-and-suspenders mechanism in case future
> > contributors forget to assign zero to the new fields themselves.
>
> Splitting up the request/response structure as you proposed in a
> previous patch could definitely help with this. I'd prefer to rather do
> such a bigger change as a follow-up though as it would lead to a lot of
> churn.
I'm OK with pushing the larger change down the road, but I am a little
uncomfortable with the interim state being introduced here. Perhaps a
compromise here would be to have the caller supply a pointer to an
object_info struct, whose request fields we honor. The response fields
would then be written into a separate object_info struct via an
out-parameter.
I don't know. I think that ^ this suggestion is kind of ugly, but I'm
trying to come up with something that doesn't introduce the risk I
described above in the interim between this patch series and the one
you're proposing later on.
Thanks,
Taylor
^ permalink raw reply [flat|nested] 120+ messages in thread
* Re: [PATCH v3 11/14] odb: introduce mtime fields for object info requests
2026-01-23 17:48 ` Taylor Blau
@ 2026-01-26 8:53 ` Patrick Steinhardt
0 siblings, 0 replies; 120+ messages in thread
From: Patrick Steinhardt @ 2026-01-26 8:53 UTC (permalink / raw)
To: Taylor Blau; +Cc: git, Karthik Nayak, Justin Tobler, Junio C Hamano
On Fri, Jan 23, 2026 at 12:48:32PM -0500, Taylor Blau wrote:
> On Fri, Jan 23, 2026 at 10:43:07AM +0100, Patrick Steinhardt wrote:
> > > > diff --git a/odb.c b/odb.c
> > > > index 65f0447aa5..67decd3908 100644
> > > > --- a/odb.c
> > > > +++ b/odb.c
> > > > @@ -702,6 +702,8 @@ static int do_oid_object_info_extended(struct object_database *odb,
> > > > oidclr(oi->delta_base_oid, odb->repo->hash_algo);
> > > > if (oi->contentp)
> > > > *oi->contentp = xmemdupz(co->buf, co->size);
> > > > + if (oi->mtimep)
> > > > + *oi->mtimep = 0;
> > >
> > > Assuming that you do not change the object_info request/response
> > > semantics, I wonder if it might make sense to zero out the entirety of
> > > the response section as a belt-and-suspenders mechanism in case future
> > > contributors forget to assign zero to the new fields themselves.
> >
> > Splitting up the request/response structure as you proposed in a
> > previous patch could definitely help with this. I'd prefer to rather do
> > such a bigger change as a follow-up though as it would lead to a lot of
> > churn.
>
> I'm OK with pushing the larger change down the road, but I am a little
> uncomfortable with the interim state being introduced here. Perhaps a
> compromise here would be to have the caller supply a pointer to an
> object_info struct, whose request fields we honor. The response fields
> would then be written into a separate object_info struct via an
> out-parameter.
>
> I don't know. I think that ^ this suggestion is kind of ugly, but I'm
> trying to come up with something that doesn't introduce the risk I
> described above in the interim between this patch series and the one
> you're proposing later on.
I think it's actually not _that_ ugly, and I like the additional safety
that it brings us.
Thanks!
Patrick
diff --git a/object-file.c b/object-file.c
index bc5209f2fe..6785821c8c 100644
--- a/object-file.c
+++ b/object-file.c
@@ -1804,7 +1804,7 @@ int for_each_loose_file_in_source(struct odb_source *source,
struct for_each_object_wrapper_data {
struct odb_source *source;
- struct object_info *oi;
+ const struct object_info *request;
odb_for_each_object_cb cb;
void *cb_data;
};
@@ -1814,21 +1814,28 @@ static int for_each_object_wrapper_cb(const struct object_id *oid,
void *cb_data)
{
struct for_each_object_wrapper_data *data = cb_data;
- if (data->oi &&
- read_object_info_from_path(data->source, path, oid, data->oi, 0) < 0)
+
+ if (data->request) {
+ struct object_info oi = *data->request;
+
+ if (read_object_info_from_path(data->source, path, oid, &oi, 0) < 0)
return -1;
- return data->cb(oid, data->oi, data->cb_data);
+
+ return data->cb(oid, &oi, data->cb_data);
+ } else {
+ return data->cb(oid, NULL, data->cb_data);
+ }
}
int odb_source_loose_for_each_object(struct odb_source *source,
- struct object_info *oi,
+ const struct object_info *request,
odb_for_each_object_cb cb,
void *cb_data,
unsigned flags)
{
struct for_each_object_wrapper_data data = {
.source = source,
- .oi = oi,
+ .request = request,
.cb = cb,
.cb_data = cb_data,
};
diff --git a/object-file.h b/object-file.h
index af7f57d2a1..d9979baea8 100644
--- a/object-file.h
+++ b/object-file.h
@@ -128,12 +128,13 @@ int for_each_loose_file_in_source(struct odb_source *source,
/*
* Iterate through all loose objects in the given object database source and
- * invoke the callback function for each of them. If given, the object info
- * will be populated with the object's data as if you had called
- * `odb_source_loose_read_object_info()` on the object.
+ * invoke the callback function for each of them. If an object info request is
+ * given, then the object info will be read for every individual object and
+ * passed to the callback as if `odb_source_loose_read_object_info()` was
+ * called for the object.
*/
int odb_source_loose_for_each_object(struct odb_source *source,
- struct object_info *oi,
+ const struct object_info *request,
odb_for_each_object_cb cb,
void *cb_data,
unsigned flags);
diff --git a/odb.c b/odb.c
index 67decd3908..9d9a3fad62 100644
--- a/odb.c
+++ b/odb.c
@@ -998,7 +998,7 @@ int odb_freshen_object(struct object_database *odb,
}
int odb_for_each_object(struct object_database *odb,
- struct object_info *oi,
+ const struct object_info *request,
odb_for_each_object_cb cb,
void *cb_data,
unsigned flags)
@@ -1011,12 +1011,14 @@ int odb_for_each_object(struct object_database *odb,
continue;
if (!(flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY)) {
- ret = odb_source_loose_for_each_object(source, oi, cb, cb_data, flags);
+ ret = odb_source_loose_for_each_object(source, request,
+ cb, cb_data, flags);
if (ret)
return ret;
}
- ret = packfile_store_for_each_object(source->packfiles, oi, cb, cb_data, flags);
+ ret = packfile_store_for_each_object(source->packfiles, request,
+ cb, cb_data, flags);
if (ret)
return ret;
}
diff --git a/odb.h b/odb.h
index 72d69ffcb3..8ad0fcc02f 100644
--- a/odb.h
+++ b/odb.h
@@ -492,6 +492,9 @@ typedef int (*odb_for_each_object_cb)(const struct object_id *oid,
* Iterate through all objects contained in the object database. Note that
* objects may be iterated over multiple times in case they are either stored
* in different backends or in case they are stored in multiple sources.
+ * If an object info request is given, then the object info will be read and
+ * passed to the callback as if `odb_read_object_info()` was called for the
+ * object.
*
* Returning a non-zero error code from the callback function will cause
* iteration to abort. The error code will be propagated.
@@ -500,7 +503,7 @@ typedef int (*odb_for_each_object_cb)(const struct object_id *oid,
* an arbitrary non-zero error code returned by the callback itself.
*/
int odb_for_each_object(struct object_database *odb,
- struct object_info *oi,
+ const struct object_info *request,
odb_for_each_object_cb cb,
void *cb_data,
unsigned flags);
diff --git a/packfile.c b/packfile.c
index e455150d65..57fbf51876 100644
--- a/packfile.c
+++ b/packfile.c
@@ -2329,7 +2329,7 @@ int for_each_object_in_pack(struct packed_git *p,
struct packfile_store_for_each_object_wrapper_data {
struct packfile_store *store;
- struct object_info *oi;
+ const struct object_info *request;
odb_for_each_object_cb cb;
void *cb_data;
};
@@ -2341,28 +2341,31 @@ static int packfile_store_for_each_object_wrapper(const struct object_id *oid,
{
struct packfile_store_for_each_object_wrapper_data *data = cb_data;
- if (data->oi) {
+ if (data->request) {
off_t offset = nth_packed_object_offset(pack, index_pos);
+ struct object_info oi = *data->request;
if (packed_object_info_with_index_pos(pack, offset,
- &index_pos, data->oi) < 0) {
+ &index_pos, &oi) < 0) {
mark_bad_packed_object(pack, oid);
return -1;
}
- }
- return data->cb(oid, data->oi, data->cb_data);
+ return data->cb(oid, &oi, data->cb_data);
+ } else {
+ return data->cb(oid, NULL, data->cb_data);
+ }
}
int packfile_store_for_each_object(struct packfile_store *store,
- struct object_info *oi,
+ const struct object_info *request,
odb_for_each_object_cb cb,
void *cb_data,
unsigned flags)
{
struct packfile_store_for_each_object_wrapper_data data = {
.store = store,
- .oi = oi,
+ .request = request,
.cb = cb,
.cb_data = cb_data,
};
diff --git a/packfile.h b/packfile.h
index 8e0d2b7661..1a1b720764 100644
--- a/packfile.h
+++ b/packfile.h
@@ -343,14 +343,15 @@ int for_each_object_in_pack(struct packed_git *p,
/*
* Iterate through all packed objects in the given packfile store and invoke
- * the callback function for each of them. If given, the object info will be
- * populated with the object's data as if you had called
- * `packfile_store_read_object_info()` on the object.
+ * the callback function for each of them. If an object info request is given,
+ * then the object info will be read for every individual object and passed to
+ * the callback as if `packfile_store_read_object_info()` was called for the
+ * object.
*
* The flags parameter is a combination of `odb_for_each_object_flags`.
*/
int packfile_store_for_each_object(struct packfile_store *store,
- struct object_info *oi,
+ const struct object_info *request,
odb_for_each_object_cb cb,
void *cb_data,
unsigned flags);
^ permalink raw reply related [flat|nested] 120+ messages in thread
* [PATCH v3 12/14] builtin/pack-objects: use `packfile_store_for_each_object()`
2026-01-21 12:50 ` [PATCH v3 00/14] odb: introduce `odb_for_each_object()` Patrick Steinhardt
` (10 preceding siblings ...)
2026-01-21 12:50 ` [PATCH v3 11/14] odb: introduce mtime fields for object info requests Patrick Steinhardt
@ 2026-01-21 12:50 ` Patrick Steinhardt
2026-01-23 1:21 ` Taylor Blau
2026-01-21 12:50 ` [PATCH v3 13/14] reachable: convert to use `odb_for_each_object()` Patrick Steinhardt
` (2 subsequent siblings)
14 siblings, 1 reply; 120+ messages in thread
From: Patrick Steinhardt @ 2026-01-21 12:50 UTC (permalink / raw)
To: git; +Cc: Karthik Nayak, Justin Tobler, Junio C Hamano
When enumerating objects that are supposed to be stored in a new cruft
pack we use `for_each_packed_object()` and then derive each object's
mtime individually. Refactor this logic to instead use the new
`packfile_store_for_each_object()` function with an object info request
that asks for the respective mtimes.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
builtin/pack-objects.c | 45 +++++++++++++++++++++------------------------
1 file changed, 21 insertions(+), 24 deletions(-)
diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index 74317051fd..223ec3b49e 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -4314,25 +4314,12 @@ static void show_edge(struct commit *commit)
}
static int add_object_in_unpacked_pack(const struct object_id *oid,
- struct packed_git *pack,
- uint32_t pos,
+ struct object_info *oi,
void *data UNUSED)
{
if (cruft) {
- off_t offset;
- time_t mtime;
-
- if (pack->is_cruft) {
- if (load_pack_mtimes(pack) < 0)
- die(_("could not load cruft pack .mtimes"));
- mtime = nth_packed_mtime(pack, pos);
- } else {
- mtime = pack->mtime;
- }
- offset = nth_packed_object_offset(pack, pos);
-
- add_cruft_object_entry(oid, OBJ_NONE, pack, offset,
- NULL, mtime);
+ add_cruft_object_entry(oid, OBJ_NONE, oi->u.packed.pack,
+ oi->u.packed.offset, NULL, *oi->mtimep);
} else {
add_object_entry(oid, OBJ_NONE, "", 0);
}
@@ -4341,14 +4328,24 @@ static int add_object_in_unpacked_pack(const struct object_id *oid,
static void add_objects_in_unpacked_packs(void)
{
- if (for_each_packed_object(to_pack.repo,
- add_object_in_unpacked_pack,
- NULL,
- ODB_FOR_EACH_OBJECT_PACK_ORDER |
- ODB_FOR_EACH_OBJECT_LOCAL_ONLY |
- ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS |
- ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS))
- die(_("cannot open pack index"));
+ struct odb_source *source;
+ time_t mtime;
+ struct object_info oi = {
+ .mtimep = &mtime,
+ };
+
+ odb_prepare_alternates(to_pack.repo->objects);
+ for (source = to_pack.repo->objects->sources; source; source = source->next) {
+ if (!source->local)
+ continue;
+
+ if (packfile_store_for_each_object(source->packfiles, &oi,
+ add_object_in_unpacked_pack, NULL,
+ ODB_FOR_EACH_OBJECT_PACK_ORDER |
+ ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS |
+ ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS))
+ die(_("cannot open pack index"));
+ }
}
static int add_loose_object(const struct object_id *oid, const char *path,
--
2.53.0.rc0.250.g0ac79233d6.dirty
^ permalink raw reply related [flat|nested] 120+ messages in thread* Re: [PATCH v3 12/14] builtin/pack-objects: use `packfile_store_for_each_object()`
2026-01-21 12:50 ` [PATCH v3 12/14] builtin/pack-objects: use `packfile_store_for_each_object()` Patrick Steinhardt
@ 2026-01-23 1:21 ` Taylor Blau
2026-01-23 9:43 ` Patrick Steinhardt
0 siblings, 1 reply; 120+ messages in thread
From: Taylor Blau @ 2026-01-23 1:21 UTC (permalink / raw)
To: Patrick Steinhardt; +Cc: git, Karthik Nayak, Justin Tobler, Junio C Hamano
On Wed, Jan 21, 2026 at 01:50:28PM +0100, Patrick Steinhardt wrote:
> static int add_object_in_unpacked_pack(const struct object_id *oid,
> - struct packed_git *pack,
> - uint32_t pos,
> + struct object_info *oi,
> void *data UNUSED)
> {
> if (cruft) {
> - off_t offset;
> - time_t mtime;
> -
> - if (pack->is_cruft) {
> - if (load_pack_mtimes(pack) < 0)
> - die(_("could not load cruft pack .mtimes"));
> - mtime = nth_packed_mtime(pack, pos);
> - } else {
> - mtime = pack->mtime;
> - }
> - offset = nth_packed_object_offset(pack, pos);
> -
> - add_cruft_object_entry(oid, OBJ_NONE, pack, offset,
> - NULL, mtime);
OK, here's where we see the existing logic for determining the mtime of
an object in the GC sense. I see there's a subsequent patch that also
makes use of the object_info->mtimep field, and my guess is (not having
completely read that patch yet) that having the same notion of mtime
between the two callsites is desirable.
I still wonder whether imposing that notion of mtime at the object_info
layer is the right choice. I wonder if it would make more sense to allow
the caller to have a "statp" pointer filled out (or alternatively stick
a "struct stat" in both the packed union type as well as the loose one,
though the latter doesn't yet exist).
Then the caller could do something like:
static time_t object_info_gc_mtime(const struct object_info *oi)
{
if (!oi->statp)
BUG("oops!");
switch (oi->whence) {
case OI_CACHED:
return 0;
case OI_LOOSE:
return oi->statp->st_mtime;
case OI_PACKED:
struct packed_git *p = oi->u.packed.pack;
if (p->is_cruft) {
uint32_t pack_pos;
if (load_pack_mtimes(p) < 0)
die(_("could not load cruft pack .mtimes for '%s'"),
pack_basename(p));
if (offset_to_pack_pos(p, oi->u.packed.offset, &pack_pos) < 0)
die(_("could not find offset for object '%s' in cruft pack '%s'"),
oid_to_hex(&oi->oid),
pack_basename(p));
return nth_packed_mtime(p, pack_pos_to_index(p, pack_pos));
} else {
return p->mtime; /* or oi->statp->st_mtime */
}
default:
BUG("unknown oi->whence: %d", oi->whence);
}
}
I like the above because it encapsulates the GC-specific interpretation
of an object's mtime outside of the object_info layer, while adding
information (namely statp) that is generic enough to be potentially
useful to other callers who may not be interested in the GC-specific
interpretation.
> + add_cruft_object_entry(oid, OBJ_NONE, oi->u.packed.pack,
> + oi->u.packed.offset, NULL, *oi->mtimep);
> } else {
> add_object_entry(oid, OBJ_NONE, "", 0);
> }
> @@ -4341,14 +4328,24 @@ static int add_object_in_unpacked_pack(const struct object_id *oid,
>
> static void add_objects_in_unpacked_packs(void)
> {
> - if (for_each_packed_object(to_pack.repo,
> - add_object_in_unpacked_pack,
> - NULL,
> - ODB_FOR_EACH_OBJECT_PACK_ORDER |
> - ODB_FOR_EACH_OBJECT_LOCAL_ONLY |
> - ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS |
> - ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS))
> - die(_("cannot open pack index"));
> + struct odb_source *source;
> + time_t mtime;
> + struct object_info oi = {
> + .mtimep = &mtime,
> + };
> +
> + odb_prepare_alternates(to_pack.repo->objects);
> + for (source = to_pack.repo->objects->sources; source; source = source->next) {
> + if (!source->local)
> + continue;
OK, we dropped the ODB_FOR_EACH_OBJECT_LOCAL_ONLY flag when dispatching
to the packfile_store iterator, but that's OK, since it's handled above
here.
Interestingly, packfile_store_for_each_object_internal() has a similar
check:
if ((flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local)
continue;
, but I'm wondering whether these are subtly different. Would a
non-local source ever have packs for which the p->pack_local bit is set?
Or is the locality of a pack determined relative to the source
containing it, in which case we'd need to make the check here?
> + if (packfile_store_for_each_object(source->packfiles, &oi,
> + add_object_in_unpacked_pack, NULL,
> + ODB_FOR_EACH_OBJECT_PACK_ORDER |
> + ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS |
> + ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS))
> + die(_("cannot open pack index"));
> + }
> }
>
> static int add_loose_object(const struct object_id *oid, const char *path,
>
> --
> 2.53.0.rc0.250.g0ac79233d6.dirty
>
Thanks,
Taylor
^ permalink raw reply [flat|nested] 120+ messages in thread* Re: [PATCH v3 12/14] builtin/pack-objects: use `packfile_store_for_each_object()`
2026-01-23 1:21 ` Taylor Blau
@ 2026-01-23 9:43 ` Patrick Steinhardt
2026-01-23 18:35 ` Taylor Blau
0 siblings, 1 reply; 120+ messages in thread
From: Patrick Steinhardt @ 2026-01-23 9:43 UTC (permalink / raw)
To: Taylor Blau; +Cc: git, Karthik Nayak, Justin Tobler, Junio C Hamano
On Thu, Jan 22, 2026 at 08:21:55PM -0500, Taylor Blau wrote:
> On Wed, Jan 21, 2026 at 01:50:28PM +0100, Patrick Steinhardt wrote:
> > static int add_object_in_unpacked_pack(const struct object_id *oid,
> > - struct packed_git *pack,
> > - uint32_t pos,
> > + struct object_info *oi,
> > void *data UNUSED)
> > {
> > if (cruft) {
> > - off_t offset;
> > - time_t mtime;
> > -
> > - if (pack->is_cruft) {
> > - if (load_pack_mtimes(pack) < 0)
> > - die(_("could not load cruft pack .mtimes"));
> > - mtime = nth_packed_mtime(pack, pos);
> > - } else {
> > - mtime = pack->mtime;
> > - }
> > - offset = nth_packed_object_offset(pack, pos);
> > -
> > - add_cruft_object_entry(oid, OBJ_NONE, pack, offset,
> > - NULL, mtime);
>
> OK, here's where we see the existing logic for determining the mtime of
> an object in the GC sense. I see there's a subsequent patch that also
> makes use of the object_info->mtimep field, and my guess is (not having
> completely read that patch yet) that having the same notion of mtime
> between the two callsites is desirable.
>
> I still wonder whether imposing that notion of mtime at the object_info
> layer is the right choice. I wonder if it would make more sense to allow
> the caller to have a "statp" pointer filled out (or alternatively stick
> a "struct stat" in both the packed union type as well as the loose one,
> though the latter doesn't yet exist).
The problem with filling out a `struct stat` though is that it will only
apply to backends that actually have a path to stat. There may be other
backends that don't. You could of course pretend that there was a file
and fill in the `st_mtime` field. But I don't really see the benefit
over having a standalone mtime field.
> Then the caller could do something like:
>
> static time_t object_info_gc_mtime(const struct object_info *oi)
> {
> if (!oi->statp)
> BUG("oops!");
>
> switch (oi->whence) {
> case OI_CACHED:
> return 0;
> case OI_LOOSE:
> return oi->statp->st_mtime;
> case OI_PACKED:
> struct packed_git *p = oi->u.packed.pack;
> if (p->is_cruft) {
> uint32_t pack_pos;
>
> if (load_pack_mtimes(p) < 0)
> die(_("could not load cruft pack .mtimes for '%s'"),
> pack_basename(p));
> if (offset_to_pack_pos(p, oi->u.packed.offset, &pack_pos) < 0)
> die(_("could not find offset for object '%s' in cruft pack '%s'"),
> oid_to_hex(&oi->oid),
> pack_basename(p));
>
> return nth_packed_mtime(p, pack_pos_to_index(p, pack_pos));
> } else {
> return p->mtime; /* or oi->statp->st_mtime */
> }
> default:
> BUG("unknown oi->whence: %d", oi->whence);
> }
> }
>
> I like the above because it encapsulates the GC-specific interpretation
> of an object's mtime outside of the object_info layer, while adding
> information (namely statp) that is generic enough to be potentially
> useful to other callers who may not be interested in the GC-specific
> interpretation.
This isn't achieving the goal of making the logic pluggable though, as
you now have backend-specific logic outside of the backends. Also, isn't
the end result basically the same as what I have proposed, except that
my version _is_ fully pluggable because the logic is entirely contained
in the backend?
> > @@ -4341,14 +4328,24 @@ static int add_object_in_unpacked_pack(const struct object_id *oid,
> >
> > static void add_objects_in_unpacked_packs(void)
> > {
> > - if (for_each_packed_object(to_pack.repo,
> > - add_object_in_unpacked_pack,
> > - NULL,
> > - ODB_FOR_EACH_OBJECT_PACK_ORDER |
> > - ODB_FOR_EACH_OBJECT_LOCAL_ONLY |
> > - ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS |
> > - ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS))
> > - die(_("cannot open pack index"));
> > + struct odb_source *source;
> > + time_t mtime;
> > + struct object_info oi = {
> > + .mtimep = &mtime,
> > + };
> > +
> > + odb_prepare_alternates(to_pack.repo->objects);
> > + for (source = to_pack.repo->objects->sources; source; source = source->next) {
> > + if (!source->local)
> > + continue;
>
> OK, we dropped the ODB_FOR_EACH_OBJECT_LOCAL_ONLY flag when dispatching
> to the packfile_store iterator, but that's OK, since it's handled above
> here.
>
> Interestingly, packfile_store_for_each_object_internal() has a similar
> check:
>
> if ((flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local)
> continue;
>
> , but I'm wondering whether these are subtly different. Would a
> non-local source ever have packs for which the p->pack_local bit is set?
> Or is the locality of a pack determined relative to the source
> containing it, in which case we'd need to make the check here?
To the best of my knowledge we may only ever end up adding a non-local
pack to the source, but not the other way round. This can for example
happen in git-index-pack(1).
But you know, there isn't any good reason to not continue passing this
flag. Better be safe than sorry.
Thanks!
Patrick
^ permalink raw reply [flat|nested] 120+ messages in thread* Re: [PATCH v3 12/14] builtin/pack-objects: use `packfile_store_for_each_object()`
2026-01-23 9:43 ` Patrick Steinhardt
@ 2026-01-23 18:35 ` Taylor Blau
2026-01-26 8:53 ` Patrick Steinhardt
0 siblings, 1 reply; 120+ messages in thread
From: Taylor Blau @ 2026-01-23 18:35 UTC (permalink / raw)
To: Patrick Steinhardt; +Cc: git, Karthik Nayak, Justin Tobler, Junio C Hamano
On Fri, Jan 23, 2026 at 10:43:16AM +0100, Patrick Steinhardt wrote:
> On Thu, Jan 22, 2026 at 08:21:55PM -0500, Taylor Blau wrote:
> > On Wed, Jan 21, 2026 at 01:50:28PM +0100, Patrick Steinhardt wrote:
> > > static int add_object_in_unpacked_pack(const struct object_id *oid,
> > > - struct packed_git *pack,
> > > - uint32_t pos,
> > > + struct object_info *oi,
> > > void *data UNUSED)
> > > {
> > > if (cruft) {
> > > - off_t offset;
> > > - time_t mtime;
> > > -
> > > - if (pack->is_cruft) {
> > > - if (load_pack_mtimes(pack) < 0)
> > > - die(_("could not load cruft pack .mtimes"));
> > > - mtime = nth_packed_mtime(pack, pos);
> > > - } else {
> > > - mtime = pack->mtime;
> > > - }
> > > - offset = nth_packed_object_offset(pack, pos);
> > > -
> > > - add_cruft_object_entry(oid, OBJ_NONE, pack, offset,
> > > - NULL, mtime);
> >
> > OK, here's where we see the existing logic for determining the mtime of
> > an object in the GC sense. I see there's a subsequent patch that also
> > makes use of the object_info->mtimep field, and my guess is (not having
> > completely read that patch yet) that having the same notion of mtime
> > between the two callsites is desirable.
> >
> > I still wonder whether imposing that notion of mtime at the object_info
> > layer is the right choice. I wonder if it would make more sense to allow
> > the caller to have a "statp" pointer filled out (or alternatively stick
> > a "struct stat" in both the packed union type as well as the loose one,
> > though the latter doesn't yet exist).
>
> The problem with filling out a `struct stat` though is that it will only
> apply to backends that actually have a path to stat. There may be other
> backends that don't. You could of course pretend that there was a file
> and fill in the `st_mtime` field. But I don't really see the benefit
> over having a standalone mtime field.
I understand what you're saying, but I don't think that this is unique
to stat. Looking through the object_info struct, there are a handful of
fields on the request side that are coupled to the objects themselves,
not their representation, such as typep, sizep, and contentp.
But there are a handful of fields in the request section which are *not*
properties of the objects themselves, but rather properties of the way
those objects are represented as part of the backend-specific
implementation.
For example, disk_sizep suggests that all objects are stored on disk and
have a clear notion of how much space they occupy. I could imagine a
backend implementation where perhaps the contents of objects are divvied
up into smaller chunks and deduplicated across many objects. I don't
think there is a clear answer to how much "disk size" an object occupies
in that case.
delta_base_oid is another field that I'd argue is not a property of the
object itself, but rather its representation. Of course, objects stored
in packfiles may or may not be stored as a delta against some other
object, and thus being able to ask what that object is makes sense. But
loose objects don't have the same property as a result of how they are
stored.
To me this seems like an example where implementation-specific details
are already leaking through the object_info struct. So in that sense I
don't think that adding a "struct stat" here is meaningfully changing
anything.
But I think the proposed mtimep field is a special case not only for the
reasons stated above, but because an object's mtime has multiple
interpretations already. For example, if I'm asking about an object's
mtime, and that object happens to be stored in a cruft pack, which mtime
am I referring to? Packed objects inherit their mtime from the mtime of
the *.pack itself, but cruft objects have an additional interpretation
which is read from the *.mtimes file corresponding to the cruft pack.
I don't love bolting another leaky abstraction onto the object_info
interface, but my broader concern is that the information here is not
just leaky but ambiguous. By adding a statp pointer, I think the
information is less ambiguous since the GC-specific interpretation of
mtime is done at a layer above stat(2).
> > Then the caller could do something like:
> >
> > static time_t object_info_gc_mtime(const struct object_info *oi)
> > {
> > if (!oi->statp)
> > BUG("oops!");
> >
> > switch (oi->whence) {
> > case OI_CACHED:
> > return 0;
> > case OI_LOOSE:
> > return oi->statp->st_mtime;
> > case OI_PACKED:
> > struct packed_git *p = oi->u.packed.pack;
> > if (p->is_cruft) {
> > uint32_t pack_pos;
> >
> > if (load_pack_mtimes(p) < 0)
> > die(_("could not load cruft pack .mtimes for '%s'"),
> > pack_basename(p));
> > if (offset_to_pack_pos(p, oi->u.packed.offset, &pack_pos) < 0)
> > die(_("could not find offset for object '%s' in cruft pack '%s'"),
> > oid_to_hex(&oi->oid),
> > pack_basename(p));
> >
> > return nth_packed_mtime(p, pack_pos_to_index(p, pack_pos));
> > } else {
> > return p->mtime; /* or oi->statp->st_mtime */
> > }
> > default:
> > BUG("unknown oi->whence: %d", oi->whence);
> > }
> > }
> >
> > I like the above because it encapsulates the GC-specific interpretation
> > of an object's mtime outside of the object_info layer, while adding
> > information (namely statp) that is generic enough to be potentially
> > useful to other callers who may not be interested in the GC-specific
> > interpretation.
>
> This isn't achieving the goal of making the logic pluggable though, as
> you now have backend-specific logic outside of the backends. Also, isn't
> the end result basically the same as what I have proposed, except that
> my version _is_ fully pluggable because the logic is entirely contained
> in the backend?
Yes, the end result is the same, both your patch and what I wrote here
implement the same GC-specific definition of an object's "mtime". I am
not following the argument about pluggability, though. The concern I
have above is that we are pushing domain-specific logic into the object
storage backend, not the other way around.
Thanks,
Taylor
^ permalink raw reply [flat|nested] 120+ messages in thread* Re: [PATCH v3 12/14] builtin/pack-objects: use `packfile_store_for_each_object()`
2026-01-23 18:35 ` Taylor Blau
@ 2026-01-26 8:53 ` Patrick Steinhardt
2026-01-29 11:08 ` Jeff King
0 siblings, 1 reply; 120+ messages in thread
From: Patrick Steinhardt @ 2026-01-26 8:53 UTC (permalink / raw)
To: Taylor Blau; +Cc: git, Karthik Nayak, Justin Tobler, Junio C Hamano
On Fri, Jan 23, 2026 at 01:35:12PM -0500, Taylor Blau wrote:
> On Fri, Jan 23, 2026 at 10:43:16AM +0100, Patrick Steinhardt wrote:
> > On Thu, Jan 22, 2026 at 08:21:55PM -0500, Taylor Blau wrote:
> > > On Wed, Jan 21, 2026 at 01:50:28PM +0100, Patrick Steinhardt wrote:
> > > > static int add_object_in_unpacked_pack(const struct object_id *oid,
> > > > - struct packed_git *pack,
> > > > - uint32_t pos,
> > > > + struct object_info *oi,
> > > > void *data UNUSED)
> > > > {
> > > > if (cruft) {
> > > > - off_t offset;
> > > > - time_t mtime;
> > > > -
> > > > - if (pack->is_cruft) {
> > > > - if (load_pack_mtimes(pack) < 0)
> > > > - die(_("could not load cruft pack .mtimes"));
> > > > - mtime = nth_packed_mtime(pack, pos);
> > > > - } else {
> > > > - mtime = pack->mtime;
> > > > - }
> > > > - offset = nth_packed_object_offset(pack, pos);
> > > > -
> > > > - add_cruft_object_entry(oid, OBJ_NONE, pack, offset,
> > > > - NULL, mtime);
> > >
> > > OK, here's where we see the existing logic for determining the mtime of
> > > an object in the GC sense. I see there's a subsequent patch that also
> > > makes use of the object_info->mtimep field, and my guess is (not having
> > > completely read that patch yet) that having the same notion of mtime
> > > between the two callsites is desirable.
> > >
> > > I still wonder whether imposing that notion of mtime at the object_info
> > > layer is the right choice. I wonder if it would make more sense to allow
> > > the caller to have a "statp" pointer filled out (or alternatively stick
> > > a "struct stat" in both the packed union type as well as the loose one,
> > > though the latter doesn't yet exist).
> >
> > The problem with filling out a `struct stat` though is that it will only
> > apply to backends that actually have a path to stat. There may be other
> > backends that don't. You could of course pretend that there was a file
> > and fill in the `st_mtime` field. But I don't really see the benefit
> > over having a standalone mtime field.
>
> I understand what you're saying, but I don't think that this is unique
> to stat. Looking through the object_info struct, there are a handful of
> fields on the request side that are coupled to the objects themselves,
> not their representation, such as typep, sizep, and contentp.
>
> But there are a handful of fields in the request section which are *not*
> properties of the objects themselves, but rather properties of the way
> those objects are represented as part of the backend-specific
> implementation.
Yes, the specific interpretation will change for some fields. But in
general, most of the fields still apply to all backends.
> For example, disk_sizep suggests that all objects are stored on disk and
> have a clear notion of how much space they occupy. I could imagine a
> backend implementation where perhaps the contents of objects are divvied
> up into smaller chunks and deduplicated across many objects. I don't
> think there is a clear answer to how much "disk size" an object occupies
> in that case.
This would still apply to other backends though. It's true that "disk"
size is a bit of a misnomer now, and that it should probably rather be
renamed to "storage" size. But overall, no matter the backend, you will
still eventually end up storing the object data somewhere, and that
takes up space.
> delta_base_oid is another field that I'd argue is not a property of the
> object itself, but rather its representation. Of course, objects stored
> in packfiles may or may not be stored as a delta against some other
> object, and thus being able to ask what that object is makes sense. But
> loose objects don't have the same property as a result of how they are
> stored.
Yup. This field is specific to the packed backend indeed and ideally
shouldn't be part of the `struct object_info`. In the best case it could
be lifted into `struct object_info::u`, but I'm not sure whether that's
easily possible.
> To me this seems like an example where implementation-specific details
> are already leaking through the object_info struct. So in that sense I
> don't think that adding a "struct stat" here is meaningfully changing
> anything.
The thing is that I'm trying to clean up all the different messes that
we have. So instead of adding _more_ leakiness, I'd rather prefer to
remove some of it.
> But I think the proposed mtimep field is a special case not only for the
> reasons stated above, but because an object's mtime has multiple
> interpretations already. For example, if I'm asking about an object's
> mtime, and that object happens to be stored in a cruft pack, which mtime
> am I referring to? Packed objects inherit their mtime from the mtime of
> the *.pack itself, but cruft objects have an additional interpretation
> which is read from the *.mtimes file corresponding to the cruft pack.
This is a good question indeed though.
> I don't love bolting another leaky abstraction onto the object_info
> interface, but my broader concern is that the information here is not
> just leaky but ambiguous. By adding a statp pointer, I think the
> information is less ambiguous since the GC-specific interpretation of
> mtime is done at a layer above stat(2).
Fair point. For the current backend, mtime can be ambiguous as the same
object may be stored multiple times: either as a loose object, or as
part of any of the packfiles.
In the context of `odb_for_each_object()` that info is not ambigous
though: we would yield the same object multiple times, and every time we
yield it we will may have a different mtime. And this is working as
expected for the two callsites:
- In "reachable.c" we use the mtimep field in the context of recent
objects. So if at least one of the objects has a new-enough mtime we
would eventually see it.
- In "builtin/pack-objects.c" we use basically the same logic as we
use after my patch seires, where we use either the cruft time or the
pack time. So things work as expected over there, too.
So I'd claim that this is working sensibly for `odb_for_each_object()`,
and there is no ambiguity involved. It's the caller that has to
disambiguite, and that's already happening.
But things are a bit different if you invoke `odb_read_object_info()`
directly, as we have no way to disambiguate there. We only want to yield
_a_ representation of an object, so the mtime will be derived from
whatever data structure the object was found in first. This could be
helped with better documentation.
> > > Then the caller could do something like:
> > >
> > > static time_t object_info_gc_mtime(const struct object_info *oi)
> > > {
> > > if (!oi->statp)
> > > BUG("oops!");
> > >
> > > switch (oi->whence) {
> > > case OI_CACHED:
> > > return 0;
> > > case OI_LOOSE:
> > > return oi->statp->st_mtime;
> > > case OI_PACKED:
> > > struct packed_git *p = oi->u.packed.pack;
> > > if (p->is_cruft) {
> > > uint32_t pack_pos;
> > >
> > > if (load_pack_mtimes(p) < 0)
> > > die(_("could not load cruft pack .mtimes for '%s'"),
> > > pack_basename(p));
> > > if (offset_to_pack_pos(p, oi->u.packed.offset, &pack_pos) < 0)
> > > die(_("could not find offset for object '%s' in cruft pack '%s'"),
> > > oid_to_hex(&oi->oid),
> > > pack_basename(p));
> > >
> > > return nth_packed_mtime(p, pack_pos_to_index(p, pack_pos));
> > > } else {
> > > return p->mtime; /* or oi->statp->st_mtime */
> > > }
> > > default:
> > > BUG("unknown oi->whence: %d", oi->whence);
> > > }
> > > }
> > >
> > > I like the above because it encapsulates the GC-specific interpretation
> > > of an object's mtime outside of the object_info layer, while adding
> > > information (namely statp) that is generic enough to be potentially
> > > useful to other callers who may not be interested in the GC-specific
> > > interpretation.
> >
> > This isn't achieving the goal of making the logic pluggable though, as
> > you now have backend-specific logic outside of the backends. Also, isn't
> > the end result basically the same as what I have proposed, except that
> > my version _is_ fully pluggable because the logic is entirely contained
> > in the backend?
>
> Yes, the end result is the same, both your patch and what I wrote here
> implement the same GC-specific definition of an object's "mtime". I am
> not following the argument about pluggability, though. The concern I
> have above is that we are pushing domain-specific logic into the object
> storage backend, not the other way around.
To expand on the pluggability bit: every time you add a new backend
you'll have to extend the above logic to understand how it represents
the mtime. That by itself might be doable, but let's for example
consider a backend that is a black box to us (like a shared library that
may plug in arbitrary storage logic). In that case you would not even be
able to derive the information unless you have a generic layer that lets
you convey it to the caller.
So overall I agree with you that there are nuances here, and that the
mtimep pointer _can_ be used incorrectly. But I still think that the
concept is generic enough across backends, and the refactored logic
still works as extended. I'll try to expand the docs and commit message
a bit to cover this discussion.
Thanks!
Patrick
^ permalink raw reply [flat|nested] 120+ messages in thread* Re: [PATCH v3 12/14] builtin/pack-objects: use `packfile_store_for_each_object()`
2026-01-26 8:53 ` Patrick Steinhardt
@ 2026-01-29 11:08 ` Jeff King
2026-01-30 12:57 ` Patrick Steinhardt
0 siblings, 1 reply; 120+ messages in thread
From: Jeff King @ 2026-01-29 11:08 UTC (permalink / raw)
To: Patrick Steinhardt
Cc: Taylor Blau, git, Karthik Nayak, Justin Tobler, Junio C Hamano
On Mon, Jan 26, 2026 at 09:53:18AM +0100, Patrick Steinhardt wrote:
> > Yes, the end result is the same, both your patch and what I wrote here
> > implement the same GC-specific definition of an object's "mtime". I am
> > not following the argument about pluggability, though. The concern I
> > have above is that we are pushing domain-specific logic into the object
> > storage backend, not the other way around.
>
> To expand on the pluggability bit: every time you add a new backend
> you'll have to extend the above logic to understand how it represents
> the mtime. That by itself might be doable, but let's for example
> consider a backend that is a black box to us (like a shared library that
> may plug in arbitrary storage logic). In that case you would not even be
> able to derive the information unless you have a generic layer that lets
> you convey it to the caller.
>
> So overall I agree with you that there are nuances here, and that the
> mtimep pointer _can_ be used incorrectly. But I still think that the
> concept is generic enough across backends, and the refactored logic
> still works as extended. I'll try to expand the docs and commit message
> a bit to cover this discussion.
There's a related concept that I saw while reading some of the earlier
patches. When you converted fsck, I wondered how you would handle the
call to read_loose_object(), which takes an actual path. And it needs to
do so, because we want to make sure we are opening and reading that
particular copy of the object, and not one from elsewhere.
The answer is that you punted on it for this series, and we still get
the path via for_each_loose_file_in_source(). ;) That is OK, but I think
it will eventually run into the same issue: we will need some kind of
cursor or context for the iterator to be able to get extended
information about a particular copy of an object.
I think there are probably two approaches here:
1. The abstract odb API tries to share as little as possible. It gives
the caller back an opaque context struct, and that struct can be
handed back to the odb to get object contents or other information
(perhaps even an mtime!). Under the hood for the current odb
implementation this is probably just a pointer to a string with the
filesystem path for loose objects, and the usual packed_git/offset
pair for packed objects.
2. The odb API provides a set of information that a particular backend
_might_ implement, and callers can poke at that information and
decide how to handle it when it's not available. And so that might
include a filesystem path for loose objects, which some backends
may choose to leave NULL.
Option (1) presents a cleaner API for the odb, but it's also more
restrictive. Anything that a caller _might_ want to do has to be pushed
down into the API, and it has to start learning about things like
mtimes. And how to decide what "mtime" means for non-filesystem
backends.
Option (2) pushes more work onto the callers. They need to not only look
up the mtimes themselves (like they do now), but they have to decide how
to handle the case when no path is available. Which in the worst case
means a special case for each type of backend, though I think in
practice they'd probably fall into rough groups.
I think one thing that appeals to me about option 2, though, is that it
keeps a lot of the specialized "business logic" together in those
callers. Most code doesn't are about concepts like mtime or specific
copies of objects. But when it does, like in repack or fsck, there are
often subtle assumptions and interpretations. I'd rather see all of that
lumped together in the fsck code than have it split half-and-half
between them and the odb code (which is really going to be some backends
idea of how its concepts can be shoe-horned into the abstract API).
-Peff
^ permalink raw reply [flat|nested] 120+ messages in thread* Re: [PATCH v3 12/14] builtin/pack-objects: use `packfile_store_for_each_object()`
2026-01-29 11:08 ` Jeff King
@ 2026-01-30 12:57 ` Patrick Steinhardt
0 siblings, 0 replies; 120+ messages in thread
From: Patrick Steinhardt @ 2026-01-30 12:57 UTC (permalink / raw)
To: Jeff King; +Cc: Taylor Blau, git, Karthik Nayak, Justin Tobler, Junio C Hamano
On Thu, Jan 29, 2026 at 06:08:39AM -0500, Jeff King wrote:
> On Mon, Jan 26, 2026 at 09:53:18AM +0100, Patrick Steinhardt wrote:
>
> > > Yes, the end result is the same, both your patch and what I wrote here
> > > implement the same GC-specific definition of an object's "mtime". I am
> > > not following the argument about pluggability, though. The concern I
> > > have above is that we are pushing domain-specific logic into the object
> > > storage backend, not the other way around.
> >
> > To expand on the pluggability bit: every time you add a new backend
> > you'll have to extend the above logic to understand how it represents
> > the mtime. That by itself might be doable, but let's for example
> > consider a backend that is a black box to us (like a shared library that
> > may plug in arbitrary storage logic). In that case you would not even be
> > able to derive the information unless you have a generic layer that lets
> > you convey it to the caller.
> >
> > So overall I agree with you that there are nuances here, and that the
> > mtimep pointer _can_ be used incorrectly. But I still think that the
> > concept is generic enough across backends, and the refactored logic
> > still works as extended. I'll try to expand the docs and commit message
> > a bit to cover this discussion.
>
> There's a related concept that I saw while reading some of the earlier
> patches. When you converted fsck, I wondered how you would handle the
> call to read_loose_object(), which takes an actual path. And it needs to
> do so, because we want to make sure we are opening and reading that
> particular copy of the object, and not one from elsewhere.
>
> The answer is that you punted on it for this series, and we still get
> the path via for_each_loose_file_in_source(). ;) That is OK, but I think
> it will eventually run into the same issue: we will need some kind of
> cursor or context for the iterator to be able to get extended
> information about a particular copy of an object.
>
> I think there are probably two approaches here:
>
> 1. The abstract odb API tries to share as little as possible. It gives
> the caller back an opaque context struct, and that struct can be
> handed back to the odb to get object contents or other information
> (perhaps even an mtime!). Under the hood for the current odb
> implementation this is probably just a pointer to a string with the
> filesystem path for loose objects, and the usual packed_git/offset
> pair for packed objects.
>
> 2. The odb API provides a set of information that a particular backend
> _might_ implement, and callers can poke at that information and
> decide how to handle it when it's not available. And so that might
> include a filesystem path for loose objects, which some backends
> may choose to leave NULL.
>
> Option (1) presents a cleaner API for the odb, but it's also more
> restrictive. Anything that a caller _might_ want to do has to be pushed
> down into the API, and it has to start learning about things like
> mtimes. And how to decide what "mtime" means for non-filesystem
> backends.
>
> Option (2) pushes more work onto the callers. They need to not only look
> up the mtimes themselves (like they do now), but they have to decide how
> to handle the case when no path is available. Which in the worst case
> means a special case for each type of backend, though I think in
> practice they'd probably fall into rough groups.
>
> I think one thing that appeals to me about option 2, though, is that it
> keeps a lot of the specialized "business logic" together in those
> callers. Most code doesn't are about concepts like mtime or specific
> copies of objects. But when it does, like in repack or fsck, there are
> often subtle assumptions and interpretations. I'd rather see all of that
> lumped together in the fsck code than have it split half-and-half
> between them and the odb code (which is really going to be some backends
> idea of how its concepts can be shoe-horned into the abstract API).
Yup. One thing that I'm planning to do in one of the subsequent patch
series is to expand `struct object_info` to handle this.
Right now, the sturcture contains a `whence` pointer that tells us which
backend the information is stored in. But that concept can be extended
to surface more info: instead of only telling the caller the type, we
can instead return the actual source that the object has been looked up
in.
Furthermore, the `struct object_info::u` union already contains enough
information for us to uniquely identify a specific option. So what we
would do then is to call `odb_source_read_object_info()` on the specific
source and pass it the union.
The loose source wouldn't have to do anything in that case, as the
location of the object is deterministic and there can only be one copy.
But the packed source would inspect `u.packed.pack` and thus know which
specific object we refer to.
This still hinges on a couple intermediate steps, but I think with this
plan we should be able to handle this issue in a way where the caller
doesn't need to know _anything_ about how exactly the ODB source itself
works.
Thanks!
Patrick
^ permalink raw reply [flat|nested] 120+ messages in thread
* [PATCH v3 13/14] reachable: convert to use `odb_for_each_object()`
2026-01-21 12:50 ` [PATCH v3 00/14] odb: introduce `odb_for_each_object()` Patrick Steinhardt
` (11 preceding siblings ...)
2026-01-21 12:50 ` [PATCH v3 12/14] builtin/pack-objects: use `packfile_store_for_each_object()` Patrick Steinhardt
@ 2026-01-21 12:50 ` Patrick Steinhardt
2026-01-21 12:50 ` [PATCH v3 14/14] odb: drop unused `for_each_{loose,packed}_object()` functions Patrick Steinhardt
2026-01-22 1:33 ` [PATCH v3 00/14] odb: introduce `odb_for_each_object()` Taylor Blau
14 siblings, 0 replies; 120+ messages in thread
From: Patrick Steinhardt @ 2026-01-21 12:50 UTC (permalink / raw)
To: git; +Cc: Karthik Nayak, Justin Tobler, Junio C Hamano
To figure out which objects expired objects we enumerate all loose and
packed objects individually so that we can figure out their respective
mtimes. Refactor the code to instead use `odb_for_each_object()` with a
request that ask for the object mtime instead.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
reachable.c | 125 +++++++++++++++++-------------------------------------------
1 file changed, 35 insertions(+), 90 deletions(-)
diff --git a/reachable.c b/reachable.c
index 82676b2668..101cfc2727 100644
--- a/reachable.c
+++ b/reachable.c
@@ -191,30 +191,27 @@ static int obj_is_recent(const struct object_id *oid, timestamp_t mtime,
return oidset_contains(&data->extra_recent_oids, oid);
}
-static void add_recent_object(const struct object_id *oid,
- struct packed_git *pack,
- off_t offset,
- timestamp_t mtime,
- struct recent_data *data)
+static int want_recent_object(struct recent_data *data,
+ const struct object_id *oid)
{
- struct object *obj;
- enum object_type type;
+ if (data->ignore_in_core_kept_packs &&
+ has_object_kept_pack(data->revs->repo, oid, KEPT_PACK_IN_CORE))
+ return 0;
+ return 1;
+}
- if (!obj_is_recent(oid, mtime, data))
- return;
+static int add_recent_object(const struct object_id *oid,
+ struct object_info *oi,
+ void *cb_data)
+{
+ struct recent_data *data = cb_data;
+ struct object *obj;
- /*
- * We do not want to call parse_object here, because
- * inflating blobs and trees could be very expensive.
- * However, we do need to know the correct type for
- * later processing, and the revision machinery expects
- * commits and tags to have been parsed.
- */
- type = odb_read_object_info(the_repository->objects, oid, NULL);
- if (type < 0)
- die("unable to get object info for %s", oid_to_hex(oid));
+ if (!want_recent_object(data, oid) ||
+ !obj_is_recent(oid, *oi->mtimep, data))
+ return 0;
- switch (type) {
+ switch (*oi->typep) {
case OBJ_TAG:
case OBJ_COMMIT:
obj = parse_object_or_die(the_repository, oid, NULL);
@@ -227,77 +224,22 @@ static void add_recent_object(const struct object_id *oid,
break;
default:
die("unknown object type for %s: %s",
- oid_to_hex(oid), type_name(type));
+ oid_to_hex(oid), type_name(*oi->typep));
}
if (!obj)
die("unable to lookup %s", oid_to_hex(oid));
-
- add_pending_object(data->revs, obj, "");
- if (data->cb)
- data->cb(obj, pack, offset, mtime);
-}
-
-static int want_recent_object(struct recent_data *data,
- const struct object_id *oid)
-{
- if (data->ignore_in_core_kept_packs &&
- has_object_kept_pack(data->revs->repo, oid, KEPT_PACK_IN_CORE))
+ if (obj->flags & SEEN)
return 0;
- return 1;
-}
-static int add_recent_loose(const struct object_id *oid,
- const char *path, void *data)
-{
- struct stat st;
- struct object *obj;
-
- if (!want_recent_object(data, oid))
- return 0;
-
- obj = lookup_object(the_repository, oid);
-
- if (obj && obj->flags & SEEN)
- return 0;
-
- if (stat(path, &st) < 0) {
- /*
- * It's OK if an object went away during our iteration; this
- * could be due to a simultaneous repack. But anything else
- * we should abort, since we might then fail to mark objects
- * which should not be pruned.
- */
- if (errno == ENOENT)
- return 0;
- return error_errno("unable to stat %s", oid_to_hex(oid));
+ add_pending_object(data->revs, obj, "");
+ if (data->cb) {
+ if (oi->whence == OI_PACKED)
+ data->cb(obj, oi->u.packed.pack, oi->u.packed.offset, *oi->mtimep);
+ else
+ data->cb(obj, NULL, 0, *oi->mtimep);
}
- add_recent_object(oid, NULL, 0, st.st_mtime, data);
- return 0;
-}
-
-static int add_recent_packed(const struct object_id *oid,
- struct packed_git *p,
- uint32_t pos,
- void *data)
-{
- struct object *obj;
- timestamp_t mtime = p->mtime;
-
- if (!want_recent_object(data, oid))
- return 0;
-
- obj = lookup_object(the_repository, oid);
-
- if (obj && obj->flags & SEEN)
- return 0;
- if (p->is_cruft) {
- if (load_pack_mtimes(p) < 0)
- die(_("could not load cruft pack .mtimes"));
- mtime = nth_packed_mtime(p, pos);
- }
- add_recent_object(oid, p, nth_packed_object_offset(p, pos), mtime, data);
return 0;
}
@@ -307,7 +249,13 @@ int add_unseen_recent_objects_to_traversal(struct rev_info *revs,
int ignore_in_core_kept_packs)
{
struct recent_data data;
- enum odb_for_each_object_flags flags;
+ unsigned flags;
+ enum object_type type;
+ time_t mtime;
+ struct object_info oi = {
+ .mtimep = &mtime,
+ .typep = &type,
+ };
int r;
data.revs = revs;
@@ -318,16 +266,13 @@ int add_unseen_recent_objects_to_traversal(struct rev_info *revs,
oidset_init(&data.extra_recent_oids, 0);
data.extra_recent_oids_loaded = 0;
- r = for_each_loose_object(the_repository->objects, add_recent_loose, &data,
- ODB_FOR_EACH_OBJECT_LOCAL_ONLY);
- if (r)
- goto done;
-
flags = ODB_FOR_EACH_OBJECT_LOCAL_ONLY | ODB_FOR_EACH_OBJECT_PACK_ORDER;
if (ignore_in_core_kept_packs)
flags |= ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS;
- r = for_each_packed_object(revs->repo, add_recent_packed, &data, flags);
+ r = odb_for_each_object(revs->repo->objects, &oi, add_recent_object, &data, flags);
+ if (r)
+ goto done;
done:
oidset_clear(&data.extra_recent_oids);
--
2.53.0.rc0.250.g0ac79233d6.dirty
^ permalink raw reply related [flat|nested] 120+ messages in thread* [PATCH v3 14/14] odb: drop unused `for_each_{loose,packed}_object()` functions
2026-01-21 12:50 ` [PATCH v3 00/14] odb: introduce `odb_for_each_object()` Patrick Steinhardt
` (12 preceding siblings ...)
2026-01-21 12:50 ` [PATCH v3 13/14] reachable: convert to use `odb_for_each_object()` Patrick Steinhardt
@ 2026-01-21 12:50 ` Patrick Steinhardt
2026-01-22 1:33 ` [PATCH v3 00/14] odb: introduce `odb_for_each_object()` Taylor Blau
14 siblings, 0 replies; 120+ messages in thread
From: Patrick Steinhardt @ 2026-01-21 12:50 UTC (permalink / raw)
To: git; +Cc: Karthik Nayak, Justin Tobler, Junio C Hamano
We have converted all callers of `for_each_loose_object()` and
`for_each_packed_object()` to use their new replacement functions
instead. We can thus remove them now.
Do so and inline `packfile_store_for_each_object_internal()` now that it
only has a single callsite again. This makes it a bit easier to follow
the callback indirection that is happening there.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
object-file.c | 20 ------------
object-file.h | 11 -------
packfile.c | 99 +++++++++++++++++++++--------------------------------------
packfile.h | 2 --
4 files changed, 35 insertions(+), 97 deletions(-)
diff --git a/object-file.c b/object-file.c
index c0f896673b..bc5209f2fe 100644
--- a/object-file.c
+++ b/object-file.c
@@ -1802,26 +1802,6 @@ int for_each_loose_file_in_source(struct odb_source *source,
return r;
}
-int for_each_loose_object(struct object_database *odb,
- each_loose_object_fn cb, void *data,
- enum odb_for_each_object_flags flags)
-{
- struct odb_source *source;
-
- odb_prepare_alternates(odb);
- for (source = odb->sources; source; source = source->next) {
- int r = for_each_loose_file_in_source(source, cb, NULL,
- NULL, data);
- if (r)
- return r;
-
- if (flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY)
- break;
- }
-
- return 0;
-}
-
struct for_each_object_wrapper_data {
struct odb_source *source;
struct object_info *oi;
diff --git a/object-file.h b/object-file.h
index 048b778531..af7f57d2a1 100644
--- a/object-file.h
+++ b/object-file.h
@@ -126,17 +126,6 @@ int for_each_loose_file_in_source(struct odb_source *source,
each_loose_subdir_fn subdir_cb,
void *data);
-/*
- * Iterate over all accessible loose objects without respect to
- * reachability. By default, this includes both local and alternate objects.
- * The order in which objects are visited is unspecified.
- *
- * Any flags specific to packs are ignored.
- */
-int for_each_loose_object(struct object_database *odb,
- each_loose_object_fn, void *,
- enum odb_for_each_object_flags flags);
-
/*
* Iterate through all loose objects in the given object database source and
* invoke the callback function for each of them. If given, the object info
diff --git a/packfile.c b/packfile.c
index c96ec21f86..6f56b5e2dc 100644
--- a/packfile.c
+++ b/packfile.c
@@ -2326,65 +2326,6 @@ int for_each_object_in_pack(struct packed_git *p,
return r;
}
-static int packfile_store_for_each_object_internal(struct packfile_store *store,
- each_packed_object_fn cb,
- void *data,
- unsigned flags,
- int *pack_errors)
-{
- struct packfile_list_entry *e;
- int ret = 0;
-
- store->skip_mru_updates = true;
-
- for (e = packfile_store_get_packs(store); e; e = e->next) {
- struct packed_git *p = e->pack;
-
- if ((flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local)
- continue;
- if ((flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY) &&
- !p->pack_promisor)
- continue;
- if ((flags & ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS) &&
- p->pack_keep_in_core)
- continue;
- if ((flags & ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS) &&
- p->pack_keep)
- continue;
- if (open_pack_index(p)) {
- *pack_errors = 1;
- continue;
- }
-
- ret = for_each_object_in_pack(p, cb, data, flags);
- if (ret)
- break;
- }
-
- store->skip_mru_updates = false;
-
- return ret;
-}
-
-int for_each_packed_object(struct repository *repo, each_packed_object_fn cb,
- void *data, unsigned flags)
-{
- struct odb_source *source;
- int pack_errors = 0;
- int ret = 0;
-
- odb_prepare_alternates(repo->objects);
-
- for (source = repo->objects->sources; source; source = source->next) {
- ret = packfile_store_for_each_object_internal(source->packfiles, cb, data,
- flags, &pack_errors);
- if (ret)
- break;
- }
-
- return ret ? ret : pack_errors;
-}
-
struct packfile_store_for_each_object_wrapper_data {
struct packfile_store *store;
struct object_info *oi;
@@ -2424,14 +2365,44 @@ int packfile_store_for_each_object(struct packfile_store *store,
.cb = cb,
.cb_data = cb_data,
};
+ struct packfile_list_entry *e;
int pack_errors = 0, ret;
- ret = packfile_store_for_each_object_internal(store, packfile_store_for_each_object_wrapper,
- &data, flags, &pack_errors);
- if (ret)
- return ret;
+ store->skip_mru_updates = true;
+
+ for (e = packfile_store_get_packs(store); e; e = e->next) {
+ struct packed_git *p = e->pack;
+
+ if ((flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local)
+ continue;
+ if ((flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY) &&
+ !p->pack_promisor)
+ continue;
+ if ((flags & ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS) &&
+ p->pack_keep_in_core)
+ continue;
+ if ((flags & ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS) &&
+ p->pack_keep)
+ continue;
+ if (open_pack_index(p)) {
+ pack_errors = 1;
+ continue;
+ }
+
+ ret = for_each_object_in_pack(p, packfile_store_for_each_object_wrapper,
+ &data, flags);
+ if (ret)
+ goto out;
+ }
+
+ ret = 0;
- return pack_errors ? -1 : 0;
+out:
+ store->skip_mru_updates = false;
+
+ if (!ret && pack_errors)
+ ret = -1;
+ return ret;
}
struct add_promisor_object_data {
diff --git a/packfile.h b/packfile.h
index ab0637fbe9..8e0d2b7661 100644
--- a/packfile.h
+++ b/packfile.h
@@ -340,8 +340,6 @@ typedef int each_packed_object_fn(const struct object_id *oid,
int for_each_object_in_pack(struct packed_git *p,
each_packed_object_fn, void *data,
unsigned flags);
-int for_each_packed_object(struct repository *repo, each_packed_object_fn cb,
- void *data, unsigned flags);
/*
* Iterate through all packed objects in the given packfile store and invoke
--
2.53.0.rc0.250.g0ac79233d6.dirty
^ permalink raw reply related [flat|nested] 120+ messages in thread* Re: [PATCH v3 00/14] odb: introduce `odb_for_each_object()`
2026-01-21 12:50 ` [PATCH v3 00/14] odb: introduce `odb_for_each_object()` Patrick Steinhardt
` (13 preceding siblings ...)
2026-01-21 12:50 ` [PATCH v3 14/14] odb: drop unused `for_each_{loose,packed}_object()` functions Patrick Steinhardt
@ 2026-01-22 1:33 ` Taylor Blau
2026-01-22 17:02 ` Junio C Hamano
14 siblings, 1 reply; 120+ messages in thread
From: Taylor Blau @ 2026-01-22 1:33 UTC (permalink / raw)
To: Patrick Steinhardt; +Cc: git, Karthik Nayak, Justin Tobler, Junio C Hamano
On Wed, Jan 21, 2026 at 01:50:16PM +0100, Patrick Steinhardt wrote:
> The patch series is built on top of 8745eae506 (The 17th batch,
> 2026-01-11) with the following two series merged into it:
>
> - ps/read-object-info-improvements at a282a8f163 (packfile: move MIDX
> into packfile store, 2026-01-09).
>
> - ps/packfile-store-in-odb-source at 12d3b58b55 (packfile: drop
> repository parameter from `packed_object_info()`, 2026-01-12) .
I was having a little bit of trouble constructing a base to apply these
patches. a282a8f163 merges cleanly into 8745eae506, but 12d3b58b55 does
not merge cleanly into that, nor do they apply as a single octopus
merge.
Looking at the base-commit identified below from your fork[1], there is
some conflict resolution required to merge in the latter series. I'm
including the --remerge-diff results below in case others are interested
in applying this locally.
--- 8< ---
diff --git a/packfile.c b/packfile.c
remerge CONFLICT (content): Merge conflict in packfile.c
index 4cc9d8c07e6..402c3b5dc73 100644
--- a/packfile.c
+++ b/packfile.c
@@ -2164,16 +2164,8 @@ int packfile_store_read_object_info(struct packfile_store *store,
if (!oi)
return 0;
-<<<<<<< b7f649ca936 (Merge remote-tracking branch 'junio/ps/read-object-info-improvements' into HEAD)
ret = packed_object_info(e.p, e.offset, oi);
if (ret < 0) {
-||||||| merged common ancestors
- rtype = packed_object_info(store->odb->repo, e.p, e.offset, oi);
- if (rtype < 0) {
-=======
- rtype = packed_object_info(store->source->odb->repo, e.p, e.offset, oi);
- if (rtype < 0) {
->>>>>>> a282a8f163f (packfile: move MIDX into packfile store)
mark_bad_packed_object(e.p, oid);
return -1;
}
@@ -2574,17 +2566,9 @@ int packfile_store_read_object_stream(struct odb_read_stream **out,
oi.sizep = &size;
if (packfile_store_read_object_info(store, oid, &oi, 0) ||
-<<<<<<< b7f649ca936 (Merge remote-tracking branch 'junio/ps/read-object-info-improvements' into HEAD)
oi.u.packed.type == PACKED_OBJECT_TYPE_REF_DELTA ||
oi.u.packed.type == PACKED_OBJECT_TYPE_OFS_DELTA ||
- repo_settings_get_big_file_threshold(store->odb->repo) >= size)
-||||||| merged common ancestors
- oi.u.packed.is_delta ||
- repo_settings_get_big_file_threshold(store->odb->repo) >= size)
-=======
- oi.u.packed.is_delta ||
repo_settings_get_big_file_threshold(store->source->odb->repo) >= size)
->>>>>>> a282a8f163f (packfile: move MIDX into packfile store)
return -1;
in_pack_type = unpack_object_header(oi.u.packed.pack,
--- >8 ---
Thanks,
Taylor
[1]: https://gitlab.com/pks-gitlab/git.git/
^ permalink raw reply related [flat|nested] 120+ messages in thread* Re: [PATCH v3 00/14] odb: introduce `odb_for_each_object()`
2026-01-22 1:33 ` [PATCH v3 00/14] odb: introduce `odb_for_each_object()` Taylor Blau
@ 2026-01-22 17:02 ` Junio C Hamano
0 siblings, 0 replies; 120+ messages in thread
From: Junio C Hamano @ 2026-01-22 17:02 UTC (permalink / raw)
To: Taylor Blau; +Cc: Patrick Steinhardt, git, Karthik Nayak, Justin Tobler
Taylor Blau <me@ttaylorr.com> writes:
> On Wed, Jan 21, 2026 at 01:50:16PM +0100, Patrick Steinhardt wrote:
>> The patch series is built on top of 8745eae506 (The 17th batch,
>> 2026-01-11) with the following two series merged into it:
>>
>> - ps/read-object-info-improvements at a282a8f163 (packfile: move MIDX
>> into packfile store, 2026-01-09).
>>
>> - ps/packfile-store-in-odb-source at 12d3b58b55 (packfile: drop
>> repository parameter from `packed_object_info()`, 2026-01-12) .
>
> I was having a little bit of trouble constructing a base to apply these
> patches. a282a8f163 merges cleanly into 8745eae506, but 12d3b58b55 does
> not merge cleanly into that, nor do they apply as a single octopus
> merge.
>
> Looking at the base-commit identified below from your fork[1], there is
> some conflict resolution required to merge in the latter series. I'm
> including the --remerge-diff results below in case others are interested
> in applying this locally.
Thanks for independently validating the conflict resolution I did.
A quick glance of your remerge-diff matches what I had been using
for the past week:
$ git log --oneline --first-parent master..ps/odb-for-each-object
...
ec16dde5c8 Merge branch 'ps/packfile-store-in-odb-source' into ps/odb-for-each-object
c8e1706e8d Merge branch 'ps/read-object-info-improvements' into ps/odb-for-each-object
$ git log -2 --oneline --remerge-diff -p ec16dde5c8
ec16dde5c8 Merge branch 'ps/packfile-store-in-odb-source' into ps/odb-for-each-object
diff --git a/packfile.c b/packfile.c
remerge CONFLICT (content): Merge conflict in packfile.c
index d951de73d1..402c3b5dc7 100644
--- a/packfile.c
+++ b/packfile.c
@@ -2164,16 +2164,8 @@ int packfile_store_read_object_info(struct packfile_store *store,
if (!oi)
return 0;
-<<<<<<< c8e1706e8d (Merge branch 'ps/read-object-info-improvements' into ps/odb-for-each-object)
ret = packed_object_info(e.p, e.offset, oi);
if (ret < 0) {
-||||||| merged common ancestors
- rtype = packed_object_info(store->odb->repo, e.p, e.offset, oi);
- if (rtype < 0) {
-=======
- rtype = packed_object_info(store->source->odb->repo, e.p, e.offset, oi);
- if (rtype < 0) {
->>>>>>> a282a8f163 (packfile: move MIDX into packfile store)
mark_bad_packed_object(e.p, oid);
return -1;
}
@@ -2574,17 +2566,9 @@ int packfile_store_read_object_stream(struct odb_read_stream **out,
oi.sizep = &size;
if (packfile_store_read_object_info(store, oid, &oi, 0) ||
-<<<<<<< c8e1706e8d (Merge branch 'ps/read-object-info-improvements' into ps/odb-for-each-object)
oi.u.packed.type == PACKED_OBJECT_TYPE_REF_DELTA ||
oi.u.packed.type == PACKED_OBJECT_TYPE_OFS_DELTA ||
- repo_settings_get_big_file_threshold(store->odb->repo) >= size)
-||||||| merged common ancestors
- oi.u.packed.is_delta ||
- repo_settings_get_big_file_threshold(store->odb->repo) >= size)
-=======
- oi.u.packed.is_delta ||
repo_settings_get_big_file_threshold(store->source->odb->repo) >= size)
->>>>>>> a282a8f163 (packfile: move MIDX into packfile store)
return -1;
in_pack_type = unpack_object_header(oi.u.packed.pack,
c8e1706e8d Merge branch 'ps/read-object-info-improvements' into ps/odb-for-each-object
>
> --- 8< ---
> diff --git a/packfile.c b/packfile.c
> remerge CONFLICT (content): Merge conflict in packfile.c
> index 4cc9d8c07e6..402c3b5dc73 100644
> --- a/packfile.c
> +++ b/packfile.c
> @@ -2164,16 +2164,8 @@ int packfile_store_read_object_info(struct packfile_store *store,
> if (!oi)
> return 0;
>
> -<<<<<<< b7f649ca936 (Merge remote-tracking branch 'junio/ps/read-object-info-improvements' into HEAD)
> ret = packed_object_info(e.p, e.offset, oi);
> if (ret < 0) {
> -||||||| merged common ancestors
> - rtype = packed_object_info(store->odb->repo, e.p, e.offset, oi);
> - if (rtype < 0) {
> -=======
> - rtype = packed_object_info(store->source->odb->repo, e.p, e.offset, oi);
> - if (rtype < 0) {
> ->>>>>>> a282a8f163f (packfile: move MIDX into packfile store)
> mark_bad_packed_object(e.p, oid);
> return -1;
> }
> @@ -2574,17 +2566,9 @@ int packfile_store_read_object_stream(struct odb_read_stream **out,
> oi.sizep = &size;
>
> if (packfile_store_read_object_info(store, oid, &oi, 0) ||
> -<<<<<<< b7f649ca936 (Merge remote-tracking branch 'junio/ps/read-object-info-improvements' into HEAD)
> oi.u.packed.type == PACKED_OBJECT_TYPE_REF_DELTA ||
> oi.u.packed.type == PACKED_OBJECT_TYPE_OFS_DELTA ||
> - repo_settings_get_big_file_threshold(store->odb->repo) >= size)
> -||||||| merged common ancestors
> - oi.u.packed.is_delta ||
> - repo_settings_get_big_file_threshold(store->odb->repo) >= size)
> -=======
> - oi.u.packed.is_delta ||
> repo_settings_get_big_file_threshold(store->source->odb->repo) >= size)
> ->>>>>>> a282a8f163f (packfile: move MIDX into packfile store)
> return -1;
>
> in_pack_type = unpack_object_header(oi.u.packed.pack,
> --- >8 ---
>
> Thanks,
> Taylor
>
> [1]: https://gitlab.com/pks-gitlab/git.git/
^ permalink raw reply related [flat|nested] 120+ messages in thread
* [PATCH v4 00/14] odb: introduce `odb_for_each_object()`
2026-01-15 11:04 [PATCH 00/14] odb: introduce `odb_for_each_object()` Patrick Steinhardt
` (16 preceding siblings ...)
2026-01-21 12:50 ` [PATCH v3 00/14] odb: introduce `odb_for_each_object()` Patrick Steinhardt
@ 2026-01-26 9:51 ` Patrick Steinhardt
2026-01-26 9:51 ` [PATCH v4 01/14] odb: rename `FOR_EACH_OBJECT_*` flags Patrick Steinhardt
` (14 more replies)
17 siblings, 15 replies; 120+ messages in thread
From: Patrick Steinhardt @ 2026-01-26 9:51 UTC (permalink / raw)
To: git; +Cc: Karthik Nayak, Justin Tobler, Junio C Hamano
Hi,
this patch series introduces a generic `odb_for_each_object()` function
to iterate through objects and adapts callers to use it. The intent is
to make iteration through objects independent of the actual storage
backend.
The series is structured as follows:
- Commits 1 to 2 do some cleanups for the for-each-object flags.
- Commits 3 to 7 introduce the infrastructure for
`odb_for_each_object()`.
- Commits 8 to 13 convert a couple of callers to use the new
interfaces.
- Commit 14 drops now-unused functions.
The patch series is built on top of 8745eae506 (The 17th batch,
2026-01-11) with the following two series merged into it:
- ps/read-object-info-improvements at a282a8f163 (packfile: move MIDX
into packfile store, 2026-01-09).
- ps/packfile-store-in-odb-source at 12d3b58b55 (packfile: drop
repository parameter from `packed_object_info()`, 2026-01-12) .
Changes in v4:
- Convert the `odb_for_each_object()` object info into a read-only
request parameter. Instead, we now read into a "fresh" object info
in the backends so that there can be no stale data.
- Fix typo in `struct object_info *io` parameter.
- Document what's still missing to convert `batch_each_object()` to
use the generic `odb_for_each_object()` function.
- Document ambiguity of the `mtime`.
- Re-add the `ODB_FOR_EACH_OBJECT_LOCAL_ONLY` flag in
`add_object_in_unpacked_pack()`. It shouldn't make any difference,
but it makes the conversion a bit more straight-forward.
- Link to v3: https://lore.kernel.org/r/20260121-pks-odb-for-each-object-v3-0-12c4dfd24227@pks.im
Changes in v3:
- Fix error code propagation in last commit.
- Link to v2: https://lore.kernel.org/r/20260120-pks-odb-for-each-object-v2-0-d05cbfd3d6f8@pks.im
Changes in v2:
- Clarify the comment of `odb_for_each_object()` to point out that
it's the callback that can abort iteration by returning a non-zero
error code.
- Document in the commit message that we don't yet convert all sites
to use `odb_for_each_object()`.
- Link to v1: https://lore.kernel.org/r/20260115-pks-odb-for-each-object-v1-0-5418a91d5d99@pks.im
Thanks!
Patrick
---
Patrick Steinhardt (14):
odb: rename `FOR_EACH_OBJECT_*` flags
odb: fix flags parameter to be unsigned
object-file: extract function to read object info from path
object-file: introduce function to iterate through objects
packfile: extract function to iterate through objects of a store
packfile: introduce function to iterate through objects
odb: introduce `odb_for_each_object()`
builtin/fsck: refactor to use `odb_for_each_object()`
treewide: enumerate promisor objects via `odb_for_each_object()`
treewide: drop uses of `for_each_{loose,packed}_object()`
odb: introduce mtime fields for object info requests
builtin/pack-objects: use `packfile_store_for_each_object()`
reachable: convert to use `odb_for_each_object()`
odb: drop unused `for_each_{loose,packed}_object()` functions
builtin/cat-file.c | 36 ++++++++--
builtin/fsck.c | 57 ++++-----------
builtin/pack-objects.c | 48 +++++++------
commit-graph.c | 46 +++++++++----
object-file.c | 125 ++++++++++++++++++++++-----------
object-file.h | 22 +++---
odb.c | 31 +++++++++
odb.h | 58 ++++++++++++++--
packfile.c | 184 +++++++++++++++++++++++++++++++++----------------
packfile.h | 19 ++++-
reachable.c | 129 ++++++++++------------------------
repack-promisor.c | 8 +--
revision.c | 10 ++-
13 files changed, 462 insertions(+), 311 deletions(-)
Range-diff versus v3:
1: a080e62c44 = 1: e7fa63f733 odb: rename `FOR_EACH_OBJECT_*` flags
2: 7980f241a9 = 2: b462808c07 odb: fix flags parameter to be unsigned
3: 14b9251711 = 3: 00d77e9e45 object-file: extract function to read object info from path
4: 93af71f3c7 ! 4: b9899bd1cb object-file: introduce function to iterate through objects
@@ object-file.c: int for_each_loose_object(struct object_database *odb,
+struct for_each_object_wrapper_data {
+ struct odb_source *source;
-+ struct object_info *oi;
++ const struct object_info *request;
+ odb_for_each_object_cb cb;
+ void *cb_data;
+};
@@ object-file.c: int for_each_loose_object(struct object_database *odb,
+ void *cb_data)
+{
+ struct for_each_object_wrapper_data *data = cb_data;
-+ if (data->oi &&
-+ read_object_info_from_path(data->source, path, oid, data->oi, 0) < 0)
++
++ if (data->request) {
++ struct object_info oi = *data->request;
++
++ if (read_object_info_from_path(data->source, path, oid, &oi, 0) < 0)
+ return -1;
-+ return data->cb(oid, data->oi, data->cb_data);
++
++ return data->cb(oid, &oi, data->cb_data);
++ } else {
++ return data->cb(oid, NULL, data->cb_data);
++ }
+}
+
+int odb_source_loose_for_each_object(struct odb_source *source,
-+ struct object_info *oi,
++ const struct object_info *request,
+ odb_for_each_object_cb cb,
+ void *cb_data,
+ unsigned flags)
+{
+ struct for_each_object_wrapper_data data = {
+ .source = source,
-+ .oi = oi,
++ .request = request,
+ .cb = cb,
+ .cb_data = cb_data,
+ };
@@ object-file.h: int for_each_loose_object(struct object_database *odb,
+ * `odb_source_loose_read_object_info()` on the object.
+ */
+int odb_source_loose_for_each_object(struct odb_source *source,
-+ struct object_info *oi,
++ const struct object_info *request,
+ odb_for_each_object_cb cb,
+ void *cb_data,
+ unsigned flags);
5: ad0a28e2bb = 5: 03fe7d5b3b packfile: extract function to iterate through objects of a store
6: e87126ddee ! 6: 4648a18a9b packfile: introduce function to iterate through objects
@@ Commit message
packfile: introduce function to iterate through objects
Introduce a new function `packfile_store_for_each_object()`. This
- function is the equivalent to `odb_source_loose_for_each_object()` in
+ function is equivalent to `odb_source_loose_for_each_object()`, except
that it:
- - Works on a single packfile store and thus per object source.
+ - Works on a single packfile store instead of working on the object
+ database level. Consequently, it will only yield packed objects of a
+ single object database source.
- Passes a `struct object_info` to the callback function.
@@ packfile.c: int for_each_packed_object(struct repository *repo, each_packed_obje
+struct packfile_store_for_each_object_wrapper_data {
+ struct packfile_store *store;
-+ struct object_info *oi;
++ const struct object_info *request;
+ odb_for_each_object_cb cb;
+ void *cb_data;
+};
@@ packfile.c: int for_each_packed_object(struct repository *repo, each_packed_obje
+{
+ struct packfile_store_for_each_object_wrapper_data *data = cb_data;
+
-+ if (data->oi) {
++ if (data->request) {
+ off_t offset = nth_packed_object_offset(pack, index_pos);
++ struct object_info oi = *data->request;
+
-+ if (packed_object_info(pack, offset, data->oi) < 0) {
++ if (packed_object_info(pack, offset, &oi) < 0) {
+ mark_bad_packed_object(pack, oid);
+ return -1;
+ }
-+ }
+
-+ return data->cb(oid, data->oi, data->cb_data);
++ return data->cb(oid, &oi, data->cb_data);
++ } else {
++ return data->cb(oid, NULL, data->cb_data);
++ }
+}
+
+int packfile_store_for_each_object(struct packfile_store *store,
-+ struct object_info *oi,
++ const struct object_info *request,
+ odb_for_each_object_cb cb,
+ void *cb_data,
+ unsigned flags)
+{
+ struct packfile_store_for_each_object_wrapper_data data = {
+ .store = store,
-+ .oi = oi,
++ .request = request,
+ .cb = cb,
+ .cb_data = cb_data,
+ };
@@ packfile.h: int for_each_object_in_pack(struct packed_git *p,
+/*
+ * Iterate through all packed objects in the given packfile store and invoke
-+ * the callback function for each of them. If given, the object info will be
-+ * populated with the object's data as if you had called
-+ * `packfile_store_read_object_info()` on the object.
++ * the callback function for each of them. If an object info request is given,
++ * then the object info will be read for every individual object and passed to
++ * the callback as if `packfile_store_read_object_info()` was called for the
++ * object.
+ *
+ * The flags parameter is a combination of `odb_for_each_object_flags`.
+ */
+int packfile_store_for_each_object(struct packfile_store *store,
-+ struct object_info *oi,
++ const struct object_info *request,
+ odb_for_each_object_cb cb,
+ void *cb_data,
+ unsigned flags);
7: f437198d7a ! 7: 3ec85ee10f odb: introduce `odb_for_each_object()`
@@ Commit message
Signed-off-by: Patrick Steinhardt <ps@pks.im>
+ ## object-file.h ##
+@@ object-file.h: int for_each_loose_object(struct object_database *odb,
+
+ /*
+ * Iterate through all loose objects in the given object database source and
+- * invoke the callback function for each of them. If given, the object info
+- * will be populated with the object's data as if you had called
+- * `odb_source_loose_read_object_info()` on the object.
++ * invoke the callback function for each of them. If an object info request is
++ * given, then the object info will be read for every individual object and
++ * passed to the callback as if `odb_source_loose_read_object_info()` was
++ * called for the object.
+ */
+ int odb_source_loose_for_each_object(struct odb_source *source,
+ const struct object_info *request,
+
## odb.c ##
@@ odb.c: int odb_freshen_object(struct object_database *odb,
return 0;
}
+int odb_for_each_object(struct object_database *odb,
-+ struct object_info *oi,
++ const struct object_info *request,
+ odb_for_each_object_cb cb,
+ void *cb_data,
+ unsigned flags)
@@ odb.c: int odb_freshen_object(struct object_database *odb,
+ continue;
+
+ if (!(flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY)) {
-+ ret = odb_source_loose_for_each_object(source, oi, cb, cb_data, flags);
++ ret = odb_source_loose_for_each_object(source, request,
++ cb, cb_data, flags);
+ if (ret)
+ return ret;
+ }
+
-+ ret = packfile_store_for_each_object(source->packfiles, oi, cb, cb_data, flags);
++ ret = packfile_store_for_each_object(source->packfiles, request,
++ cb, cb_data, flags);
+ if (ret)
+ return ret;
+ }
@@ odb.h: typedef int (*odb_for_each_object_cb)(const struct object_id *oid,
+ * Iterate through all objects contained in the object database. Note that
+ * objects may be iterated over multiple times in case they are either stored
+ * in different backends or in case they are stored in multiple sources.
++ * If an object info request is given, then the object info will be read and
++ * passed to the callback as if `odb_read_object_info()` was called for the
++ * object.
+ *
+ * Returning a non-zero error code from the callback function will cause
+ * iteration to abort. The error code will be propagated.
@@ odb.h: typedef int (*odb_for_each_object_cb)(const struct object_id *oid,
+ * an arbitrary non-zero error code returned by the callback itself.
+ */
+int odb_for_each_object(struct object_database *odb,
-+ struct object_info *oi,
++ const struct object_info *request,
+ odb_for_each_object_cb cb,
+ void *cb_data,
+ unsigned flags);
8: 75c0e7fb54 ! 8: 069bcb600b builtin/fsck: refactor to use `odb_for_each_object()`
@@ Commit message
Refactor these callsites accordingly.
+ Note that `odb_for_each_object()` may iterate over the same object
+ multiple times, for example when it exists both in packed and loose
+ format. But this has already been the case beforehand, so this does not
+ result in a change in behaviour.
+
Signed-off-by: Patrick Steinhardt <ps@pks.im>
## builtin/fsck.c ##
@@ builtin/fsck.c: static int mark_used(struct object *obj, enum object_type type U
-static void mark_unreachable_referents(const struct object_id *oid)
+static int mark_unreachable_referents(const struct object_id *oid,
-+ struct object_info *io UNUSED,
++ struct object_info *oi UNUSED,
+ void *data UNUSED)
{
struct fsck_options options = FSCK_OPTIONS_DEFAULT;
9: 5a1c71af5f = 9: cb472da9d5 treewide: enumerate promisor objects via `odb_for_each_object()`
10: b6dcd01b19 ! 10: 505243613c treewide: drop uses of `for_each_{loose,packed}_object()`
@@ builtin/cat-file.c: static void batch_each_object(struct batch_options *opt,
+ struct odb_source *source;
- for_each_loose_object(the_repository->objects, batch_one_object_loose, &payload, 0);
++ /*
++ * TODO: we still need to tap into implementation details of the object
++ * database sources. Ideally, we should extend `odb_for_each_object()`
++ * to handle object filters itself so that we can move the filtering
++ * logic into the individual sources.
++ */
+ odb_prepare_alternates(the_repository->objects);
+ for (source = the_repository->objects->sources; source; source = source->next) {
+ int ret = odb_source_loose_for_each_object(source, NULL, batch_one_object_oi,
11: 92a8225bca ! 11: 3dc547bb9d odb: introduce mtime fields for object info requests
@@ Commit message
to request an object's mtime. This new field will be used in a
subsequent commit.
+ Note that the concept of "mtime" is ambiguous: given an object, it may
+ be stored multiple times in the object database, and each of these
+ instances may have a different mtime. Disambiguating these mtimes is
+ nothing that can happen on the generic ODB layer: the caller may search
+ for the oldest object, the newest object, or even the relation of object
+ mtimes depending on the specific source they are located in. As such, it
+ is the responsibility of the caller to disambiguate mtimes.
+
+ A consequence of this is that it's most likely incorrect to look up the
+ mtime via `odb_read_object_info()`, as this interface does not give us
+ enough information to disambiguate the mtime. Document this accordingly
+ and tell users to use `odb_for_each_object()` instead.
+
+ Even with this gotcha though it's sensible to have this request as part
+ of the object info, as the mtime is a property of the object storage
+ format. If we for example had a "black-box" storage backend, we'd still
+ need to be able to query it for the mtime info in a generic way.
+
+ We could introduce a safety mechanism that for example calls `BUG()` in
+ case we look up the mtime outside of `odb_for_each_object()`. But that
+ feels somewhat heavy-handed.
+
Signed-off-by: Patrick Steinhardt <ps@pks.im>
## object-file.c ##
@@ odb.c: static int do_oid_object_info_extended(struct object_database *odb,
## odb.h ##
@@ odb.h: struct object_info {
- off_t *disk_sizep;
struct object_id *delta_base_oid;
void **contentp;
-+ time_t *mtimep;
++ /*
++ * The time the given looked-up object has been last modified.
++ *
++ * Note: the mtime may be ambiguous in case the object exists multiple
++ * times in the object database. It is thus _not_ recommended to use
++ * this field outside of contexts where you would read every instance
++ * of the object, like for example with `odb_for_each_object()`. As it
++ * is impossible to say at the ODB level what the intent of the caller
++ * is (e.g. whether to find the oldest or newest object), it is the
++ * responsibility of the caller to disambiguate the mtimes.
++ */
++ time_t *mtimep;
++
/* Response */
enum {
+ OI_CACHED,
## packfile.c ##
@@ packfile.c: static void add_delta_base_cache(struct packed_git *p, off_t base_offset,
@@ packfile.c: int packed_object_info(struct packed_git *p,
+ uint32_t index_pos;
+
+ if (load_pack_mtimes(p) < 0)
-+ die(_("could not load cruft pack .mtimes"));
++ die(_("could not load .mtimes for cruft pack '%s'"),
++ pack_basename(p));
+
+ if (maybe_index_pos)
+ index_pos = *maybe_index_pos;
@@ packfile.c: int packed_object_info(struct packed_git *p,
struct pack_window **w_curs,
off_t curpos,
@@ packfile.c: static int packfile_store_for_each_object_wrapper(const struct object_id *oid,
- if (data->oi) {
off_t offset = nth_packed_object_offset(pack, index_pos);
+ struct object_info oi = *data->request;
-- if (packed_object_info(pack, offset, data->oi) < 0) {
+- if (packed_object_info(pack, offset, &oi) < 0) {
+ if (packed_object_info_with_index_pos(pack, offset,
-+ &index_pos, data->oi) < 0) {
++ &index_pos, &oi) < 0) {
mark_bad_packed_object(pack, oid);
return -1;
}
12: 658cbf8f12 ! 12: 0047a40d16 builtin/pack-objects: use `packfile_store_for_each_object()`
@@ builtin/pack-objects.c: static int add_object_in_unpacked_pack(const struct obje
+ if (packfile_store_for_each_object(source->packfiles, &oi,
+ add_object_in_unpacked_pack, NULL,
+ ODB_FOR_EACH_OBJECT_PACK_ORDER |
++ ODB_FOR_EACH_OBJECT_LOCAL_ONLY |
+ ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS |
+ ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS))
+ die(_("cannot open pack index"));
13: a28907a4b6 = 13: c3bde2e822 reachable: convert to use `odb_for_each_object()`
14: 7d235b6529 ! 14: bf2f3c39a6 odb: drop unused `for_each_{loose,packed}_object()` functions
@@ object-file.c: int for_each_loose_file_in_source(struct odb_source *source,
-
struct for_each_object_wrapper_data {
struct odb_source *source;
- struct object_info *oi;
+ const struct object_info *request;
## object-file.h ##
@@ object-file.h: int for_each_loose_file_in_source(struct odb_source *source,
@@ object-file.h: int for_each_loose_file_in_source(struct odb_source *source,
-
/*
* Iterate through all loose objects in the given object database source and
- * invoke the callback function for each of them. If given, the object info
+ * invoke the callback function for each of them. If an object info request is
## packfile.c ##
@@ packfile.c: int for_each_object_in_pack(struct packed_git *p,
@@ packfile.c: int for_each_object_in_pack(struct packed_git *p,
-
struct packfile_store_for_each_object_wrapper_data {
struct packfile_store *store;
- struct object_info *oi;
+ const struct object_info *request;
@@ packfile.c: int packfile_store_for_each_object(struct packfile_store *store,
.cb = cb,
.cb_data = cb_data,
---
base-commit: 1ff0e42d332523a11cc3d61b8d8463db5f9f14e8
change-id: 20260115-pks-odb-for-each-object-60b78cde09fd
^ permalink raw reply [flat|nested] 120+ messages in thread* [PATCH v4 01/14] odb: rename `FOR_EACH_OBJECT_*` flags
2026-01-26 9:51 ` [PATCH v4 " Patrick Steinhardt
@ 2026-01-26 9:51 ` Patrick Steinhardt
2026-01-26 9:51 ` [PATCH v4 02/14] odb: fix flags parameter to be unsigned Patrick Steinhardt
` (13 subsequent siblings)
14 siblings, 0 replies; 120+ messages in thread
From: Patrick Steinhardt @ 2026-01-26 9:51 UTC (permalink / raw)
To: git; +Cc: Karthik Nayak, Justin Tobler, Junio C Hamano
Rename the `FOR_EACH_OBJECT_*` flags to have an `ODB_` prefix. This
prepares us for a new upcoming `odb_for_each_object()` function and
ensures that both the function and its flags have the same prefix.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
builtin/cat-file.c | 2 +-
builtin/pack-objects.c | 10 +++++-----
commit-graph.c | 4 ++--
object-file.c | 4 ++--
object-file.h | 2 +-
odb.h | 13 +++++++------
packfile.c | 20 ++++++++++----------
packfile.h | 4 ++--
reachable.c | 8 ++++----
repack-promisor.c | 2 +-
revision.c | 2 +-
11 files changed, 36 insertions(+), 35 deletions(-)
diff --git a/builtin/cat-file.c b/builtin/cat-file.c
index 2ad712e9f8..6964a5a52c 100644
--- a/builtin/cat-file.c
+++ b/builtin/cat-file.c
@@ -922,7 +922,7 @@ static int batch_objects(struct batch_options *opt)
cb.seen = &seen;
batch_each_object(opt, batch_unordered_object,
- FOR_EACH_OBJECT_PACK_ORDER, &cb);
+ ODB_FOR_EACH_OBJECT_PACK_ORDER, &cb);
oidset_clear(&seen);
} else {
diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index 6ee31d48c9..74317051fd 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -3912,7 +3912,7 @@ static void read_packs_list_from_stdin(struct rev_info *revs)
for_each_object_in_pack(p,
add_object_entry_from_pack,
revs,
- FOR_EACH_OBJECT_PACK_ORDER);
+ ODB_FOR_EACH_OBJECT_PACK_ORDER);
}
strbuf_release(&buf);
@@ -4344,10 +4344,10 @@ static void add_objects_in_unpacked_packs(void)
if (for_each_packed_object(to_pack.repo,
add_object_in_unpacked_pack,
NULL,
- FOR_EACH_OBJECT_PACK_ORDER |
- FOR_EACH_OBJECT_LOCAL_ONLY |
- FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS |
- FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS))
+ ODB_FOR_EACH_OBJECT_PACK_ORDER |
+ ODB_FOR_EACH_OBJECT_LOCAL_ONLY |
+ ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS |
+ ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS))
die(_("cannot open pack index"));
}
diff --git a/commit-graph.c b/commit-graph.c
index 6b1f02e179..7f1145a082 100644
--- a/commit-graph.c
+++ b/commit-graph.c
@@ -1927,7 +1927,7 @@ static int fill_oids_from_packs(struct write_commit_graph_context *ctx,
goto cleanup;
}
for_each_object_in_pack(p, add_packed_commits, ctx,
- FOR_EACH_OBJECT_PACK_ORDER);
+ ODB_FOR_EACH_OBJECT_PACK_ORDER);
close_pack(p);
free(p);
}
@@ -1965,7 +1965,7 @@ static void fill_oids_from_all_packs(struct write_commit_graph_context *ctx)
_("Finding commits for commit graph among packed objects"),
ctx->approx_nr_objects);
for_each_packed_object(ctx->r, add_packed_commits, ctx,
- FOR_EACH_OBJECT_PACK_ORDER);
+ ODB_FOR_EACH_OBJECT_PACK_ORDER);
if (ctx->progress_done < ctx->approx_nr_objects)
display_progress(ctx->progress, ctx->approx_nr_objects);
stop_progress(&ctx->progress);
diff --git a/object-file.c b/object-file.c
index e7e4c3348f..64e9e239dc 100644
--- a/object-file.c
+++ b/object-file.c
@@ -1789,7 +1789,7 @@ int for_each_loose_file_in_source(struct odb_source *source,
int for_each_loose_object(struct object_database *odb,
each_loose_object_fn cb, void *data,
- enum for_each_object_flags flags)
+ enum odb_for_each_object_flags flags)
{
struct odb_source *source;
@@ -1800,7 +1800,7 @@ int for_each_loose_object(struct object_database *odb,
if (r)
return r;
- if (flags & FOR_EACH_OBJECT_LOCAL_ONLY)
+ if (flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY)
break;
}
diff --git a/object-file.h b/object-file.h
index 1229d5f675..42bb50e10c 100644
--- a/object-file.h
+++ b/object-file.h
@@ -134,7 +134,7 @@ int for_each_loose_file_in_source(struct odb_source *source,
*/
int for_each_loose_object(struct object_database *odb,
each_loose_object_fn, void *,
- enum for_each_object_flags flags);
+ enum odb_for_each_object_flags flags);
/**
diff --git a/odb.h b/odb.h
index bab07755f4..74503addf1 100644
--- a/odb.h
+++ b/odb.h
@@ -442,24 +442,25 @@ static inline void obj_read_unlock(void)
if(obj_read_use_lock)
pthread_mutex_unlock(&obj_read_mutex);
}
+
/* Flags for for_each_*_object(). */
-enum for_each_object_flags {
+enum odb_for_each_object_flags {
/* Iterate only over local objects, not alternates. */
- FOR_EACH_OBJECT_LOCAL_ONLY = (1<<0),
+ ODB_FOR_EACH_OBJECT_LOCAL_ONLY = (1<<0),
/* Only iterate over packs obtained from the promisor remote. */
- FOR_EACH_OBJECT_PROMISOR_ONLY = (1<<1),
+ ODB_FOR_EACH_OBJECT_PROMISOR_ONLY = (1<<1),
/*
* Visit objects within a pack in packfile order rather than .idx order
*/
- FOR_EACH_OBJECT_PACK_ORDER = (1<<2),
+ ODB_FOR_EACH_OBJECT_PACK_ORDER = (1<<2),
/* Only iterate over packs that are not marked as kept in-core. */
- FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS = (1<<3),
+ ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS = (1<<3),
/* Only iterate over packs that do not have .keep files. */
- FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS = (1<<4),
+ ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS = (1<<4),
};
enum {
diff --git a/packfile.c b/packfile.c
index 402c3b5dc7..b65f0b43f1 100644
--- a/packfile.c
+++ b/packfile.c
@@ -2259,12 +2259,12 @@ int has_object_kept_pack(struct repository *r, const struct object_id *oid,
int for_each_object_in_pack(struct packed_git *p,
each_packed_object_fn cb, void *data,
- enum for_each_object_flags flags)
+ enum odb_for_each_object_flags flags)
{
uint32_t i;
int r = 0;
- if (flags & FOR_EACH_OBJECT_PACK_ORDER) {
+ if (flags & ODB_FOR_EACH_OBJECT_PACK_ORDER) {
if (load_pack_revindex(p->repo, p))
return -1;
}
@@ -2285,7 +2285,7 @@ int for_each_object_in_pack(struct packed_git *p,
* - in pack-order, it is pack position, which we must
* convert to an index position in order to get the oid.
*/
- if (flags & FOR_EACH_OBJECT_PACK_ORDER)
+ if (flags & ODB_FOR_EACH_OBJECT_PACK_ORDER)
index_pos = pack_pos_to_index(p, i);
else
index_pos = i;
@@ -2302,7 +2302,7 @@ int for_each_object_in_pack(struct packed_git *p,
}
int for_each_packed_object(struct repository *repo, each_packed_object_fn cb,
- void *data, enum for_each_object_flags flags)
+ void *data, enum odb_for_each_object_flags flags)
{
struct odb_source *source;
int r = 0;
@@ -2318,15 +2318,15 @@ int for_each_packed_object(struct repository *repo, each_packed_object_fn cb,
for (e = packfile_store_get_packs(source->packfiles); e; e = e->next) {
struct packed_git *p = e->pack;
- if ((flags & FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local)
+ if ((flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local)
continue;
- if ((flags & FOR_EACH_OBJECT_PROMISOR_ONLY) &&
+ if ((flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY) &&
!p->pack_promisor)
continue;
- if ((flags & FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS) &&
+ if ((flags & ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS) &&
p->pack_keep_in_core)
continue;
- if ((flags & FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS) &&
+ if ((flags & ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS) &&
p->pack_keep)
continue;
if (open_pack_index(p)) {
@@ -2413,8 +2413,8 @@ int is_promisor_object(struct repository *r, const struct object_id *oid)
if (repo_has_promisor_remote(r)) {
for_each_packed_object(r, add_promisor_object,
&promisor_objects,
- FOR_EACH_OBJECT_PROMISOR_ONLY |
- FOR_EACH_OBJECT_PACK_ORDER);
+ ODB_FOR_EACH_OBJECT_PROMISOR_ONLY |
+ ODB_FOR_EACH_OBJECT_PACK_ORDER);
}
promisor_objects_prepared = 1;
}
diff --git a/packfile.h b/packfile.h
index acc5c55ad5..15551258bd 100644
--- a/packfile.h
+++ b/packfile.h
@@ -339,9 +339,9 @@ typedef int each_packed_object_fn(const struct object_id *oid,
void *data);
int for_each_object_in_pack(struct packed_git *p,
each_packed_object_fn, void *data,
- enum for_each_object_flags flags);
+ enum odb_for_each_object_flags flags);
int for_each_packed_object(struct repository *repo, each_packed_object_fn cb,
- void *data, enum for_each_object_flags flags);
+ void *data, enum odb_for_each_object_flags flags);
/* A hook to report invalid files in pack directory */
#define PACKDIR_FILE_PACK 1
diff --git a/reachable.c b/reachable.c
index 4b532039d5..82676b2668 100644
--- a/reachable.c
+++ b/reachable.c
@@ -307,7 +307,7 @@ int add_unseen_recent_objects_to_traversal(struct rev_info *revs,
int ignore_in_core_kept_packs)
{
struct recent_data data;
- enum for_each_object_flags flags;
+ enum odb_for_each_object_flags flags;
int r;
data.revs = revs;
@@ -319,13 +319,13 @@ int add_unseen_recent_objects_to_traversal(struct rev_info *revs,
data.extra_recent_oids_loaded = 0;
r = for_each_loose_object(the_repository->objects, add_recent_loose, &data,
- FOR_EACH_OBJECT_LOCAL_ONLY);
+ ODB_FOR_EACH_OBJECT_LOCAL_ONLY);
if (r)
goto done;
- flags = FOR_EACH_OBJECT_LOCAL_ONLY | FOR_EACH_OBJECT_PACK_ORDER;
+ flags = ODB_FOR_EACH_OBJECT_LOCAL_ONLY | ODB_FOR_EACH_OBJECT_PACK_ORDER;
if (ignore_in_core_kept_packs)
- flags |= FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS;
+ flags |= ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS;
r = for_each_packed_object(revs->repo, add_recent_packed, &data, flags);
diff --git a/repack-promisor.c b/repack-promisor.c
index ee6e0669f6..45c330b9a5 100644
--- a/repack-promisor.c
+++ b/repack-promisor.c
@@ -56,7 +56,7 @@ void repack_promisor_objects(struct repository *repo,
ctx.cmd = &cmd;
ctx.algop = repo->hash_algo;
for_each_packed_object(repo, write_oid, &ctx,
- FOR_EACH_OBJECT_PROMISOR_ONLY);
+ ODB_FOR_EACH_OBJECT_PROMISOR_ONLY);
if (cmd.in == -1) {
/* No packed objects; cmd was never started */
diff --git a/revision.c b/revision.c
index b65a763770..5aadf46dac 100644
--- a/revision.c
+++ b/revision.c
@@ -3938,7 +3938,7 @@ int prepare_revision_walk(struct rev_info *revs)
if (revs->exclude_promisor_objects) {
for_each_packed_object(revs->repo, mark_uninteresting, revs,
- FOR_EACH_OBJECT_PROMISOR_ONLY);
+ ODB_FOR_EACH_OBJECT_PROMISOR_ONLY);
}
if (!revs->reflog_info)
--
2.53.0.rc1.267.g6e3a78c723.dirty
^ permalink raw reply related [flat|nested] 120+ messages in thread* [PATCH v4 02/14] odb: fix flags parameter to be unsigned
2026-01-26 9:51 ` [PATCH v4 " Patrick Steinhardt
2026-01-26 9:51 ` [PATCH v4 01/14] odb: rename `FOR_EACH_OBJECT_*` flags Patrick Steinhardt
@ 2026-01-26 9:51 ` Patrick Steinhardt
2026-01-26 9:51 ` [PATCH v4 03/14] object-file: extract function to read object info from path Patrick Steinhardt
` (12 subsequent siblings)
14 siblings, 0 replies; 120+ messages in thread
From: Patrick Steinhardt @ 2026-01-26 9:51 UTC (permalink / raw)
To: git; +Cc: Karthik Nayak, Justin Tobler, Junio C Hamano
The `flags` parameter accepted by various `for_each_object()` functions
is a bitfield of multiple flags. Such parameters are typically unsigned
in the Git codebase, but we use `enum odb_for_each_object_flags` in
some places.
Adapt these function signatures to use the correct type.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
object-file.c | 3 ++-
object-file.h | 3 ++-
packfile.c | 4 ++--
packfile.h | 4 ++--
4 files changed, 8 insertions(+), 6 deletions(-)
diff --git a/object-file.c b/object-file.c
index 64e9e239dc..8fa461dd59 100644
--- a/object-file.c
+++ b/object-file.c
@@ -414,7 +414,8 @@ static int parse_loose_header(const char *hdr, struct object_info *oi)
int odb_source_loose_read_object_info(struct odb_source *source,
const struct object_id *oid,
- struct object_info *oi, int flags)
+ struct object_info *oi,
+ unsigned flags)
{
int ret;
int fd;
diff --git a/object-file.h b/object-file.h
index 42bb50e10c..2acf19fb91 100644
--- a/object-file.h
+++ b/object-file.h
@@ -47,7 +47,8 @@ void odb_source_loose_reprepare(struct odb_source *source);
int odb_source_loose_read_object_info(struct odb_source *source,
const struct object_id *oid,
- struct object_info *oi, int flags);
+ struct object_info *oi,
+ unsigned flags);
int odb_source_loose_read_object_stream(struct odb_read_stream **out,
struct odb_source *source,
diff --git a/packfile.c b/packfile.c
index b65f0b43f1..79fe64a25b 100644
--- a/packfile.c
+++ b/packfile.c
@@ -2259,7 +2259,7 @@ int has_object_kept_pack(struct repository *r, const struct object_id *oid,
int for_each_object_in_pack(struct packed_git *p,
each_packed_object_fn cb, void *data,
- enum odb_for_each_object_flags flags)
+ unsigned flags)
{
uint32_t i;
int r = 0;
@@ -2302,7 +2302,7 @@ int for_each_object_in_pack(struct packed_git *p,
}
int for_each_packed_object(struct repository *repo, each_packed_object_fn cb,
- void *data, enum odb_for_each_object_flags flags)
+ void *data, unsigned flags)
{
struct odb_source *source;
int r = 0;
diff --git a/packfile.h b/packfile.h
index 15551258bd..447c44c4a7 100644
--- a/packfile.h
+++ b/packfile.h
@@ -339,9 +339,9 @@ typedef int each_packed_object_fn(const struct object_id *oid,
void *data);
int for_each_object_in_pack(struct packed_git *p,
each_packed_object_fn, void *data,
- enum odb_for_each_object_flags flags);
+ unsigned flags);
int for_each_packed_object(struct repository *repo, each_packed_object_fn cb,
- void *data, enum odb_for_each_object_flags flags);
+ void *data, unsigned flags);
/* A hook to report invalid files in pack directory */
#define PACKDIR_FILE_PACK 1
--
2.53.0.rc1.267.g6e3a78c723.dirty
^ permalink raw reply related [flat|nested] 120+ messages in thread* [PATCH v4 03/14] object-file: extract function to read object info from path
2026-01-26 9:51 ` [PATCH v4 " Patrick Steinhardt
2026-01-26 9:51 ` [PATCH v4 01/14] odb: rename `FOR_EACH_OBJECT_*` flags Patrick Steinhardt
2026-01-26 9:51 ` [PATCH v4 02/14] odb: fix flags parameter to be unsigned Patrick Steinhardt
@ 2026-01-26 9:51 ` Patrick Steinhardt
2026-01-26 9:51 ` [PATCH v4 04/14] object-file: introduce function to iterate through objects Patrick Steinhardt
` (11 subsequent siblings)
14 siblings, 0 replies; 120+ messages in thread
From: Patrick Steinhardt @ 2026-01-26 9:51 UTC (permalink / raw)
To: git; +Cc: Karthik Nayak, Justin Tobler, Junio C Hamano
Extract a new function that allows us to read object info for a specific
loose object via a user-supplied path. This function will be used in a
subsequent commit.
Note that this also allows us to drop `stat_loose_object()`, which is
a simple wrapper around `odb_loose_path()` plus lstat(3p).
Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
object-file.c | 39 ++++++++++++++++-----------------------
1 file changed, 16 insertions(+), 23 deletions(-)
diff --git a/object-file.c b/object-file.c
index 8fa461dd59..a651129426 100644
--- a/object-file.c
+++ b/object-file.c
@@ -165,30 +165,13 @@ int stream_object_signature(struct repository *r, const struct object_id *oid)
}
/*
- * Find "oid" as a loose object in given source.
- * Returns 0 on success, negative on failure.
+ * Find "oid" as a loose object in given source, open the object and return its
+ * file descriptor. Returns the file descriptor on success, negative on failure.
*
* The "path" out-parameter will give the path of the object we found (if any).
* Note that it may point to static storage and is only valid until another
* call to stat_loose_object().
*/
-static int stat_loose_object(struct odb_source_loose *loose,
- const struct object_id *oid,
- struct stat *st, const char **path)
-{
- static struct strbuf buf = STRBUF_INIT;
-
- *path = odb_loose_path(loose->source, &buf, oid);
- if (!lstat(*path, st))
- return 0;
-
- return -1;
-}
-
-/*
- * Like stat_loose_object(), but actually open the object and return the
- * descriptor. See the caveats on the "path" parameter above.
- */
static int open_loose_object(struct odb_source_loose *loose,
const struct object_id *oid, const char **path)
{
@@ -412,7 +395,8 @@ static int parse_loose_header(const char *hdr, struct object_info *oi)
return 0;
}
-int odb_source_loose_read_object_info(struct odb_source *source,
+static int read_object_info_from_path(struct odb_source *source,
+ const char *path,
const struct object_id *oid,
struct object_info *oi,
unsigned flags)
@@ -420,7 +404,6 @@ int odb_source_loose_read_object_info(struct odb_source *source,
int ret;
int fd;
unsigned long mapsize;
- const char *path;
void *map = NULL;
git_zstream stream, *stream_to_end = NULL;
char hdr[MAX_HEADER_LEN];
@@ -443,7 +426,7 @@ int odb_source_loose_read_object_info(struct odb_source *source,
goto out;
}
- if (stat_loose_object(source->loose, oid, &st, &path) < 0) {
+ if (lstat(path, &st) < 0) {
ret = -1;
goto out;
}
@@ -455,7 +438,7 @@ int odb_source_loose_read_object_info(struct odb_source *source,
goto out;
}
- fd = open_loose_object(source->loose, oid, &path);
+ fd = git_open(path);
if (fd < 0) {
if (errno != ENOENT)
error_errno(_("unable to open loose object %s"), oid_to_hex(oid));
@@ -534,6 +517,16 @@ int odb_source_loose_read_object_info(struct odb_source *source,
return ret;
}
+int odb_source_loose_read_object_info(struct odb_source *source,
+ const struct object_id *oid,
+ struct object_info *oi,
+ unsigned flags)
+{
+ static struct strbuf buf = STRBUF_INIT;
+ odb_loose_path(source, &buf, oid);
+ return read_object_info_from_path(source, buf.buf, oid, oi, flags);
+}
+
static void hash_object_body(const struct git_hash_algo *algo, struct git_hash_ctx *c,
const void *buf, unsigned long len,
struct object_id *oid,
--
2.53.0.rc1.267.g6e3a78c723.dirty
^ permalink raw reply related [flat|nested] 120+ messages in thread* [PATCH v4 04/14] object-file: introduce function to iterate through objects
2026-01-26 9:51 ` [PATCH v4 " Patrick Steinhardt
` (2 preceding siblings ...)
2026-01-26 9:51 ` [PATCH v4 03/14] object-file: extract function to read object info from path Patrick Steinhardt
@ 2026-01-26 9:51 ` Patrick Steinhardt
2026-01-26 9:51 ` [PATCH v4 05/14] packfile: extract function to iterate through objects of a store Patrick Steinhardt
` (10 subsequent siblings)
14 siblings, 0 replies; 120+ messages in thread
From: Patrick Steinhardt @ 2026-01-26 9:51 UTC (permalink / raw)
To: git; +Cc: Karthik Nayak, Justin Tobler, Junio C Hamano
We have multiple divergent interfaces to iterate through objects of a
specific backend:
- `for_each_loose_object()` yields all loose objects.
- `for_each_packed_object()` (somewhat obviously) yields all packed
objects.
These functions have different function signatures, which makes it hard
to create a common abstraction layer that covers both of these.
Introduce a new function `odb_source_loose_for_each_object()` to plug
this gap. This function doesn't take any data specific to loose objects,
but instead it accepts a `struct object_info` that will be populated the
exact same as if `odb_source_loose_read_object()` was called.
The benefit of this new interface is that we can continue to pass
backend-specific data, as `struct object_info` contains a union for
these exact use cases. This will allow us to unify how we iterate
through objects across both loose and packed objects in a subsequent
commit.
The `for_each_loose_object()` function continues to exist for now, but
it will be removed at the end of this patch series.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
object-file.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++++
object-file.h | 11 +++++++++++
odb.h | 12 ++++++++++++
3 files changed, 71 insertions(+)
diff --git a/object-file.c b/object-file.c
index a651129426..ef2c7618c1 100644
--- a/object-file.c
+++ b/object-file.c
@@ -1801,6 +1801,54 @@ int for_each_loose_object(struct object_database *odb,
return 0;
}
+struct for_each_object_wrapper_data {
+ struct odb_source *source;
+ const struct object_info *request;
+ odb_for_each_object_cb cb;
+ void *cb_data;
+};
+
+static int for_each_object_wrapper_cb(const struct object_id *oid,
+ const char *path,
+ void *cb_data)
+{
+ struct for_each_object_wrapper_data *data = cb_data;
+
+ if (data->request) {
+ struct object_info oi = *data->request;
+
+ if (read_object_info_from_path(data->source, path, oid, &oi, 0) < 0)
+ return -1;
+
+ return data->cb(oid, &oi, data->cb_data);
+ } else {
+ return data->cb(oid, NULL, data->cb_data);
+ }
+}
+
+int odb_source_loose_for_each_object(struct odb_source *source,
+ const struct object_info *request,
+ odb_for_each_object_cb cb,
+ void *cb_data,
+ unsigned flags)
+{
+ struct for_each_object_wrapper_data data = {
+ .source = source,
+ .request = request,
+ .cb = cb,
+ .cb_data = cb_data,
+ };
+
+ /* There are no loose promisor objects, so we can return immediately. */
+ if ((flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY))
+ return 0;
+ if ((flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY) && !source->local)
+ return 0;
+
+ return for_each_loose_file_in_source(source, for_each_object_wrapper_cb,
+ NULL, NULL, &data);
+}
+
static int append_loose_object(const struct object_id *oid,
const char *path UNUSED,
void *data)
diff --git a/object-file.h b/object-file.h
index 2acf19fb91..5b9641cd89 100644
--- a/object-file.h
+++ b/object-file.h
@@ -137,6 +137,17 @@ int for_each_loose_object(struct object_database *odb,
each_loose_object_fn, void *,
enum odb_for_each_object_flags flags);
+/*
+ * Iterate through all loose objects in the given object database source and
+ * invoke the callback function for each of them. If given, the object info
+ * will be populated with the object's data as if you had called
+ * `odb_source_loose_read_object_info()` on the object.
+ */
+int odb_source_loose_for_each_object(struct odb_source *source,
+ const struct object_info *request,
+ odb_for_each_object_cb cb,
+ void *cb_data,
+ unsigned flags);
/**
* format_object_header() is a thin wrapper around s xsnprintf() that
diff --git a/odb.h b/odb.h
index 74503addf1..f97f249580 100644
--- a/odb.h
+++ b/odb.h
@@ -463,6 +463,18 @@ enum odb_for_each_object_flags {
ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS = (1<<4),
};
+/*
+ * A callback function that can be used to iterate through objects. If given,
+ * the optional `oi` parameter will be populated the same as if you would call
+ * `odb_read_object_info()`.
+ *
+ * Returning a non-zero error code will cause iteration to abort. The error
+ * code will be propagated.
+ */
+typedef int (*odb_for_each_object_cb)(const struct object_id *oid,
+ struct object_info *oi,
+ void *cb_data);
+
enum {
/*
* By default, `odb_write_object()` does not actually write anything
--
2.53.0.rc1.267.g6e3a78c723.dirty
^ permalink raw reply related [flat|nested] 120+ messages in thread* [PATCH v4 05/14] packfile: extract function to iterate through objects of a store
2026-01-26 9:51 ` [PATCH v4 " Patrick Steinhardt
` (3 preceding siblings ...)
2026-01-26 9:51 ` [PATCH v4 04/14] object-file: introduce function to iterate through objects Patrick Steinhardt
@ 2026-01-26 9:51 ` Patrick Steinhardt
2026-01-26 9:51 ` [PATCH v4 06/14] packfile: introduce function to iterate through objects Patrick Steinhardt
` (9 subsequent siblings)
14 siblings, 0 replies; 120+ messages in thread
From: Patrick Steinhardt @ 2026-01-26 9:51 UTC (permalink / raw)
To: git; +Cc: Karthik Nayak, Justin Tobler, Junio C Hamano
In the next commit we're about to introduce a new function that knows to
iterate through objects of a given packfile store. Same as with the
equivalent function for loose objects, this new function will also be
agnostic of backends by using a `struct object_info`.
Prepare for this by extracting a new shared function to iterate through
a single packfile store.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
packfile.c | 78 ++++++++++++++++++++++++++++++++++++--------------------------
1 file changed, 45 insertions(+), 33 deletions(-)
diff --git a/packfile.c b/packfile.c
index 79fe64a25b..d15a2ce12b 100644
--- a/packfile.c
+++ b/packfile.c
@@ -2301,51 +2301,63 @@ int for_each_object_in_pack(struct packed_git *p,
return r;
}
-int for_each_packed_object(struct repository *repo, each_packed_object_fn cb,
- void *data, unsigned flags)
+static int packfile_store_for_each_object_internal(struct packfile_store *store,
+ each_packed_object_fn cb,
+ void *data,
+ unsigned flags,
+ int *pack_errors)
{
- struct odb_source *source;
- int r = 0;
- int pack_errors = 0;
+ struct packfile_list_entry *e;
+ int ret = 0;
- odb_prepare_alternates(repo->objects);
+ store->skip_mru_updates = true;
- for (source = repo->objects->sources; source; source = source->next) {
- struct packfile_list_entry *e;
+ for (e = packfile_store_get_packs(store); e; e = e->next) {
+ struct packed_git *p = e->pack;
- source->packfiles->skip_mru_updates = true;
+ if ((flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local)
+ continue;
+ if ((flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY) &&
+ !p->pack_promisor)
+ continue;
+ if ((flags & ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS) &&
+ p->pack_keep_in_core)
+ continue;
+ if ((flags & ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS) &&
+ p->pack_keep)
+ continue;
+ if (open_pack_index(p)) {
+ *pack_errors = 1;
+ continue;
+ }
- for (e = packfile_store_get_packs(source->packfiles); e; e = e->next) {
- struct packed_git *p = e->pack;
+ ret = for_each_object_in_pack(p, cb, data, flags);
+ if (ret)
+ break;
+ }
- if ((flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local)
- continue;
- if ((flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY) &&
- !p->pack_promisor)
- continue;
- if ((flags & ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS) &&
- p->pack_keep_in_core)
- continue;
- if ((flags & ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS) &&
- p->pack_keep)
- continue;
- if (open_pack_index(p)) {
- pack_errors = 1;
- continue;
- }
+ store->skip_mru_updates = false;
- r = for_each_object_in_pack(p, cb, data, flags);
- if (r)
- break;
- }
+ return ret;
+}
- source->packfiles->skip_mru_updates = false;
+int for_each_packed_object(struct repository *repo, each_packed_object_fn cb,
+ void *data, unsigned flags)
+{
+ struct odb_source *source;
+ int pack_errors = 0;
+ int ret = 0;
- if (r)
+ odb_prepare_alternates(repo->objects);
+
+ for (source = repo->objects->sources; source; source = source->next) {
+ ret = packfile_store_for_each_object_internal(source->packfiles, cb, data,
+ flags, &pack_errors);
+ if (ret)
break;
}
- return r ? r : pack_errors;
+ return ret ? ret : pack_errors;
}
static int add_promisor_object(const struct object_id *oid,
--
2.53.0.rc1.267.g6e3a78c723.dirty
^ permalink raw reply related [flat|nested] 120+ messages in thread* [PATCH v4 06/14] packfile: introduce function to iterate through objects
2026-01-26 9:51 ` [PATCH v4 " Patrick Steinhardt
` (4 preceding siblings ...)
2026-01-26 9:51 ` [PATCH v4 05/14] packfile: extract function to iterate through objects of a store Patrick Steinhardt
@ 2026-01-26 9:51 ` Patrick Steinhardt
2026-01-26 9:51 ` [PATCH v4 07/14] odb: introduce `odb_for_each_object()` Patrick Steinhardt
` (8 subsequent siblings)
14 siblings, 0 replies; 120+ messages in thread
From: Patrick Steinhardt @ 2026-01-26 9:51 UTC (permalink / raw)
To: git; +Cc: Karthik Nayak, Justin Tobler, Junio C Hamano
Introduce a new function `packfile_store_for_each_object()`. This
function is equivalent to `odb_source_loose_for_each_object()`, except
that it:
- Works on a single packfile store instead of working on the object
database level. Consequently, it will only yield packed objects of a
single object database source.
- Passes a `struct object_info` to the callback function.
As such, it provides the same callback interface as we already provide
for loose objects now. These functions will be used in a subsequent step
to implement `odb_for_each_object()`.
The `for_each_packed_object()` function continues to exist for now, but
it will be removed at the end of this patch series.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
packfile.c | 51 +++++++++++++++++++++++++++++++++++++++++++++++++++
packfile.h | 15 +++++++++++++++
2 files changed, 66 insertions(+)
diff --git a/packfile.c b/packfile.c
index d15a2ce12b..c35d5ea655 100644
--- a/packfile.c
+++ b/packfile.c
@@ -2360,6 +2360,57 @@ int for_each_packed_object(struct repository *repo, each_packed_object_fn cb,
return ret ? ret : pack_errors;
}
+struct packfile_store_for_each_object_wrapper_data {
+ struct packfile_store *store;
+ const struct object_info *request;
+ odb_for_each_object_cb cb;
+ void *cb_data;
+};
+
+static int packfile_store_for_each_object_wrapper(const struct object_id *oid,
+ struct packed_git *pack,
+ uint32_t index_pos,
+ void *cb_data)
+{
+ struct packfile_store_for_each_object_wrapper_data *data = cb_data;
+
+ if (data->request) {
+ off_t offset = nth_packed_object_offset(pack, index_pos);
+ struct object_info oi = *data->request;
+
+ if (packed_object_info(pack, offset, &oi) < 0) {
+ mark_bad_packed_object(pack, oid);
+ return -1;
+ }
+
+ return data->cb(oid, &oi, data->cb_data);
+ } else {
+ return data->cb(oid, NULL, data->cb_data);
+ }
+}
+
+int packfile_store_for_each_object(struct packfile_store *store,
+ const struct object_info *request,
+ odb_for_each_object_cb cb,
+ void *cb_data,
+ unsigned flags)
+{
+ struct packfile_store_for_each_object_wrapper_data data = {
+ .store = store,
+ .request = request,
+ .cb = cb,
+ .cb_data = cb_data,
+ };
+ int pack_errors = 0, ret;
+
+ ret = packfile_store_for_each_object_internal(store, packfile_store_for_each_object_wrapper,
+ &data, flags, &pack_errors);
+ if (ret)
+ return ret;
+
+ return pack_errors ? -1 : 0;
+}
+
static int add_promisor_object(const struct object_id *oid,
struct packed_git *pack,
uint32_t pos UNUSED,
diff --git a/packfile.h b/packfile.h
index 447c44c4a7..b7964f0289 100644
--- a/packfile.h
+++ b/packfile.h
@@ -343,6 +343,21 @@ int for_each_object_in_pack(struct packed_git *p,
int for_each_packed_object(struct repository *repo, each_packed_object_fn cb,
void *data, unsigned flags);
+/*
+ * Iterate through all packed objects in the given packfile store and invoke
+ * the callback function for each of them. If an object info request is given,
+ * then the object info will be read for every individual object and passed to
+ * the callback as if `packfile_store_read_object_info()` was called for the
+ * object.
+ *
+ * The flags parameter is a combination of `odb_for_each_object_flags`.
+ */
+int packfile_store_for_each_object(struct packfile_store *store,
+ const struct object_info *request,
+ odb_for_each_object_cb cb,
+ void *cb_data,
+ unsigned flags);
+
/* A hook to report invalid files in pack directory */
#define PACKDIR_FILE_PACK 1
#define PACKDIR_FILE_IDX 2
--
2.53.0.rc1.267.g6e3a78c723.dirty
^ permalink raw reply related [flat|nested] 120+ messages in thread* [PATCH v4 07/14] odb: introduce `odb_for_each_object()`
2026-01-26 9:51 ` [PATCH v4 " Patrick Steinhardt
` (5 preceding siblings ...)
2026-01-26 9:51 ` [PATCH v4 06/14] packfile: introduce function to iterate through objects Patrick Steinhardt
@ 2026-01-26 9:51 ` Patrick Steinhardt
2026-01-26 9:51 ` [PATCH v4 08/14] builtin/fsck: refactor to use `odb_for_each_object()` Patrick Steinhardt
` (7 subsequent siblings)
14 siblings, 0 replies; 120+ messages in thread
From: Patrick Steinhardt @ 2026-01-26 9:51 UTC (permalink / raw)
To: git; +Cc: Karthik Nayak, Justin Tobler, Junio C Hamano
Introduce a new function `odb_for_each_object()` that knows to iterate
through all objects part of a given object database. This function is
essentially a simple wrapper around the object database sources.
Subsequent commits will adapt callers to use this new function.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
object-file.h | 7 ++++---
odb.c | 29 +++++++++++++++++++++++++++++
odb.h | 20 ++++++++++++++++++++
3 files changed, 53 insertions(+), 3 deletions(-)
diff --git a/object-file.h b/object-file.h
index 5b9641cd89..b5eac0349e 100644
--- a/object-file.h
+++ b/object-file.h
@@ -139,9 +139,10 @@ int for_each_loose_object(struct object_database *odb,
/*
* Iterate through all loose objects in the given object database source and
- * invoke the callback function for each of them. If given, the object info
- * will be populated with the object's data as if you had called
- * `odb_source_loose_read_object_info()` on the object.
+ * invoke the callback function for each of them. If an object info request is
+ * given, then the object info will be read for every individual object and
+ * passed to the callback as if `odb_source_loose_read_object_info()` was
+ * called for the object.
*/
int odb_source_loose_for_each_object(struct odb_source *source,
const struct object_info *request,
diff --git a/odb.c b/odb.c
index ac70b6a099..13a415c2c3 100644
--- a/odb.c
+++ b/odb.c
@@ -995,6 +995,35 @@ int odb_freshen_object(struct object_database *odb,
return 0;
}
+int odb_for_each_object(struct object_database *odb,
+ const struct object_info *request,
+ odb_for_each_object_cb cb,
+ void *cb_data,
+ unsigned flags)
+{
+ int ret;
+
+ odb_prepare_alternates(odb);
+ for (struct odb_source *source = odb->sources; source; source = source->next) {
+ if (flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY && !source->local)
+ continue;
+
+ if (!(flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY)) {
+ ret = odb_source_loose_for_each_object(source, request,
+ cb, cb_data, flags);
+ if (ret)
+ return ret;
+ }
+
+ ret = packfile_store_for_each_object(source->packfiles, request,
+ cb, cb_data, flags);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
void odb_assert_oid_type(struct object_database *odb,
const struct object_id *oid, enum object_type expect)
{
diff --git a/odb.h b/odb.h
index f97f249580..b5d28bc188 100644
--- a/odb.h
+++ b/odb.h
@@ -475,6 +475,26 @@ typedef int (*odb_for_each_object_cb)(const struct object_id *oid,
struct object_info *oi,
void *cb_data);
+/*
+ * Iterate through all objects contained in the object database. Note that
+ * objects may be iterated over multiple times in case they are either stored
+ * in different backends or in case they are stored in multiple sources.
+ * If an object info request is given, then the object info will be read and
+ * passed to the callback as if `odb_read_object_info()` was called for the
+ * object.
+ *
+ * Returning a non-zero error code from the callback function will cause
+ * iteration to abort. The error code will be propagated.
+ *
+ * Returns 0 on success, a negative error code in case a failure occurred, or
+ * an arbitrary non-zero error code returned by the callback itself.
+ */
+int odb_for_each_object(struct object_database *odb,
+ const struct object_info *request,
+ odb_for_each_object_cb cb,
+ void *cb_data,
+ unsigned flags);
+
enum {
/*
* By default, `odb_write_object()` does not actually write anything
--
2.53.0.rc1.267.g6e3a78c723.dirty
^ permalink raw reply related [flat|nested] 120+ messages in thread* [PATCH v4 08/14] builtin/fsck: refactor to use `odb_for_each_object()`
2026-01-26 9:51 ` [PATCH v4 " Patrick Steinhardt
` (6 preceding siblings ...)
2026-01-26 9:51 ` [PATCH v4 07/14] odb: introduce `odb_for_each_object()` Patrick Steinhardt
@ 2026-01-26 9:51 ` Patrick Steinhardt
2026-01-26 9:51 ` [PATCH v4 09/14] treewide: enumerate promisor objects via `odb_for_each_object()` Patrick Steinhardt
` (6 subsequent siblings)
14 siblings, 0 replies; 120+ messages in thread
From: Patrick Steinhardt @ 2026-01-26 9:51 UTC (permalink / raw)
To: git; +Cc: Karthik Nayak, Justin Tobler, Junio C Hamano
In git-fsck(1) we have two callsites where we iterate over all objects
via `for_each_loose_object()` and `for_each_packed_object()`. Both of
these are trivially convertible with `odb_for_each_object()`.
Refactor these callsites accordingly.
Note that `odb_for_each_object()` may iterate over the same object
multiple times, for example when it exists both in packed and loose
format. But this has already been the case beforehand, so this does not
result in a change in behaviour.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
builtin/fsck.c | 57 ++++++++++++---------------------------------------------
1 file changed, 12 insertions(+), 45 deletions(-)
diff --git a/builtin/fsck.c b/builtin/fsck.c
index 4979bc795e..2ebe77d58e 100644
--- a/builtin/fsck.c
+++ b/builtin/fsck.c
@@ -218,15 +218,17 @@ static int mark_used(struct object *obj, enum object_type type UNUSED,
return 0;
}
-static void mark_unreachable_referents(const struct object_id *oid)
+static int mark_unreachable_referents(const struct object_id *oid,
+ struct object_info *oi UNUSED,
+ void *data UNUSED)
{
struct fsck_options options = FSCK_OPTIONS_DEFAULT;
struct object *obj = lookup_object(the_repository, oid);
if (!obj || !(obj->flags & HAS_OBJ))
- return; /* not part of our original set */
+ return 0; /* not part of our original set */
if (obj->flags & REACHABLE)
- return; /* reachable objects already traversed */
+ return 0; /* reachable objects already traversed */
/*
* Avoid passing OBJ_NONE to fsck_walk, which will parse the object
@@ -243,22 +245,7 @@ static void mark_unreachable_referents(const struct object_id *oid)
fsck_walk(obj, NULL, &options);
if (obj->type == OBJ_TREE)
free_tree_buffer((struct tree *)obj);
-}
-static int mark_loose_unreachable_referents(const struct object_id *oid,
- const char *path UNUSED,
- void *data UNUSED)
-{
- mark_unreachable_referents(oid);
- return 0;
-}
-
-static int mark_packed_unreachable_referents(const struct object_id *oid,
- struct packed_git *pack UNUSED,
- uint32_t pos UNUSED,
- void *data UNUSED)
-{
- mark_unreachable_referents(oid);
return 0;
}
@@ -394,12 +381,8 @@ static void check_connectivity(void)
* and ignore any that weren't present in our earlier
* traversal.
*/
- for_each_loose_object(the_repository->objects,
- mark_loose_unreachable_referents, NULL, 0);
- for_each_packed_object(the_repository,
- mark_packed_unreachable_referents,
- NULL,
- 0);
+ odb_for_each_object(the_repository->objects, NULL,
+ mark_unreachable_referents, NULL, 0);
}
/* Look up all the requirements, warn about missing objects.. */
@@ -848,26 +831,12 @@ static void fsck_index(struct index_state *istate, const char *index_path,
fsck_resolve_undo(istate, index_path);
}
-static void mark_object_for_connectivity(const struct object_id *oid)
+static int mark_object_for_connectivity(const struct object_id *oid,
+ struct object_info *oi UNUSED,
+ void *cb_data UNUSED)
{
struct object *obj = lookup_unknown_object(the_repository, oid);
obj->flags |= HAS_OBJ;
-}
-
-static int mark_loose_for_connectivity(const struct object_id *oid,
- const char *path UNUSED,
- void *data UNUSED)
-{
- mark_object_for_connectivity(oid);
- return 0;
-}
-
-static int mark_packed_for_connectivity(const struct object_id *oid,
- struct packed_git *pack UNUSED,
- uint32_t pos UNUSED,
- void *data UNUSED)
-{
- mark_object_for_connectivity(oid);
return 0;
}
@@ -1001,10 +970,8 @@ int cmd_fsck(int argc,
fsck_refs(the_repository);
if (connectivity_only) {
- for_each_loose_object(the_repository->objects,
- mark_loose_for_connectivity, NULL, 0);
- for_each_packed_object(the_repository,
- mark_packed_for_connectivity, NULL, 0);
+ odb_for_each_object(the_repository->objects, NULL,
+ mark_object_for_connectivity, NULL, 0);
} else {
odb_prepare_alternates(the_repository->objects);
for (source = the_repository->objects->sources; source; source = source->next)
--
2.53.0.rc1.267.g6e3a78c723.dirty
^ permalink raw reply related [flat|nested] 120+ messages in thread* [PATCH v4 09/14] treewide: enumerate promisor objects via `odb_for_each_object()`
2026-01-26 9:51 ` [PATCH v4 " Patrick Steinhardt
` (7 preceding siblings ...)
2026-01-26 9:51 ` [PATCH v4 08/14] builtin/fsck: refactor to use `odb_for_each_object()` Patrick Steinhardt
@ 2026-01-26 9:51 ` Patrick Steinhardt
2026-01-26 9:51 ` [PATCH v4 10/14] treewide: drop uses of `for_each_{loose,packed}_object()` Patrick Steinhardt
` (5 subsequent siblings)
14 siblings, 0 replies; 120+ messages in thread
From: Patrick Steinhardt @ 2026-01-26 9:51 UTC (permalink / raw)
To: git; +Cc: Karthik Nayak, Justin Tobler, Junio C Hamano
We have multiple callsites where we enumerate all promisor objects in
the object database via `for_each_packed_object()`. This is done by
passing the `ODB_FOR_EACH_OBJECT_PROMISOR_ONLY` flag, which causes us to
skip over all non-promisor objects.
These callsites can be trivially converted to `odb_for_each_object()` as
we know to skip enumeration of loose objects in case the `PROMISOR_ONLY`
flag was passed by the caller.
Refactor the sites accordingly.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
packfile.c | 37 ++++++++++++++++++++++---------------
repack-promisor.c | 8 ++++----
revision.c | 10 ++++------
3 files changed, 30 insertions(+), 25 deletions(-)
diff --git a/packfile.c b/packfile.c
index c35d5ea655..c54deabd64 100644
--- a/packfile.c
+++ b/packfile.c
@@ -2411,28 +2411,32 @@ int packfile_store_for_each_object(struct packfile_store *store,
return pack_errors ? -1 : 0;
}
+struct add_promisor_object_data {
+ struct repository *repo;
+ struct oidset *set;
+};
+
static int add_promisor_object(const struct object_id *oid,
- struct packed_git *pack,
- uint32_t pos UNUSED,
- void *set_)
+ struct object_info *oi UNUSED,
+ void *cb_data)
{
- struct oidset *set = set_;
+ struct add_promisor_object_data *data = cb_data;
struct object *obj;
int we_parsed_object;
- obj = lookup_object(pack->repo, oid);
+ obj = lookup_object(data->repo, oid);
if (obj && obj->parsed) {
we_parsed_object = 0;
} else {
we_parsed_object = 1;
- obj = parse_object_with_flags(pack->repo, oid,
+ obj = parse_object_with_flags(data->repo, oid,
PARSE_OBJECT_SKIP_HASH_CHECK);
}
if (!obj)
return 1;
- oidset_insert(set, oid);
+ oidset_insert(data->set, oid);
/*
* If this is a tree, commit, or tag, the objects it refers
@@ -2450,19 +2454,19 @@ static int add_promisor_object(const struct object_id *oid,
*/
return 0;
while (tree_entry_gently(&desc, &entry))
- oidset_insert(set, &entry.oid);
+ oidset_insert(data->set, &entry.oid);
if (we_parsed_object)
free_tree_buffer(tree);
} else if (obj->type == OBJ_COMMIT) {
struct commit *commit = (struct commit *) obj;
struct commit_list *parents = commit->parents;
- oidset_insert(set, get_commit_tree_oid(commit));
+ oidset_insert(data->set, get_commit_tree_oid(commit));
for (; parents; parents = parents->next)
- oidset_insert(set, &parents->item->object.oid);
+ oidset_insert(data->set, &parents->item->object.oid);
} else if (obj->type == OBJ_TAG) {
struct tag *tag = (struct tag *) obj;
- oidset_insert(set, get_tagged_oid(tag));
+ oidset_insert(data->set, get_tagged_oid(tag));
}
return 0;
}
@@ -2474,10 +2478,13 @@ int is_promisor_object(struct repository *r, const struct object_id *oid)
if (!promisor_objects_prepared) {
if (repo_has_promisor_remote(r)) {
- for_each_packed_object(r, add_promisor_object,
- &promisor_objects,
- ODB_FOR_EACH_OBJECT_PROMISOR_ONLY |
- ODB_FOR_EACH_OBJECT_PACK_ORDER);
+ struct add_promisor_object_data data = {
+ .repo = r,
+ .set = &promisor_objects,
+ };
+
+ odb_for_each_object(r->objects, NULL, add_promisor_object, &data,
+ ODB_FOR_EACH_OBJECT_PROMISOR_ONLY | ODB_FOR_EACH_OBJECT_PACK_ORDER);
}
promisor_objects_prepared = 1;
}
diff --git a/repack-promisor.c b/repack-promisor.c
index 45c330b9a5..35c4073632 100644
--- a/repack-promisor.c
+++ b/repack-promisor.c
@@ -17,8 +17,8 @@ struct write_oid_context {
* necessary.
*/
static int write_oid(const struct object_id *oid,
- struct packed_git *pack UNUSED,
- uint32_t pos UNUSED, void *data)
+ struct object_info *oi UNUSED,
+ void *data)
{
struct write_oid_context *ctx = data;
struct child_process *cmd = ctx->cmd;
@@ -55,8 +55,8 @@ void repack_promisor_objects(struct repository *repo,
*/
ctx.cmd = &cmd;
ctx.algop = repo->hash_algo;
- for_each_packed_object(repo, write_oid, &ctx,
- ODB_FOR_EACH_OBJECT_PROMISOR_ONLY);
+ odb_for_each_object(repo->objects, NULL, write_oid, &ctx,
+ ODB_FOR_EACH_OBJECT_PROMISOR_ONLY);
if (cmd.in == -1) {
/* No packed objects; cmd was never started */
diff --git a/revision.c b/revision.c
index 5aadf46dac..e34bcd8e88 100644
--- a/revision.c
+++ b/revision.c
@@ -3626,8 +3626,7 @@ void reset_revision_walk(void)
}
static int mark_uninteresting(const struct object_id *oid,
- struct packed_git *pack UNUSED,
- uint32_t pos UNUSED,
+ struct object_info *oi UNUSED,
void *cb)
{
struct rev_info *revs = cb;
@@ -3936,10 +3935,9 @@ int prepare_revision_walk(struct rev_info *revs)
(revs->limited && limiting_can_increase_treesame(revs)))
revs->treesame.name = "treesame";
- if (revs->exclude_promisor_objects) {
- for_each_packed_object(revs->repo, mark_uninteresting, revs,
- ODB_FOR_EACH_OBJECT_PROMISOR_ONLY);
- }
+ if (revs->exclude_promisor_objects)
+ odb_for_each_object(revs->repo->objects, NULL, mark_uninteresting,
+ revs, ODB_FOR_EACH_OBJECT_PROMISOR_ONLY);
if (!revs->reflog_info)
prepare_to_use_bloom_filter(revs);
--
2.53.0.rc1.267.g6e3a78c723.dirty
^ permalink raw reply related [flat|nested] 120+ messages in thread* [PATCH v4 10/14] treewide: drop uses of `for_each_{loose,packed}_object()`
2026-01-26 9:51 ` [PATCH v4 " Patrick Steinhardt
` (8 preceding siblings ...)
2026-01-26 9:51 ` [PATCH v4 09/14] treewide: enumerate promisor objects via `odb_for_each_object()` Patrick Steinhardt
@ 2026-01-26 9:51 ` Patrick Steinhardt
2026-01-26 9:51 ` [PATCH v4 11/14] odb: introduce mtime fields for object info requests Patrick Steinhardt
` (4 subsequent siblings)
14 siblings, 0 replies; 120+ messages in thread
From: Patrick Steinhardt @ 2026-01-26 9:51 UTC (permalink / raw)
To: git; +Cc: Karthik Nayak, Justin Tobler, Junio C Hamano
We're using `for_each_loose_object()` and `for_each_packed_object()` at
a couple of callsites to enumerate all loose and packed objects,
respectively. These functions will be removed in a subsequent commit in
favor of the newly introduced `odb_source_loose_for_each_object()` and
`packfile_store_for_each_object()` replacements.
Prepare for this by refactoring the sites accordingly.
Note that ideally, we'd convert all callsites to use the generic
`odb_for_each_object()` function already. But for some callers this is
not possible (yet), and it would require some significant refactorings
to make this work. Converting these site will thus be deferred to a
later patch series.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
builtin/cat-file.c | 34 ++++++++++++++++++++++++++++------
commit-graph.c | 44 +++++++++++++++++++++++++++++++-------------
2 files changed, 59 insertions(+), 19 deletions(-)
diff --git a/builtin/cat-file.c b/builtin/cat-file.c
index 6964a5a52c..e2c63dbedf 100644
--- a/builtin/cat-file.c
+++ b/builtin/cat-file.c
@@ -806,11 +806,14 @@ struct for_each_object_payload {
void *payload;
};
-static int batch_one_object_loose(const struct object_id *oid,
- const char *path UNUSED,
- void *_payload)
+static int batch_one_object_oi(const struct object_id *oid,
+ struct object_info *oi,
+ void *_payload)
{
struct for_each_object_payload *payload = _payload;
+ if (oi && oi->whence == OI_PACKED)
+ return payload->callback(oid, oi->u.packed.pack, oi->u.packed.offset,
+ payload->payload);
return payload->callback(oid, NULL, 0, payload->payload);
}
@@ -846,8 +849,21 @@ static void batch_each_object(struct batch_options *opt,
.payload = _payload,
};
struct bitmap_index *bitmap = prepare_bitmap_git(the_repository);
+ struct odb_source *source;
- for_each_loose_object(the_repository->objects, batch_one_object_loose, &payload, 0);
+ /*
+ * TODO: we still need to tap into implementation details of the object
+ * database sources. Ideally, we should extend `odb_for_each_object()`
+ * to handle object filters itself so that we can move the filtering
+ * logic into the individual sources.
+ */
+ odb_prepare_alternates(the_repository->objects);
+ for (source = the_repository->objects->sources; source; source = source->next) {
+ int ret = odb_source_loose_for_each_object(source, NULL, batch_one_object_oi,
+ &payload, flags);
+ if (ret)
+ break;
+ }
if (bitmap && !for_each_bitmapped_object(bitmap, &opt->objects_filter,
batch_one_object_bitmapped, &payload)) {
@@ -861,8 +877,14 @@ static void batch_each_object(struct batch_options *opt,
&payload, flags);
}
} else {
- for_each_packed_object(the_repository, batch_one_object_packed,
- &payload, flags);
+ struct object_info oi = { 0 };
+
+ for (source = the_repository->objects->sources; source; source = source->next) {
+ int ret = packfile_store_for_each_object(source->packfiles, &oi,
+ batch_one_object_oi, &payload, flags);
+ if (ret)
+ break;
+ }
}
free_bitmap_index(bitmap);
diff --git a/commit-graph.c b/commit-graph.c
index 7f1145a082..a3087d7883 100644
--- a/commit-graph.c
+++ b/commit-graph.c
@@ -1479,30 +1479,38 @@ static int write_graph_chunk_bloom_data(struct hashfile *f,
return 0;
}
+static int add_packed_commits_oi(const struct object_id *oid,
+ struct object_info *oi,
+ void *data)
+{
+ struct write_commit_graph_context *ctx = (struct write_commit_graph_context*)data;
+
+ if (ctx->progress)
+ display_progress(ctx->progress, ++ctx->progress_done);
+
+ if (*oi->typep != OBJ_COMMIT)
+ return 0;
+
+ oid_array_append(&ctx->oids, oid);
+ set_commit_pos(ctx->r, oid);
+
+ return 0;
+}
+
static int add_packed_commits(const struct object_id *oid,
struct packed_git *pack,
uint32_t pos,
void *data)
{
- struct write_commit_graph_context *ctx = (struct write_commit_graph_context*)data;
enum object_type type;
off_t offset = nth_packed_object_offset(pack, pos);
struct object_info oi = OBJECT_INFO_INIT;
- if (ctx->progress)
- display_progress(ctx->progress, ++ctx->progress_done);
-
oi.typep = &type;
if (packed_object_info(pack, offset, &oi) < 0)
die(_("unable to get type of object %s"), oid_to_hex(oid));
- if (type != OBJ_COMMIT)
- return 0;
-
- oid_array_append(&ctx->oids, oid);
- set_commit_pos(ctx->r, oid);
-
- return 0;
+ return add_packed_commits_oi(oid, &oi, data);
}
static void add_missing_parents(struct write_commit_graph_context *ctx, struct commit *commit)
@@ -1959,13 +1967,23 @@ static int fill_oids_from_commits(struct write_commit_graph_context *ctx,
static void fill_oids_from_all_packs(struct write_commit_graph_context *ctx)
{
+ struct odb_source *source;
+ enum object_type type;
+ struct object_info oi = {
+ .typep = &type,
+ };
+
if (ctx->report_progress)
ctx->progress = start_delayed_progress(
ctx->r,
_("Finding commits for commit graph among packed objects"),
ctx->approx_nr_objects);
- for_each_packed_object(ctx->r, add_packed_commits, ctx,
- ODB_FOR_EACH_OBJECT_PACK_ORDER);
+
+ odb_prepare_alternates(ctx->r->objects);
+ for (source = ctx->r->objects->sources; source; source = source->next)
+ packfile_store_for_each_object(source->packfiles, &oi, add_packed_commits_oi,
+ ctx, ODB_FOR_EACH_OBJECT_PACK_ORDER);
+
if (ctx->progress_done < ctx->approx_nr_objects)
display_progress(ctx->progress, ctx->approx_nr_objects);
stop_progress(&ctx->progress);
--
2.53.0.rc1.267.g6e3a78c723.dirty
^ permalink raw reply related [flat|nested] 120+ messages in thread* [PATCH v4 11/14] odb: introduce mtime fields for object info requests
2026-01-26 9:51 ` [PATCH v4 " Patrick Steinhardt
` (9 preceding siblings ...)
2026-01-26 9:51 ` [PATCH v4 10/14] treewide: drop uses of `for_each_{loose,packed}_object()` Patrick Steinhardt
@ 2026-01-26 9:51 ` Patrick Steinhardt
2026-01-26 9:51 ` [PATCH v4 12/14] builtin/pack-objects: use `packfile_store_for_each_object()` Patrick Steinhardt
` (3 subsequent siblings)
14 siblings, 0 replies; 120+ messages in thread
From: Patrick Steinhardt @ 2026-01-26 9:51 UTC (permalink / raw)
To: git; +Cc: Karthik Nayak, Justin Tobler, Junio C Hamano
There are some use cases where we need to figure out the mtime for
objects. Most importantly, this is the case when we want to prune
unreachable objects. But getting at that data requires users to manually
derive the info either via the loose object's mtime, the packfiles'
mtime or via the ".mtimes" file.
Introduce a new `struct object_info::mtimep` pointer that allows callers
to request an object's mtime. This new field will be used in a
subsequent commit.
Note that the concept of "mtime" is ambiguous: given an object, it may
be stored multiple times in the object database, and each of these
instances may have a different mtime. Disambiguating these mtimes is
nothing that can happen on the generic ODB layer: the caller may search
for the oldest object, the newest object, or even the relation of object
mtimes depending on the specific source they are located in. As such, it
is the responsibility of the caller to disambiguate mtimes.
A consequence of this is that it's most likely incorrect to look up the
mtime via `odb_read_object_info()`, as this interface does not give us
enough information to disambiguate the mtime. Document this accordingly
and tell users to use `odb_for_each_object()` instead.
Even with this gotcha though it's sensible to have this request as part
of the object info, as the mtime is a property of the object storage
format. If we for example had a "black-box" storage backend, we'd still
need to be able to query it for the mtime info in a generic way.
We could introduce a safety mechanism that for example calls `BUG()` in
case we look up the mtime outside of `odb_for_each_object()`. But that
feels somewhat heavy-handed.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
object-file.c | 29 +++++++++++++++++++++++++----
odb.c | 2 ++
odb.h | 13 +++++++++++++
packfile.c | 41 ++++++++++++++++++++++++++++++++++-------
4 files changed, 74 insertions(+), 11 deletions(-)
diff --git a/object-file.c b/object-file.c
index ef2c7618c1..5537ab2c37 100644
--- a/object-file.c
+++ b/object-file.c
@@ -409,6 +409,7 @@ static int read_object_info_from_path(struct odb_source *source,
char hdr[MAX_HEADER_LEN];
unsigned long size_scratch;
enum object_type type_scratch;
+ struct stat st;
/*
* If we don't care about type or size, then we don't
@@ -421,7 +422,7 @@ static int read_object_info_from_path(struct odb_source *source,
if (!oi || (!oi->typep && !oi->sizep && !oi->contentp)) {
struct stat st;
- if ((!oi || !oi->disk_sizep) && (flags & OBJECT_INFO_QUICK)) {
+ if ((!oi || (!oi->disk_sizep && !oi->mtimep)) && (flags & OBJECT_INFO_QUICK)) {
ret = quick_has_loose(source->loose, oid) ? 0 : -1;
goto out;
}
@@ -431,8 +432,12 @@ static int read_object_info_from_path(struct odb_source *source,
goto out;
}
- if (oi && oi->disk_sizep)
- *oi->disk_sizep = st.st_size;
+ if (oi) {
+ if (oi->disk_sizep)
+ *oi->disk_sizep = st.st_size;
+ if (oi->mtimep)
+ *oi->mtimep = st.st_mtime;
+ }
ret = 0;
goto out;
@@ -446,7 +451,21 @@ static int read_object_info_from_path(struct odb_source *source,
goto out;
}
- map = map_fd(fd, path, &mapsize);
+ if (fstat(fd, &st)) {
+ close(fd);
+ ret = -1;
+ goto out;
+ }
+
+ mapsize = xsize_t(st.st_size);
+ if (!mapsize) {
+ close(fd);
+ ret = error(_("object file %s is empty"), path);
+ goto out;
+ }
+
+ map = xmmap(NULL, mapsize, PROT_READ, MAP_PRIVATE, fd, 0);
+ close(fd);
if (!map) {
ret = -1;
goto out;
@@ -454,6 +473,8 @@ static int read_object_info_from_path(struct odb_source *source,
if (oi->disk_sizep)
*oi->disk_sizep = mapsize;
+ if (oi->mtimep)
+ *oi->mtimep = st.st_mtime;
stream_to_end = &stream;
diff --git a/odb.c b/odb.c
index 13a415c2c3..9d9a3fad62 100644
--- a/odb.c
+++ b/odb.c
@@ -702,6 +702,8 @@ static int do_oid_object_info_extended(struct object_database *odb,
oidclr(oi->delta_base_oid, odb->repo->hash_algo);
if (oi->contentp)
*oi->contentp = xmemdupz(co->buf, co->size);
+ if (oi->mtimep)
+ *oi->mtimep = 0;
oi->whence = OI_CACHED;
}
return 0;
diff --git a/odb.h b/odb.h
index b5d28bc188..8ad0fcc02f 100644
--- a/odb.h
+++ b/odb.h
@@ -318,6 +318,19 @@ struct object_info {
struct object_id *delta_base_oid;
void **contentp;
+ /*
+ * The time the given looked-up object has been last modified.
+ *
+ * Note: the mtime may be ambiguous in case the object exists multiple
+ * times in the object database. It is thus _not_ recommended to use
+ * this field outside of contexts where you would read every instance
+ * of the object, like for example with `odb_for_each_object()`. As it
+ * is impossible to say at the ODB level what the intent of the caller
+ * is (e.g. whether to find the oldest or newest object), it is the
+ * responsibility of the caller to disambiguate the mtimes.
+ */
+ time_t *mtimep;
+
/* Response */
enum {
OI_CACHED,
diff --git a/packfile.c b/packfile.c
index c54deabd64..845633139f 100644
--- a/packfile.c
+++ b/packfile.c
@@ -1578,13 +1578,14 @@ static void add_delta_base_cache(struct packed_git *p, off_t base_offset,
hashmap_add(&delta_base_cache, &ent->ent);
}
-int packed_object_info(struct packed_git *p,
- off_t obj_offset, struct object_info *oi)
+static int packed_object_info_with_index_pos(struct packed_git *p, off_t obj_offset,
+ uint32_t *maybe_index_pos, struct object_info *oi)
{
struct pack_window *w_curs = NULL;
unsigned long size;
off_t curpos = obj_offset;
enum object_type type = OBJ_NONE;
+ uint32_t pack_pos;
int ret;
/*
@@ -1619,16 +1620,35 @@ int packed_object_info(struct packed_git *p,
}
}
- if (oi->disk_sizep) {
- uint32_t pos;
- if (offset_to_pack_pos(p, obj_offset, &pos) < 0) {
+ if (oi->disk_sizep || (oi->mtimep && p->is_cruft)) {
+ if (offset_to_pack_pos(p, obj_offset, &pack_pos) < 0) {
error("could not find object at offset %"PRIuMAX" "
"in pack %s", (uintmax_t)obj_offset, p->pack_name);
ret = -1;
goto out;
}
+ }
+
+ if (oi->disk_sizep)
+ *oi->disk_sizep = pack_pos_to_offset(p, pack_pos + 1) - obj_offset;
+
+ if (oi->mtimep) {
+ if (p->is_cruft) {
+ uint32_t index_pos;
+
+ if (load_pack_mtimes(p) < 0)
+ die(_("could not load .mtimes for cruft pack '%s'"),
+ pack_basename(p));
+
+ if (maybe_index_pos)
+ index_pos = *maybe_index_pos;
+ else
+ index_pos = pack_pos_to_index(p, pack_pos);
- *oi->disk_sizep = pack_pos_to_offset(p, pos + 1) - obj_offset;
+ *oi->mtimep = nth_packed_mtime(p, index_pos);
+ } else {
+ *oi->mtimep = p->mtime;
+ }
}
if (oi->typep) {
@@ -1681,6 +1701,12 @@ int packed_object_info(struct packed_git *p,
return ret;
}
+int packed_object_info(struct packed_git *p, off_t obj_offset,
+ struct object_info *oi)
+{
+ return packed_object_info_with_index_pos(p, obj_offset, NULL, oi);
+}
+
static void *unpack_compressed_entry(struct packed_git *p,
struct pack_window **w_curs,
off_t curpos,
@@ -2378,7 +2404,8 @@ static int packfile_store_for_each_object_wrapper(const struct object_id *oid,
off_t offset = nth_packed_object_offset(pack, index_pos);
struct object_info oi = *data->request;
- if (packed_object_info(pack, offset, &oi) < 0) {
+ if (packed_object_info_with_index_pos(pack, offset,
+ &index_pos, &oi) < 0) {
mark_bad_packed_object(pack, oid);
return -1;
}
--
2.53.0.rc1.267.g6e3a78c723.dirty
^ permalink raw reply related [flat|nested] 120+ messages in thread* [PATCH v4 12/14] builtin/pack-objects: use `packfile_store_for_each_object()`
2026-01-26 9:51 ` [PATCH v4 " Patrick Steinhardt
` (10 preceding siblings ...)
2026-01-26 9:51 ` [PATCH v4 11/14] odb: introduce mtime fields for object info requests Patrick Steinhardt
@ 2026-01-26 9:51 ` Patrick Steinhardt
2026-01-26 9:51 ` [PATCH v4 13/14] reachable: convert to use `odb_for_each_object()` Patrick Steinhardt
` (2 subsequent siblings)
14 siblings, 0 replies; 120+ messages in thread
From: Patrick Steinhardt @ 2026-01-26 9:51 UTC (permalink / raw)
To: git; +Cc: Karthik Nayak, Justin Tobler, Junio C Hamano
When enumerating objects that are supposed to be stored in a new cruft
pack we use `for_each_packed_object()` and then derive each object's
mtime individually. Refactor this logic to instead use the new
`packfile_store_for_each_object()` function with an object info request
that asks for the respective mtimes.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
builtin/pack-objects.c | 46 ++++++++++++++++++++++------------------------
1 file changed, 22 insertions(+), 24 deletions(-)
diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index 74317051fd..a6d37366ff 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -4314,25 +4314,12 @@ static void show_edge(struct commit *commit)
}
static int add_object_in_unpacked_pack(const struct object_id *oid,
- struct packed_git *pack,
- uint32_t pos,
+ struct object_info *oi,
void *data UNUSED)
{
if (cruft) {
- off_t offset;
- time_t mtime;
-
- if (pack->is_cruft) {
- if (load_pack_mtimes(pack) < 0)
- die(_("could not load cruft pack .mtimes"));
- mtime = nth_packed_mtime(pack, pos);
- } else {
- mtime = pack->mtime;
- }
- offset = nth_packed_object_offset(pack, pos);
-
- add_cruft_object_entry(oid, OBJ_NONE, pack, offset,
- NULL, mtime);
+ add_cruft_object_entry(oid, OBJ_NONE, oi->u.packed.pack,
+ oi->u.packed.offset, NULL, *oi->mtimep);
} else {
add_object_entry(oid, OBJ_NONE, "", 0);
}
@@ -4341,14 +4328,25 @@ static int add_object_in_unpacked_pack(const struct object_id *oid,
static void add_objects_in_unpacked_packs(void)
{
- if (for_each_packed_object(to_pack.repo,
- add_object_in_unpacked_pack,
- NULL,
- ODB_FOR_EACH_OBJECT_PACK_ORDER |
- ODB_FOR_EACH_OBJECT_LOCAL_ONLY |
- ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS |
- ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS))
- die(_("cannot open pack index"));
+ struct odb_source *source;
+ time_t mtime;
+ struct object_info oi = {
+ .mtimep = &mtime,
+ };
+
+ odb_prepare_alternates(to_pack.repo->objects);
+ for (source = to_pack.repo->objects->sources; source; source = source->next) {
+ if (!source->local)
+ continue;
+
+ if (packfile_store_for_each_object(source->packfiles, &oi,
+ add_object_in_unpacked_pack, NULL,
+ ODB_FOR_EACH_OBJECT_PACK_ORDER |
+ ODB_FOR_EACH_OBJECT_LOCAL_ONLY |
+ ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS |
+ ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS))
+ die(_("cannot open pack index"));
+ }
}
static int add_loose_object(const struct object_id *oid, const char *path,
--
2.53.0.rc1.267.g6e3a78c723.dirty
^ permalink raw reply related [flat|nested] 120+ messages in thread* [PATCH v4 13/14] reachable: convert to use `odb_for_each_object()`
2026-01-26 9:51 ` [PATCH v4 " Patrick Steinhardt
` (11 preceding siblings ...)
2026-01-26 9:51 ` [PATCH v4 12/14] builtin/pack-objects: use `packfile_store_for_each_object()` Patrick Steinhardt
@ 2026-01-26 9:51 ` Patrick Steinhardt
2026-01-26 9:51 ` [PATCH v4 14/14] odb: drop unused `for_each_{loose,packed}_object()` functions Patrick Steinhardt
2026-02-20 22:59 ` [PATCH v4 00/14] odb: introduce `odb_for_each_object()` Junio C Hamano
14 siblings, 0 replies; 120+ messages in thread
From: Patrick Steinhardt @ 2026-01-26 9:51 UTC (permalink / raw)
To: git; +Cc: Karthik Nayak, Justin Tobler, Junio C Hamano
To figure out which objects expired objects we enumerate all loose and
packed objects individually so that we can figure out their respective
mtimes. Refactor the code to instead use `odb_for_each_object()` with a
request that ask for the object mtime instead.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
reachable.c | 125 +++++++++++++++++-------------------------------------------
1 file changed, 35 insertions(+), 90 deletions(-)
diff --git a/reachable.c b/reachable.c
index 82676b2668..101cfc2727 100644
--- a/reachable.c
+++ b/reachable.c
@@ -191,30 +191,27 @@ static int obj_is_recent(const struct object_id *oid, timestamp_t mtime,
return oidset_contains(&data->extra_recent_oids, oid);
}
-static void add_recent_object(const struct object_id *oid,
- struct packed_git *pack,
- off_t offset,
- timestamp_t mtime,
- struct recent_data *data)
+static int want_recent_object(struct recent_data *data,
+ const struct object_id *oid)
{
- struct object *obj;
- enum object_type type;
+ if (data->ignore_in_core_kept_packs &&
+ has_object_kept_pack(data->revs->repo, oid, KEPT_PACK_IN_CORE))
+ return 0;
+ return 1;
+}
- if (!obj_is_recent(oid, mtime, data))
- return;
+static int add_recent_object(const struct object_id *oid,
+ struct object_info *oi,
+ void *cb_data)
+{
+ struct recent_data *data = cb_data;
+ struct object *obj;
- /*
- * We do not want to call parse_object here, because
- * inflating blobs and trees could be very expensive.
- * However, we do need to know the correct type for
- * later processing, and the revision machinery expects
- * commits and tags to have been parsed.
- */
- type = odb_read_object_info(the_repository->objects, oid, NULL);
- if (type < 0)
- die("unable to get object info for %s", oid_to_hex(oid));
+ if (!want_recent_object(data, oid) ||
+ !obj_is_recent(oid, *oi->mtimep, data))
+ return 0;
- switch (type) {
+ switch (*oi->typep) {
case OBJ_TAG:
case OBJ_COMMIT:
obj = parse_object_or_die(the_repository, oid, NULL);
@@ -227,77 +224,22 @@ static void add_recent_object(const struct object_id *oid,
break;
default:
die("unknown object type for %s: %s",
- oid_to_hex(oid), type_name(type));
+ oid_to_hex(oid), type_name(*oi->typep));
}
if (!obj)
die("unable to lookup %s", oid_to_hex(oid));
-
- add_pending_object(data->revs, obj, "");
- if (data->cb)
- data->cb(obj, pack, offset, mtime);
-}
-
-static int want_recent_object(struct recent_data *data,
- const struct object_id *oid)
-{
- if (data->ignore_in_core_kept_packs &&
- has_object_kept_pack(data->revs->repo, oid, KEPT_PACK_IN_CORE))
+ if (obj->flags & SEEN)
return 0;
- return 1;
-}
-static int add_recent_loose(const struct object_id *oid,
- const char *path, void *data)
-{
- struct stat st;
- struct object *obj;
-
- if (!want_recent_object(data, oid))
- return 0;
-
- obj = lookup_object(the_repository, oid);
-
- if (obj && obj->flags & SEEN)
- return 0;
-
- if (stat(path, &st) < 0) {
- /*
- * It's OK if an object went away during our iteration; this
- * could be due to a simultaneous repack. But anything else
- * we should abort, since we might then fail to mark objects
- * which should not be pruned.
- */
- if (errno == ENOENT)
- return 0;
- return error_errno("unable to stat %s", oid_to_hex(oid));
+ add_pending_object(data->revs, obj, "");
+ if (data->cb) {
+ if (oi->whence == OI_PACKED)
+ data->cb(obj, oi->u.packed.pack, oi->u.packed.offset, *oi->mtimep);
+ else
+ data->cb(obj, NULL, 0, *oi->mtimep);
}
- add_recent_object(oid, NULL, 0, st.st_mtime, data);
- return 0;
-}
-
-static int add_recent_packed(const struct object_id *oid,
- struct packed_git *p,
- uint32_t pos,
- void *data)
-{
- struct object *obj;
- timestamp_t mtime = p->mtime;
-
- if (!want_recent_object(data, oid))
- return 0;
-
- obj = lookup_object(the_repository, oid);
-
- if (obj && obj->flags & SEEN)
- return 0;
- if (p->is_cruft) {
- if (load_pack_mtimes(p) < 0)
- die(_("could not load cruft pack .mtimes"));
- mtime = nth_packed_mtime(p, pos);
- }
- add_recent_object(oid, p, nth_packed_object_offset(p, pos), mtime, data);
return 0;
}
@@ -307,7 +249,13 @@ int add_unseen_recent_objects_to_traversal(struct rev_info *revs,
int ignore_in_core_kept_packs)
{
struct recent_data data;
- enum odb_for_each_object_flags flags;
+ unsigned flags;
+ enum object_type type;
+ time_t mtime;
+ struct object_info oi = {
+ .mtimep = &mtime,
+ .typep = &type,
+ };
int r;
data.revs = revs;
@@ -318,16 +266,13 @@ int add_unseen_recent_objects_to_traversal(struct rev_info *revs,
oidset_init(&data.extra_recent_oids, 0);
data.extra_recent_oids_loaded = 0;
- r = for_each_loose_object(the_repository->objects, add_recent_loose, &data,
- ODB_FOR_EACH_OBJECT_LOCAL_ONLY);
- if (r)
- goto done;
-
flags = ODB_FOR_EACH_OBJECT_LOCAL_ONLY | ODB_FOR_EACH_OBJECT_PACK_ORDER;
if (ignore_in_core_kept_packs)
flags |= ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS;
- r = for_each_packed_object(revs->repo, add_recent_packed, &data, flags);
+ r = odb_for_each_object(revs->repo->objects, &oi, add_recent_object, &data, flags);
+ if (r)
+ goto done;
done:
oidset_clear(&data.extra_recent_oids);
--
2.53.0.rc1.267.g6e3a78c723.dirty
^ permalink raw reply related [flat|nested] 120+ messages in thread* [PATCH v4 14/14] odb: drop unused `for_each_{loose,packed}_object()` functions
2026-01-26 9:51 ` [PATCH v4 " Patrick Steinhardt
` (12 preceding siblings ...)
2026-01-26 9:51 ` [PATCH v4 13/14] reachable: convert to use `odb_for_each_object()` Patrick Steinhardt
@ 2026-01-26 9:51 ` Patrick Steinhardt
2026-02-20 22:59 ` [PATCH v4 00/14] odb: introduce `odb_for_each_object()` Junio C Hamano
14 siblings, 0 replies; 120+ messages in thread
From: Patrick Steinhardt @ 2026-01-26 9:51 UTC (permalink / raw)
To: git; +Cc: Karthik Nayak, Justin Tobler, Junio C Hamano
We have converted all callers of `for_each_loose_object()` and
`for_each_packed_object()` to use their new replacement functions
instead. We can thus remove them now.
Do so and inline `packfile_store_for_each_object_internal()` now that it
only has a single callsite again. This makes it a bit easier to follow
the callback indirection that is happening there.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
object-file.c | 20 ------------
object-file.h | 11 -------
packfile.c | 99 +++++++++++++++++++++--------------------------------------
packfile.h | 2 --
4 files changed, 35 insertions(+), 97 deletions(-)
diff --git a/object-file.c b/object-file.c
index 5537ab2c37..6785821c8c 100644
--- a/object-file.c
+++ b/object-file.c
@@ -1802,26 +1802,6 @@ int for_each_loose_file_in_source(struct odb_source *source,
return r;
}
-int for_each_loose_object(struct object_database *odb,
- each_loose_object_fn cb, void *data,
- enum odb_for_each_object_flags flags)
-{
- struct odb_source *source;
-
- odb_prepare_alternates(odb);
- for (source = odb->sources; source; source = source->next) {
- int r = for_each_loose_file_in_source(source, cb, NULL,
- NULL, data);
- if (r)
- return r;
-
- if (flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY)
- break;
- }
-
- return 0;
-}
-
struct for_each_object_wrapper_data {
struct odb_source *source;
const struct object_info *request;
diff --git a/object-file.h b/object-file.h
index b5eac0349e..d9979baea8 100644
--- a/object-file.h
+++ b/object-file.h
@@ -126,17 +126,6 @@ int for_each_loose_file_in_source(struct odb_source *source,
each_loose_subdir_fn subdir_cb,
void *data);
-/*
- * Iterate over all accessible loose objects without respect to
- * reachability. By default, this includes both local and alternate objects.
- * The order in which objects are visited is unspecified.
- *
- * Any flags specific to packs are ignored.
- */
-int for_each_loose_object(struct object_database *odb,
- each_loose_object_fn, void *,
- enum odb_for_each_object_flags flags);
-
/*
* Iterate through all loose objects in the given object database source and
* invoke the callback function for each of them. If an object info request is
diff --git a/packfile.c b/packfile.c
index 845633139f..57fbf51876 100644
--- a/packfile.c
+++ b/packfile.c
@@ -2327,65 +2327,6 @@ int for_each_object_in_pack(struct packed_git *p,
return r;
}
-static int packfile_store_for_each_object_internal(struct packfile_store *store,
- each_packed_object_fn cb,
- void *data,
- unsigned flags,
- int *pack_errors)
-{
- struct packfile_list_entry *e;
- int ret = 0;
-
- store->skip_mru_updates = true;
-
- for (e = packfile_store_get_packs(store); e; e = e->next) {
- struct packed_git *p = e->pack;
-
- if ((flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local)
- continue;
- if ((flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY) &&
- !p->pack_promisor)
- continue;
- if ((flags & ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS) &&
- p->pack_keep_in_core)
- continue;
- if ((flags & ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS) &&
- p->pack_keep)
- continue;
- if (open_pack_index(p)) {
- *pack_errors = 1;
- continue;
- }
-
- ret = for_each_object_in_pack(p, cb, data, flags);
- if (ret)
- break;
- }
-
- store->skip_mru_updates = false;
-
- return ret;
-}
-
-int for_each_packed_object(struct repository *repo, each_packed_object_fn cb,
- void *data, unsigned flags)
-{
- struct odb_source *source;
- int pack_errors = 0;
- int ret = 0;
-
- odb_prepare_alternates(repo->objects);
-
- for (source = repo->objects->sources; source; source = source->next) {
- ret = packfile_store_for_each_object_internal(source->packfiles, cb, data,
- flags, &pack_errors);
- if (ret)
- break;
- }
-
- return ret ? ret : pack_errors;
-}
-
struct packfile_store_for_each_object_wrapper_data {
struct packfile_store *store;
const struct object_info *request;
@@ -2428,14 +2369,44 @@ int packfile_store_for_each_object(struct packfile_store *store,
.cb = cb,
.cb_data = cb_data,
};
+ struct packfile_list_entry *e;
int pack_errors = 0, ret;
- ret = packfile_store_for_each_object_internal(store, packfile_store_for_each_object_wrapper,
- &data, flags, &pack_errors);
- if (ret)
- return ret;
+ store->skip_mru_updates = true;
+
+ for (e = packfile_store_get_packs(store); e; e = e->next) {
+ struct packed_git *p = e->pack;
+
+ if ((flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local)
+ continue;
+ if ((flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY) &&
+ !p->pack_promisor)
+ continue;
+ if ((flags & ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS) &&
+ p->pack_keep_in_core)
+ continue;
+ if ((flags & ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS) &&
+ p->pack_keep)
+ continue;
+ if (open_pack_index(p)) {
+ pack_errors = 1;
+ continue;
+ }
+
+ ret = for_each_object_in_pack(p, packfile_store_for_each_object_wrapper,
+ &data, flags);
+ if (ret)
+ goto out;
+ }
+
+ ret = 0;
- return pack_errors ? -1 : 0;
+out:
+ store->skip_mru_updates = false;
+
+ if (!ret && pack_errors)
+ ret = -1;
+ return ret;
}
struct add_promisor_object_data {
diff --git a/packfile.h b/packfile.h
index b7964f0289..1a1b720764 100644
--- a/packfile.h
+++ b/packfile.h
@@ -340,8 +340,6 @@ typedef int each_packed_object_fn(const struct object_id *oid,
int for_each_object_in_pack(struct packed_git *p,
each_packed_object_fn, void *data,
unsigned flags);
-int for_each_packed_object(struct repository *repo, each_packed_object_fn cb,
- void *data, unsigned flags);
/*
* Iterate through all packed objects in the given packfile store and invoke
--
2.53.0.rc1.267.g6e3a78c723.dirty
^ permalink raw reply related [flat|nested] 120+ messages in thread* Re: [PATCH v4 00/14] odb: introduce `odb_for_each_object()`
2026-01-26 9:51 ` [PATCH v4 " Patrick Steinhardt
` (13 preceding siblings ...)
2026-01-26 9:51 ` [PATCH v4 14/14] odb: drop unused `for_each_{loose,packed}_object()` functions Patrick Steinhardt
@ 2026-02-20 22:59 ` Junio C Hamano
14 siblings, 0 replies; 120+ messages in thread
From: Junio C Hamano @ 2026-02-20 22:59 UTC (permalink / raw)
To: Patrick Steinhardt; +Cc: git, Karthik Nayak, Justin Tobler
Patrick Steinhardt <ps@pks.im> writes:
> this patch series introduces a generic `odb_for_each_object()` function
> to iterate through objects and adapts callers to use it. The intent is
> to make iteration through objects independent of the actual storage
> backend.
This topic has been dormant for too long, but we saw quite a lot of
things changed over the course of its evolution. Perhaps we are now
at the sweet "good enough" place?
Let me mark the topic for 'next', if that is the case. Thanks.
^ permalink raw reply [flat|nested] 120+ messages in thread