Git development

Git development
 help / color / mirror / Atom feed

* [PATCH 05/16] odb/source-packed: wire up `close()` callback
From: Patrick Steinhardt @ 2026-06-04 11:25 UTC (permalink / raw)
  To: git
In-Reply-To: <20260604-pks-odb-source-packed-v1-0-2e7ab31b4b5c@pks.im>

Wire up a new `close()` callback for the packed source and call it from
the "files" source via the generic `odb_source_close()` interface.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
 odb/source-files.c  |  2 +-
 odb/source-packed.c | 16 ++++++++++++++++
 packfile.c          | 12 ------------
 packfile.h          |  6 ------
 4 files changed, 17 insertions(+), 19 deletions(-)

diff --git a/odb/source-files.c b/odb/source-files.c
index 3608808e7c..9b0fa9ccdc 100644
--- a/odb/source-files.c
+++ b/odb/source-files.c
@@ -38,7 +38,7 @@ static void odb_source_files_close(struct odb_source *source)
 {
 	struct odb_source_files *files = odb_source_files_downcast(source);
 	odb_source_close(&files->loose->base);
-	packfile_store_close(files->packed);
+	odb_source_close(&files->packed->base);
 }
 
 static void odb_source_files_reprepare(struct odb_source *source)
diff --git a/odb/source-packed.c b/odb/source-packed.c
index f81a990cbd..74805be1dd 100644
--- a/odb/source-packed.c
+++ b/odb/source-packed.c
@@ -1,6 +1,7 @@
 #include "git-compat-util.h"
 #include "abspath.h"
 #include "chdir-notify.h"
+#include "midx.h"
 #include "odb/source-packed.h"
 #include "packfile.h"
 
@@ -16,6 +17,20 @@ static void odb_source_packed_reparent(const char *name UNUSED,
 	packed->base.path = path;
 }
 
+static void odb_source_packed_close(struct odb_source *source)
+{
+	struct odb_source_packed *packed = odb_source_packed_downcast(source);
+
+	for (struct packfile_list_entry *e = packed->packs.head; e; e = e->next) {
+		if (e->pack->do_not_close)
+			BUG("want to close pack marked 'do-not-close'");
+		close_pack(e->pack);
+	}
+	if (packed->midx)
+		close_midx(packed->midx);
+	packed->midx = NULL;
+}
+
 static void odb_source_packed_free(struct odb_source *source)
 {
 	struct odb_source_packed *packed = odb_source_packed_downcast(source);
@@ -42,6 +57,7 @@ struct odb_source_packed *odb_source_packed_new(struct odb_source_files *parent)
 	strmap_init(&packed->packs_by_path);
 
 	packed->base.free = odb_source_packed_free;
+	packed->base.close = odb_source_packed_close;
 
 	if (!is_absolute_path(parent->base.path))
 		chdir_notify_register(NULL, odb_source_packed_reparent, packed);
diff --git a/packfile.c b/packfile.c
index 8fefc4358e..3cff3b8ffa 100644
--- a/packfile.c
+++ b/packfile.c
@@ -2832,18 +2832,6 @@ int parse_pack_header_option(const char *in, unsigned char *out, unsigned int *l
 	return 0;
 }
 
-void packfile_store_close(struct odb_source_packed *store)
-{
-	for (struct packfile_list_entry *e = store->packs.head; e; e = e->next) {
-		if (e->pack->do_not_close)
-			BUG("want to close pack marked 'do-not-close'");
-		close_pack(e->pack);
-	}
-	if (store->midx)
-		close_midx(store->midx);
-	store->midx = NULL;
-}
-
 struct odb_packed_read_stream {
 	struct odb_read_stream base;
 	struct packed_git *pack;
diff --git a/packfile.h b/packfile.h
index d6814b564e..23af47f0c1 100644
--- a/packfile.h
+++ b/packfile.h
@@ -67,12 +67,6 @@ void packfile_list_append(struct packfile_list *list, struct packed_git *pack);
 struct packed_git *packfile_list_find_oid(struct packfile_list_entry *packs,
 					  const struct object_id *oid);
 
-/*
- * Close all packfiles associated with this store. The packfiles won't be
- * free'd, so they can be re-opened at a later point in time.
- */
-void packfile_store_close(struct odb_source_packed *store);
-
 /*
  * Prepare the packfile store by loading packfiles and multi-pack indices for
  * all alternates. This becomes a no-op if the store is already prepared.

-- 
2.54.0.1064.gd145956f57.dirty


^ permalink raw reply related

* [PATCH 04/16] odb/source-packed: start converting to a proper `struct odb_source`
From: Patrick Steinhardt @ 2026-06-04 11:25 UTC (permalink / raw)
  To: git
In-Reply-To: <20260604-pks-odb-source-packed-v1-0-2e7ab31b4b5c@pks.im>

Start converting `struct odb_source_packed` into a proper pluggable
`struct odb_source` by embedding the base struct and assigning it the
new `ODB_SOURCE_PACKED` type. Furthermore, wire up lifecycle management
of this source by implementing the `free` callback and taking ownership
of the chdir notifications.

Note that the packed source is not yet functional as a standalone `struct
odb_source`, as it's missing all of the callback implementations. These
will be wired up in subsequent commits.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
 odb/source-files.c  |  2 +-
 odb/source-packed.c | 49 ++++++++++++++++++++++++++++++++++++++++++++-----
 odb/source-packed.h | 12 ++++++++++++
 odb/source.h        |  3 +++
 packfile.c          | 10 ----------
 packfile.h          |  6 ------
 6 files changed, 60 insertions(+), 22 deletions(-)

diff --git a/odb/source-files.c b/odb/source-files.c
index e04525fb08..3608808e7c 100644
--- a/odb/source-files.c
+++ b/odb/source-files.c
@@ -29,7 +29,7 @@ static void odb_source_files_free(struct odb_source *source)
 	struct odb_source_files *files = odb_source_files_downcast(source);
 	chdir_notify_unregister(NULL, odb_source_files_reparent, files);
 	odb_source_free(&files->loose->base);
-	packfile_store_free(files->packed);
+	odb_source_free(&files->packed->base);
 	odb_source_release(&files->base);
 	free(files);
 }
diff --git a/odb/source-packed.c b/odb/source-packed.c
index 12e785be48..f81a990cbd 100644
--- a/odb/source-packed.c
+++ b/odb/source-packed.c
@@ -1,11 +1,50 @@
 #include "git-compat-util.h"
+#include "abspath.h"
+#include "chdir-notify.h"
 #include "odb/source-packed.h"
+#include "packfile.h"
+
+static void odb_source_packed_reparent(const char *name UNUSED,
+				       const char *old_cwd,
+				       const char *new_cwd,
+				       void *cb_data)
+{
+	struct odb_source_packed *packed = cb_data;
+	char *path = reparent_relative_path(old_cwd, new_cwd,
+					    packed->base.path);
+	free(packed->base.path);
+	packed->base.path = path;
+}
+
+static void odb_source_packed_free(struct odb_source *source)
+{
+	struct odb_source_packed *packed = odb_source_packed_downcast(source);
+
+	chdir_notify_unregister(NULL, odb_source_packed_reparent, packed);
+
+	for (struct packfile_list_entry *e = packed->packs.head; e; e = e->next)
+		free(e->pack);
+	packfile_list_clear(&packed->packs);
+
+	strmap_clear(&packed->packs_by_path, 0);
+	odb_source_release(&packed->base);
+	free(packed);
+}
 
 struct odb_source_packed *odb_source_packed_new(struct odb_source_files *parent)
 {
-	struct odb_source_packed *store;
-	CALLOC_ARRAY(store, 1);
-	store->files = parent;
-	strmap_init(&store->packs_by_path);
-	return store;
+	struct odb_source_packed *packed;
+
+	CALLOC_ARRAY(packed, 1);
+	odb_source_init(&packed->base, parent->base.odb, ODB_SOURCE_PACKED,
+			parent->base.path, parent->base.local);
+	packed->files = parent;
+	strmap_init(&packed->packs_by_path);
+
+	packed->base.free = odb_source_packed_free;
+
+	if (!is_absolute_path(parent->base.path))
+		chdir_notify_register(NULL, odb_source_packed_reparent, packed);
+
+	return packed;
 }
diff --git a/odb/source-packed.h b/odb/source-packed.h
index 6e5c79a77a..abe3310069 100644
--- a/odb/source-packed.h
+++ b/odb/source-packed.h
@@ -17,6 +17,7 @@ struct packfile_list_entry {
  * A store that manages packfiles for a given object database.
  */
 struct odb_source_packed {
+	struct odb_source base;
 	struct odb_source_files *files;
 
 	/*
@@ -77,4 +78,15 @@ struct odb_source_packed {
  */
 struct odb_source_packed *odb_source_packed_new(struct odb_source_files *parent);
 
+/*
+ * Cast the given object database source to the packed backend. This will cause
+ * a BUG in case the source doesn't use this backend.
+ */
+static inline struct odb_source_packed *odb_source_packed_downcast(struct odb_source *source)
+{
+	if (source->type != ODB_SOURCE_PACKED)
+		BUG("trying to downcast source of type '%d' to packed", source->type);
+	return container_of(source, struct odb_source_packed, base);
+}
+
 #endif
diff --git a/odb/source.h b/odb/source.h
index 8bcb67787e..6865e1f71a 100644
--- a/odb/source.h
+++ b/odb/source.h
@@ -17,6 +17,9 @@ enum odb_source_type {
 	/* The "loose" backend that uses loose objects, only. */
 	ODB_SOURCE_LOOSE,
 
+	/* The "packed" backend that uses packfiles. */
+	ODB_SOURCE_PACKED,
+
 	/* The "in-memory" backend that stores objects in memory. */
 	ODB_SOURCE_INMEMORY,
 };
diff --git a/packfile.c b/packfile.c
index 2af07f5e1a..8fefc4358e 100644
--- a/packfile.c
+++ b/packfile.c
@@ -2832,16 +2832,6 @@ int parse_pack_header_option(const char *in, unsigned char *out, unsigned int *l
 	return 0;
 }
 
-void packfile_store_free(struct odb_source_packed *store)
-{
-	for (struct packfile_list_entry *e = store->packs.head; e; e = e->next)
-		free(e->pack);
-	packfile_list_clear(&store->packs);
-
-	strmap_clear(&store->packs_by_path, 0);
-	free(store);
-}
-
 void packfile_store_close(struct odb_source_packed *store)
 {
 	for (struct packfile_list_entry *e = store->packs.head; e; e = e->next) {
diff --git a/packfile.h b/packfile.h
index 6f76df3589..d6814b564e 100644
--- a/packfile.h
+++ b/packfile.h
@@ -67,12 +67,6 @@ void packfile_list_append(struct packfile_list *list, struct packed_git *pack);
 struct packed_git *packfile_list_find_oid(struct packfile_list_entry *packs,
 					  const struct object_id *oid);
 
-/*
- * Free the packfile store and all its associated state. All packfiles
- * tracked by the store will be closed.
- */
-void packfile_store_free(struct odb_source_packed *store);
-
 /*
  * Close all packfiles associated with this store. The packfiles won't be
  * free'd, so they can be re-opened at a later point in time.

-- 
2.54.0.1064.gd145956f57.dirty


^ permalink raw reply related

* [PATCH 03/16] odb/source-packed: store pointer to "files" instead of generic source
From: Patrick Steinhardt @ 2026-06-04 11:25 UTC (permalink / raw)
  To: git
In-Reply-To: <20260604-pks-odb-source-packed-v1-0-2e7ab31b4b5c@pks.im>

The `struct odb_source_packed` holds a pointer to its owning parent
source. The way that Git is currently structured, this parent is always
the "files" source. In subsequent commits we're going to detangle that
so that the "packed" source doesn't have any owning parent source at
all, which makes it usable as a completely standalone source.

Detangling this mess is somewhat intricate though, and is made even more
intricate because it's not always clear which kind of source one is
holding at a specific point in time -- either the parent "files" source,
or the child "packed" source.

Make this relationship more explicit by storing a pointer to the "files"
source instead of storing a pointer to a generic `struct odb_source`.
This will help make subsequent steps a bit clearer.

Note that this is a temporary step, only. At the end of this series
we will have dropped the parent pointer completely.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
 odb/source-files.c  |  2 +-
 odb/source-packed.c |  4 ++--
 odb/source-packed.h |  4 ++--
 packfile.c          | 12 ++++++------
 4 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/odb/source-files.c b/odb/source-files.c
index 191562f316..e04525fb08 100644
--- a/odb/source-files.c
+++ b/odb/source-files.c
@@ -269,7 +269,7 @@ struct odb_source_files *odb_source_files_new(struct object_database *odb,
 	CALLOC_ARRAY(files, 1);
 	odb_source_init(&files->base, odb, ODB_SOURCE_FILES, path, local);
 	files->loose = odb_source_loose_new(odb, path, local);
-	files->packed = odb_source_packed_new(&files->base);
+	files->packed = odb_source_packed_new(files);
 
 	files->base.free = odb_source_files_free;
 	files->base.close = odb_source_files_close;
diff --git a/odb/source-packed.c b/odb/source-packed.c
index 1e94b47ea0..12e785be48 100644
--- a/odb/source-packed.c
+++ b/odb/source-packed.c
@@ -1,11 +1,11 @@
 #include "git-compat-util.h"
 #include "odb/source-packed.h"
 
-struct odb_source_packed *odb_source_packed_new(struct odb_source *source)
+struct odb_source_packed *odb_source_packed_new(struct odb_source_files *parent)
 {
 	struct odb_source_packed *store;
 	CALLOC_ARRAY(store, 1);
-	store->source = source;
+	store->files = parent;
 	strmap_init(&store->packs_by_path);
 	return store;
 }
diff --git a/odb/source-packed.h b/odb/source-packed.h
index c17068a4f1..6e5c79a77a 100644
--- a/odb/source-packed.h
+++ b/odb/source-packed.h
@@ -17,7 +17,7 @@ struct packfile_list_entry {
  * A store that manages packfiles for a given object database.
  */
 struct odb_source_packed {
-	struct odb_source *source;
+	struct odb_source_files *files;
 
 	/*
 	 * The list of packfiles in the order in which they have been most
@@ -75,6 +75,6 @@ struct odb_source_packed {
  * Allocate and initialize a new empty packfile store for the given object
  * database source.
  */
-struct odb_source_packed *odb_source_packed_new(struct odb_source *source);
+struct odb_source_packed *odb_source_packed_new(struct odb_source_files *parent);
 
 #endif
diff --git a/packfile.c b/packfile.c
index f2b27df66e..2af07f5e1a 100644
--- a/packfile.c
+++ b/packfile.c
@@ -885,7 +885,7 @@ struct packed_git *packfile_store_load_pack(struct odb_source_packed *store,
 
 	p = strmap_get(&store->packs_by_path, key.buf);
 	if (!p) {
-		p = add_packed_git(store->source->odb->repo, idx_path,
+		p = add_packed_git(store->files->base.odb->repo, idx_path,
 				   strlen(idx_path), local);
 		if (p)
 			packfile_store_add_pack(store, p);
@@ -1073,8 +1073,8 @@ void packfile_store_prepare(struct odb_source_packed *store)
 	if (store->initialized)
 		return;
 
-	prepare_multi_pack_index_one(store->source);
-	prepare_packed_git_one(store->source);
+	prepare_multi_pack_index_one(&store->files->base);
+	prepare_packed_git_one(&store->files->base);
 
 	sort_packs(&store->packs.head, sort_pack);
 	for (struct packfile_list_entry *e = store->packs.head; e; e = e->next)
@@ -1112,7 +1112,7 @@ int packfile_store_count_objects(struct odb_source_packed *store,
 	unsigned long count = 0;
 	int ret;
 
-	m = get_multi_pack_index(store->source);
+	m = get_multi_pack_index(&store->files->base);
 	if (m)
 		count += m->num_objects + m->num_objects_in_base;
 
@@ -2533,7 +2533,7 @@ static int packfile_store_for_each_prefixed_object(
 
 	store->skip_mru_updates = true;
 
-	m = get_multi_pack_index(store->source);
+	m = get_multi_pack_index(&store->files->base);
 	if (m) {
 		ret = for_each_prefixed_object_in_midx(store, m, opts, data);
 		if (ret)
@@ -2715,7 +2715,7 @@ int packfile_store_find_abbrev_len(struct odb_source_packed *store,
 	struct packfile_list_entry *e;
 	struct multi_pack_index *m;
 
-	m = get_multi_pack_index(store->source);
+	m = get_multi_pack_index(&store->files->base);
 	if (m)
 		find_abbrev_len_for_midx(m, oid, min_len, &min_len);
 

-- 
2.54.0.1064.gd145956f57.dirty


^ permalink raw reply related

* [PATCH 02/16] packfile: move packed source into "odb/" subsystem
From: Patrick Steinhardt @ 2026-06-04 11:25 UTC (permalink / raw)
  To: git
In-Reply-To: <20260604-pks-odb-source-packed-v1-0-2e7ab31b4b5c@pks.im>

In subsequent patches we'll be turning `struct odb_source_packed` into a
proper `struct odb_source`. As a first step towards this goal, move its
struct out of "packfile.{c,h}" and into "odb/source-packed.{c,h}".

This detaches the implementation of the packfile object source from the
generic packfile code, following the same convention already used by the
"files" and "in-memory" sources.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
 Makefile            |  1 +
 meson.build         |  1 +
 odb/source-files.c  |  2 +-
 odb/source-packed.c | 11 ++++++++
 odb/source-packed.h | 80 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 packfile.c          |  9 ------
 packfile.h          | 75 +------------------------------------------------
 7 files changed, 95 insertions(+), 84 deletions(-)

diff --git a/Makefile b/Makefile
index 0976a69b4c..e3381b8857 100644
--- a/Makefile
+++ b/Makefile
@@ -1218,6 +1218,7 @@ LIB_OBJS += odb/source.o
 LIB_OBJS += odb/source-files.o
 LIB_OBJS += odb/source-inmemory.o
 LIB_OBJS += odb/source-loose.o
+LIB_OBJS += odb/source-packed.o
 LIB_OBJS += odb/streaming.o
 LIB_OBJS += odb/transaction.o
 LIB_OBJS += oid-array.o
diff --git a/meson.build b/meson.build
index 3247697f74..1976024008 100644
--- a/meson.build
+++ b/meson.build
@@ -406,6 +406,7 @@ libgit_sources = [
   'odb/source-files.c',
   'odb/source-inmemory.c',
   'odb/source-loose.c',
+  'odb/source-packed.c',
   'odb/streaming.c',
   'odb/transaction.c',
   'oid-array.c',
diff --git a/odb/source-files.c b/odb/source-files.c
index 5bdd042922..191562f316 100644
--- a/odb/source-files.c
+++ b/odb/source-files.c
@@ -269,7 +269,7 @@ struct odb_source_files *odb_source_files_new(struct object_database *odb,
 	CALLOC_ARRAY(files, 1);
 	odb_source_init(&files->base, odb, ODB_SOURCE_FILES, path, local);
 	files->loose = odb_source_loose_new(odb, path, local);
-	files->packed = packfile_store_new(&files->base);
+	files->packed = odb_source_packed_new(&files->base);
 
 	files->base.free = odb_source_files_free;
 	files->base.close = odb_source_files_close;
diff --git a/odb/source-packed.c b/odb/source-packed.c
new file mode 100644
index 0000000000..1e94b47ea0
--- /dev/null
+++ b/odb/source-packed.c
@@ -0,0 +1,11 @@
+#include "git-compat-util.h"
+#include "odb/source-packed.h"
+
+struct odb_source_packed *odb_source_packed_new(struct odb_source *source)
+{
+	struct odb_source_packed *store;
+	CALLOC_ARRAY(store, 1);
+	store->source = source;
+	strmap_init(&store->packs_by_path);
+	return store;
+}
diff --git a/odb/source-packed.h b/odb/source-packed.h
new file mode 100644
index 0000000000..c17068a4f1
--- /dev/null
+++ b/odb/source-packed.h
@@ -0,0 +1,80 @@
+#ifndef ODB_SOURCE_PACKED_H
+#define ODB_SOURCE_PACKED_H
+
+#include "odb/source.h"
+#include "strmap.h"
+
+struct packfile_list {
+	struct packfile_list_entry *head, *tail;
+};
+
+struct packfile_list_entry {
+	struct packfile_list_entry *next;
+	struct packed_git *pack;
+};
+
+/*
+ * A store that manages packfiles for a given object database.
+ */
+struct odb_source_packed {
+	struct odb_source *source;
+
+	/*
+	 * The list of packfiles in the order in which they have been most
+	 * recently used.
+	 */
+	struct packfile_list packs;
+
+	/*
+	 * Cache of packfiles which are marked as "kept", either because there
+	 * is an on-disk ".keep" file or because they are marked as "kept" in
+	 * memory.
+	 *
+	 * Should not be accessed directly, but via
+	 * `packfile_store_get_kept_pack_cache()`. The list of packs gets
+	 * invalidated when the stored flags and the flags passed to
+	 * `packfile_store_get_kept_pack_cache()` mismatch.
+	 */
+	struct {
+		struct packed_git **packs;
+		unsigned flags;
+	} kept_cache;
+
+	/* The multi-pack index that belongs to this specific packfile store. */
+	struct multi_pack_index *midx;
+
+	/*
+	 * A map of packfile names to packed_git structs for tracking which
+	 * packs have been loaded already.
+	 */
+	struct strmap packs_by_path;
+
+	/*
+	 * Whether packfiles have already been populated with this store's
+	 * packs.
+	 */
+	bool initialized;
+
+	/*
+	 * Usually, packfiles will be reordered to the front of the `packs`
+	 * list whenever an object is looked up via them. This has the effect
+	 * that packs that contain a lot of accessed objects will be located
+	 * towards the front.
+	 *
+	 * This is usually desireable, but there are exceptions. One exception
+	 * is when the looking up multiple objects in a loop for each packfile.
+	 * In that case, we may easily end up with an infinite loop as the
+	 * packfiles get reordered to the front repeatedly.
+	 *
+	 * Setting this field to `true` thus disables these reorderings.
+	 */
+	bool skip_mru_updates;
+};
+
+/*
+ * Allocate and initialize a new empty packfile store for the given object
+ * database source.
+ */
+struct odb_source_packed *odb_source_packed_new(struct odb_source *source);
+
+#endif
diff --git a/packfile.c b/packfile.c
index a2d768d0ae..f2b27df66e 100644
--- a/packfile.c
+++ b/packfile.c
@@ -2832,15 +2832,6 @@ int parse_pack_header_option(const char *in, unsigned char *out, unsigned int *l
 	return 0;
 }
 
-struct odb_source_packed *packfile_store_new(struct odb_source *source)
-{
-	struct odb_source_packed *store;
-	CALLOC_ARRAY(store, 1);
-	store->source = source;
-	strmap_init(&store->packs_by_path);
-	return store;
-}
-
 void packfile_store_free(struct odb_source_packed *store)
 {
 	for (struct packfile_list_entry *e = store->packs.head; e; e = e->next)
diff --git a/packfile.h b/packfile.h
index 9cec15bc50..6f76df3589 100644
--- a/packfile.h
+++ b/packfile.h
@@ -5,9 +5,9 @@
 #include "object.h"
 #include "odb.h"
 #include "odb/source-files.h"
+#include "odb/source-packed.h"
 #include "oidset.h"
 #include "repository.h"
-#include "strmap.h"
 
 /* in odb.h */
 struct object_info;
@@ -54,15 +54,6 @@ struct packed_git {
 	char pack_name[FLEX_ARRAY]; /* more */
 };
 
-struct packfile_list {
-	struct packfile_list_entry *head, *tail;
-};
-
-struct packfile_list_entry {
-	struct packfile_list_entry *next;
-	struct packed_git *pack;
-};
-
 void packfile_list_clear(struct packfile_list *list);
 void packfile_list_remove(struct packfile_list *list, struct packed_git *pack);
 void packfile_list_prepend(struct packfile_list *list, struct packed_git *pack);
@@ -76,70 +67,6 @@ void packfile_list_append(struct packfile_list *list, struct packed_git *pack);
 struct packed_git *packfile_list_find_oid(struct packfile_list_entry *packs,
 					  const struct object_id *oid);
 
-/*
- * A store that manages packfiles for a given object database.
- */
-struct odb_source_packed {
-	struct odb_source *source;
-
-	/*
-	 * The list of packfiles in the order in which they have been most
-	 * recently used.
-	 */
-	struct packfile_list packs;
-
-	/*
-	 * Cache of packfiles which are marked as "kept", either because there
-	 * is an on-disk ".keep" file or because they are marked as "kept" in
-	 * memory.
-	 *
-	 * Should not be accessed directly, but via
-	 * `packfile_store_get_kept_pack_cache()`. The list of packs gets
-	 * invalidated when the stored flags and the flags passed to
-	 * `packfile_store_get_kept_pack_cache()` mismatch.
-	 */
-	struct {
-		struct packed_git **packs;
-		unsigned flags;
-	} kept_cache;
-
-	/* The multi-pack index that belongs to this specific packfile store. */
-	struct multi_pack_index *midx;
-
-	/*
-	 * A map of packfile names to packed_git structs for tracking which
-	 * packs have been loaded already.
-	 */
-	struct strmap packs_by_path;
-
-	/*
-	 * Whether packfiles have already been populated with this store's
-	 * packs.
-	 */
-	bool initialized;
-
-	/*
-	 * Usually, packfiles will be reordered to the front of the `packs`
-	 * list whenever an object is looked up via them. This has the effect
-	 * that packs that contain a lot of accessed objects will be located
-	 * towards the front.
-	 *
-	 * This is usually desireable, but there are exceptions. One exception
-	 * is when the looking up multiple objects in a loop for each packfile.
-	 * In that case, we may easily end up with an infinite loop as the
-	 * packfiles get reordered to the front repeatedly.
-	 *
-	 * Setting this field to `true` thus disables these reorderings.
-	 */
-	bool skip_mru_updates;
-};
-
-/*
- * Allocate and initialize a new empty packfile store for the given object
- * database source.
- */
-struct odb_source_packed *packfile_store_new(struct odb_source *source);
-
 /*
  * Free the packfile store and all its associated state. All packfiles
  * tracked by the store will be closed.

-- 
2.54.0.1064.gd145956f57.dirty


^ permalink raw reply related

* [PATCH 01/16] packfile: rename `struct packfile_store` to `odb_source_packed`
From: Patrick Steinhardt @ 2026-06-04 11:25 UTC (permalink / raw)
  To: git
In-Reply-To: <20260604-pks-odb-source-packed-v1-0-2e7ab31b4b5c@pks.im>

Not too long ago, we have introduced the packfile store in b7983adb51
(packfile: introduce a new `struct packfile_store`, 2025-09-23). This
struct is responsible for managing all of our access to packfiles and is
used as one of the two sources of objects for the "files" source.

Back when I introduced this structure I didn't have the clear vision yet
that it will eventually also turn into a proper object database source,
and how exactly that infrastructure will look like. Now though it's
becoming increasingly clear that it does make sense to treat it just the
same as any of our other ODB sources.

The consequence is that the naming is now a bit out-of-date: it's just
another source and will be turned into a proper `struct odb_source` over
the next couple of commits, but it's not named accordingly.

Rename the structure to `odb_source_packed` to align it with this goal
and to bring it in line with the other sources we already have.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
 odb/source-files.h |  4 ++--
 packfile.c         | 56 +++++++++++++++++++++++++++---------------------------
 packfile.h         | 32 +++++++++++++++----------------
 3 files changed, 46 insertions(+), 46 deletions(-)

diff --git a/odb/source-files.h b/odb/source-files.h
index 23a3b4e04b..d7ac3c1c81 100644
--- a/odb/source-files.h
+++ b/odb/source-files.h
@@ -4,7 +4,7 @@
 #include "odb/source.h"
 
 struct odb_source_loose;
-struct packfile_store;
+struct odb_source_packed;
 
 /*
  * The files object database source uses a combination of loose objects and
@@ -13,7 +13,7 @@ struct packfile_store;
 struct odb_source_files {
 	struct odb_source base;
 	struct odb_source_loose *loose;
-	struct packfile_store *packed;
+	struct odb_source_packed *packed;
 };
 
 /* Allocate and initialize a new object source. */
diff --git a/packfile.c b/packfile.c
index 89366abfe3..a2d768d0ae 100644
--- a/packfile.c
+++ b/packfile.c
@@ -859,7 +859,7 @@ struct packed_git *add_packed_git(struct repository *r, const char *path,
 	return p;
 }
 
-void packfile_store_add_pack(struct packfile_store *store,
+void packfile_store_add_pack(struct odb_source_packed *store,
 			     struct packed_git *pack)
 {
 	if (pack->pack_fd != -1)
@@ -869,7 +869,7 @@ void packfile_store_add_pack(struct packfile_store *store,
 	strmap_put(&store->packs_by_path, pack->pack_name, pack);
 }
 
-struct packed_git *packfile_store_load_pack(struct packfile_store *store,
+struct packed_git *packfile_store_load_pack(struct odb_source_packed *store,
 					    const char *idx_path, int local)
 {
 	struct strbuf key = STRBUF_INIT;
@@ -1068,7 +1068,7 @@ static int sort_pack(const struct packfile_list_entry *a,
 	return -1;
 }
 
-void packfile_store_prepare(struct packfile_store *store)
+void packfile_store_prepare(struct odb_source_packed *store)
 {
 	if (store->initialized)
 		return;
@@ -1084,13 +1084,13 @@ void packfile_store_prepare(struct packfile_store *store)
 	store->initialized = true;
 }
 
-void packfile_store_reprepare(struct packfile_store *store)
+void packfile_store_reprepare(struct odb_source_packed *store)
 {
 	store->initialized = false;
 	packfile_store_prepare(store);
 }
 
-struct packfile_list_entry *packfile_store_get_packs(struct packfile_store *store)
+struct packfile_list_entry *packfile_store_get_packs(struct odb_source_packed *store)
 {
 	packfile_store_prepare(store);
 
@@ -1103,7 +1103,7 @@ struct packfile_list_entry *packfile_store_get_packs(struct packfile_store *stor
 	return store->packs.head;
 }
 
-int packfile_store_count_objects(struct packfile_store *store,
+int packfile_store_count_objects(struct odb_source_packed *store,
 				 enum odb_count_objects_flags flags UNUSED,
 				 unsigned long *out)
 {
@@ -2160,7 +2160,7 @@ static int fill_pack_entry(const struct object_id *oid,
 	return 1;
 }
 
-static int find_pack_entry(struct packfile_store *store,
+static int find_pack_entry(struct odb_source_packed *store,
 			   const struct object_id *oid,
 			   struct pack_entry *e)
 {
@@ -2183,7 +2183,7 @@ static int find_pack_entry(struct packfile_store *store,
 	return 0;
 }
 
-int packfile_store_freshen_object(struct packfile_store *store,
+int packfile_store_freshen_object(struct odb_source_packed *store,
 				  const struct object_id *oid)
 {
 	struct pack_entry e;
@@ -2199,7 +2199,7 @@ int packfile_store_freshen_object(struct packfile_store *store,
 	return 1;
 }
 
-int packfile_store_read_object_info(struct packfile_store *store,
+int packfile_store_read_object_info(struct odb_source_packed *store,
 				    const struct object_id *oid,
 				    struct object_info *oi,
 				    enum object_info_flags flags)
@@ -2234,7 +2234,7 @@ int packfile_store_read_object_info(struct packfile_store *store,
 	return 0;
 }
 
-static void maybe_invalidate_kept_pack_cache(struct packfile_store *store,
+static void maybe_invalidate_kept_pack_cache(struct odb_source_packed *store,
 					     unsigned flags)
 {
 	if (!store->kept_cache.packs)
@@ -2245,7 +2245,7 @@ static void maybe_invalidate_kept_pack_cache(struct packfile_store *store,
 	store->kept_cache.flags = 0;
 }
 
-struct packed_git **packfile_store_get_kept_pack_cache(struct packfile_store *store,
+struct packed_git **packfile_store_get_kept_pack_cache(struct odb_source_packed *store,
 						       unsigned flags)
 {
 	maybe_invalidate_kept_pack_cache(store, flags);
@@ -2365,8 +2365,8 @@ int for_each_object_in_pack(struct packed_git *p,
 	return r;
 }
 
-struct packfile_store_for_each_object_wrapper_data {
-	struct packfile_store *store;
+struct odb_source_packed_for_each_object_wrapper_data {
+	struct odb_source_packed *store;
 	const struct object_info *request;
 	odb_for_each_object_cb cb;
 	void *cb_data;
@@ -2377,7 +2377,7 @@ static int packfile_store_for_each_object_wrapper(const struct object_id *oid,
 						  uint32_t index_pos,
 						  void *cb_data)
 {
-	struct packfile_store_for_each_object_wrapper_data *data = cb_data;
+	struct odb_source_packed_for_each_object_wrapper_data *data = cb_data;
 
 	if (data->request) {
 		off_t offset = nth_packed_object_offset(pack, index_pos);
@@ -2411,10 +2411,10 @@ static int match_hash(unsigned len, const unsigned char *a, const unsigned char
 }
 
 static int for_each_prefixed_object_in_midx(
-	struct packfile_store *store,
+	struct odb_source_packed *store,
 	struct multi_pack_index *m,
 	const struct odb_for_each_object_options *opts,
-	struct packfile_store_for_each_object_wrapper_data *data)
+	struct odb_source_packed_for_each_object_wrapper_data *data)
 {
 	int ret;
 
@@ -2470,10 +2470,10 @@ static int for_each_prefixed_object_in_midx(
 }
 
 static int for_each_prefixed_object_in_pack(
-	struct packfile_store *store,
+	struct odb_source_packed *store,
 	struct packed_git *p,
 	const struct odb_for_each_object_options *opts,
-	struct packfile_store_for_each_object_wrapper_data *data)
+	struct odb_source_packed_for_each_object_wrapper_data *data)
 {
 	uint32_t num, i, first = 0;
 	int len = opts->prefix_hex_len > p->repo->hash_algo->hexsz ?
@@ -2519,9 +2519,9 @@ static int for_each_prefixed_object_in_pack(
 }
 
 static int packfile_store_for_each_prefixed_object(
-	struct packfile_store *store,
+	struct odb_source_packed *store,
 	const struct odb_for_each_object_options *opts,
-	struct packfile_store_for_each_object_wrapper_data *data)
+	struct odb_source_packed_for_each_object_wrapper_data *data)
 {
 	struct packfile_list_entry *e;
 	struct multi_pack_index *m;
@@ -2566,13 +2566,13 @@ static int packfile_store_for_each_prefixed_object(
 	return ret;
 }
 
-int packfile_store_for_each_object(struct packfile_store *store,
+int packfile_store_for_each_object(struct odb_source_packed *store,
 				   const struct object_info *request,
 				   odb_for_each_object_cb cb,
 				   void *cb_data,
 				   const struct odb_for_each_object_options *opts)
 {
-	struct packfile_store_for_each_object_wrapper_data data = {
+	struct odb_source_packed_for_each_object_wrapper_data data = {
 		.store = store,
 		.request = request,
 		.cb = cb,
@@ -2707,7 +2707,7 @@ static void find_abbrev_len_for_pack(struct packed_git *p,
 	*out = len;
 }
 
-int packfile_store_find_abbrev_len(struct packfile_store *store,
+int packfile_store_find_abbrev_len(struct odb_source_packed *store,
 				   const struct object_id *oid,
 				   unsigned min_len,
 				   unsigned *out)
@@ -2832,16 +2832,16 @@ int parse_pack_header_option(const char *in, unsigned char *out, unsigned int *l
 	return 0;
 }
 
-struct packfile_store *packfile_store_new(struct odb_source *source)
+struct odb_source_packed *packfile_store_new(struct odb_source *source)
 {
-	struct packfile_store *store;
+	struct odb_source_packed *store;
 	CALLOC_ARRAY(store, 1);
 	store->source = source;
 	strmap_init(&store->packs_by_path);
 	return store;
 }
 
-void packfile_store_free(struct packfile_store *store)
+void packfile_store_free(struct odb_source_packed *store)
 {
 	for (struct packfile_list_entry *e = store->packs.head; e; e = e->next)
 		free(e->pack);
@@ -2851,7 +2851,7 @@ void packfile_store_free(struct packfile_store *store)
 	free(store);
 }
 
-void packfile_store_close(struct packfile_store *store)
+void packfile_store_close(struct odb_source_packed *store)
 {
 	for (struct packfile_list_entry *e = store->packs.head; e; e = e->next) {
 		if (e->pack->do_not_close)
@@ -2988,7 +2988,7 @@ int packfile_read_object_stream(struct odb_read_stream **out,
 }
 
 int packfile_store_read_object_stream(struct odb_read_stream **out,
-				      struct packfile_store *store,
+				      struct odb_source_packed *store,
 				      const struct object_id *oid)
 {
 	struct pack_entry e;
diff --git a/packfile.h b/packfile.h
index 49d6bdecf6..9cec15bc50 100644
--- a/packfile.h
+++ b/packfile.h
@@ -79,7 +79,7 @@ struct packed_git *packfile_list_find_oid(struct packfile_list_entry *packs,
 /*
  * A store that manages packfiles for a given object database.
  */
-struct packfile_store {
+struct odb_source_packed {
 	struct odb_source *source;
 
 	/*
@@ -138,19 +138,19 @@ struct packfile_store {
  * Allocate and initialize a new empty packfile store for the given object
  * database source.
  */
-struct packfile_store *packfile_store_new(struct odb_source *source);
+struct odb_source_packed *packfile_store_new(struct odb_source *source);
 
 /*
  * Free the packfile store and all its associated state. All packfiles
  * tracked by the store will be closed.
  */
-void packfile_store_free(struct packfile_store *store);
+void packfile_store_free(struct odb_source_packed *store);
 
 /*
  * Close all packfiles associated with this store. The packfiles won't be
  * free'd, so they can be re-opened at a later point in time.
  */
-void packfile_store_close(struct packfile_store *store);
+void packfile_store_close(struct odb_source_packed *store);
 
 /*
  * Prepare the packfile store by loading packfiles and multi-pack indices for
@@ -159,7 +159,7 @@ void packfile_store_close(struct packfile_store *store);
  * It shouldn't typically be necessary to call this function directly, as
  * functions that access the store know to prepare it.
  */
-void packfile_store_prepare(struct packfile_store *store);
+void packfile_store_prepare(struct odb_source_packed *store);
 
 /*
  * Clear the packfile caches and try to look up any new packfiles that have
@@ -167,20 +167,20 @@ void packfile_store_prepare(struct packfile_store *store);
  *
  * This function must be called under the `odb_read_lock()`.
  */
-void packfile_store_reprepare(struct packfile_store *store);
+void packfile_store_reprepare(struct odb_source_packed *store);
 
 /*
  * Add the pack to the store so that contained objects become accessible via
  * the store. This moves ownership into the store.
  */
-void packfile_store_add_pack(struct packfile_store *store,
+void packfile_store_add_pack(struct odb_source_packed *store,
 			     struct packed_git *pack);
 
 /*
  * Get all packs managed by the given store, including packfiles that are
  * referenced by multi-pack indices.
  */
-struct packfile_list_entry *packfile_store_get_packs(struct packfile_store *store);
+struct packfile_list_entry *packfile_store_get_packs(struct odb_source_packed *store);
 
 struct repo_for_each_pack_data {
 	struct odb_source *source;
@@ -239,7 +239,7 @@ static inline void repo_for_each_pack_data_next(struct repo_for_each_pack_data *
 	     repo_for_each_pack_data_next(&eack_pack_data))
 
 int packfile_store_read_object_stream(struct odb_read_stream **out,
-				      struct packfile_store *store,
+				      struct odb_source_packed *store,
 				      const struct object_id *oid);
 
 /*
@@ -248,7 +248,7 @@ int packfile_store_read_object_stream(struct odb_read_stream **out,
  * not found, 0 if it was and read successfully, and a negative error code in
  * case the object was corrupted.
  */
-int packfile_store_read_object_info(struct packfile_store *store,
+int packfile_store_read_object_info(struct odb_source_packed *store,
 				    const struct object_id *oid,
 				    struct object_info *oi,
 				    enum object_info_flags flags);
@@ -258,10 +258,10 @@ int packfile_store_read_object_info(struct packfile_store *store,
  * either the newly opened packfile or the preexisting packfile. Returns a
  * `NULL` pointer in case the packfile could not be opened.
  */
-struct packed_git *packfile_store_load_pack(struct packfile_store *store,
+struct packed_git *packfile_store_load_pack(struct odb_source_packed *store,
 					    const char *idx_path, int local);
 
-int packfile_store_freshen_object(struct packfile_store *store,
+int packfile_store_freshen_object(struct odb_source_packed *store,
 				  const struct object_id *oid);
 
 enum kept_pack_type {
@@ -276,7 +276,7 @@ enum kept_pack_type {
  *
  * Return 0 on success, a negative error code otherwise.
  */
-int packfile_store_count_objects(struct packfile_store *store,
+int packfile_store_count_objects(struct odb_source_packed *store,
 				 enum odb_count_objects_flags flags,
 				 unsigned long *out);
 
@@ -285,7 +285,7 @@ int packfile_store_count_objects(struct packfile_store *store,
  * combination of `kept_pack_type` flags. The cache is computed on demand and
  * will be recomputed whenever the flags change.
  */
-struct packed_git **packfile_store_get_kept_pack_cache(struct packfile_store *store,
+struct packed_git **packfile_store_get_kept_pack_cache(struct odb_source_packed *store,
 						       unsigned flags);
 
 struct pack_window {
@@ -365,13 +365,13 @@ int for_each_object_in_pack(struct packed_git *p,
  *
  * The flags parameter is a combination of `odb_for_each_object_flags`.
  */
-int packfile_store_for_each_object(struct packfile_store *store,
+int packfile_store_for_each_object(struct odb_source_packed *store,
 				   const struct object_info *request,
 				   odb_for_each_object_cb cb,
 				   void *cb_data,
 				   const struct odb_for_each_object_options *opts);
 
-int packfile_store_find_abbrev_len(struct packfile_store *store,
+int packfile_store_find_abbrev_len(struct odb_source_packed *store,
 				   const struct object_id *oid,
 				   unsigned min_len,
 				   unsigned *out);

-- 
2.54.0.1064.gd145956f57.dirty


^ permalink raw reply related

* [PATCH 00/16] odb: make packed object source a proper `struct odb_source`
From: Patrick Steinhardt @ 2026-06-04 11:25 UTC (permalink / raw)
  To: git

Hi,

this patch series converts the "packed" source into a proper `struct
odb_source`. It's thus the equivalent to [1], which did the same thing
for the "loose" source.

This series here is unfortunately a bit bigger, mostly because I'm also
renaming `struct packfile_store` to `struct odb_source_packed`. Back
when I introduced the packfile store I didn't yet have the full vision
of how the final layout will look like, so I didn't have the foresight
yet to call it `struct odb_source_packed`. But now that the layout has
materialized I think it's sensible to adjust its naming to match all the
other sources that we have.

Also: I don't have anything else in the pipeline anymore that moves
around large pieces of our code in the vicinity of the object database.
So after this series got merged, subsequent changes should be of a more
incremental nature.

This series is built on top of 9ac3f193c0 (The 11th batch, 2026-06-02)
with ps/odb-source-loose at ef4778bcba (odb/source-loose: drop pointer
to the "files" source, 2026-06-01) merged into it.

Note that there's a trivial conflict with 8570d9ba31 (Merge branch
'wy/docs-typofixes' into seen, 2026-06-04) that can be solved like this:

diff --cc packfile.h
index ed49ab7f21,5729a37018..0000000000
--- a/packfile.h
+++ b/packfile.h
diff --git a/odb/source-packed.h b/odb/source-packed.h
index ef5a10b224..1d312f7dea 100644
--- a/odb/source-packed.h
+++ b/odb/source-packed.h
@@ -61,7 +61,7 @@ struct odb_source_packed {
         * that packs that contain a lot of accessed objects will be located
         * towards the front.
         *
-        * This is usually desireable, but there are exceptions. One exception
+        * This is usually desirable, but there are exceptions. One exception
         * is when the looking up multiple objects in a loop for each packfile.
         * In that case, we may easily end up with an infinite loop as the
         * packfiles get reordered to the front repeatedly.

Thanks!

Patrick

[1]: <20260521-b4-pks-odb-source-loose-v1-0-6553b399be2d@pks.im>

---
Patrick Steinhardt (16):
      packfile: rename `struct packfile_store` to `odb_source_packed`
      packfile: move packed source into "odb/" subsystem
      odb/source-packed: store pointer to "files" instead of generic source
      odb/source-packed: start converting to a proper `struct odb_source`
      odb/source-packed: wire up `close()` callback
      odb/source-packed: wire up `reprepare()` callback
      packfile: use higher-level interface to implement `has_object_pack()`
      odb/source-packed: wire up `read_object_info()` callback
      odb/source-packed: wire up `read_object_stream()` callback
      odb/source-packed: wire up `for_each_object()` callback
      odb/source-packed: wire up `count_objects()` callback
      odb/source-packed: wire up `find_abbrev_len()` callback
      odb/source-packed: wire up `freshen_object()` callback
      odb/source-packed: stub out remaining functions
      midx: refactor interfaces to work on "packed" source
      odb/source-packed: drop pointer to "files" parent source

 Makefile                   |   1 +
 builtin/cat-file.c         |   4 +-
 builtin/grep.c             |   2 +-
 builtin/multi-pack-index.c |  29 +-
 builtin/pack-objects.c     |   7 +-
 builtin/repack.c           |   8 +-
 commit-graph.c             |   4 +-
 meson.build                |   1 +
 midx-write.c               |  34 +-
 midx.c                     | 118 +++----
 midx.h                     |  30 +-
 odb/source-files.c         |  20 +-
 odb/source-files.h         |   4 +-
 odb/source-packed.c        | 764 +++++++++++++++++++++++++++++++++++++++++++++
 odb/source-packed.h        | 102 ++++++
 odb/source.h               |   3 +
 pack-bitmap.c              |   8 +-
 pack-revindex.c            |   6 +-
 packfile.c                 | 701 +----------------------------------------
 packfile.h                 | 166 +---------
 repack-geometry.c          |   3 +-
 repack-midx.c              |   9 +-
 repack.c                   |   6 +-
 t/helper/test-read-midx.c  |   7 +-
 24 files changed, 1054 insertions(+), 983 deletions(-)


---
base-commit: 06d49cec508464ced5d42541890ce5d749542a61
change-id: 20260602-pks-odb-source-packed-3826c352f059


^ permalink raw reply related

* Is it intended behaviour that 'git gc' ignores the 'commitGraph.changedPaths' setting?
From: Tomasz Konojacki @ 2026-06-04 11:24 UTC (permalink / raw)
  To: git

Hi,

It seems that 'git gc' (and also 'fetch' with 'fetch.writeCommitGraph'
enabled) ignore the 'commitGraph.changedPaths' setting.

See the output below, the commands are being executed on a repo that
doesn't have a commit-graph generated:

$ git --version
git version 2.54.0

$ git config --global --get-all commitGraph.changedPaths
true

$ git gc
Enumerating objects: 674076, done.
Counting objects: 100% (674076/674076), done.
Delta compression using up to 16 threads
Compressing objects: 100% (137084/137084), done.
Writing objects: 100% (674076/674076), done.
Total 674076 (delta 524292), reused 673941 (delta 524158), pack-reused 0 (from 0)
Enumerating cruft objects: 6160, done.
Traversing cruft objects: 12279, done.
Counting objects: 100% (6160/6160), done.
Delta compression using up to 16 threads
Compressing objects: 100% (1802/1802), done.
Writing objects: 100% (6160/6160), done.
Total 6160 (delta 4314), reused 6160 (delta 4314), pack-reused 0 (from 0)
Expanding reachable commits in commit graph: 131458, done.

$ git commit-graph write
Expanding reachable commits in commit graph: 132865, done.
Computing commit changed paths Bloom filters: 100% (132865/132865), done.

As you can see, 'gc' didn't create changed paths bloom filters, only a
direct call to 'commit-graph write' did.

Is this intended behaviour? It's very surprising to me.

Also, is there a way to make 'gc' and 'fetch' generate changed path
bloom filters?

Thanks,
Tomasz

^ permalink raw reply

* [PATCH 7/7] odb: use size_t for object_info.sizep and the size APIs
From: Johannes Schindelin via GitGitGadget @ 2026-06-04 10:51 UTC (permalink / raw)
  To: git; +Cc: Kristofer Karlsson, Johannes Schindelin, Johannes Schindelin
In-Reply-To: <pull.2137.git.1780570272.gitgitgadget@gmail.com>

From: Johannes Schindelin <johannes.schindelin@gmx.de>

When `js/objects-larger-than-4gb-on-windows` widened the streaming,
index-pack and unpack-objects code paths, in the interest of keeping the
patches somewhat reasonably-sized, it left the public ODB API still
typed in `unsigned long`. In particular `struct object_info::sizep` and
the four wrappers built on top of it (`odb_read_object`,
`odb_read_object_peeled`, `odb_read_object_info`, `odb_pretend_object`)
still return the unpacked size through `unsigned long *`, so on Windows
`cat-file -s` and the `git add` / `git status` paths for a >4 GiB blob
silently cap at 4 GiB.

Widen the field and the four wrappers. The previous commits already
widened the `unpack_entry()` cascade and pack-objects' in-core size
accessors, so most of the cascade arrives here with no further work: the
temporary shims in `packed_object_info_with_index_pos()` and in
`unpack_entry()`'s delta-base recovery path go away, the two
`SET_SIZE(entry, cast_size_t_to_ulong(canonical_size))` calls in
`check_object()` and the matching one in `drop_reused_delta()` collapse
to plain `SET_SIZE`, and `oe_get_size_slow()`'s tail
`cast_size_t_to_ulong()` is gone too.

What remains narrow are the boundaries this series does not
intend to touch: the diff, blame, textconv and fast-import machinery.

Even so, this patch is unfortunately quite large.

Assisted-by: Opus 4.7
Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
---
 apply.c                       |  6 +++---
 archive.c                     |  4 ++--
 attr.c                        |  2 +-
 bisect.c                      |  2 +-
 blame.c                       | 15 ++++++++++----
 builtin/cat-file.c            | 39 +++++++++++++++++++++--------------
 builtin/difftool.c            |  2 +-
 builtin/fast-export.c         |  7 +++++--
 builtin/fast-import.c         | 22 ++++++++++++++------
 builtin/fsck.c                |  2 +-
 builtin/grep.c                | 12 +++++------
 builtin/index-pack.c          |  6 +++---
 builtin/log.c                 |  2 +-
 builtin/ls-files.c            |  2 +-
 builtin/ls-tree.c             |  4 ++--
 builtin/merge-tree.c          |  6 +++---
 builtin/mktag.c               |  2 +-
 builtin/notes.c               |  6 +++---
 builtin/pack-objects.c        | 33 ++++++++++++++++++++---------
 builtin/repo.c                |  4 +++-
 builtin/tag.c                 |  4 ++--
 builtin/unpack-file.c         |  2 +-
 builtin/unpack-objects.c      |  6 ++++--
 bundle.c                      |  2 +-
 combine-diff.c                |  4 +++-
 commit.c                      | 10 ++++-----
 config.c                      |  2 +-
 diff.c                        |  5 ++++-
 dir.c                         |  2 +-
 entry.c                       |  4 +---
 fmt-merge-msg.c               |  4 ++--
 fsck.c                        |  2 +-
 grep.c                        |  4 +++-
 http-push.c                   |  2 +-
 list-objects-filter.c         |  2 +-
 mailmap.c                     |  2 +-
 match-trees.c                 |  4 ++--
 merge-blobs.c                 |  6 +++---
 merge-blobs.h                 |  2 +-
 merge-ort.c                   |  2 +-
 notes-cache.c                 |  2 +-
 notes-merge.c                 |  2 +-
 notes.c                       |  8 ++++---
 object-file.c                 | 18 +++++-----------
 object.c                      |  2 +-
 odb.c                         | 12 +++++------
 odb.h                         | 10 ++++-----
 odb/streaming.c               | 13 +-----------
 pack-bitmap.c                 |  4 ++--
 packfile.c                    | 12 +++--------
 path-walk.c                   |  2 +-
 protocol-caps.c               |  5 +++--
 read-cache.c                  |  6 +++---
 ref-filter.c                  |  2 +-
 reflog.c                      |  2 +-
 rerere.c                      |  2 +-
 submodule-config.c            |  2 +-
 t/helper/test-pack-deltas.c   |  3 ++-
 t/helper/test-partial-clone.c |  2 +-
 t/unit-tests/u-odb-inmemory.c |  2 +-
 tag.c                         |  4 ++--
 tree-walk.c                   | 10 +++++----
 tree.c                        |  2 +-
 xdiff-interface.c             |  2 +-
 64 files changed, 205 insertions(+), 173 deletions(-)

diff --git a/apply.c b/apply.c
index 3cf544e9a9..5e54453f79 100644
--- a/apply.c
+++ b/apply.c
@@ -3321,7 +3321,7 @@ static int apply_binary(struct apply_state *state,
 	if (odb_has_object(the_repository->objects, &oid, 0)) {
 		/* We already have the postimage */
 		enum object_type type;
-		unsigned long size;
+		size_t size;
 		char *result;
 
 		result = odb_read_object(the_repository->objects, &oid,
@@ -3384,7 +3384,7 @@ static int read_blob_object(struct strbuf *buf, const struct object_id *oid, uns
 		strbuf_addf(buf, "Subproject commit %s\n", oid_to_hex(oid));
 	} else {
 		enum object_type type;
-		unsigned long sz;
+		size_t sz;
 		char *result;
 
 		result = odb_read_object(the_repository->objects, oid,
@@ -3611,7 +3611,7 @@ static int load_preimage(struct apply_state *state,
 
 static int resolve_to(struct image *image, const struct object_id *result_id)
 {
-	unsigned long size;
+	size_t size;
 	enum object_type type;
 	char *data;
 
diff --git a/archive.c b/archive.c
index 51229107a5..59790be986 100644
--- a/archive.c
+++ b/archive.c
@@ -87,7 +87,7 @@ static void *object_file_to_archive(const struct archiver_args *args,
 				    const struct object_id *oid,
 				    unsigned int mode,
 				    enum object_type *type,
-				    unsigned long *sizep)
+				    size_t *sizep)
 {
 	void *buffer;
 	const struct commit *commit = args->convert ? args->commit : NULL;
@@ -158,7 +158,7 @@ static int write_archive_entry(const struct object_id *oid, const char *base,
 	write_archive_entry_fn_t write_entry = c->write_entry;
 	int err;
 	const char *path_without_prefix;
-	unsigned long size;
+	size_t size;
 	void *buffer;
 	enum object_type type;
 
diff --git a/attr.c b/attr.c
index 75369547b3..c61472a4e6 100644
--- a/attr.c
+++ b/attr.c
@@ -768,7 +768,7 @@ static struct attr_stack *read_attr_from_blob(struct index_state *istate,
 					      const char *path, unsigned flags)
 {
 	struct object_id oid;
-	unsigned long sz;
+	size_t sz;
 	enum object_type type;
 	void *buf;
 	unsigned short mode;
diff --git a/bisect.c b/bisect.c
index 905a9afb05..4742a5fef4 100644
--- a/bisect.c
+++ b/bisect.c
@@ -154,7 +154,7 @@ static void show_list(const char *debug, int counted, int nr,
 		struct commit *commit = p->item;
 		unsigned commit_flags = commit->object.flags;
 		enum object_type type;
-		unsigned long size;
+		size_t size;
 		char *buf = odb_read_object(the_repository->objects,
 					    &commit->object.oid, &type,
 					    &size);
diff --git a/blame.c b/blame.c
index 977cbb7097..126e232416 100644
--- a/blame.c
+++ b/blame.c
@@ -1041,10 +1041,13 @@ static void fill_origin_blob(struct diff_options *opt,
 		    textconv_object(opt->repo, o->path, o->mode,
 				    &o->blob_oid, 1, &file->ptr, &file_size))
 			;
-		else
+		else {
+			size_t file_size_st = 0;
 			file->ptr = odb_read_object(the_repository->objects,
 						    &o->blob_oid, &type,
-						    &file_size);
+						    &file_size_st);
+			file_size = cast_size_t_to_ulong(file_size_st);
+		}
 		file->size = file_size;
 
 		if (!file->ptr)
@@ -2869,10 +2872,14 @@ void setup_scoreboard(struct blame_scoreboard *sb,
 		    textconv_object(sb->repo, sb->path, o->mode, &o->blob_oid, 1, (char **) &sb->final_buf,
 				    &sb->final_buf_size))
 			;
-		else
+		else {
+			size_t final_buf_size_st = 0;
 			sb->final_buf = odb_read_object(the_repository->objects,
 							&o->blob_oid, &type,
-							&sb->final_buf_size);
+							&final_buf_size_st);
+			sb->final_buf_size =
+				cast_size_t_to_ulong(final_buf_size_st);
+		}
 
 		if (!sb->final_buf)
 			die(_("cannot read blob %s for path %s"),
diff --git a/builtin/cat-file.c b/builtin/cat-file.c
index fa45f774d7..fa6e396ddc 100644
--- a/builtin/cat-file.c
+++ b/builtin/cat-file.c
@@ -84,7 +84,7 @@ static char *replace_idents_using_mailmap(char *object_buf, size_t *size)
 
 static int filter_object(const char *path, unsigned mode,
 			 const struct object_id *oid,
-			 char **buf, unsigned long *size)
+			 char **buf, size_t *size)
 {
 	enum object_type type;
 
@@ -120,7 +120,7 @@ static int cat_one_file(int opt, const char *exp_type, const char *obj_name)
 	struct object_id oid;
 	enum object_type type;
 	char *buf;
-	unsigned long size;
+	size_t size;
 	struct object_context obj_context = {0};
 	struct object_info oi = OBJECT_INFO_INIT;
 	unsigned flags = OBJECT_INFO_LOOKUP_REPLACE;
@@ -166,7 +166,7 @@ static int cat_one_file(int opt, const char *exp_type, const char *obj_name)
 		if (use_mailmap && (type == OBJ_COMMIT || type == OBJ_TAG)) {
 			size_t s = size;
 			buf = replace_idents_using_mailmap(buf, &s);
-			size = cast_size_t_to_ulong(s);
+			size = s;
 		}
 
 		printf("%"PRIuMAX"\n", (uintmax_t)size);
@@ -188,9 +188,15 @@ static int cat_one_file(int opt, const char *exp_type, const char *obj_name)
 		break;
 
 	case 'c':
-		if (textconv_object(the_repository, path, obj_context.mode,
-				    &oid, 1, &buf, &size))
+	{
+		unsigned long size_ul = 0;
+		int textconv_ret = textconv_object(the_repository, path,
+						   obj_context.mode, &oid, 1,
+						   &buf, &size_ul);
+		size = size_ul;
+		if (textconv_ret)
 			break;
+	}
 		/* else fallthrough */
 
 	case 'p':
@@ -219,7 +225,7 @@ static int cat_one_file(int opt, const char *exp_type, const char *obj_name)
 		if (use_mailmap) {
 			size_t s = size;
 			buf = replace_idents_using_mailmap(buf, &s);
-			size = cast_size_t_to_ulong(s);
+			size = s;
 		}
 
 		/* otherwise just spit out the data */
@@ -266,7 +272,7 @@ static int cat_one_file(int opt, const char *exp_type, const char *obj_name)
 		if (use_mailmap) {
 			size_t s = size;
 			buf = replace_idents_using_mailmap(buf, &s);
-			size = cast_size_t_to_ulong(s);
+			size = s;
 		}
 		break;
 	}
@@ -288,7 +294,7 @@ cleanup:
 struct expand_data {
 	struct object_id oid;
 	enum object_type type;
-	unsigned long size;
+	size_t size;
 	unsigned short mode;
 	off_t disk_size;
 	const char *rest;
@@ -405,7 +411,7 @@ static void print_object_or_die(struct batch_options *opt, struct expand_data *d
 			fflush(stdout);
 		if (opt->transform_mode) {
 			char *contents;
-			unsigned long size;
+			size_t size;
 
 			if (!data->rest)
 				die("missing path for '%s'", oid_to_hex(oid));
@@ -417,9 +423,12 @@ static void print_object_or_die(struct batch_options *opt, struct expand_data *d
 					    oid_to_hex(oid), data->rest);
 			} else if (opt->transform_mode == 'c') {
 				enum object_type type;
-				if (!textconv_object(the_repository,
-						     data->rest, 0100644, oid,
-						     1, &contents, &size))
+				unsigned long size_ul = 0;
+				if (textconv_object(the_repository,
+						    data->rest, 0100644, oid,
+						    1, &contents, &size_ul))
+					size = size_ul;
+				else
 					contents = odb_read_object(the_repository->objects,
 								   oid, &type, &size);
 				if (!contents)
@@ -435,7 +444,7 @@ static void print_object_or_die(struct batch_options *opt, struct expand_data *d
 	}
 	else {
 		enum object_type type;
-		unsigned long size;
+		size_t size;
 		void *contents;
 
 		contents = odb_read_object(the_repository->objects, oid,
@@ -446,7 +455,7 @@ static void print_object_or_die(struct batch_options *opt, struct expand_data *d
 		if (use_mailmap) {
 			size_t s = size;
 			contents = replace_idents_using_mailmap(contents, &s);
-			size = cast_size_t_to_ulong(s);
+			size = s;
 		}
 
 		if (type != data->type)
@@ -555,7 +564,7 @@ static void batch_object_write(const char *obj_name,
 			if (!buf)
 				die(_("unable to read %s"), oid_to_hex(&data->oid));
 			buf = replace_idents_using_mailmap(buf, &s);
-			data->size = cast_size_t_to_ulong(s);
+			data->size = s;
 
 			free(buf);
 		}
diff --git a/builtin/difftool.c b/builtin/difftool.c
index 2a21005f2e..26778f8515 100644
--- a/builtin/difftool.c
+++ b/builtin/difftool.c
@@ -319,7 +319,7 @@ static char *get_symlink(struct repository *repo,
 		data = strbuf_detach(&link, NULL);
 	} else {
 		enum object_type type;
-		unsigned long size;
+		size_t size;
 		data = odb_read_object(repo->objects, oid, &type, &size);
 		if (!data)
 			die(_("could not read object %s for symlink %s"),
diff --git a/builtin/fast-export.c b/builtin/fast-export.c
index 2eb43a28da..0be43104dc 100644
--- a/builtin/fast-export.c
+++ b/builtin/fast-export.c
@@ -317,7 +317,10 @@ static void export_blob(const struct object_id *oid)
 		object = (struct object *)lookup_blob(the_repository, oid);
 		eaten = 0;
 	} else {
-		buf = odb_read_object(the_repository->objects, oid, &type, &size);
+		size_t size_st = 0;
+		buf = odb_read_object(the_repository->objects, oid, &type,
+				      &size_st);
+		size = cast_size_t_to_ulong(size_st);
 		if (!buf)
 			die(_("could not read blob %s"), oid_to_hex(oid));
 		if (check_object_signature(the_repository, oid, buf, size,
@@ -880,7 +883,7 @@ static char *anonymize_tag(void)
 
 static void handle_tag(const char *name, struct tag *tag)
 {
-	unsigned long size;
+	size_t size;
 	enum object_type type;
 	char *buf;
 	const char *tagger, *tagger_end, *message;
diff --git a/builtin/fast-import.c b/builtin/fast-import.c
index 3dff898c43..d11a2cc2c1 100644
--- a/builtin/fast-import.c
+++ b/builtin/fast-import.c
@@ -1291,7 +1291,10 @@ static void load_tree(struct tree_entry *root)
 			die(_("can't load tree %s"), oid_to_hex(oid));
 	} else {
 		enum object_type type;
-		buf = odb_read_object(the_repository->objects, oid, &type, &size);
+		size_t size_st = 0;
+		buf = odb_read_object(the_repository->objects, oid, &type,
+				      &size_st);
+		size = cast_size_t_to_ulong(size_st);
 		if (!buf || type != OBJ_TREE)
 			die(_("can't load tree %s"), oid_to_hex(oid));
 	}
@@ -2560,7 +2563,7 @@ static void note_change_n(const char *p, struct branch *b, unsigned char *old_fa
 			die(_("mark :%" PRIuMAX " not a commit"), commit_mark);
 		oidcpy(&commit_oid, &commit_oe->idx.oid);
 	} else if (!repo_get_oid(the_repository, p, &commit_oid)) {
-		unsigned long size;
+		size_t size;
 		char *buf = odb_read_object_peeled(the_repository->objects,
 						   &commit_oid, OBJ_COMMIT, &size,
 						   &commit_oid);
@@ -2627,10 +2630,12 @@ static void parse_from_existing(struct branch *b)
 		oidclr(&b->branch_tree.versions[1].oid, the_repository->hash_algo);
 	} else {
 		unsigned long size;
+		size_t size_st = 0;
 		char *buf;
 
 		buf = odb_read_object_peeled(the_repository->objects, &b->oid,
-					     OBJ_COMMIT, &size, &b->oid);
+					     OBJ_COMMIT, &size_st, &b->oid);
+		size = cast_size_t_to_ulong(size_st);
 		parse_from_commit(b, buf, size);
 		free(buf);
 	}
@@ -2722,7 +2727,7 @@ static struct hash_list *parse_merge(unsigned int *count)
 				die(_("mark :%" PRIuMAX " not a commit"), idnum);
 			oidcpy(&n->oid, &oe->idx.oid);
 		} else if (!repo_get_oid(the_repository, from, &n->oid)) {
-			unsigned long size;
+			size_t size;
 			char *buf = odb_read_object_peeled(the_repository->objects,
 							   &n->oid, OBJ_COMMIT,
 							   &size, &n->oid);
@@ -3330,7 +3335,10 @@ static void cat_blob(struct object_entry *oe, struct object_id *oid)
 	char *buf;
 
 	if (!oe || oe->pack_id == MAX_PACK_ID) {
-		buf = odb_read_object(the_repository->objects, oid, &type, &size);
+		size_t size_st = 0;
+		buf = odb_read_object(the_repository->objects, oid, &type,
+				      &size_st);
+		size = cast_size_t_to_ulong(size_st);
 	} else {
 		type = oe->type;
 		buf = gfi_unpack_entry(oe, &size);
@@ -3438,8 +3446,10 @@ static struct object_entry *dereference(struct object_entry *oe,
 		buf = gfi_unpack_entry(oe, &size);
 	} else {
 		enum object_type unused;
+		size_t size_st = 0;
 		buf = odb_read_object(the_repository->objects, oid,
-				      &unused, &size);
+				      &unused, &size_st);
+		size = cast_size_t_to_ulong(size_st);
 	}
 	if (!buf)
 		die(_("can't load object %s"), oid_to_hex(oid));
diff --git a/builtin/fsck.c b/builtin/fsck.c
index 248f8ff5a0..76b723f36d 100644
--- a/builtin/fsck.c
+++ b/builtin/fsck.c
@@ -724,7 +724,7 @@ static int fsck_loose(const struct object_id *oid, const char *path,
 	struct for_each_loose_cb *data = cb_data;
 	struct object *obj;
 	enum object_type type = OBJ_NONE;
-	unsigned long size;
+	size_t size;
 	void *contents = NULL;
 	int eaten;
 	struct object_info oi = OBJECT_INFO_INIT;
diff --git a/builtin/grep.c b/builtin/grep.c
index 6a09571903..26b85479ca 100644
--- a/builtin/grep.c
+++ b/builtin/grep.c
@@ -520,7 +520,7 @@ static int grep_submodule(struct grep_opt *opt,
 		enum object_type object_type;
 		struct tree_desc tree;
 		void *data;
-		unsigned long size;
+		size_t size;
 		struct strbuf base = STRBUF_INIT;
 
 		obj_read_lock();
@@ -573,7 +573,7 @@ static int grep_cache(struct grep_opt *opt,
 			enum object_type type;
 			struct tree_desc tree;
 			void *data;
-			unsigned long size;
+			size_t size;
 
 			data = odb_read_object(the_repository->objects, &ce->oid,
 					       &type, &size);
@@ -666,7 +666,7 @@ static int grep_tree(struct grep_opt *opt, const struct pathspec *pathspec,
 			enum object_type type;
 			struct tree_desc sub;
 			void *data;
-			unsigned long size;
+			size_t size;
 
 			data = odb_read_object(the_repository->objects,
 					       &entry.oid, &type, &size);
@@ -730,7 +730,7 @@ static void collect_blob_oids_for_tree(struct repository *repo,
 			enum object_type type;
 			struct tree_desc sub_tree;
 			void *data;
-			unsigned long size;
+			size_t size;
 
 			data = odb_read_object(repo->objects, &entry.oid,
 					       &type, &size);
@@ -764,7 +764,7 @@ static void collect_blob_oids_for_treeish(struct grep_opt *opt,
 {
 	struct tree_desc tree;
 	void *data;
-	unsigned long size;
+	size_t size;
 	struct strbuf base = STRBUF_INIT;
 	int len;
 
@@ -841,7 +841,7 @@ static int grep_object(struct grep_opt *opt, const struct pathspec *pathspec,
 	if (obj->type == OBJ_COMMIT || obj->type == OBJ_TREE) {
 		struct tree_desc tree;
 		void *data;
-		unsigned long size;
+		size_t size;
 		struct strbuf base;
 		int hit, len;
 
diff --git a/builtin/index-pack.c b/builtin/index-pack.c
index 3c4474e681..78da3a6566 100644
--- a/builtin/index-pack.c
+++ b/builtin/index-pack.c
@@ -258,7 +258,7 @@ static unsigned check_object(struct object *obj)
 		return 0;
 
 	if (!(obj->flags & FLAG_CHECKED)) {
-		unsigned long size;
+		size_t size;
 		int type = odb_read_object_info(the_repository->objects,
 						&obj->oid, &size);
 		if (type <= 0)
@@ -905,7 +905,7 @@ static void sha1_object(const void *data, struct object_entry *obj_entry,
 	if (collision_test_needed) {
 		void *has_data;
 		enum object_type has_type;
-		unsigned long has_size;
+		size_t has_size;
 		read_lock();
 		has_type = odb_read_object_info(the_repository->objects, oid, &has_size);
 		if (has_type < 0)
@@ -1515,7 +1515,7 @@ static void fix_unresolved_deltas(struct hashfile *f)
 		struct ref_delta_entry *d = sorted_by_pos[i];
 		enum object_type type;
 		void *data;
-		unsigned long size;
+		size_t size;
 
 		if (objects[d->obj_no].real_type != OBJ_REF_DELTA)
 			continue;
diff --git a/builtin/log.c b/builtin/log.c
index e464b30af4..d027ce1e0b 100644
--- a/builtin/log.c
+++ b/builtin/log.c
@@ -613,7 +613,7 @@ static int show_blob_object(const struct object_id *oid, struct rev_info *rev, c
 
 static int show_tag_object(const struct object_id *oid, struct rev_info *rev)
 {
-	unsigned long size;
+	size_t size;
 	enum object_type type;
 	char *buf = odb_read_object(the_repository->objects, oid, &type, &size);
 	unsigned long offset = 0;
diff --git a/builtin/ls-files.c b/builtin/ls-files.c
index e1a22b41b9..bfbd145e97 100644
--- a/builtin/ls-files.c
+++ b/builtin/ls-files.c
@@ -251,7 +251,7 @@ static void expand_objectsize(struct repository *repo, struct strbuf *line,
 			      const enum object_type type, unsigned int padded)
 {
 	if (type == OBJ_BLOB) {
-		unsigned long size;
+		size_t size;
 		if (odb_read_object_info(repo->objects, oid, &size) < 0)
 			die(_("could not get object info about '%s'"),
 			    oid_to_hex(oid));
diff --git a/builtin/ls-tree.c b/builtin/ls-tree.c
index 113e4a960d..7d075bfca2 100644
--- a/builtin/ls-tree.c
+++ b/builtin/ls-tree.c
@@ -27,7 +27,7 @@ static void expand_objectsize(struct strbuf *line, const struct object_id *oid,
 			      const enum object_type type, unsigned int padded)
 {
 	if (type == OBJ_BLOB) {
-		unsigned long size;
+		size_t size;
 		if (odb_read_object_info(the_repository->objects, oid, &size) < 0)
 			die(_("could not get object info about '%s'"),
 			    oid_to_hex(oid));
@@ -217,7 +217,7 @@ static int show_tree_long(const struct object_id *oid, struct strbuf *base,
 		return early;
 
 	if (type == OBJ_BLOB) {
-		unsigned long size;
+		size_t size;
 		if (odb_read_object_info(the_repository->objects, oid, &size) == OBJ_BAD)
 			xsnprintf(size_text, sizeof(size_text), "BAD");
 		else
diff --git a/builtin/merge-tree.c b/builtin/merge-tree.c
index 312b595d1e..49f41e520f 100644
--- a/builtin/merge-tree.c
+++ b/builtin/merge-tree.c
@@ -69,7 +69,7 @@ static const char *explanation(struct merge_list *entry)
 	return "removed in remote";
 }
 
-static void *result(struct merge_list *entry, unsigned long *size)
+static void *result(struct merge_list *entry, size_t *size)
 {
 	enum object_type type;
 	struct blob *base, *our, *their;
@@ -96,7 +96,7 @@ static void *result(struct merge_list *entry, unsigned long *size)
 			   base, our, their, size);
 }
 
-static void *origin(struct merge_list *entry, unsigned long *size)
+static void *origin(struct merge_list *entry, size_t *size)
 {
 	enum object_type type;
 	while (entry) {
@@ -119,7 +119,7 @@ static int show_outf(void *priv UNUSED, mmbuffer_t *mb, int nbuf)
 
 static void show_diff(struct merge_list *entry)
 {
-	unsigned long size;
+	size_t size;
 	mmfile_t src, dst;
 	xpparam_t xpp;
 	xdemitconf_t xecfg;
diff --git a/builtin/mktag.c b/builtin/mktag.c
index f40264a878..37c17e6beb 100644
--- a/builtin/mktag.c
+++ b/builtin/mktag.c
@@ -50,7 +50,7 @@ static int verify_object_in_tag(struct object_id *tagged_oid, int *tagged_type)
 {
 	int ret;
 	enum object_type type;
-	unsigned long size;
+	size_t size;
 	void *buffer;
 	const struct object_id *repl;
 
diff --git a/builtin/notes.c b/builtin/notes.c
index 9af602bdd7..962df867c8 100644
--- a/builtin/notes.c
+++ b/builtin/notes.c
@@ -150,7 +150,7 @@ static int list_each_note(const struct object_id *object_oid,
 
 static void copy_obj_to_fd(int fd, const struct object_id *oid)
 {
-	unsigned long size;
+	size_t size;
 	enum object_type type;
 	char *buf = odb_read_object(the_repository->objects, oid, &type, &size);
 	if (buf) {
@@ -313,7 +313,7 @@ static int parse_reuse_arg(const struct option *opt, const char *arg, int unset)
 	char *value;
 	struct object_id object;
 	enum object_type type;
-	unsigned long len;
+	size_t len;
 
 	BUG_ON_OPT_NEG(unset);
 
@@ -721,7 +721,7 @@ static int append_edit(int argc, const char **argv, const char *prefix,
 
 	if (note && !edit) {
 		/* Append buf to previous note contents */
-		unsigned long size;
+		size_t size;
 		enum object_type type;
 		struct strbuf buf = STRBUF_INIT;
 		char *prev_buf = odb_read_object(the_repository->objects, note, &type, &size);
diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index bb372d0b03..6202fe4dca 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -356,14 +356,17 @@ static void *get_delta(struct object_entry *entry)
 	unsigned long size, base_size, delta_size;
 	void *buf, *base_buf, *delta_buf;
 	enum object_type type;
+	size_t size_st = 0, base_size_st = 0;
 
 	buf = odb_read_object(the_repository->objects, &entry->idx.oid,
-			      &type, &size);
+			      &type, &size_st);
+	size = cast_size_t_to_ulong(size_st);
 	if (!buf)
 		die(_("unable to read %s"), oid_to_hex(&entry->idx.oid));
 	base_buf = odb_read_object(the_repository->objects,
 				   &DELTA(entry)->idx.oid, &type,
-				   &base_size);
+				   &base_size_st);
+	base_size = cast_size_t_to_ulong(base_size_st);
 	if (!base_buf)
 		die("unable to read %s",
 		    oid_to_hex(&DELTA(entry)->idx.oid));
@@ -528,9 +531,11 @@ static unsigned long write_no_reuse_object(struct hashfile *f, struct object_ent
 			type = st->type;
 			size = st->size;
 		} else {
+			size_t size_st = 0;
 			buf = odb_read_object(the_repository->objects,
 					      &entry->idx.oid, &type,
-					      &size);
+					      &size_st);
+			size = cast_size_t_to_ulong(size_st);
 			if (!buf)
 				die(_("unable to read %s"),
 				    oid_to_hex(&entry->idx.oid));
@@ -1935,6 +1940,7 @@ static struct pbase_tree_cache *pbase_tree_get(const struct object_id *oid)
 	struct pbase_tree_cache *ent, *nent;
 	void *data;
 	unsigned long size;
+	size_t size_st = 0;
 	enum object_type type;
 	int neigh;
 	int my_ix = pbase_tree_cache_ix(oid);
@@ -1962,7 +1968,8 @@ static struct pbase_tree_cache *pbase_tree_get(const struct object_id *oid)
 	/* Did not find one.  Either we got a bogus request or
 	 * we need to read and perhaps cache.
 	 */
-	data = odb_read_object(the_repository->objects, oid, &type, &size);
+	data = odb_read_object(the_repository->objects, oid, &type, &size_st);
+	size = cast_size_t_to_ulong(size_st);
 	if (!data)
 		return NULL;
 	if (type != OBJ_TREE) {
@@ -2117,13 +2124,15 @@ static void add_preferred_base(struct object_id *oid)
 	struct pbase_tree *it;
 	void *data;
 	unsigned long size;
+	size_t size_st = 0;
 	struct object_id tree_oid;
 
 	if (window <= num_preferred_base++)
 		return;
 
 	data = odb_read_object_peeled(the_repository->objects, oid,
-				      OBJ_TREE, &size, &tree_oid);
+				      OBJ_TREE, &size_st, &tree_oid);
+	size = cast_size_t_to_ulong(size_st);
 	if (!data)
 		return;
 
@@ -2235,7 +2244,7 @@ static void prefetch_to_pack(uint32_t object_index_start) {
 
 static void check_object(struct object_entry *entry, uint32_t object_index)
 {
-	unsigned long canonical_size;
+	size_t canonical_size;
 	enum object_type type;
 	struct object_info oi = {.typep = &type, .sizep = &canonical_size};
 
@@ -2434,7 +2443,7 @@ static void drop_reused_delta(struct object_entry *entry)
 	unsigned *idx = &to_pack.objects[entry->delta_idx - 1].delta_child_idx;
 	struct object_info oi = OBJECT_INFO_INIT;
 	enum object_type type;
-	unsigned long size;
+	size_t size;
 
 	while (*idx) {
 		struct object_entry *oe = &to_pack.objects[*idx - 1];
@@ -2746,7 +2755,7 @@ size_t oe_get_size_slow(struct packing_data *pack,
 	size_t size;
 
 	if (e->type_ != OBJ_OFS_DELTA && e->type_ != OBJ_REF_DELTA) {
-		unsigned long sz;
+		size_t sz;
 		packing_data_lock(&to_pack);
 		if (odb_read_object_info(the_repository->objects,
 					 &e->idx.oid, &sz) < 0)
@@ -2831,10 +2840,12 @@ static int try_delta(struct unpacked *trg, struct unpacked *src,
 
 	/* Load data if not already done */
 	if (!trg->data) {
+		size_t sz_st = 0;
 		packing_data_lock(&to_pack);
 		trg->data = odb_read_object(the_repository->objects,
 					    &trg_entry->idx.oid, &type,
-					    &sz);
+					    &sz_st);
+		sz = cast_size_t_to_ulong(sz_st);
 		packing_data_unlock(&to_pack);
 		if (!trg->data)
 			die(_("object %s cannot be read"),
@@ -2846,10 +2857,12 @@ static int try_delta(struct unpacked *trg, struct unpacked *src,
 		*mem_usage += sz;
 	}
 	if (!src->data) {
+		size_t sz_st = 0;
 		packing_data_lock(&to_pack);
 		src->data = odb_read_object(the_repository->objects,
 					    &src_entry->idx.oid, &type,
-					    &sz);
+					    &sz_st);
+		sz = cast_size_t_to_ulong(sz_st);
 		packing_data_unlock(&to_pack);
 		if (!src->data) {
 			if (src_entry->preferred_base) {
diff --git a/builtin/repo.c b/builtin/repo.c
index 71a5c1c29c..69f3626467 100644
--- a/builtin/repo.c
+++ b/builtin/repo.c
@@ -784,13 +784,14 @@ static int count_objects(const char *path UNUSED, struct oid_array *oids,
 	for (size_t i = 0; i < oids->nr; i++) {
 		struct object_info oi = OBJECT_INFO_INIT;
 		unsigned long inflated;
+		size_t inflated_st = 0;
 		struct commit *commit;
 		struct object *obj;
 		void *content;
 		off_t disk;
 		int eaten;
 
-		oi.sizep = &inflated;
+		oi.sizep = &inflated_st;
 		oi.disk_sizep = &disk;
 		oi.contentp = &content;
 
@@ -798,6 +799,7 @@ static int count_objects(const char *path UNUSED, struct oid_array *oids,
 						  OBJECT_INFO_SKIP_FETCH_OBJECT |
 						  OBJECT_INFO_QUICK) < 0)
 			continue;
+		inflated = cast_size_t_to_ulong(inflated_st);
 
 		obj = parse_object_buffer(the_repository, &oids->oid[i], type,
 					  inflated, content, &eaten);
diff --git a/builtin/tag.c b/builtin/tag.c
index d51c2e3349..06c125b53c 100644
--- a/builtin/tag.c
+++ b/builtin/tag.c
@@ -238,7 +238,7 @@ static int git_tag_config(const char *var, const char *value,
 
 static void write_tag_body(int fd, const struct object_id *oid)
 {
-	unsigned long size;
+	size_t size;
 	enum object_type type;
 	char *buf, *sp, *orig;
 	struct strbuf payload = STRBUF_INIT;
@@ -388,7 +388,7 @@ static void create_reflog_msg(const struct object_id *oid, struct strbuf *sb)
 	enum object_type type;
 	struct commit *c;
 	char *buf;
-	unsigned long size;
+	size_t size;
 	int subject_len = 0;
 	const char *subject_start;
 
diff --git a/builtin/unpack-file.c b/builtin/unpack-file.c
index 87877a9fab..387389ed49 100644
--- a/builtin/unpack-file.c
+++ b/builtin/unpack-file.c
@@ -12,7 +12,7 @@ static char *create_temp_file(struct object_id *oid)
 	static char path[50];
 	void *buf;
 	enum object_type type;
-	unsigned long size;
+	size_t size;
 	int fd;
 
 	buf = odb_read_object(the_repository->objects, oid, &type, &size);
diff --git a/builtin/unpack-objects.c b/builtin/unpack-objects.c
index e7a50c493c..f3849bb654 100644
--- a/builtin/unpack-objects.c
+++ b/builtin/unpack-objects.c
@@ -231,7 +231,7 @@ static int check_object(struct object *obj, enum object_type type,
 		die("object type mismatch");
 
 	if (!(obj->flags & FLAG_OPEN)) {
-		unsigned long size;
+		size_t size;
 		int type = odb_read_object_info(the_repository->objects, &obj->oid, &size);
 		if (type != obj->type || type <= 0)
 			die("object of unexpected type");
@@ -436,6 +436,7 @@ static void unpack_delta_entry(enum object_type type, unsigned long delta_size,
 {
 	void *delta_data, *base;
 	unsigned long base_size;
+	size_t base_size_st = 0;
 	struct object_id base_oid;
 
 	if (type == OBJ_REF_DELTA) {
@@ -512,7 +513,8 @@ static void unpack_delta_entry(enum object_type type, unsigned long delta_size,
 		return;
 
 	base = odb_read_object(the_repository->objects, &base_oid,
-			       &type, &base_size);
+			       &type, &base_size_st);
+	base_size = cast_size_t_to_ulong(base_size_st);
 	if (!base) {
 		error("failed to read delta-pack base object %s",
 		      oid_to_hex(&base_oid));
diff --git a/bundle.c b/bundle.c
index 42327f9739..fd2db2c837 100644
--- a/bundle.c
+++ b/bundle.c
@@ -296,7 +296,7 @@ int list_bundle_refs(struct bundle_header *header, int argc, const char **argv)
 
 static int is_tag_in_date_range(struct object *tag, struct rev_info *revs)
 {
-	unsigned long size;
+	size_t size;
 	enum object_type type;
 	char *buf = NULL, *line, *lineend;
 	timestamp_t date;
diff --git a/combine-diff.c b/combine-diff.c
index b799862068..3ce71db8bb 100644
--- a/combine-diff.c
+++ b/combine-diff.c
@@ -325,7 +325,9 @@ static char *grab_blob(struct repository *r,
 		*size = fill_textconv(r, textconv, df, &blob);
 		free_filespec(df);
 	} else {
-		blob = odb_read_object(r->objects, oid, &type, size);
+		size_t size_st = 0;
+		blob = odb_read_object(r->objects, oid, &type, &size_st);
+		*size = cast_size_t_to_ulong(size_st);
 		if (!blob)
 			die(_("unable to read %s"), oid_to_hex(oid));
 		if (type != OBJ_BLOB)
diff --git a/commit.c b/commit.c
index fd8723502e..7950effc58 100644
--- a/commit.c
+++ b/commit.c
@@ -395,7 +395,7 @@ const void *repo_get_commit_buffer(struct repository *r,
 	const void *ret = get_cached_commit_buffer(r, commit, sizep);
 	if (!ret) {
 		enum object_type type;
-		unsigned long size;
+		size_t size;
 		ret = odb_read_object(r->objects, &commit->object.oid, &type, &size);
 		if (!ret)
 			die("cannot read commit object %s",
@@ -404,7 +404,7 @@ const void *repo_get_commit_buffer(struct repository *r,
 			die("expected commit for %s, got %s",
 			    oid_to_hex(&commit->object.oid), type_name(type));
 		if (sizep)
-			*sizep = size;
+			*sizep = cast_size_t_to_ulong(size);
 	}
 	return ret;
 }
@@ -437,7 +437,7 @@ static inline void set_commit_tree(struct commit *c, struct tree *t)
 static void load_tree_from_commit_contents(struct repository *r, struct commit *commit)
 {
 	enum object_type type;
-	unsigned long size;
+	size_t size;
 	char *buf;
 	const char *p;
 	struct object_id tree_oid;
@@ -604,7 +604,7 @@ int repo_parse_commit_internal(struct repository *r,
 {
 	enum object_type type;
 	void *buffer;
-	unsigned long size;
+	size_t size;
 	struct object_info oi = {
 		.typep = &type,
 		.sizep = &size,
@@ -1313,7 +1313,7 @@ static void handle_signed_tag(const struct commit *parent, struct commit_extra_h
 	struct merge_remote_desc *desc;
 	struct commit_extra_header *mergetag;
 	char *buf;
-	unsigned long size;
+	size_t size;
 	enum object_type type;
 	struct strbuf payload = STRBUF_INIT;
 	struct strbuf signature = STRBUF_INIT;
diff --git a/config.c b/config.c
index a1b92fe083..21b231052c 100644
--- a/config.c
+++ b/config.c
@@ -1442,7 +1442,7 @@ int git_config_from_blob_oid(config_fn_t fn,
 {
 	enum object_type type;
 	char *buf;
-	unsigned long size;
+	size_t size;
 	int ret;
 
 	buf = odb_read_object(repo->objects, oid, &type, &size);
diff --git a/diff.c b/diff.c
index 5a584fa1d5..816b89dc6c 100644
--- a/diff.c
+++ b/diff.c
@@ -4594,8 +4594,9 @@ int diff_populate_filespec(struct repository *r,
 		}
 	}
 	else {
+		size_t size_st = 0;
 		struct object_info info = {
-			.sizep = &s->size
+			.sizep = &size_st
 		};
 
 		if (!(size_only || check_binary))
@@ -4617,6 +4618,7 @@ int diff_populate_filespec(struct repository *r,
 			die("unable to read %s", oid_to_hex(&s->oid));
 
 object_read:
+		s->size = cast_size_t_to_ulong(size_st);
 		if (size_only || check_binary) {
 			if (size_only)
 				return 0;
@@ -4631,6 +4633,7 @@ object_read:
 			if (odb_read_object_info_extended(r->objects, &s->oid, &info,
 							  OBJECT_INFO_LOOKUP_REPLACE))
 				die("unable to read %s", oid_to_hex(&s->oid));
+			s->size = cast_size_t_to_ulong(size_st);
 		}
 		s->should_free = 1;
 	}
diff --git a/dir.c b/dir.c
index 33c81c256e..b6764d98a7 100644
--- a/dir.c
+++ b/dir.c
@@ -324,7 +324,7 @@ static int do_read_blob(const struct object_id *oid, struct oid_stat *oid_stat,
 			size_t *size_out, char **data_out)
 {
 	enum object_type type;
-	unsigned long sz;
+	size_t sz;
 	char *data;
 
 	*size_out = 0;
diff --git a/entry.c b/entry.c
index 7817aee362..c444fe5a10 100644
--- a/entry.c
+++ b/entry.c
@@ -92,11 +92,9 @@ static int create_file(const char *path, unsigned int mode)
 void *read_blob_entry(const struct cache_entry *ce, size_t *size)
 {
 	enum object_type type;
-	unsigned long ul;
 	void *blob_data = odb_read_object(the_repository->objects, &ce->oid,
-					  &type, &ul);
+					  &type, size);
 
-	*size = ul;
 	if (blob_data) {
 		if (type == OBJ_BLOB)
 			return blob_data;
diff --git a/fmt-merge-msg.c b/fmt-merge-msg.c
index 45d8b20e97..14441f23ae 100644
--- a/fmt-merge-msg.c
+++ b/fmt-merge-msg.c
@@ -528,11 +528,11 @@ static void fmt_merge_msg_sigs(struct strbuf *out)
 	for (i = 0; i < origins.nr; i++) {
 		struct object_id *oid = origins.items[i].util;
 		enum object_type type;
-		unsigned long size;
+		size_t size;
 		char *buf = odb_read_object(the_repository->objects, oid,
 					    &type, &size);
 		char *origbuf = buf;
-		unsigned long len = size;
+		size_t len = size;
 		struct signature_check sigc = { NULL };
 		struct strbuf payload = STRBUF_INIT, sig = STRBUF_INIT;
 
diff --git a/fsck.c b/fsck.c
index b72200c352..82c2002f4a 100644
--- a/fsck.c
+++ b/fsck.c
@@ -1328,7 +1328,7 @@ static int fsck_blobs(struct oidset *blobs_found, struct oidset *blobs_done,
 	oidset_iter_init(blobs_found, &iter);
 	while ((oid = oidset_iter_next(&iter))) {
 		enum object_type type;
-		unsigned long size;
+		size_t size;
 		char *buf;
 
 		if (oidset_contains(blobs_done, oid))
diff --git a/grep.c b/grep.c
index a54e5d86a9..1d75d31421 100644
--- a/grep.c
+++ b/grep.c
@@ -1931,9 +1931,11 @@ void grep_source_clear_data(struct grep_source *gs)
 static int grep_source_load_oid(struct grep_source *gs)
 {
 	enum object_type type;
+	size_t size_st = 0;
 
 	gs->buf = odb_read_object(gs->repo->objects, gs->identifier,
-				  &type, &gs->size);
+				  &type, &size_st);
+	gs->size = cast_size_t_to_ulong(size_st);
 	if (!gs->buf)
 		return error(_("'%s': unable to read %s"),
 			     gs->name,
diff --git a/http-push.c b/http-push.c
index 520d6c3b6a..c61d9f7e02 100644
--- a/http-push.c
+++ b/http-push.c
@@ -365,7 +365,7 @@ static void start_put(struct transfer_request *request)
 	enum object_type type;
 	char hdr[50];
 	void *unpacked;
-	unsigned long len;
+	size_t len;
 	int hdrlen;
 	ssize_t size;
 	git_zstream stream;
diff --git a/list-objects-filter.c b/list-objects-filter.c
index 78316e7f90..c912ff3079 100644
--- a/list-objects-filter.c
+++ b/list-objects-filter.c
@@ -280,7 +280,7 @@ static enum list_objects_filter_result filter_blobs_limit(
 	void *filter_data_)
 {
 	struct filter_blobs_limit_data *filter_data = filter_data_;
-	unsigned long object_length;
+	size_t object_length;
 	enum object_type t;
 
 	switch (filter_situation) {
diff --git a/mailmap.c b/mailmap.c
index 3b2691781d..72b639e602 100644
--- a/mailmap.c
+++ b/mailmap.c
@@ -186,7 +186,7 @@ int read_mailmap_blob(struct repository *repo, struct string_list *map,
 {
 	struct object_id oid;
 	char *buf;
-	unsigned long size;
+	size_t size;
 	enum object_type type;
 
 	if (!name)
diff --git a/match-trees.c b/match-trees.c
index 4216933d06..2a43c0fa1a 100644
--- a/match-trees.c
+++ b/match-trees.c
@@ -61,7 +61,7 @@ static void *fill_tree_desc_strict(struct repository *r,
 {
 	void *buffer;
 	enum object_type type;
-	unsigned long size;
+	size_t size;
 
 	buffer = odb_read_object(r->objects, hash, &type, &size);
 	if (!buffer)
@@ -186,7 +186,7 @@ static int splice_tree(struct repository *r,
 	char *subpath;
 	int toplen;
 	char *buf;
-	unsigned long sz;
+	size_t sz;
 	struct tree_desc desc;
 	unsigned char *rewrite_here;
 	const struct object_id *rewrite_with;
diff --git a/merge-blobs.c b/merge-blobs.c
index 6fc2799417..16a75bd1e3 100644
--- a/merge-blobs.c
+++ b/merge-blobs.c
@@ -9,7 +9,7 @@
 static int fill_mmfile_blob(mmfile_t *f, struct blob *obj)
 {
 	void *buf;
-	unsigned long size;
+	size_t size;
 	enum object_type type;
 
 	buf = odb_read_object(the_repository->objects, &obj->object.oid,
@@ -35,7 +35,7 @@ static void *three_way_filemerge(struct index_state *istate,
 				 mmfile_t *base,
 				 mmfile_t *our,
 				 mmfile_t *their,
-				 unsigned long *size)
+				 size_t *size)
 {
 	enum ll_merge_result merge_status;
 	mmbuffer_t res;
@@ -61,7 +61,7 @@ static void *three_way_filemerge(struct index_state *istate,
 
 void *merge_blobs(struct index_state *istate, const char *path,
 		  struct blob *base, struct blob *our,
-		  struct blob *their, unsigned long *size)
+		  struct blob *their, size_t *size)
 {
 	void *res = NULL;
 	mmfile_t f1, f2, common;
diff --git a/merge-blobs.h b/merge-blobs.h
index 13cf9669e5..5797517a06 100644
--- a/merge-blobs.h
+++ b/merge-blobs.h
@@ -6,6 +6,6 @@ struct index_state;
 
 void *merge_blobs(struct index_state *, const char *,
 		  struct blob *, struct blob *,
-		  struct blob *, unsigned long *);
+		  struct blob *, size_t *);
 
 #endif /* MERGE_BLOBS_H */
diff --git a/merge-ort.c b/merge-ort.c
index 544be9e466..4f6273bd51 100644
--- a/merge-ort.c
+++ b/merge-ort.c
@@ -3716,7 +3716,7 @@ static int read_oid_strbuf(struct merge_options *opt,
 {
 	void *buf;
 	enum object_type type;
-	unsigned long size;
+	size_t size;
 	buf = odb_read_object(opt->repo->objects, oid, &type, &size);
 	if (!buf) {
 		path_msg(opt, ERROR_OBJECT_READ_FAILED, 0,
diff --git a/notes-cache.c b/notes-cache.c
index bf5bb1f6c1..74cef802bd 100644
--- a/notes-cache.c
+++ b/notes-cache.c
@@ -82,7 +82,7 @@ char *notes_cache_get(struct notes_cache *c, struct object_id *key_oid,
 	const struct object_id *value_oid;
 	enum object_type type;
 	char *value;
-	unsigned long size;
+	size_t size;
 
 	value_oid = get_note(&c->tree, key_oid);
 	if (!value_oid)
diff --git a/notes-merge.c b/notes-merge.c
index b9322abbcb..118cad2518 100644
--- a/notes-merge.c
+++ b/notes-merge.c
@@ -339,7 +339,7 @@ static void write_note_to_worktree(const struct object_id *obj,
 				   const struct object_id *note)
 {
 	enum object_type type;
-	unsigned long size;
+	size_t size;
 	void *buf = odb_read_object(the_repository->objects, note, &type, &size);
 
 	if (!buf)
diff --git a/notes.c b/notes.c
index 8f315e2a00..ec9c2cb150 100644
--- a/notes.c
+++ b/notes.c
@@ -811,7 +811,8 @@ int combine_notes_concatenate(struct object_id *cur_oid,
 			      const struct object_id *new_oid)
 {
 	char *cur_msg = NULL, *new_msg = NULL, *buf;
-	unsigned long cur_len, new_len, buf_len;
+	unsigned long buf_len;
+	size_t cur_len, new_len;
 	enum object_type cur_type, new_type;
 	int ret;
 
@@ -875,7 +876,7 @@ static int string_list_add_note_lines(struct string_list *list,
 				      const struct object_id *oid)
 {
 	char *data;
-	unsigned long len;
+	size_t len;
 	enum object_type t;
 
 	if (is_null_oid(oid))
@@ -1282,7 +1283,8 @@ static void format_note(struct notes_tree *t, const struct object_id *object_oid
 	static const char utf8[] = "utf-8";
 	const struct object_id *oid;
 	char *msg, *msg_p;
-	unsigned long linelen, msglen;
+	unsigned long linelen;
+	size_t msglen;
 	enum object_type type;
 
 	if (!t)
diff --git a/object-file.c b/object-file.c
index 90f995d000..a81d50c305 100644
--- a/object-file.c
+++ b/object-file.c
@@ -381,7 +381,7 @@ static int parse_loose_header(const char *hdr, struct object_info *oi)
 	}
 
 	if (oi->sizep)
-		*oi->sizep = cast_size_t_to_ulong(size);
+		*oi->sizep = size;
 
 	/*
 	 * The length must be followed by a zero byte
@@ -409,7 +409,7 @@ static int read_object_info_from_path(struct odb_source *source,
 	void *map = NULL;
 	git_zstream stream, *stream_to_end = NULL;
 	char hdr[MAX_HEADER_LEN];
-	unsigned long size_scratch;
+	size_t size_scratch;
 	enum object_type type_scratch;
 	struct stat st;
 
@@ -1222,7 +1222,7 @@ int force_object_loose(struct odb_source *source,
 {
 	const struct git_hash_algo *compat = source->odb->repo->compat_hash_algo;
 	void *buf;
-	unsigned long len;
+	size_t len;
 	struct object_info oi = OBJECT_INFO_INIT;
 	struct object_id compat_oid;
 	enum object_type type;
@@ -2126,7 +2126,7 @@ int read_loose_object(struct repository *repo,
 	unsigned long mapsize;
 	git_zstream stream;
 	char hdr[MAX_HEADER_LEN];
-	unsigned long *size = oi->sizep;
+	size_t *size = oi->sizep;
 
 	fd = git_open(path);
 	if (fd >= 0)
@@ -2302,7 +2302,6 @@ int odb_source_loose_read_object_stream(struct odb_read_stream **out,
 	struct object_info oi = OBJECT_INFO_INIT;
 	struct odb_loose_read_stream *st;
 	unsigned long mapsize;
-	unsigned long size_ul;
 	void *mapped;
 
 	mapped = odb_source_loose_map_object(source, oid, &mapsize);
@@ -2326,18 +2325,11 @@ int odb_source_loose_read_object_stream(struct odb_read_stream **out,
 		goto error;
 	}
 
-	/*
-	 * object_info.sizep is unsigned long* (32-bit on Windows), but
-	 * st->base.size is size_t (64-bit). Use temporary variable.
-	 * Note: loose objects >4GB would still truncate here, but such
-	 * large loose objects are uncommon (they'd normally be packed).
-	 */
-	oi.sizep = &size_ul;
+	oi.sizep = &st->base.size;
 	oi.typep = &st->base.type;
 
 	if (parse_loose_header(st->hdr, &oi) < 0 || st->base.type < 0)
 		goto error;
-	st->base.size = size_ul;
 
 	st->mapped = mapped;
 	st->mapsize = mapsize;
diff --git a/object.c b/object.c
index 465902ecc6..23b84aa7e2 100644
--- a/object.c
+++ b/object.c
@@ -325,7 +325,7 @@ struct object *parse_object_with_flags(struct repository *r,
 {
 	int skip_hash = !!(flags & PARSE_OBJECT_SKIP_HASH_CHECK);
 	int discard_tree = !!(flags & PARSE_OBJECT_DISCARD_TREE);
-	unsigned long size;
+	size_t size;
 	enum object_type type;
 	int eaten;
 	const struct object_id *repl = lookup_replace_object(r, oid);
diff --git a/odb.c b/odb.c
index 965ef68e4e..7d555be09f 100644
--- a/odb.c
+++ b/odb.c
@@ -625,7 +625,7 @@ static int oid_object_info_convert(struct repository *r,
 	enum object_type type;
 	struct object_id oid, delta_base_oid;
 	struct object_info new_oi, *oi;
-	unsigned long size;
+	size_t size;
 	void *content;
 	int ret;
 
@@ -716,7 +716,7 @@ int odb_read_object_info_extended(struct object_database *odb,
 /* returns enum object_type or negative */
 int odb_read_object_info(struct object_database *odb,
 			 const struct object_id *oid,
-			 unsigned long *sizep)
+			 size_t *sizep)
 {
 	enum object_type type;
 	struct object_info oi = OBJECT_INFO_INIT;
@@ -730,7 +730,7 @@ int odb_read_object_info(struct object_database *odb,
 }
 
 int odb_pretend_object(struct object_database *odb,
-		       void *buf, unsigned long len, enum object_type type,
+		       void *buf, size_t len, enum object_type type,
 		       struct object_id *oid)
 {
 	hash_object_file(odb->repo->hash_algo, buf, len, type, oid);
@@ -744,7 +744,7 @@ int odb_pretend_object(struct object_database *odb,
 void *odb_read_object(struct object_database *odb,
 		      const struct object_id *oid,
 		      enum object_type *type,
-		      unsigned long *size)
+		      size_t *size)
 {
 	struct object_info oi = OBJECT_INFO_INIT;
 	unsigned flags = OBJECT_INFO_DIE_IF_CORRUPT | OBJECT_INFO_LOOKUP_REPLACE;
@@ -762,12 +762,12 @@ void *odb_read_object(struct object_database *odb,
 void *odb_read_object_peeled(struct object_database *odb,
 			     const struct object_id *oid,
 			     enum object_type required_type,
-			     unsigned long *size,
+			     size_t *size,
 			     struct object_id *actual_oid_return)
 {
 	enum object_type type;
 	void *buffer;
-	unsigned long isize;
+	size_t isize;
 	struct object_id actual_oid;
 
 	oidcpy(&actual_oid, oid);
diff --git a/odb.h b/odb.h
index 73553ed5a7..e2f0bbad25 100644
--- a/odb.h
+++ b/odb.h
@@ -228,12 +228,12 @@ struct odb_source *odb_add_to_alternates_memory(struct object_database *odb,
 void *odb_read_object(struct object_database *odb,
 		      const struct object_id *oid,
 		      enum object_type *type,
-		      unsigned long *size);
+		      size_t *size);
 
 void *odb_read_object_peeled(struct object_database *odb,
 			     const struct object_id *oid,
 			     enum object_type required_type,
-			     unsigned long *size,
+			     size_t *size,
 			     struct object_id *oid_ret);
 
 /*
@@ -245,13 +245,13 @@ void *odb_read_object_peeled(struct object_database *odb,
  * that reference it.
  */
 int odb_pretend_object(struct object_database *odb,
-		       void *buf, unsigned long len, enum object_type type,
+		       void *buf, size_t len, enum object_type type,
 		       struct object_id *oid);
 
 struct object_info {
 	/* Request */
 	enum object_type *typep;
-	unsigned long *sizep;
+	size_t *sizep;
 	off_t *disk_sizep;
 	struct object_id *delta_base_oid;
 	void **contentp;
@@ -356,7 +356,7 @@ int odb_read_object_info_extended(struct object_database *odb,
  */
 int odb_read_object_info(struct object_database *odb,
 			 const struct object_id *oid,
-			 unsigned long *sizep);
+			 size_t *sizep);
 
 enum odb_has_object_flags {
 	/* Retry packed storage after checking packed and loose storage */
diff --git a/odb/streaming.c b/odb/streaming.c
index 7602a8d5d8..20531e864c 100644
--- a/odb/streaming.c
+++ b/odb/streaming.c
@@ -157,26 +157,15 @@ static int open_istream_incore(struct odb_read_stream **out,
 		.base.read = read_istream_incore,
 	};
 	struct odb_incore_read_stream *st;
-	unsigned long size_ul;
 	int ret;
 
 	oi.typep = &stream.base.type;
-	/*
-	 * object_info.sizep is unsigned long* (32-bit on Windows), but
-	 * stream.base.size is size_t (64-bit). We use a temporary variable
-	 * because the types are incompatible. Note: this path still truncates
-	 * for >4GB objects, but large objects should use pack streaming
-	 * (packfile_store_read_object_stream) which handles size_t properly.
-	 * This incore fallback is only used for small objects or when pack
-	 * streaming is unavailable.
-	 */
-	oi.sizep = &size_ul;
+	oi.sizep = &stream.base.size;
 	oi.contentp = (void **)&stream.buf;
 	ret = odb_read_object_info_extended(odb, oid, &oi,
 					    OBJECT_INFO_DIE_IF_CORRUPT);
 	if (ret)
 		return ret;
-	stream.base.size = size_ul;
 
 	CALLOC_ARRAY(st, 1);
 	*st = stream;
diff --git a/pack-bitmap.c b/pack-bitmap.c
index f9af8a96bd..e8a82945cc 100644
--- a/pack-bitmap.c
+++ b/pack-bitmap.c
@@ -1856,7 +1856,7 @@ static void filter_bitmap_blob_none(struct bitmap_index *bitmap_git,
 static unsigned long get_size_by_pos(struct bitmap_index *bitmap_git,
 				     uint32_t pos)
 {
-	unsigned long size;
+	size_t size;
 	struct object_info oi = OBJECT_INFO_INIT;
 
 	oi.sizep = &size;
@@ -1891,7 +1891,7 @@ static unsigned long get_size_by_pos(struct bitmap_index *bitmap_git,
 			die(_("unable to get size of %s"), oid_to_hex(&obj->oid));
 	}
 
-	return size;
+	return cast_size_t_to_ulong(size);
 }
 
 static void filter_bitmap_blob_limit(struct bitmap_index *bitmap_git,
diff --git a/packfile.c b/packfile.c
index c174982d10..78c389e6f3 100644
--- a/packfile.c
+++ b/packfile.c
@@ -1607,13 +1607,10 @@ static int packed_object_info_with_index_pos(struct packed_git *p, off_t obj_off
 	 * a "real" type later if the caller is interested.
 	 */
 	if (oi->contentp) {
-		size_t size_st = 0;
 		*oi->contentp = cache_or_unpack_entry(p->repo, p, obj_offset,
-						      &size_st, &type);
+						      oi->sizep, &type);
 		if (!*oi->contentp)
 			type = OBJ_BAD;
-		else if (oi->sizep)
-			*oi->sizep = cast_size_t_to_ulong(size_st);
 	} else if (oi->sizep || oi->typep || oi->delta_base_oid) {
 		type = unpack_object_header(p, &w_curs, &curpos, &size);
 	}
@@ -1633,7 +1630,7 @@ static int packed_object_info_with_index_pos(struct packed_git *p, off_t obj_off
 				goto out;
 			}
 		}
-		*oi->sizep = (unsigned long)size;
+		*oi->sizep = size;
 	}
 
 	if (oi->disk_sizep || (oi->mtimep && p->is_cruft)) {
@@ -1919,7 +1916,6 @@ void *unpack_entry(struct repository *r, struct packed_git *p, off_t obj_offset,
 			struct object_id base_oid;
 			if (!(offset_to_pack_pos(p, obj_offset, &pos))) {
 				struct object_info oi = OBJECT_INFO_INIT;
-				unsigned long bsz_ul = 0;
 
 				nth_packed_object_id(&base_oid, p,
 						     pack_pos_to_index(p, pos));
@@ -1930,13 +1926,11 @@ void *unpack_entry(struct repository *r, struct packed_git *p, off_t obj_offset,
 				mark_bad_packed_object(p, &base_oid);
 
 				oi.typep = &type;
-				oi.sizep = &bsz_ul;
+				oi.sizep = &base_size;
 				oi.contentp = &base;
 				if (odb_read_object_info_extended(r->objects, &base_oid,
 								  &oi, 0) < 0)
 					base = NULL;
-				else
-					base_size = bsz_ul;
 
 				external_base = base;
 			}
diff --git a/path-walk.c b/path-walk.c
index 94ff90bd15..edc8e736d7 100644
--- a/path-walk.c
+++ b/path-walk.c
@@ -368,7 +368,7 @@ static int walk_path(struct path_walk_context *ctx,
 		struct oid_array filtered = OID_ARRAY_INIT;
 
 		for (size_t i = 0; i < list->oids.nr; i++) {
-			unsigned long size;
+			size_t size;
 
 			if (odb_read_object_info(ctx->repo->objects,
 						 &list->oids.oid[i],
diff --git a/protocol-caps.c b/protocol-caps.c
index 35072ed60b..8858ea4489 100644
--- a/protocol-caps.c
+++ b/protocol-caps.c
@@ -50,7 +50,7 @@ static void send_info(struct repository *r, struct packet_writer *writer,
 	for_each_string_list_item (item, oid_str_list) {
 		const char *oid_str = item->string;
 		struct object_id oid;
-		unsigned long object_size;
+		size_t object_size;
 
 		if (get_oid_hex_algop(oid_str, &oid, r->hash_algo) < 0) {
 			packet_writer_error(
@@ -66,7 +66,8 @@ static void send_info(struct repository *r, struct packet_writer *writer,
 			if (odb_read_object_info(r->objects, &oid, &object_size) < 0) {
 				strbuf_addstr(&send_buffer, " ");
 			} else {
-				strbuf_addf(&send_buffer, " %lu", object_size);
+				strbuf_addf(&send_buffer, " %"PRIuMAX,
+					    (uintmax_t)object_size);
 			}
 		}
 
diff --git a/read-cache.c b/read-cache.c
index 21829102ae..21ca58beea 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -250,7 +250,7 @@ static int ce_compare_link(const struct cache_entry *ce, size_t expected_size)
 {
 	int match = -1;
 	void *buffer;
-	unsigned long size;
+	size_t size;
 	enum object_type type;
 	struct strbuf sb = STRBUF_INIT;
 
@@ -3462,7 +3462,7 @@ void *read_blob_data_from_index(struct index_state *istate,
 				const char *path, unsigned long *size)
 {
 	int pos, len;
-	unsigned long sz;
+	size_t sz;
 	enum object_type type;
 	void *data;
 
@@ -3490,7 +3490,7 @@ void *read_blob_data_from_index(struct index_state *istate,
 		return NULL;
 	}
 	if (size)
-		*size = sz;
+		*size = cast_size_t_to_ulong(sz);
 	return data;
 }
 
diff --git a/ref-filter.c b/ref-filter.c
index 1da4c0e60d..8ba91c72a1 100644
--- a/ref-filter.c
+++ b/ref-filter.c
@@ -86,7 +86,7 @@ struct ref_trailer_buf {
 static struct expand_data {
 	struct object_id oid;
 	enum object_type type;
-	unsigned long size;
+	size_t size;
 	off_t disk_size;
 	struct object_id delta_base_oid;
 	void *content;
diff --git a/reflog.c b/reflog.c
index 82337078d0..04edbe5670 100644
--- a/reflog.c
+++ b/reflog.c
@@ -154,7 +154,7 @@ static int tree_is_complete(const struct object_id *oid)
 
 	if (!tree->buffer) {
 		enum object_type type;
-		unsigned long size;
+		size_t size;
 		void *data = odb_read_object(the_repository->objects, oid,
 					     &type, &size);
 		if (!data) {
diff --git a/rerere.c b/rerere.c
index 0296700f9f..068321b24f 100644
--- a/rerere.c
+++ b/rerere.c
@@ -990,7 +990,7 @@ static int handle_cache(struct index_state *istate,
 
 	while (pos < istate->cache_nr) {
 		enum object_type type;
-		unsigned long size;
+		size_t size;
 
 		ce = istate->cache[pos++];
 		if (ce_namelen(ce) != len || memcmp(ce->name, path, len))
diff --git a/submodule-config.c b/submodule-config.c
index a81897b4e0..f75997402a 100644
--- a/submodule-config.c
+++ b/submodule-config.c
@@ -694,7 +694,7 @@ static const struct submodule *config_from(struct submodule_cache *cache,
 		enum lookup_type lookup_type)
 {
 	struct strbuf rev = STRBUF_INIT;
-	unsigned long config_size;
+	size_t config_size;
 	char *config = NULL;
 	struct object_id oid;
 	enum object_type type;
diff --git a/t/helper/test-pack-deltas.c b/t/helper/test-pack-deltas.c
index c493b75e02..840797cf0d 100644
--- a/t/helper/test-pack-deltas.c
+++ b/t/helper/test-pack-deltas.c
@@ -48,7 +48,8 @@ static void write_ref_delta(struct hashfile *f,
 			    struct object_id *base)
 {
 	unsigned char header[MAX_PACK_OBJECT_HEADER];
-	unsigned long size, base_size, delta_size, compressed_size, hdrlen;
+	unsigned long delta_size, compressed_size, hdrlen;
+	size_t size, base_size;
 	enum object_type type;
 	void *base_buf, *delta_buf;
 	void *buf = odb_read_object(the_repository->objects,
diff --git a/t/helper/test-partial-clone.c b/t/helper/test-partial-clone.c
index a7aab426d0..87c59108e0 100644
--- a/t/helper/test-partial-clone.c
+++ b/t/helper/test-partial-clone.c
@@ -17,7 +17,7 @@ static void object_info(const char *gitdir, const char *oid_hex)
 {
 	struct repository r;
 	struct object_id oid;
-	unsigned long size;
+	size_t size;
 	struct object_info oi = {.sizep = &size};
 	const char *p;
 
diff --git a/t/unit-tests/u-odb-inmemory.c b/t/unit-tests/u-odb-inmemory.c
index 482502ef4b..6844bfc37c 100644
--- a/t/unit-tests/u-odb-inmemory.c
+++ b/t/unit-tests/u-odb-inmemory.c
@@ -20,7 +20,7 @@ static void cl_assert_object_info(struct odb_source_inmemory *source,
 				  const char *expected_content)
 {
 	enum object_type actual_type;
-	unsigned long actual_size;
+	size_t actual_size;
 	void *actual_content;
 	struct object_info oi = {
 		.typep = &actual_type,
diff --git a/tag.c b/tag.c
index 2f12e51024..1a00ded6eb 100644
--- a/tag.c
+++ b/tag.c
@@ -49,7 +49,7 @@ int gpg_verify_tag(struct repository *r, const struct object_id *oid,
 {
 	enum object_type type;
 	char *buf;
-	unsigned long size;
+	size_t size;
 	int ret;
 
 	type = odb_read_object_info(r->objects, oid, NULL);
@@ -207,7 +207,7 @@ int parse_tag(struct repository *r, struct tag *item)
 {
 	enum object_type type;
 	void *data;
-	unsigned long size;
+	size_t size;
 	int ret;
 
 	if (item->object.parsed)
diff --git a/tree-walk.c b/tree-walk.c
index 7e1b956f27..a67f06b9eb 100644
--- a/tree-walk.c
+++ b/tree-walk.c
@@ -87,7 +87,7 @@ void *fill_tree_descriptor(struct repository *r,
 			   struct tree_desc *desc,
 			   const struct object_id *oid)
 {
-	unsigned long size = 0;
+	size_t size = 0;
 	void *buf = NULL;
 
 	if (oid) {
@@ -610,7 +610,7 @@ int get_tree_entry(struct repository *r,
 {
 	int retval;
 	void *tree;
-	unsigned long size;
+	size_t size;
 	struct object_id root;
 
 	tree = odb_read_object_peeled(r->objects, tree_oid, OBJ_TREE, &size, &root);
@@ -682,7 +682,7 @@ enum get_oid_result get_tree_entry_follow_symlinks(struct repository *r,
 		if (!t.buffer) {
 			void *tree;
 			struct object_id root;
-			unsigned long size;
+			size_t size;
 			tree = odb_read_object_peeled(r->objects, &current_tree_oid,
 						      OBJ_TREE, &size, &root);
 			if (!tree)
@@ -778,6 +778,7 @@ enum get_oid_result get_tree_entry_follow_symlinks(struct repository *r,
 		} else if (S_ISLNK(*mode)) {
 			/* Follow a symlink */
 			unsigned long link_len;
+			size_t link_len_st = 0;
 			size_t len;
 			char *contents, *contents_start;
 			struct dir_state *parent;
@@ -797,7 +798,8 @@ enum get_oid_result get_tree_entry_follow_symlinks(struct repository *r,
 
 			contents = odb_read_object(r->objects,
 						   &current_tree_oid, &type,
-						   &link_len);
+						   &link_len_st);
+			link_len = cast_size_t_to_ulong(link_len_st);
 
 			if (!contents)
 				goto done;
diff --git a/tree.c b/tree.c
index d703ab97c8..53f7395e9f 100644
--- a/tree.c
+++ b/tree.c
@@ -188,7 +188,7 @@ int repo_parse_tree_gently(struct repository *r, struct tree *item,
 {
 	 enum object_type type;
 	 void *buffer;
-	 unsigned long size;
+	 size_t size;
 
 	if (item->object.parsed)
 		return 0;
diff --git a/xdiff-interface.c b/xdiff-interface.c
index 5ee2b96d0a..db6938689f 100644
--- a/xdiff-interface.c
+++ b/xdiff-interface.c
@@ -179,7 +179,7 @@ int read_mmfile(mmfile_t *ptr, const char *filename)
 void read_mmblob(mmfile_t *ptr, struct object_database *odb,
 		 const struct object_id *oid)
 {
-	unsigned long size;
+	size_t size;
 	enum object_type type;
 
 	if (is_null_oid(oid)) {
-- 
gitgitgadget

^ permalink raw reply related

* [PATCH 6/7] packfile,delta: drop the `cast_size_t_to_ulong()` wrappers
From: Johannes Schindelin via GitGitGadget @ 2026-06-04 10:51 UTC (permalink / raw)
  To: git; +Cc: Kristofer Karlsson, Johannes Schindelin, Johannes Schindelin
In-Reply-To: <pull.2137.git.1780570272.gitgitgadget@gmail.com>

From: Johannes Schindelin <johannes.schindelin@gmx.de>

When I started the transition from `unsigned long` to `size_t`, in the
interest of keeping the patches reviewable, I introduced these calls to
prevent data type narrowing from silently failing to handle large object
sizes. I also introduced `*_sz()` variants that would allow most of the
callers to keep using that `unsigned long` that the 90s kindly asked to
be returned.

After the preceding commits, the only places that called the narrow
wrappers either no longer exist or already use the `_sz` form
internally, so the wrappers just narrow values back through
`cast_size_t_to_ulong()` for no reason.

Drop them and rename the `_sz` variants back to the natural names.

Assisted-by: Opus 4.7
Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
---
 delta.h       | 14 ++------------
 packfile.c    | 28 ++++++++--------------------
 packfile.h    |  2 +-
 patch-delta.c |  4 ++--
 4 files changed, 13 insertions(+), 35 deletions(-)

diff --git a/delta.h b/delta.h
index bb149dc82b..eb5c6d2fdb 100644
--- a/delta.h
+++ b/delta.h
@@ -86,11 +86,8 @@ void *patch_delta(const void *src_buf, size_t src_size,
  * This must be called twice on the delta data buffer, first to get the
  * expected source buffer size, and again to get the target buffer size.
  */
-/*
- * Size_t variant that doesn't truncate - use for >4GB objects on Windows.
- */
-static inline size_t get_delta_hdr_size_sz(const unsigned char **datap,
-					   const unsigned char *top)
+static inline size_t get_delta_hdr_size(const unsigned char **datap,
+					const unsigned char *top)
 {
 	const unsigned char *data = *datap;
 	size_t cmd, size = 0;
@@ -104,11 +101,4 @@ static inline size_t get_delta_hdr_size_sz(const unsigned char **datap,
 	return size;
 }
 
-static inline unsigned long get_delta_hdr_size(const unsigned char **datap,
-					       const unsigned char *top)
-{
-	size_t size = get_delta_hdr_size_sz(datap, top);
-	return cast_size_t_to_ulong(size);
-}
-
 #endif
diff --git a/packfile.c b/packfile.c
index dab0a9b16d..c174982d10 100644
--- a/packfile.c
+++ b/packfile.c
@@ -1164,11 +1164,12 @@ unsigned long unpack_object_header_buffer(const unsigned char *buf,
 }
 
 /*
- * Size_t variant for >4GB delta results on Windows.
+ * Read a delta object's header at curpos in p (already inflated as needed)
+ * and return the size of the result object (the post-application target).
  */
-static size_t get_size_from_delta_sz(struct packed_git *p,
-				     struct pack_window **w_curs,
-				     off_t curpos)
+size_t get_size_from_delta(struct packed_git *p,
+			   struct pack_window **w_curs,
+			   off_t curpos)
 {
 	const unsigned char *data;
 	unsigned char delta_head[20], *in;
@@ -1215,18 +1216,10 @@ static size_t get_size_from_delta_sz(struct packed_git *p,
 	data = delta_head;
 
 	/* ignore base size */
-	get_delta_hdr_size_sz(&data, delta_head+sizeof(delta_head));
+	get_delta_hdr_size(&data, delta_head+sizeof(delta_head));
 
 	/* Read the result size */
-	return get_delta_hdr_size_sz(&data, delta_head+sizeof(delta_head));
-}
-
-unsigned long get_size_from_delta(struct packed_git *p,
-				  struct pack_window **w_curs,
-				  off_t curpos)
-{
-	size_t size = get_size_from_delta_sz(p, w_curs, curpos);
-	return cast_size_t_to_ulong(size);
+	return get_delta_hdr_size(&data, delta_head+sizeof(delta_head));
 }
 
 int unpack_object_header(struct packed_git *p,
@@ -1634,12 +1627,7 @@ static int packed_object_info_with_index_pos(struct packed_git *p, off_t obj_off
 				ret = -1;
 				goto out;
 			}
-			/*
-			 * Use size_t variant to avoid die() on >4GB deltas.
-			 * oi->sizep is unsigned long, so truncation may occur,
-			 * but streaming code uses its own size_t tracking.
-			 */
-			size = get_size_from_delta_sz(p, &w_curs, tmp_pos);
+			size = get_size_from_delta(p, &w_curs, tmp_pos);
 			if (size == 0) {
 				ret = -1;
 				goto out;
diff --git a/packfile.h b/packfile.h
index 0b5ae3f9fc..bd4494906d 100644
--- a/packfile.h
+++ b/packfile.h
@@ -458,7 +458,7 @@ int is_pack_valid(struct packed_git *);
 void *unpack_entry(struct repository *r, struct packed_git *, off_t,
 		   enum object_type *, size_t *);
 unsigned long unpack_object_header_buffer(const unsigned char *buf, unsigned long len, enum object_type *type, size_t *sizep);
-unsigned long get_size_from_delta(struct packed_git *, struct pack_window **, off_t);
+size_t get_size_from_delta(struct packed_git *, struct pack_window **, off_t);
 int unpack_object_header(struct packed_git *, struct pack_window **, off_t *, size_t *);
 off_t get_delta_base(struct packed_git *p, struct pack_window **w_curs,
 		     off_t *curpos, enum object_type type,
diff --git a/patch-delta.c b/patch-delta.c
index 44cda97994..42199fa956 100644
--- a/patch-delta.c
+++ b/patch-delta.c
@@ -27,12 +27,12 @@ void *patch_delta(const void *src_buf, size_t src_size,
 	top = (const unsigned char *) delta_buf + delta_size;
 
 	/* make sure the orig file size matches what we expect */
-	size = get_delta_hdr_size_sz(&data, top);
+	size = get_delta_hdr_size(&data, top);
 	if (size != src_size)
 		return NULL;
 
 	/* now the result size */
-	size = get_delta_hdr_size_sz(&data, top);
+	size = get_delta_hdr_size(&data, top);
 	dst_buf = xmallocz(size);
 
 	out = dst_buf;
-- 
gitgitgadget


^ permalink raw reply related

* [PATCH 5/7] pack-objects: use size_t for in-core object sizes
From: Johannes Schindelin via GitGitGadget @ 2026-06-04 10:51 UTC (permalink / raw)
  To: git; +Cc: Kristofer Karlsson, Johannes Schindelin, Johannes Schindelin
In-Reply-To: <pull.2137.git.1780570272.gitgitgadget@gmail.com>

From: Johannes Schindelin <johannes.schindelin@gmx.de>

`pack-objects` stores per-entry object sizes in either the 31-bit
`size_` member of the `struct object_entry` or, when the value does not
fit, the `pack->delta_size[]` spill array.  The accessors (`oe_size`,
`oe_delta_size`, `oe_get_size_slow`, `oe_size_*_than`) and the setters
(`oe_set_size`, `oe_set_delta_size`) used `unsigned long` for the spill
type, which on Windows means the spill silently caps at 4 GiB per entry.
That is what made `upload-pack` die with "object too large to read on
this platform" when serving the >4 GiB blob in `t5608` tests 5 and 6
when run with `GIT_TEST_CLONE_2GB`.

Widen them all to `size_t` (including `pack->delta_size`) and drop the
three `cast_size_t_to_ulong()` calls in `check_object()` that guarded
`in_pack_size`.  The two `SET_SIZE(entry, canonical_size)` calls in the
same function stay cast-free as before, since `canonical_size` is still
`unsigned long` until a later commit widens `object_info::sizep`.

Assisted-by: Opus 4.7
Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
---
 builtin/pack-objects.c | 35 ++++++++++++++++++-----------------
 pack-objects.h         |  2 +-
 2 files changed, 19 insertions(+), 18 deletions(-)

diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index 975f04d699..bb372d0b03 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -66,8 +66,8 @@ static inline struct object_entry *oe_delta(
 		return &pack->objects[e->delta_idx - 1];
 }
 
-static inline unsigned long oe_delta_size(struct packing_data *pack,
-					  const struct object_entry *e)
+static inline size_t oe_delta_size(struct packing_data *pack,
+				   const struct object_entry *e)
 {
 	if (e->delta_size_valid)
 		return e->delta_size_;
@@ -83,11 +83,11 @@ static inline unsigned long oe_delta_size(struct packing_data *pack,
 	return pack->delta_size[e - pack->objects];
 }
 
-unsigned long oe_get_size_slow(struct packing_data *pack,
-			       const struct object_entry *e);
+size_t oe_get_size_slow(struct packing_data *pack,
+			const struct object_entry *e);
 
-static inline unsigned long oe_size(struct packing_data *pack,
-				    const struct object_entry *e)
+static inline size_t oe_size(struct packing_data *pack,
+			     const struct object_entry *e)
 {
 	if (e->size_valid)
 		return e->size_;
@@ -145,7 +145,7 @@ static inline void oe_set_delta_sibling(struct packing_data *pack,
 
 static inline void oe_set_size(struct packing_data *pack,
 			       struct object_entry *e,
-			       unsigned long size)
+			       size_t size)
 {
 	if (size < pack->oe_size_limit) {
 		e->size_ = size;
@@ -159,7 +159,7 @@ static inline void oe_set_size(struct packing_data *pack,
 
 static inline void oe_set_delta_size(struct packing_data *pack,
 				     struct object_entry *e,
-				     unsigned long size)
+				     size_t size)
 {
 	if (size < pack->oe_delta_size_limit) {
 		e->delta_size_ = size;
@@ -496,7 +496,7 @@ static void copy_pack_data(struct hashfile *f,
 
 static inline int oe_size_greater_than(struct packing_data *pack,
 				       const struct object_entry *lhs,
-				       unsigned long rhs)
+				       size_t rhs)
 {
 	if (lhs->size_valid)
 		return lhs->size_ > rhs;
@@ -2277,7 +2277,7 @@ static void check_object(struct object_entry *entry, uint32_t object_index)
 		default:
 			/* Not a delta hence we've already got all we need. */
 			oe_set_type(entry, entry->in_pack_type);
-			SET_SIZE(entry, cast_size_t_to_ulong(in_pack_size));
+			SET_SIZE(entry, in_pack_size);
 			entry->in_pack_header_size = used;
 			if (oe_type(entry) < OBJ_COMMIT || oe_type(entry) > OBJ_BLOB)
 				goto give_up;
@@ -2331,8 +2331,8 @@ static void check_object(struct object_entry *entry, uint32_t object_index)
 		if (have_base &&
 		    can_reuse_delta(&base_ref, entry, &base_entry)) {
 			oe_set_type(entry, entry->in_pack_type);
-			SET_SIZE(entry, cast_size_t_to_ulong(in_pack_size)); /* delta size */
-			SET_DELTA_SIZE(entry, cast_size_t_to_ulong(in_pack_size));
+			SET_SIZE(entry, in_pack_size); /* delta size */
+			SET_DELTA_SIZE(entry, in_pack_size);
 
 			if (base_entry) {
 				SET_DELTA(entry, base_entry);
@@ -2355,7 +2355,8 @@ static void check_object(struct object_entry *entry, uint32_t object_index)
 			 * object size from the delta header.
 			 */
 			delta_pos = entry->in_pack_offset + entry->in_pack_header_size;
-			canonical_size = get_size_from_delta(p, &w_curs, delta_pos);
+			canonical_size = get_size_from_delta(p, &w_curs,
+							     delta_pos);
 			if (canonical_size == 0)
 				goto give_up;
 			SET_SIZE(entry, canonical_size);
@@ -2711,7 +2712,7 @@ static pthread_mutex_t progress_mutex;
 
 static inline int oe_size_less_than(struct packing_data *pack,
 				    const struct object_entry *lhs,
-				    unsigned long rhs)
+				    size_t rhs)
 {
 	if (lhs->size_valid)
 		return lhs->size_ < rhs;
@@ -2734,8 +2735,8 @@ static inline void oe_set_tree_depth(struct packing_data *pack,
  * reconstruction (so non-deltas are true object sizes, but deltas
  * return the size of the delta data).
  */
-unsigned long oe_get_size_slow(struct packing_data *pack,
-			       const struct object_entry *e)
+size_t oe_get_size_slow(struct packing_data *pack,
+			const struct object_entry *e)
 {
 	struct packed_git *p;
 	struct pack_window *w_curs;
@@ -2769,7 +2770,7 @@ unsigned long oe_get_size_slow(struct packing_data *pack,
 
 	unuse_pack(&w_curs);
 	packing_data_unlock(&to_pack);
-	return cast_size_t_to_ulong(size);
+	return size;
 }
 
 static int try_delta(struct unpacked *trg, struct unpacked *src,
diff --git a/pack-objects.h b/pack-objects.h
index 83299d4732..e97e84ddcb 100644
--- a/pack-objects.h
+++ b/pack-objects.h
@@ -141,7 +141,7 @@ struct packing_data {
 	uint32_t index_size;
 
 	unsigned int *in_pack_pos;
-	unsigned long *delta_size;
+	size_t *delta_size;
 
 	/*
 	 * Only one of these can be non-NULL and they have different
-- 
gitgitgadget


^ permalink raw reply related

* [PATCH 4/7] packfile: widen unpack_entry()'s size out-parameter to size_t
From: Johannes Schindelin via GitGitGadget @ 2026-06-04 10:51 UTC (permalink / raw)
  To: git; +Cc: Kristofer Karlsson, Johannes Schindelin, Johannes Schindelin
In-Reply-To: <pull.2137.git.1780570272.gitgitgadget@gmail.com>

From: Johannes Schindelin <johannes.schindelin@gmx.de>

The topic `js/objects-larger-than-4gb-on-windows` widened the streaming,
index-pack and unpack-objects paths to `size_t` but deliberately stopped
at the in-memory `unpack_entry()` cascade, which still hands back the
unpacked size through `unsigned long *`.  On Windows that boundary
truncates above 4 GiB because that data type is only 32 bits wide on
that platform.

Widen the code path. Except `packed_object_info_with_index_pos()`: It
cannot yet pass `oi->sizep` directly because the field is still
`unsigned long *`; bridge it with a `size_t` temporary that narrows
back, and let a later commit drop the bridge once the field is wide
too. `gfi_unpack_entry()` keeps its narrow signature because fast-import
tracks sizes through `unsigned long` everywhere it crosses subsystem
boundaries, keeping its signature allows the scope of this commit to be
somewhat reasonable, still.

Assisted-by: Opus 4.7
Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
---
 builtin/fast-import.c |  7 ++++++-
 pack-check.c          |  5 ++---
 packfile.c            | 28 +++++++++++++++++-----------
 packfile.h            |  3 ++-
 4 files changed, 27 insertions(+), 16 deletions(-)

diff --git a/builtin/fast-import.c b/builtin/fast-import.c
index 82bc6dcc00..3dff898c43 100644
--- a/builtin/fast-import.c
+++ b/builtin/fast-import.c
@@ -1239,6 +1239,8 @@ static void *gfi_unpack_entry(
 	unsigned long *sizep)
 {
 	enum object_type type;
+	size_t size_st = 0;
+	void *data;
 	struct packed_git *p = all_packs[oe->pack_id];
 	if (p == pack_data && p->pack_size < (pack_size + the_hash_algo->rawsz)) {
 		/* The object is stored in the packfile we are writing to
@@ -1260,7 +1262,10 @@ static void *gfi_unpack_entry(
 		 */
 		p->pack_size = pack_size + the_hash_algo->rawsz;
 	}
-	return unpack_entry(the_repository, p, oe->idx.offset, &type, sizep);
+	data = unpack_entry(the_repository, p, oe->idx.offset, &type, &size_st);
+	if (sizep)
+		*sizep = cast_size_t_to_ulong(size_st);
+	return data;
 }
 
 static void load_tree(struct tree_entry *root)
diff --git a/pack-check.c b/pack-check.c
index 2792f34d25..5adfb3f272 100644
--- a/pack-check.c
+++ b/pack-check.c
@@ -143,9 +143,8 @@ static int verify_packfile(struct repository *r,
 			data = NULL;
 			data_valid = 0;
 		} else {
-			unsigned long sz;
-			data = unpack_entry(r, p, entries[i].offset, &type, &sz);
-			size = sz;
+			data = unpack_entry(r, p, entries[i].offset, &type,
+					    &size);
 			data_valid = 1;
 		}
 
diff --git a/packfile.c b/packfile.c
index e202f48837..dab0a9b16d 100644
--- a/packfile.c
+++ b/packfile.c
@@ -1454,7 +1454,7 @@ struct delta_base_cache_entry {
 	struct delta_base_cache_key key;
 	struct list_head lru;
 	void *data;
-	unsigned long size;
+	size_t size;
 	enum object_type type;
 };
 
@@ -1525,7 +1525,7 @@ static void detach_delta_base_cache_entry(struct delta_base_cache_entry *ent)
 }
 
 static void *cache_or_unpack_entry(struct repository *r, struct packed_git *p,
-				   off_t base_offset, unsigned long *base_size,
+				   off_t base_offset, size_t *base_size,
 				   enum object_type *type)
 {
 	struct delta_base_cache_entry *ent;
@@ -1558,8 +1558,8 @@ void clear_delta_base_cache(void)
 }
 
 static void add_delta_base_cache(struct packed_git *p, off_t base_offset,
-				 void *base, unsigned long base_size,
-				 unsigned long delta_base_cache_limit,
+				 void *base, size_t base_size,
+				 size_t delta_base_cache_limit,
 				 enum object_type type)
 {
 	struct delta_base_cache_entry *ent;
@@ -1614,10 +1614,13 @@ static int packed_object_info_with_index_pos(struct packed_git *p, off_t obj_off
 	 * a "real" type later if the caller is interested.
 	 */
 	if (oi->contentp) {
-		*oi->contentp = cache_or_unpack_entry(p->repo, p, obj_offset, oi->sizep,
-						      &type);
+		size_t size_st = 0;
+		*oi->contentp = cache_or_unpack_entry(p->repo, p, obj_offset,
+						      &size_st, &type);
 		if (!*oi->contentp)
 			type = OBJ_BAD;
+		else if (oi->sizep)
+			*oi->sizep = cast_size_t_to_ulong(size_st);
 	} else if (oi->sizep || oi->typep || oi->delta_base_oid) {
 		type = unpack_object_header(p, &w_curs, &curpos, &size);
 	}
@@ -1735,7 +1738,7 @@ int packed_object_info(struct packed_git *p, off_t obj_offset,
 static void *unpack_compressed_entry(struct packed_git *p,
 				    struct pack_window **w_curs,
 				    off_t curpos,
-				    unsigned long size)
+				    size_t size)
 {
 	int st;
 	git_zstream stream;
@@ -1790,11 +1793,11 @@ int do_check_packed_object_crc;
 struct unpack_entry_stack_ent {
 	off_t obj_offset;
 	off_t curpos;
-	unsigned long size;
+	size_t size;
 };
 
 void *unpack_entry(struct repository *r, struct packed_git *p, off_t obj_offset,
-		   enum object_type *final_type, unsigned long *final_size)
+		   enum object_type *final_type, size_t *final_size)
 {
 	struct pack_window *w_curs = NULL;
 	off_t curpos = obj_offset;
@@ -1911,7 +1914,7 @@ void *unpack_entry(struct repository *r, struct packed_git *p, off_t obj_offset,
 		void *delta_data;
 		void *base = data;
 		void *external_base = NULL;
-		unsigned long delta_size, base_size = size;
+		size_t delta_size, base_size = size;
 		int i;
 		off_t base_obj_offset = obj_offset;
 
@@ -1928,6 +1931,7 @@ void *unpack_entry(struct repository *r, struct packed_git *p, off_t obj_offset,
 			struct object_id base_oid;
 			if (!(offset_to_pack_pos(p, obj_offset, &pos))) {
 				struct object_info oi = OBJECT_INFO_INIT;
+				unsigned long bsz_ul = 0;
 
 				nth_packed_object_id(&base_oid, p,
 						     pack_pos_to_index(p, pos));
@@ -1938,11 +1942,13 @@ void *unpack_entry(struct repository *r, struct packed_git *p, off_t obj_offset,
 				mark_bad_packed_object(p, &base_oid);
 
 				oi.typep = &type;
-				oi.sizep = &base_size;
+				oi.sizep = &bsz_ul;
 				oi.contentp = &base;
 				if (odb_read_object_info_extended(r->objects, &base_oid,
 								  &oi, 0) < 0)
 					base = NULL;
+				else
+					base_size = bsz_ul;
 
 				external_base = base;
 			}
diff --git a/packfile.h b/packfile.h
index 49d6bdecf6..0b5ae3f9fc 100644
--- a/packfile.h
+++ b/packfile.h
@@ -455,7 +455,8 @@ off_t nth_packed_object_offset(const struct packed_git *, uint32_t n);
 off_t find_pack_entry_one(const struct object_id *oid, struct packed_git *);
 
 int is_pack_valid(struct packed_git *);
-void *unpack_entry(struct repository *r, struct packed_git *, off_t, enum object_type *, unsigned long *);
+void *unpack_entry(struct repository *r, struct packed_git *, off_t,
+		   enum object_type *, size_t *);
 unsigned long unpack_object_header_buffer(const unsigned char *buf, unsigned long len, enum object_type *type, size_t *sizep);
 unsigned long get_size_from_delta(struct packed_git *, struct pack_window **, off_t);
 int unpack_object_header(struct packed_git *, struct pack_window **, off_t *, size_t *);
-- 
gitgitgadget


^ permalink raw reply related

* [PATCH 3/7] pack-objects(check_pack_inflate()): use size_t instead of unsigned long
From: Johannes Schindelin via GitGitGadget @ 2026-06-04 10:51 UTC (permalink / raw)
  To: git; +Cc: Kristofer Karlsson, Johannes Schindelin, Johannes Schindelin
In-Reply-To: <pull.2137.git.1780570272.gitgitgadget@gmail.com>

From: Johannes Schindelin <johannes.schindelin@gmx.de>

`write_reuse_object()` learned to track its packed-object size as
`size_t` in 606c192380 (odb, packfile: use size_t for streaming
object sizes, 2026-05-08), but the comparison sink it feeds,
`check_pack_inflate()`, still takes the expected decompressed size
as `unsigned long`. The call site bridges the mismatch with
`cast_size_t_to_ulong()`, which on Windows turns a >4 GiB object
into an immediate die().

That function only uses `expect` once: as the right-hand side of a
`stream.total_out == expect` equality test against zlib's counter.
zlib's own `total_out` counter is `uLong` and is therefore still
32-bit-bound on Windows. Widening `expect` to `size_t` cannot fix that,
but it is a strict improvement nonetheless: instead of dying outright,
an oversized object now simply makes the equality fail and lets
`write_reuse_object()` fall back to `write_no_reuse_object()`, which
decompresses and re-deflates the content (and which the larger
pack-objects widening series targets separately).

Drop the `cast_size_t_to_ulong()` shim at the call site now that
the receiving parameter speaks the same type as `entry_size`.

Assisted-by: Opus 4.7
Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
---
 builtin/pack-objects.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index fe9fbecb30..975f04d699 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -453,7 +453,7 @@ static int check_pack_inflate(struct packed_git *p,
 		struct pack_window **w_curs,
 		off_t offset,
 		off_t len,
-		unsigned long expect)
+		size_t expect)
 {
 	git_zstream stream;
 	unsigned char fakebuf[4096], *in;
@@ -671,8 +671,7 @@ static off_t write_reuse_object(struct hashfile *f, struct object_entry *entry,
 	datalen -= entry->in_pack_header_size;

 	if (!pack_to_stdout && p->index_version == 1 &&
-	    check_pack_inflate(p, &w_curs, offset, datalen,
-			       cast_size_t_to_ulong(entry_size))) {
+	    check_pack_inflate(p, &w_curs, offset, datalen, entry_size)) {
 		error(_("corrupt packed object for %s"),
 		      oid_to_hex(&entry->idx.oid));
 		unuse_pack(&w_curs);
-- 
gitgitgadget

^ permalink raw reply related

* [PATCH 2/7] patch-delta: use size_t for sizes
From: Johannes Schindelin via GitGitGadget @ 2026-06-04 10:51 UTC (permalink / raw)
  To: git; +Cc: Kristofer Karlsson, Johannes Schindelin, Johannes Schindelin
In-Reply-To: <pull.2137.git.1780570272.gitgitgadget@gmail.com>

From: Johannes Schindelin <johannes.schindelin@gmx.de>

`patch_delta()` takes the source and delta sizes by value and writes
back the reconstructed target size through an `unsigned long *`.  That
datatype cannot represent a value that exceeds 4 GiB on systems where
`unsigned long` is 32-bit (notably 64-bit Windows builds), though, even
though the delta encoding itself, the on-disk layout, and the in-memory
buffers happily carry such sizes. A `size_t` companion to
`get_delta_hdr_size()`, `get_delta_hdr_size_sz()`, was introduced in
17fa077596 (delta, packfile: use size_t for delta header sizes,
2026-05-08) precisely so that `patch_delta()` could be widened without
changing the on-the-wire decoding helper's signature.

Widen `patch_delta()`'s three size parameters to `size_t` and switch
its internal use of `get_delta_hdr_size()` to the `_sz` variant.
Then propagate the wider type through the callers.

Assisted-by: Opus 4.7
Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
---
 apply.c                  |  2 +-
 builtin/index-pack.c     |  4 ++--
 builtin/unpack-objects.c |  2 +-
 delta.h                  |  6 +++---
 packfile.c               |  4 +---
 patch-delta.c            | 12 ++++++------
 t/helper/test-delta.c    | 10 ++++++----
 7 files changed, 20 insertions(+), 20 deletions(-)

diff --git a/apply.c b/apply.c
index 249248d4f2..3cf544e9a9 100644
--- a/apply.c
+++ b/apply.c
@@ -3232,7 +3232,7 @@ static int apply_binary_fragment(struct apply_state *state,
 				 struct patch *patch)
 {
 	struct fragment *fragment = patch->fragments;
-	unsigned long len;
+	size_t len;
 	void *dst;
 
 	if (!fragment)
diff --git a/builtin/index-pack.c b/builtin/index-pack.c
index cf0bd8280d..3c4474e681 100644
--- a/builtin/index-pack.c
+++ b/builtin/index-pack.c
@@ -71,7 +71,7 @@ struct base_data {
 	/* Not initialized by make_base(). */
 	struct list_head list;
 	void *data;
-	unsigned long size;
+	size_t size;
 };
 
 /*
@@ -1048,7 +1048,7 @@ static struct base_data *resolve_delta(struct object_entry *delta_obj,
 {
 	void *delta_data, *result_data;
 	struct base_data *result;
-	unsigned long result_size;
+	size_t result_size;
 
 	if (show_stat) {
 		int i = delta_obj - objects;
diff --git a/builtin/unpack-objects.c b/builtin/unpack-objects.c
index 59e9b8711e..e7a50c493c 100644
--- a/builtin/unpack-objects.c
+++ b/builtin/unpack-objects.c
@@ -314,7 +314,7 @@ static void resolve_delta(unsigned nr, enum object_type type,
 			  void *delta, unsigned long delta_size)
 {
 	void *result;
-	unsigned long result_size;
+	size_t result_size;
 
 	result = patch_delta(base, base_size,
 			     delta, delta_size,
diff --git a/delta.h b/delta.h
index fad68cfc45..bb149dc82b 100644
--- a/delta.h
+++ b/delta.h
@@ -75,9 +75,9 @@ diff_delta(const void *src_buf, unsigned long src_bufsize,
  * *trg_bufsize is updated with its size.  On failure a NULL pointer is
  * returned.  The returned buffer must be freed by the caller.
  */
-void *patch_delta(const void *src_buf, unsigned long src_size,
-		  const void *delta_buf, unsigned long delta_size,
-		  unsigned long *dst_size);
+void *patch_delta(const void *src_buf, size_t src_size,
+		  const void *delta_buf, size_t delta_size,
+		  size_t *dst_size);
 
 /* the smallest possible delta size is 4 bytes */
 #define DELTA_SIZE_MIN	4
diff --git a/packfile.c b/packfile.c
index 89366abfe3..e202f48837 100644
--- a/packfile.c
+++ b/packfile.c
@@ -1964,10 +1964,8 @@ void *unpack_entry(struct repository *r, struct packed_git *p, off_t obj_offset,
 			      (uintmax_t)curpos, p->pack_name);
 			data = NULL;
 		} else {
-			unsigned long sz;
 			data = patch_delta(base, base_size, delta_data,
-					   delta_size, &sz);
-			size = sz;
+					   delta_size, &size);
 
 			/*
 			 * We could not apply the delta; warn the user, but
diff --git a/patch-delta.c b/patch-delta.c
index b5c8594db6..44cda97994 100644
--- a/patch-delta.c
+++ b/patch-delta.c
@@ -12,13 +12,13 @@
 #include "git-compat-util.h"
 #include "delta.h"
 
-void *patch_delta(const void *src_buf, unsigned long src_size,
-		  const void *delta_buf, unsigned long delta_size,
-		  unsigned long *dst_size)
+void *patch_delta(const void *src_buf, size_t src_size,
+		  const void *delta_buf, size_t delta_size,
+		  size_t *dst_size)
 {
 	const unsigned char *data, *top;
 	unsigned char *dst_buf, *out, cmd;
-	unsigned long size;
+	size_t size;
 
 	if (delta_size < DELTA_SIZE_MIN)
 		return NULL;
@@ -27,12 +27,12 @@ void *patch_delta(const void *src_buf, unsigned long src_size,
 	top = (const unsigned char *) delta_buf + delta_size;
 
 	/* make sure the orig file size matches what we expect */
-	size = get_delta_hdr_size(&data, top);
+	size = get_delta_hdr_size_sz(&data, top);
 	if (size != src_size)
 		return NULL;
 
 	/* now the result size */
-	size = get_delta_hdr_size(&data, top);
+	size = get_delta_hdr_size_sz(&data, top);
 	dst_buf = xmallocz(size);
 
 	out = dst_buf;
diff --git a/t/helper/test-delta.c b/t/helper/test-delta.c
index 52ea00c937..8223a60229 100644
--- a/t/helper/test-delta.c
+++ b/t/helper/test-delta.c
@@ -21,7 +21,7 @@ int cmd__delta(int argc, const char **argv)
 	int fd;
 	struct strbuf from = STRBUF_INIT, data = STRBUF_INIT;
 	char *out_buf;
-	unsigned long out_size;
+	size_t out_size;
 
 	if (argc != 5 || (strcmp(argv[1], "-d") && strcmp(argv[1], "-p")))
 		usage(usage_str);
@@ -31,11 +31,13 @@ int cmd__delta(int argc, const char **argv)
 	if (strbuf_read_file(&data, argv[3], 0) < 0)
 		die_errno("unable to read '%s'", argv[3]);
 
-	if (argv[1][1] == 'd')
+	if (argv[1][1] == 'd') {
+		unsigned long delta_size;
 		out_buf = diff_delta(from.buf, from.len,
 				     data.buf, data.len,
-				     &out_size, 0);
-	else
+				     &delta_size, 0);
+		out_size = delta_size;
+	} else
 		out_buf = patch_delta(from.buf, from.len,
 				      data.buf, data.len,
 				      &out_size);
-- 
gitgitgadget


^ permalink raw reply related

* [PATCH 1/7] compat/msvc: use _chsize_s for ftruncate
From: Johannes Schindelin via GitGitGadget @ 2026-06-04 10:51 UTC (permalink / raw)
  To: git; +Cc: Kristofer Karlsson, Johannes Schindelin, Johannes Schindelin
In-Reply-To: <pull.2137.git.1780570272.gitgitgadget@gmail.com>

From: Johannes Schindelin <johannes.schindelin@gmx.de>

On Windows, `unsigned long` and `long` are 32 bits even on 64-bit
builds. The MSVC compatibility header has shimmed `ftruncate()` with

	#define ftruncate _chsize

ever since `compat/msvc-posix.h` was introduced. `_chsize()` takes a
32-bit `long` for the new length, which silently truncates files (and
the requested size) to 2 GiB. That is enough to make t7508 test 126
"git add fails gracefully with 4 GiB and 8 GiB files" fail under
MSVC: `test-tool truncate` creates a sparse 4 GiB or 8 GiB file via
the shimmed `ftruncate()`, and the test never gets off the ground.

`_chsize_s()` is the modern replacement, accepts a 64-bit `__int64`
length, and is the only sensible target on Windows. The catch is that
it does not follow the POSIX `-1` + `errno` convention: it returns
`0` on success and an errno value (a small positive integer) on
failure. A plain `#define ftruncate _chsize_s` would therefore
silently break callers that test the return value as `< 0` or against
`-1`, of which there are several: `http.c`, `parallel-checkout.c`,
and `t/helper/test-truncate.c` among them.

Introduce a `static inline` wrapper that calls `_chsize_s()`, copies
its errno return into `errno`, and translates the result to the
familiar `-1` / `0` convention, then point `ftruncate` at the
wrapper. Place the wrapper after `#include "mingw-posix.h"` so the
`off_t` parameter resolves to the already-widened `off64_t` rather
than the 32-bit `_off_t` from `compat/vcbuild/include/unistd.h`.

MinGW is unaffected: its `ftruncate()` already takes `off_t` and
routes through `ftruncate64()` when `_FILE_OFFSET_BITS=64`, which is
the default in our build.

Assisted-by: Opus 4.7
Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
---
 compat/msvc-posix.h | 24 +++++++++++++++++++++++-
 1 file changed, 23 insertions(+), 1 deletion(-)

diff --git a/compat/msvc-posix.h b/compat/msvc-posix.h
index c500b8b4aa..7ce39b8d3f 100644
--- a/compat/msvc-posix.h
+++ b/compat/msvc-posix.h
@@ -16,7 +16,6 @@
 #define __attribute__(x)
 #define strcasecmp   _stricmp
 #define strncasecmp  _strnicmp
-#define ftruncate    _chsize
 #define strtoull     _strtoui64
 #define strtoll      _strtoi64

@@ -30,4 +29,27 @@ typedef int sigset_t;

 #include "mingw-posix.h"

+/*
+ * MSVC's `_chsize()` takes a 32-bit `long` and silently truncates files
+ * to 2 GiB. `_chsize_s()` accepts a 64-bit length but returns 0 on
+ * success or an errno value on failure, rather than the -1/errno
+ * convention POSIX `ftruncate()` callers expect. Wrap it so callers
+ * that test the return value as `< 0` or against `-1` keep working.
+ *
+ * Note: this declaration must follow `#include "mingw-posix.h"` so
+ * `off_t` resolves to `off64_t` and the parameter type matches the
+ * underlying `_chsize_s()` width.
+ */
+static inline int msvc_ftruncate(int fd, off_t length)
+{
+	int err = _chsize_s(fd, length);
+
+	if (err) {
+		errno = err;
+		return -1;
+	}
+	return 0;
+}
+#define ftruncate msvc_ftruncate
+
 #endif /* COMPAT_MSVC_POSIX_H */
-- 
gitgitgadget

^ permalink raw reply related

* [PATCH 0/7] More work supporting objects larger than 4GB on Windows
From: Johannes Schindelin via GitGitGadget @ 2026-06-04 10:51 UTC (permalink / raw)
  To: git; +Cc: Kristofer Karlsson, Johannes Schindelin

This patch series tries to address the problems pointed out by the expensive
tests that now run in CI: t5608 and t7508 verify various aspects about
objects larger than 4GB, which Git does not currently handle correctly when
run on a platform where size_t is 64-bit and unsigned long is 32-bit.

Unfortunately, this conflicts heavily with ps/odb-source-loose. I rebased
the branch onto seen and pushed the result to
https://github.com/dscho/git/tree/refs/heads/objects-larger-than-4gb-on-windows-pt2-seen,
to make it easier to resolve merge conflicts. Here is the relevant
range-diff:

1:  f3aeae983a ! 1:  62adeb9818 odb: use size_t for object_info.sizep and the size APIs
    @@ builtin/log.c: static int show_blob_object(const struct object_id *oid, struct r
     
      ## builtin/ls-files.c ##
     @@ builtin/ls-files.c: static void expand_objectsize(struct repository *repo, struct strbuf *line,
    - 			      const enum object_type type, unsigned int padded)
    - {
    + 	size_t len;
    + 
      	if (type == OBJ_BLOB) {
     -		unsigned long size;
     +		size_t size;
    @@ builtin/ls-files.c: static void expand_objectsize(struct repository *repo, struc
     
      ## builtin/ls-tree.c ##
     @@ builtin/ls-tree.c: static void expand_objectsize(struct strbuf *line, const struct object_id *oid,
    - 			      const enum object_type type, unsigned int padded)
    - {
    + 	size_t len;
    + 
      	if (type == OBJ_BLOB) {
     -		unsigned long size;
     +		size_t size;
    @@ notes.c: static void format_note(struct notes_tree *t, const struct object_id *o
      	if (!t)
     
      ## object-file.c ##
    -@@ object-file.c: static int parse_loose_header(const char *hdr, struct object_info *oi)
    +@@ object-file.c: int parse_loose_header(const char *hdr, struct object_info *oi)
      	}
      
      	if (oi->sizep)
    @@ object-file.c: static int parse_loose_header(const char *hdr, struct object_info
      
      	/*
      	 * The length must be followed by a zero byte
    -@@ object-file.c: static int read_object_info_from_path(struct odb_source *source,
    - 	void *map = NULL;
    - 	git_zstream stream, *stream_to_end = NULL;
    - 	char hdr[MAX_HEADER_LEN];
    --	unsigned long size_scratch;
    -+	size_t size_scratch;
    - 	enum object_type type_scratch;
    - 	struct stat st;
    - 
     @@ object-file.c: int force_object_loose(struct odb_source *source,
    - {
    + 	struct odb_source_files *files = odb_source_files_downcast(source);
      	const struct git_hash_algo *compat = source->odb->repo->compat_hash_algo;
      	void *buf;
     -	unsigned long len;
    @@ object-file.c: int read_loose_object(struct repository *repo,
      
      	fd = git_open(path);
      	if (fd >= 0)
    -@@ object-file.c: int odb_source_loose_read_object_stream(struct odb_read_stream **out,
    - 	struct object_info oi = OBJECT_INFO_INIT;
    - 	struct odb_loose_read_stream *st;
    - 	unsigned long mapsize;
    --	unsigned long size_ul;
    - 	void *mapped;
    - 
    - 	mapped = odb_source_loose_map_object(source, oid, &mapsize);
    -@@ object-file.c: int odb_source_loose_read_object_stream(struct odb_read_stream **out,
    - 		goto error;
    - 	}
    - 
    --	/*
    --	 * object_info.sizep is unsigned long* (32-bit on Windows), but
    --	 * st->base.size is size_t (64-bit). Use temporary variable.
    --	 * Note: loose objects >4GB would still truncate here, but such
    --	 * large loose objects are uncommon (they'd normally be packed).
    --	 */
    --	oi.sizep = &size_ul;
    -+	oi.sizep = &st->base.size;
    - 	oi.typep = &st->base.type;
    - 
    - 	if (parse_loose_header(st->hdr, &oi) < 0 || st->base.type < 0)
    - 		goto error;
    --	st->base.size = size_ul;
    - 
    - 	st->mapped = mapped;
    - 	st->mapsize = mapsize;
     
      ## object.c ##
     @@ object.c: struct object *parse_object_with_flags(struct repository *r,
    @@ odb.h: int odb_read_object_info_extended(struct object_database *odb,
      enum odb_has_object_flags {
      	/* Retry packed storage after checking packed and loose storage */
     
    + ## odb/source-loose.c ##
    +@@ odb/source-loose.c: static int read_object_info_from_path(struct odb_source_loose *loose,
    + 	void *map = NULL;
    + 	git_zstream stream, *stream_to_end = NULL;
    + 	char hdr[MAX_HEADER_LEN];
    +-	unsigned long size_scratch;
    ++	size_t size_scratch;
    + 	enum object_type type_scratch;
    + 	struct stat st;
    + 
    +@@ odb/source-loose.c: static int odb_source_loose_read_object_stream(struct odb_read_stream **out,
    + 	struct object_info oi = OBJECT_INFO_INIT;
    + 	struct odb_loose_read_stream *st;
    + 	unsigned long mapsize;
    +-	unsigned long size_ul;
    + 	void *mapped;
    + 
    + 	mapped = odb_source_loose_map_object(loose, oid, &mapsize);
    +@@ odb/source-loose.c: static int odb_source_loose_read_object_stream(struct odb_read_stream **out,
    + 		goto error;
    + 	}
    + 
    +-	/*
    +-	 * object_info.sizep is unsigned long* (32-bit on Windows), but
    +-	 * st->base.size is size_t (64-bit). Use temporary variable.
    +-	 * Note: loose objects >4GB would still truncate here, but such
    +-	 * large loose objects are uncommon (they'd normally be packed).
    +-	 */
    +-	oi.sizep = &size_ul;
    ++	oi.sizep = &st->base.size;
    + 	oi.typep = &st->base.type;
    + 
    + 	if (parse_loose_header(st->hdr, &oi) < 0 || st->base.type < 0)
    + 		goto error;
    +-	st->base.size = size_ul;
    + 
    + 	st->mapped = mapped;
    + 	st->mapsize = mapsize;
    +
      ## odb/streaming.c ##
     @@ odb/streaming.c: static int open_istream_incore(struct odb_read_stream **out,
      		.base.read = read_istream_incore,


Johannes Schindelin (7):
  compat/msvc: use _chsize_s for ftruncate
  patch-delta: use size_t for sizes
  pack-objects(check_pack_inflate()): use size_t instead of unsigned
    long
  packfile: widen unpack_entry()'s size out-parameter to size_t
  pack-objects: use size_t for in-core object sizes
  packfile,delta: drop the `cast_size_t_to_ulong()` wrappers
  odb: use size_t for object_info.sizep and the size APIs

 apply.c                       |  8 ++--
 archive.c                     |  4 +-
 attr.c                        |  2 +-
 bisect.c                      |  2 +-
 blame.c                       | 15 +++++--
 builtin/cat-file.c            | 39 ++++++++++++-------
 builtin/difftool.c            |  2 +-
 builtin/fast-export.c         |  7 +++-
 builtin/fast-import.c         | 29 ++++++++++----
 builtin/fsck.c                |  2 +-
 builtin/grep.c                | 12 +++---
 builtin/index-pack.c          | 10 ++---
 builtin/log.c                 |  2 +-
 builtin/ls-files.c            |  2 +-
 builtin/ls-tree.c             |  4 +-
 builtin/merge-tree.c          |  6 +--
 builtin/mktag.c               |  2 +-
 builtin/notes.c               |  6 +--
 builtin/pack-objects.c        | 73 +++++++++++++++++++++--------------
 builtin/repo.c                |  4 +-
 builtin/tag.c                 |  4 +-
 builtin/unpack-file.c         |  2 +-
 builtin/unpack-objects.c      |  8 ++--
 bundle.c                      |  2 +-
 combine-diff.c                |  4 +-
 commit.c                      | 10 ++---
 compat/msvc-posix.h           | 24 +++++++++++-
 config.c                      |  2 +-
 delta.h                       | 20 +++-------
 diff.c                        |  5 ++-
 dir.c                         |  2 +-
 entry.c                       |  4 +-
 fmt-merge-msg.c               |  4 +-
 fsck.c                        |  2 +-
 grep.c                        |  4 +-
 http-push.c                   |  2 +-
 list-objects-filter.c         |  2 +-
 mailmap.c                     |  2 +-
 match-trees.c                 |  4 +-
 merge-blobs.c                 |  6 +--
 merge-blobs.h                 |  2 +-
 merge-ort.c                   |  2 +-
 notes-cache.c                 |  2 +-
 notes-merge.c                 |  2 +-
 notes.c                       |  8 ++--
 object-file.c                 | 18 +++------
 object.c                      |  2 +-
 odb.c                         | 12 +++---
 odb.h                         | 10 ++---
 odb/streaming.c               | 13 +------
 pack-bitmap.c                 |  4 +-
 pack-check.c                  |  5 +--
 pack-objects.h                |  2 +-
 packfile.c                    | 54 ++++++++++----------------
 packfile.h                    |  5 ++-
 patch-delta.c                 |  8 ++--
 path-walk.c                   |  2 +-
 protocol-caps.c               |  5 ++-
 read-cache.c                  |  6 +--
 ref-filter.c                  |  2 +-
 reflog.c                      |  2 +-
 rerere.c                      |  2 +-
 submodule-config.c            |  2 +-
 t/helper/test-delta.c         | 10 +++--
 t/helper/test-pack-deltas.c   |  3 +-
 t/helper/test-partial-clone.c |  2 +-
 t/unit-tests/u-odb-inmemory.c |  2 +-
 tag.c                         |  4 +-
 tree-walk.c                   | 10 +++--
 tree.c                        |  2 +-
 xdiff-interface.c             |  2 +-
 71 files changed, 296 insertions(+), 253 deletions(-)


base-commit: 9ac3f193c05c2237e2b14ebaa1149e9fc8a1abe0
Published-As: https://github.com/gitgitgadget/git/releases/tag/pr-2137%2Fdscho%2Fobjects-larger-than-4gb-on-windows-pt2-v1
Fetch-It-Via: git fetch https://github.com/gitgitgadget/git pr-2137/dscho/objects-larger-than-4gb-on-windows-pt2-v1
Pull-Request: https://github.com/gitgitgadget/git/pull/2137
-- 
gitgitgadget

^ permalink raw reply

* Re: [PATCH] transport-helper: fix TSAN race in transfer_debug()
From: Pushkar Singh @ 2026-06-04 10:19 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, peff
In-Reply-To: <xmqqv7bzp0vc.fsf@gitster.g>

Hi Junio,

On Thu, Jun 4, 2026 at 6:39 AM Junio C Hamano <gitster@pobox.com> wrote:

> Would it be possible that transfer_debug_enabled is still -1 at this
> point?  We would proceed in such a case, which is a bit different from
> what would have happened in the original.
>
> Perhaps
>
>         if (transfer_debug_enabled <= 0)
>                 return;
>
> is what you want?  I dunno.

You're right. The original code would never proceed while the value was still
negative, whereas my change would.

I'll update it to use <= 0 and send a v2.

Thanks,
Pushkar

^ permalink raw reply

* [PATCH v3 8/8] t: let prove fail when parsing invalid TAP output
From: Patrick Steinhardt @ 2026-06-04 10:07 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano, Jeff King
In-Reply-To: <20260604-pks-t7527-fix-tap-output-v3-0-7d766ed481e4@pks.im>

To make the result of our tests accessible we use the TAP protocol. This
protocol is parsed by either prove or by Meson. Unfortunately, these two
tools differ when it comes to their strictness when parsing the
protocol:

  - Prove by default happily accepts lines not specified by the
    protocol.

  - Meson will also accept such lines, but prints a big and ugly warning
    message.

We have fixed our test suite in the past to not print invalid TAP lines
anymore via b1dc2e796e (Merge branch 'ps/meson-tap-parse', 2025-06-17).
But as none of our tools perform a strict check it's still possible for
broken tests to sneak back in, like for example in 362f69547f (Merge
branch 'ps/t1006-tap-fix', 2025-07-16). This doesn't hurt at all when
using prove, but it's quite annoying when using Meson due to the
generated warnings.

Unfortunately, there doesn't seem to be a portable way to make all tools
complain about violations of the TAP format. The TAP 14 specification
has added pragmas to the protocol that would allow us to say `pragma
+strict`, and the effect of that would be to treat invalid TAP lines as
a test failure. But the release of TAP 14 is still rather recent, and
Test-Harness for example only gained support for it in version 3.48,
which was released in 2023.

In fact though, this pragma was already introduced as an inofficial
extension of the TAP protocol with Test-Harness 3.10, released in 2008.
So while not all tools understand the pragma, at least prove does for a
long time.

Unconditionally enable the pragma when using prove so that we'll detect
tests that emit broken TAP output right away. This would have detected
the issues fixed in preceding commits:

    $ prove t7527-builtin-fsmonitor.sh
    t7527-builtin-fsmonitor.sh .. All 69 subtests passed
            (less 6 skipped subtests: 63 okay)

    Test Summary Report
    -------------------
    t7527-builtin-fsmonitor.sh (Wstat: 0 Tests: 69 Failed: 0)
      Parse errors: Unknown TAP token: "Initialized empty Git repository in /tmp/git/test_fsmonitor_smoke/.git/"

Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
 t/test-lib.sh | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/t/test-lib.sh b/t/test-lib.sh
index d1d24c4124..ceefb99bff 100644
--- a/t/test-lib.sh
+++ b/t/test-lib.sh
@@ -1532,6 +1532,12 @@ then
 	BAIL_OUT 'You need to build test-tool; Run "make t/helper/test-tool" in the source (toplevel) directory'
 fi

+if test -n "$HARNESS_ACTIVE"
+then
+	say "TAP version 13"
+	say "pragma +strict"
+fi
+
 # Are we running this test at all?
 remove_trash=
 this_test=${0##*/}

-- 
2.54.0.1064.gd145956f57.dirty

^ permalink raw reply related

* [PATCH v3 7/8] t/lib-git-p4: silence output when killing p4d and its watchdog
From: Patrick Steinhardt @ 2026-06-04 10:07 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano, Jeff King
In-Reply-To: <20260604-pks-t7527-fix-tap-output-v3-0-7d766ed481e4@pks.im>

When stopping the p4d watchdog process via "kill -9", the shell may
print a job-control notification like:

  ./test-lib.sh: line 1269: 57960 Killed: 9               while true; do
      if test $nr_tries_left -eq 0; then
          kill -9 $p4d_pid; exit 1;
      fi; sleep 1; nr_tries_left=$(($nr_tries_left - 1));
  done 2> /dev/null 4>&2  (wd: ~)

This message is printed asynchronously by the shell when it reaps the
process. While harmless right now, this will cause breakage once we
enable strict parsing of the TAP protocol in a subsequent commit.

Fix this by using `wait` so that we can synchronously reap the watchdog
process and swallow the diagnostic.

While at it, deduplicate the logic we have in `stop_p4d_and_watchdog ()`
and `stop_and_cleanup_p4d ()`.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
 t/lib-git-p4.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/t/lib-git-p4.sh b/t/lib-git-p4.sh
index d22e9c684a..9108868187 100644
--- a/t/lib-git-p4.sh
+++ b/t/lib-git-p4.sh
@@ -65,6 +65,7 @@ pidfile="$TRASH_DIRECTORY/p4d.pid"
 
 stop_p4d_and_watchdog () {
 	kill -9 $p4d_pid $watchdog_pid
+	wait $p4d_pid $watchdog_pid 2>/dev/null
 }
 
 # git p4 submit generates a temp file, which will
@@ -174,8 +175,7 @@ retry_until_success () {
 }
 
 stop_and_cleanup_p4d () {
-	kill -9 $p4d_pid $watchdog_pid
-	wait $p4d_pid
+	stop_p4d_and_watchdog
 	rm -rf "$db" "$cli" "$pidfile"
 }
 

-- 
2.54.0.1064.gd145956f57.dirty


^ permalink raw reply related

* [PATCH v3 6/8] t/test-lib: silence EBUSY errors on Windows during test cleanup
From: Patrick Steinhardt @ 2026-06-04 10:07 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano, Jeff King
In-Reply-To: <20260604-pks-t7527-fix-tap-output-v3-0-7d766ed481e4@pks.im>

When tests have finished we clean up the trash directory via `rm -rf`.
On Windows this can fail with EBUSY in cases where a process still holds
some of the files open, for example when we have spawned a daemonized
process that wasn't properly terminated. We thus retry several times,
but every failure will result in error messages being printed, and that
in turn breaks the TAP output format.

One such case where this is causing issues is in t921x, which contains
tests related to Scalar. Some tests spawn the fsmonitor daemon, and we
never properly terminate it.

The obvious fix would be to ensure that we never leak any processes, but
that gets ugly fast. Instead, let's work around the issue by silencing
error messages printed by the `rm -rf` calls. We already know to print
an error when the retry loop fails, so we don't loose much.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
 t/test-lib.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/t/test-lib.sh b/t/test-lib.sh
index 4a7357b547..d1d24c4124 100644
--- a/t/test-lib.sh
+++ b/t/test-lib.sh
@@ -1299,10 +1299,10 @@ test_done () {
 			error "Tests passed but trash directory already removed before test cleanup; aborting"

 			cd "$TRASH_DIRECTORY/.." &&
-			rm -fr "$TRASH_DIRECTORY" || {
+			rm -fr "$TRASH_DIRECTORY" 2>/dev/null || {
 				# try again in a bit
 				sleep 5;
-				rm -fr "$TRASH_DIRECTORY"
+				rm -fr "$TRASH_DIRECTORY" 2>/dev/null
 			} ||
 			error "Tests passed but test cleanup failed; aborting"
 		fi

-- 
2.54.0.1064.gd145956f57.dirty

^ permalink raw reply related

* [PATCH v3 5/8] t7810: turn MB_REGEX check into a lazy prereq
From: Patrick Steinhardt @ 2026-06-04 10:07 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano, Jeff King
In-Reply-To: <20260604-pks-t7527-fix-tap-output-v3-0-7d766ed481e4@pks.im>

In t7810 we verify whether the system has proper multibyte locale
support by executing `test-tool regex` with a unicode character. When
this check fails though we'll output an error that breaks the TAP
format.

Fix this issue by turning the logic into a lazy prerequisite.

Reported-by: Jeff King <peff@peff.net>
Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
 t/t7810-grep.sh | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/t/t7810-grep.sh b/t/t7810-grep.sh
index 1b195bee59..d61c4a4d73 100755
--- a/t/t7810-grep.sh
+++ b/t/t7810-grep.sh
@@ -18,8 +18,9 @@ test_invalid_grep_expression() {
 	'
 }
 
-LC_ALL=en_US.UTF-8 test-tool regex '^.$' '¿' &&
-  test_set_prereq MB_REGEX
+test_lazy_prereq MB_REGEX '
+	LC_ALL=en_US.UTF-8 test-tool regex "^.$" "¿"
+'
 
 cat >hello.c <<EOF
 #include <assert.h>

-- 
2.54.0.1064.gd145956f57.dirty


^ permalink raw reply related

* [PATCH v3 4/8] t7527: fix broken TAP output
From: Patrick Steinhardt @ 2026-06-04 10:07 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano, Jeff King
In-Reply-To: <20260604-pks-t7527-fix-tap-output-v3-0-7d766ed481e4@pks.im>

Before running the tests in t7527 we first verify whether the fsmonitor
even works, which seems to depend on the actual filesystem that is in
use. The verification executes outside of any prerequisite or test body,
so its stdout/stderr is not being redirected.

The consequence of this is that any command that prints to stdout/stderr
may break the TAP specification by printing invalid lines. And in fact
we already do that, as git-init(1) prints the path to the created Git
repository by default.

Fix this issue by moving the logic into a lazy prerequisite.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
 t/t7527-builtin-fsmonitor.sh | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/t/t7527-builtin-fsmonitor.sh b/t/t7527-builtin-fsmonitor.sh
index b63c162f9b..d881e27466 100755
--- a/t/t7527-builtin-fsmonitor.sh
+++ b/t/t7527-builtin-fsmonitor.sh
@@ -25,7 +25,8 @@ maybe_timeout () {
 		"$@"
 	fi
 }
-verify_fsmonitor_works () {
+
+test_lazy_prereq FSMONITOR_WORKS '
 	git init test_fsmonitor_smoke || return 1

 	GIT_TRACE_FSMONITOR="$PWD/smoke.trace" &&
@@ -50,9 +51,9 @@ verify_fsmonitor_works () {
 	ret=$?
 	rm -rf test_fsmonitor_smoke smoke.trace
 	return $ret
-}
+'

-if ! verify_fsmonitor_works
+if ! test_have_prereq FSMONITOR_WORKS
 then
 	skip_all="filesystem does not deliver fsmonitor events (container/overlayfs?)"
 	test_done

-- 
2.54.0.1064.gd145956f57.dirty

^ permalink raw reply related

* [PATCH v3 3/8] ci: unify Linux images across GitLab and GitHub
From: Patrick Steinhardt @ 2026-06-04 10:07 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano, Jeff King
In-Reply-To: <20260604-pks-t7527-fix-tap-output-v3-0-7d766ed481e4@pks.im>

The image for the "linux-breaking-changes" job has drifted apart across
GitHub and GitLab. Adapt it to use "ubuntu:rolling" on both systems.

With this change there's only one difference remaining: GitHub uses
"ubuntu:focal" for the "linux32" job while GitLab uses "ubuntu:20.04".
These are different names for the same image, so there is no actual
difference here. Adjust GitHub to use the "20.04" tag -- this matches
all the other jobs which use version numbers, and you don't have to
learn Ubuntu's release names by heart.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
 .github/workflows/main.yml | 2 +-
 .gitlab-ci.yml             | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 3da5326f0b..cf341d74db 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -407,7 +407,7 @@ jobs:
           image: alpine:latest
         # Supported until 2025-04-02.
         - jobname: linux32
-          image: i386/ubuntu:focal
+          image: i386/ubuntu:20.04
         # A RHEL 8 compatible distro.  Supported until 2029-05-31.
         - jobname: almalinux-8
           image: almalinux:8
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index a5bdec5159..49f3689b6a 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -47,7 +47,7 @@ test:linux:
         CC: gcc
         CC_PACKAGE: gcc-8
       - jobname: linux-breaking-changes
-        image: ubuntu:20.04
+        image: ubuntu:rolling
         CC: gcc
       - jobname: fedora-breaking-changes-meson
         image: fedora:latest

-- 
2.54.0.1064.gd145956f57.dirty


^ permalink raw reply related

* [PATCH v3 2/8] gitlab-ci: add missing Linux jobs
From: Patrick Steinhardt @ 2026-06-04 10:07 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano, Jeff King
In-Reply-To: <20260604-pks-t7527-fix-tap-output-v3-0-7d766ed481e4@pks.im>

The GitLab CI definitions are missing jobs for AlmaLinux and Debian,
both of which exist in GitHub Workflows. Plug this gap.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
 .gitlab-ci.yml | 6 ++++++
 ci/lib.sh      | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 8cb41baa14..a5bdec5159 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -68,6 +68,12 @@ test:linux:
         # Supported until 2025-04-02.
       - jobname: linux32
         image: i386/ubuntu:20.04
+      # A RHEL 8 compatible distro.  Supported until 2029-05-31.
+      - jobname: almalinux-8
+        image: almalinux:8
+      # Supported until 2026-08-31.
+      - jobname: debian-11
+        image: debian:11
   artifacts:
     paths:
       - t/failed-test-artifacts
diff --git a/ci/lib.sh b/ci/lib.sh
index 6e3799cfc3..b939110a6e 100755
--- a/ci/lib.sh
+++ b/ci/lib.sh
@@ -254,7 +254,7 @@ then
 		CI_OS_NAME=osx
 		JOBS=$(nproc)
 		;;
-	*,alpine:*|*,fedora:*|*,ubuntu:*|*,i386/ubuntu:*)
+	*,almalinux:*|*,alpine:*|*,debian:*|*,fedora:*|*,ubuntu:*|*,i386/ubuntu:*)
 		CI_OS_NAME=linux
 		JOBS=$(nproc)
 		;;

-- 
2.54.0.1064.gd145956f57.dirty


^ permalink raw reply related

* [PATCH v3 1/8] gitlab-ci: rearrange Linux jobs to match GitHub's order
From: Patrick Steinhardt @ 2026-06-04 10:07 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano, Jeff King
In-Reply-To: <20260604-pks-t7527-fix-tap-output-v3-0-7d766ed481e4@pks.im>

Rearrange the order of Linux jobs that we have defined in GitLab CI so
that it matches the order on GitHub's side. This makes it easier to
compare whether the list of jobs actually matches on both sides.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
 .gitlab-ci.yml | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index e0b9a0d82b..8cb41baa14 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -42,15 +42,15 @@ test:linux:
       - jobname: linux-reftable
         image: ubuntu:rolling
         CC: clang
+      - jobname: linux-TEST-vars
+        image: ubuntu:20.04
+        CC: gcc
+        CC_PACKAGE: gcc-8
       - jobname: linux-breaking-changes
         image: ubuntu:20.04
         CC: gcc
       - jobname: fedora-breaking-changes-meson
         image: fedora:latest
-      - jobname: linux-TEST-vars
-        image: ubuntu:20.04
-        CC: gcc
-        CC_PACKAGE: gcc-8
       - jobname: linux-leaks
         image: ubuntu:rolling
         CC: gcc
@@ -60,13 +60,14 @@ test:linux:
       - jobname: linux-asan-ubsan
         image: ubuntu:rolling
         CC: clang
+      - jobname: linux-meson
+        image: ubuntu:rolling
+        CC: gcc
       - jobname: linux-musl-meson
         image: alpine:latest
+        # Supported until 2025-04-02.
       - jobname: linux32
         image: i386/ubuntu:20.04
-      - jobname: linux-meson
-        image: ubuntu:rolling
-        CC: gcc
   artifacts:
     paths:
       - t/failed-test-artifacts

-- 
2.54.0.1064.gd145956f57.dirty


^ permalink raw reply related

* [PATCH v3 0/8] t: fix broken TAP output
From: Patrick Steinhardt @ 2026-06-04 10:07 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano, Jeff King
In-Reply-To: <20260602-pks-t7527-fix-tap-output-v1-0-db3da2a1b137@pks.im>

Hi,

this small patch series fixes another instance of broken TAP output that
has landed via 4d11b9c218 (Merge branch 'pt/fsmonitor-linux', 2026-05-31).

As this has happened multiple times by now I decided to have a look at
whether we can fix this class of issues a bit more holistically. So this
series also contains a change that makes prove bail out when it sees
invalid TAP output, which uncovers a small set of preexisting issues in
our test suite.

Changes in v3:
  - Fix a test gap for AlmaLinux and Debian in GitLab CI, which uncovers
    an issue flagged by Peff.
  - Fix TAP breakage in t7810.
  - Link to v2: https://patch.msgid.link/20260603-pks-t7527-fix-tap-output-v2-0-cf3af5694e20@pks.im

Changes in v2:
  - Fix waiting for p4d, and deduplicate the logic that does this.
  - Link to v1: https://patch.msgid.link/20260602-pks-t7527-fix-tap-output-v1-0-db3da2a1b137@pks.im

Test runs can be found at [1] and [2]. Note that GitHub-side tests are
failing on Windows, but that is a preexisting failure on "master".

Thanks!

Patrick

[1]: https://gitlab.com/gitlab-org/git/-/merge_requests/585
[2]: https://github.com/git/git/pull/2320

---
Patrick Steinhardt (8):
      gitlab-ci: rearrange Linux jobs to match GitHub's order
      gitlab-ci: add missing Linux jobs
      ci: unify Linux images across GitLab and GitHub
      t7527: fix broken TAP output
      t7810: turn MB_REGEX check into a lazy prereq
      t/test-lib: silence EBUSY errors on Windows during test cleanup
      t/lib-git-p4: silence output when killing p4d and its watchdog
      t: let prove fail when parsing invalid TAP output

 .github/workflows/main.yml   |  2 +-
 .gitlab-ci.yml               | 23 +++++++++++++++--------
 ci/lib.sh                    |  2 +-
 t/lib-git-p4.sh              |  4 ++--
 t/t7527-builtin-fsmonitor.sh |  7 ++++---
 t/t7810-grep.sh              |  5 +++--
 t/test-lib.sh                | 10 ++++++++--
 7 files changed, 34 insertions(+), 19 deletions(-)

Range-diff versus v2:

-:  ---------- > 1:  5e817b102f gitlab-ci: rearrange Linux jobs to match GitHub's order
-:  ---------- > 2:  83646cc834 gitlab-ci: add missing Linux jobs
-:  ---------- > 3:  cca1567fbf ci: unify Linux images across GitLab and GitHub
1:  52abbd5280 = 4:  430bc51818 t7527: fix broken TAP output
-:  ---------- > 5:  78ef22df8d t7810: turn MB_REGEX check into a lazy prereq
2:  ea1f1eb466 = 6:  7bbaeff48c t/test-lib: silence EBUSY errors on Windows during test cleanup
3:  e97a515470 = 7:  abf2be09e6 t/lib-git-p4: silence output when killing p4d and its watchdog
4:  436d7d8cf3 = 8:  04367c34be t: let prove fail when parsing invalid TAP output

---
base-commit: 1666c1265231b0bc5f613fbbf3f0a9896cdef76e
change-id: 20260601-pks-t7527-fix-tap-output-105da1d73df0


^ permalink raw reply

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox