[PATCH 0/7] More work supporting objects larger than 4GB on Windows

All of lore.kernel.org
 help / color / mirror / Atom feed

* [PATCH 0/7] More work supporting objects larger than 4GB on Windows
@ 2026-06-04 10:51 Johannes Schindelin via GitGitGadget
  2026-06-04 10:51 ` [PATCH 1/7] compat/msvc: use _chsize_s for ftruncate Johannes Schindelin via GitGitGadget
                   ` (6 more replies)
  0 siblings, 7 replies; 8+ messages in thread
From: Johannes Schindelin via GitGitGadget @ 2026-06-04 10:51 UTC (permalink / raw)
  To: git; +Cc: Kristofer Karlsson, Johannes Schindelin

This patch series tries to address the problems pointed out by the expensive
tests that now run in CI: t5608 and t7508 verify various aspects about
objects larger than 4GB, which Git does not currently handle correctly when
run on a platform where size_t is 64-bit and unsigned long is 32-bit.

Unfortunately, this conflicts heavily with ps/odb-source-loose. I rebased
the branch onto seen and pushed the result to
https://github.com/dscho/git/tree/refs/heads/objects-larger-than-4gb-on-windows-pt2-seen,
to make it easier to resolve merge conflicts. Here is the relevant
range-diff:

1:  f3aeae983a ! 1:  62adeb9818 odb: use size_t for object_info.sizep and the size APIs
    @@ builtin/log.c: static int show_blob_object(const struct object_id *oid, struct r
     
      ## builtin/ls-files.c ##
     @@ builtin/ls-files.c: static void expand_objectsize(struct repository *repo, struct strbuf *line,
    - 			      const enum object_type type, unsigned int padded)
    - {
    + 	size_t len;
    + 
      	if (type == OBJ_BLOB) {
     -		unsigned long size;
     +		size_t size;
    @@ builtin/ls-files.c: static void expand_objectsize(struct repository *repo, struc
     
      ## builtin/ls-tree.c ##
     @@ builtin/ls-tree.c: static void expand_objectsize(struct strbuf *line, const struct object_id *oid,
    - 			      const enum object_type type, unsigned int padded)
    - {
    + 	size_t len;
    + 
      	if (type == OBJ_BLOB) {
     -		unsigned long size;
     +		size_t size;
    @@ notes.c: static void format_note(struct notes_tree *t, const struct object_id *o
      	if (!t)
     
      ## object-file.c ##
    -@@ object-file.c: static int parse_loose_header(const char *hdr, struct object_info *oi)
    +@@ object-file.c: int parse_loose_header(const char *hdr, struct object_info *oi)
      	}
      
      	if (oi->sizep)
    @@ object-file.c: static int parse_loose_header(const char *hdr, struct object_info
      
      	/*
      	 * The length must be followed by a zero byte
    -@@ object-file.c: static int read_object_info_from_path(struct odb_source *source,
    - 	void *map = NULL;
    - 	git_zstream stream, *stream_to_end = NULL;
    - 	char hdr[MAX_HEADER_LEN];
    --	unsigned long size_scratch;
    -+	size_t size_scratch;
    - 	enum object_type type_scratch;
    - 	struct stat st;
    - 
     @@ object-file.c: int force_object_loose(struct odb_source *source,
    - {
    + 	struct odb_source_files *files = odb_source_files_downcast(source);
      	const struct git_hash_algo *compat = source->odb->repo->compat_hash_algo;
      	void *buf;
     -	unsigned long len;
    @@ object-file.c: int read_loose_object(struct repository *repo,
      
      	fd = git_open(path);
      	if (fd >= 0)
    -@@ object-file.c: int odb_source_loose_read_object_stream(struct odb_read_stream **out,
    - 	struct object_info oi = OBJECT_INFO_INIT;
    - 	struct odb_loose_read_stream *st;
    - 	unsigned long mapsize;
    --	unsigned long size_ul;
    - 	void *mapped;
    - 
    - 	mapped = odb_source_loose_map_object(source, oid, &mapsize);
    -@@ object-file.c: int odb_source_loose_read_object_stream(struct odb_read_stream **out,
    - 		goto error;
    - 	}
    - 
    --	/*
    --	 * object_info.sizep is unsigned long* (32-bit on Windows), but
    --	 * st->base.size is size_t (64-bit). Use temporary variable.
    --	 * Note: loose objects >4GB would still truncate here, but such
    --	 * large loose objects are uncommon (they'd normally be packed).
    --	 */
    --	oi.sizep = &size_ul;
    -+	oi.sizep = &st->base.size;
    - 	oi.typep = &st->base.type;
    - 
    - 	if (parse_loose_header(st->hdr, &oi) < 0 || st->base.type < 0)
    - 		goto error;
    --	st->base.size = size_ul;
    - 
    - 	st->mapped = mapped;
    - 	st->mapsize = mapsize;
     
      ## object.c ##
     @@ object.c: struct object *parse_object_with_flags(struct repository *r,
    @@ odb.h: int odb_read_object_info_extended(struct object_database *odb,
      enum odb_has_object_flags {
      	/* Retry packed storage after checking packed and loose storage */
     
    + ## odb/source-loose.c ##
    +@@ odb/source-loose.c: static int read_object_info_from_path(struct odb_source_loose *loose,
    + 	void *map = NULL;
    + 	git_zstream stream, *stream_to_end = NULL;
    + 	char hdr[MAX_HEADER_LEN];
    +-	unsigned long size_scratch;
    ++	size_t size_scratch;
    + 	enum object_type type_scratch;
    + 	struct stat st;
    + 
    +@@ odb/source-loose.c: static int odb_source_loose_read_object_stream(struct odb_read_stream **out,
    + 	struct object_info oi = OBJECT_INFO_INIT;
    + 	struct odb_loose_read_stream *st;
    + 	unsigned long mapsize;
    +-	unsigned long size_ul;
    + 	void *mapped;
    + 
    + 	mapped = odb_source_loose_map_object(loose, oid, &mapsize);
    +@@ odb/source-loose.c: static int odb_source_loose_read_object_stream(struct odb_read_stream **out,
    + 		goto error;
    + 	}
    + 
    +-	/*
    +-	 * object_info.sizep is unsigned long* (32-bit on Windows), but
    +-	 * st->base.size is size_t (64-bit). Use temporary variable.
    +-	 * Note: loose objects >4GB would still truncate here, but such
    +-	 * large loose objects are uncommon (they'd normally be packed).
    +-	 */
    +-	oi.sizep = &size_ul;
    ++	oi.sizep = &st->base.size;
    + 	oi.typep = &st->base.type;
    + 
    + 	if (parse_loose_header(st->hdr, &oi) < 0 || st->base.type < 0)
    + 		goto error;
    +-	st->base.size = size_ul;
    + 
    + 	st->mapped = mapped;
    + 	st->mapsize = mapsize;
    +
      ## odb/streaming.c ##
     @@ odb/streaming.c: static int open_istream_incore(struct odb_read_stream **out,
      		.base.read = read_istream_incore,


Johannes Schindelin (7):
  compat/msvc: use _chsize_s for ftruncate
  patch-delta: use size_t for sizes
  pack-objects(check_pack_inflate()): use size_t instead of unsigned
    long
  packfile: widen unpack_entry()'s size out-parameter to size_t
  pack-objects: use size_t for in-core object sizes
  packfile,delta: drop the `cast_size_t_to_ulong()` wrappers
  odb: use size_t for object_info.sizep and the size APIs

 apply.c                       |  8 ++--
 archive.c                     |  4 +-
 attr.c                        |  2 +-
 bisect.c                      |  2 +-
 blame.c                       | 15 +++++--
 builtin/cat-file.c            | 39 ++++++++++++-------
 builtin/difftool.c            |  2 +-
 builtin/fast-export.c         |  7 +++-
 builtin/fast-import.c         | 29 ++++++++++----
 builtin/fsck.c                |  2 +-
 builtin/grep.c                | 12 +++---
 builtin/index-pack.c          | 10 ++---
 builtin/log.c                 |  2 +-
 builtin/ls-files.c            |  2 +-
 builtin/ls-tree.c             |  4 +-
 builtin/merge-tree.c          |  6 +--
 builtin/mktag.c               |  2 +-
 builtin/notes.c               |  6 +--
 builtin/pack-objects.c        | 73 +++++++++++++++++++++--------------
 builtin/repo.c                |  4 +-
 builtin/tag.c                 |  4 +-
 builtin/unpack-file.c         |  2 +-
 builtin/unpack-objects.c      |  8 ++--
 bundle.c                      |  2 +-
 combine-diff.c                |  4 +-
 commit.c                      | 10 ++---
 compat/msvc-posix.h           | 24 +++++++++++-
 config.c                      |  2 +-
 delta.h                       | 20 +++-------
 diff.c                        |  5 ++-
 dir.c                         |  2 +-
 entry.c                       |  4 +-
 fmt-merge-msg.c               |  4 +-
 fsck.c                        |  2 +-
 grep.c                        |  4 +-
 http-push.c                   |  2 +-
 list-objects-filter.c         |  2 +-
 mailmap.c                     |  2 +-
 match-trees.c                 |  4 +-
 merge-blobs.c                 |  6 +--
 merge-blobs.h                 |  2 +-
 merge-ort.c                   |  2 +-
 notes-cache.c                 |  2 +-
 notes-merge.c                 |  2 +-
 notes.c                       |  8 ++--
 object-file.c                 | 18 +++------
 object.c                      |  2 +-
 odb.c                         | 12 +++---
 odb.h                         | 10 ++---
 odb/streaming.c               | 13 +------
 pack-bitmap.c                 |  4 +-
 pack-check.c                  |  5 +--
 pack-objects.h                |  2 +-
 packfile.c                    | 54 ++++++++++----------------
 packfile.h                    |  5 ++-
 patch-delta.c                 |  8 ++--
 path-walk.c                   |  2 +-
 protocol-caps.c               |  5 ++-
 read-cache.c                  |  6 +--
 ref-filter.c                  |  2 +-
 reflog.c                      |  2 +-
 rerere.c                      |  2 +-
 submodule-config.c            |  2 +-
 t/helper/test-delta.c         | 10 +++--
 t/helper/test-pack-deltas.c   |  3 +-
 t/helper/test-partial-clone.c |  2 +-
 t/unit-tests/u-odb-inmemory.c |  2 +-
 tag.c                         |  4 +-
 tree-walk.c                   | 10 +++--
 tree.c                        |  2 +-
 xdiff-interface.c             |  2 +-
 71 files changed, 296 insertions(+), 253 deletions(-)


base-commit: 9ac3f193c05c2237e2b14ebaa1149e9fc8a1abe0
Published-As: https://github.com/gitgitgadget/git/releases/tag/pr-2137%2Fdscho%2Fobjects-larger-than-4gb-on-windows-pt2-v1
Fetch-It-Via: git fetch https://github.com/gitgitgadget/git pr-2137/dscho/objects-larger-than-4gb-on-windows-pt2-v1
Pull-Request: https://github.com/gitgitgadget/git/pull/2137
-- 
gitgitgadget

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH 1/7] compat/msvc: use _chsize_s for ftruncate
  2026-06-04 10:51 [PATCH 0/7] More work supporting objects larger than 4GB on Windows Johannes Schindelin via GitGitGadget
@ 2026-06-04 10:51 ` Johannes Schindelin via GitGitGadget
  2026-06-04 10:51 ` [PATCH 2/7] patch-delta: use size_t for sizes Johannes Schindelin via GitGitGadget
                   ` (5 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: Johannes Schindelin via GitGitGadget @ 2026-06-04 10:51 UTC (permalink / raw)
  To: git; +Cc: Kristofer Karlsson, Johannes Schindelin, Johannes Schindelin

From: Johannes Schindelin <johannes.schindelin@gmx.de>

On Windows, `unsigned long` and `long` are 32 bits even on 64-bit
builds. The MSVC compatibility header has shimmed `ftruncate()` with

	#define ftruncate _chsize

ever since `compat/msvc-posix.h` was introduced. `_chsize()` takes a
32-bit `long` for the new length, which silently truncates files (and
the requested size) to 2 GiB. That is enough to make t7508 test 126
"git add fails gracefully with 4 GiB and 8 GiB files" fail under
MSVC: `test-tool truncate` creates a sparse 4 GiB or 8 GiB file via
the shimmed `ftruncate()`, and the test never gets off the ground.

`_chsize_s()` is the modern replacement, accepts a 64-bit `__int64`
length, and is the only sensible target on Windows. The catch is that
it does not follow the POSIX `-1` + `errno` convention: it returns
`0` on success and an errno value (a small positive integer) on
failure. A plain `#define ftruncate _chsize_s` would therefore
silently break callers that test the return value as `< 0` or against
`-1`, of which there are several: `http.c`, `parallel-checkout.c`,
and `t/helper/test-truncate.c` among them.

Introduce a `static inline` wrapper that calls `_chsize_s()`, copies
its errno return into `errno`, and translates the result to the
familiar `-1` / `0` convention, then point `ftruncate` at the
wrapper. Place the wrapper after `#include "mingw-posix.h"` so the
`off_t` parameter resolves to the already-widened `off64_t` rather
than the 32-bit `_off_t` from `compat/vcbuild/include/unistd.h`.

MinGW is unaffected: its `ftruncate()` already takes `off_t` and
routes through `ftruncate64()` when `_FILE_OFFSET_BITS=64`, which is
the default in our build.

Assisted-by: Opus 4.7
Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
---
 compat/msvc-posix.h | 24 +++++++++++++++++++++++-
 1 file changed, 23 insertions(+), 1 deletion(-)

diff --git a/compat/msvc-posix.h b/compat/msvc-posix.h
index c500b8b4aa..7ce39b8d3f 100644
--- a/compat/msvc-posix.h
+++ b/compat/msvc-posix.h
@@ -16,7 +16,6 @@
 #define __attribute__(x)
 #define strcasecmp   _stricmp
 #define strncasecmp  _strnicmp
-#define ftruncate    _chsize
 #define strtoull     _strtoui64
 #define strtoll      _strtoi64

@@ -30,4 +29,27 @@ typedef int sigset_t;

 #include "mingw-posix.h"

+/*
+ * MSVC's `_chsize()` takes a 32-bit `long` and silently truncates files
+ * to 2 GiB. `_chsize_s()` accepts a 64-bit length but returns 0 on
+ * success or an errno value on failure, rather than the -1/errno
+ * convention POSIX `ftruncate()` callers expect. Wrap it so callers
+ * that test the return value as `< 0` or against `-1` keep working.
+ *
+ * Note: this declaration must follow `#include "mingw-posix.h"` so
+ * `off_t` resolves to `off64_t` and the parameter type matches the
+ * underlying `_chsize_s()` width.
+ */
+static inline int msvc_ftruncate(int fd, off_t length)
+{
+	int err = _chsize_s(fd, length);
+
+	if (err) {
+		errno = err;
+		return -1;
+	}
+	return 0;
+}
+#define ftruncate msvc_ftruncate
+
 #endif /* COMPAT_MSVC_POSIX_H */
-- 
gitgitgadget

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 2/7] patch-delta: use size_t for sizes
  2026-06-04 10:51 [PATCH 0/7] More work supporting objects larger than 4GB on Windows Johannes Schindelin via GitGitGadget
  2026-06-04 10:51 ` [PATCH 1/7] compat/msvc: use _chsize_s for ftruncate Johannes Schindelin via GitGitGadget
@ 2026-06-04 10:51 ` Johannes Schindelin via GitGitGadget
  2026-06-04 10:51 ` [PATCH 3/7] pack-objects(check_pack_inflate()): use size_t instead of unsigned long Johannes Schindelin via GitGitGadget
                   ` (4 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: Johannes Schindelin via GitGitGadget @ 2026-06-04 10:51 UTC (permalink / raw)
  To: git; +Cc: Kristofer Karlsson, Johannes Schindelin, Johannes Schindelin

From: Johannes Schindelin <johannes.schindelin@gmx.de>

`patch_delta()` takes the source and delta sizes by value and writes
back the reconstructed target size through an `unsigned long *`.  That
datatype cannot represent a value that exceeds 4 GiB on systems where
`unsigned long` is 32-bit (notably 64-bit Windows builds), though, even
though the delta encoding itself, the on-disk layout, and the in-memory
buffers happily carry such sizes. A `size_t` companion to
`get_delta_hdr_size()`, `get_delta_hdr_size_sz()`, was introduced in
17fa077596 (delta, packfile: use size_t for delta header sizes,
2026-05-08) precisely so that `patch_delta()` could be widened without
changing the on-the-wire decoding helper's signature.

Widen `patch_delta()`'s three size parameters to `size_t` and switch
its internal use of `get_delta_hdr_size()` to the `_sz` variant.
Then propagate the wider type through the callers.

Assisted-by: Opus 4.7
Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
---
 apply.c                  |  2 +-
 builtin/index-pack.c     |  4 ++--
 builtin/unpack-objects.c |  2 +-
 delta.h                  |  6 +++---
 packfile.c               |  4 +---
 patch-delta.c            | 12 ++++++------
 t/helper/test-delta.c    | 10 ++++++----
 7 files changed, 20 insertions(+), 20 deletions(-)

diff --git a/apply.c b/apply.c
index 249248d4f2..3cf544e9a9 100644
--- a/apply.c
+++ b/apply.c
@@ -3232,7 +3232,7 @@ static int apply_binary_fragment(struct apply_state *state,
 				 struct patch *patch)
 {
 	struct fragment *fragment = patch->fragments;
-	unsigned long len;
+	size_t len;
 	void *dst;
 
 	if (!fragment)
diff --git a/builtin/index-pack.c b/builtin/index-pack.c
index cf0bd8280d..3c4474e681 100644
--- a/builtin/index-pack.c
+++ b/builtin/index-pack.c
@@ -71,7 +71,7 @@ struct base_data {
 	/* Not initialized by make_base(). */
 	struct list_head list;
 	void *data;
-	unsigned long size;
+	size_t size;
 };
 
 /*
@@ -1048,7 +1048,7 @@ static struct base_data *resolve_delta(struct object_entry *delta_obj,
 {
 	void *delta_data, *result_data;
 	struct base_data *result;
-	unsigned long result_size;
+	size_t result_size;
 
 	if (show_stat) {
 		int i = delta_obj - objects;
diff --git a/builtin/unpack-objects.c b/builtin/unpack-objects.c
index 59e9b8711e..e7a50c493c 100644
--- a/builtin/unpack-objects.c
+++ b/builtin/unpack-objects.c
@@ -314,7 +314,7 @@ static void resolve_delta(unsigned nr, enum object_type type,
 			  void *delta, unsigned long delta_size)
 {
 	void *result;
-	unsigned long result_size;
+	size_t result_size;
 
 	result = patch_delta(base, base_size,
 			     delta, delta_size,
diff --git a/delta.h b/delta.h
index fad68cfc45..bb149dc82b 100644
--- a/delta.h
+++ b/delta.h
@@ -75,9 +75,9 @@ diff_delta(const void *src_buf, unsigned long src_bufsize,
  * *trg_bufsize is updated with its size.  On failure a NULL pointer is
  * returned.  The returned buffer must be freed by the caller.
  */
-void *patch_delta(const void *src_buf, unsigned long src_size,
-		  const void *delta_buf, unsigned long delta_size,
-		  unsigned long *dst_size);
+void *patch_delta(const void *src_buf, size_t src_size,
+		  const void *delta_buf, size_t delta_size,
+		  size_t *dst_size);
 
 /* the smallest possible delta size is 4 bytes */
 #define DELTA_SIZE_MIN	4
diff --git a/packfile.c b/packfile.c
index 89366abfe3..e202f48837 100644
--- a/packfile.c
+++ b/packfile.c
@@ -1964,10 +1964,8 @@ void *unpack_entry(struct repository *r, struct packed_git *p, off_t obj_offset,
 			      (uintmax_t)curpos, p->pack_name);
 			data = NULL;
 		} else {
-			unsigned long sz;
 			data = patch_delta(base, base_size, delta_data,
-					   delta_size, &sz);
-			size = sz;
+					   delta_size, &size);
 
 			/*
 			 * We could not apply the delta; warn the user, but
diff --git a/patch-delta.c b/patch-delta.c
index b5c8594db6..44cda97994 100644
--- a/patch-delta.c
+++ b/patch-delta.c
@@ -12,13 +12,13 @@
 #include "git-compat-util.h"
 #include "delta.h"
 
-void *patch_delta(const void *src_buf, unsigned long src_size,
-		  const void *delta_buf, unsigned long delta_size,
-		  unsigned long *dst_size)
+void *patch_delta(const void *src_buf, size_t src_size,
+		  const void *delta_buf, size_t delta_size,
+		  size_t *dst_size)
 {
 	const unsigned char *data, *top;
 	unsigned char *dst_buf, *out, cmd;
-	unsigned long size;
+	size_t size;
 
 	if (delta_size < DELTA_SIZE_MIN)
 		return NULL;
@@ -27,12 +27,12 @@ void *patch_delta(const void *src_buf, unsigned long src_size,
 	top = (const unsigned char *) delta_buf + delta_size;
 
 	/* make sure the orig file size matches what we expect */
-	size = get_delta_hdr_size(&data, top);
+	size = get_delta_hdr_size_sz(&data, top);
 	if (size != src_size)
 		return NULL;
 
 	/* now the result size */
-	size = get_delta_hdr_size(&data, top);
+	size = get_delta_hdr_size_sz(&data, top);
 	dst_buf = xmallocz(size);
 
 	out = dst_buf;
diff --git a/t/helper/test-delta.c b/t/helper/test-delta.c
index 52ea00c937..8223a60229 100644
--- a/t/helper/test-delta.c
+++ b/t/helper/test-delta.c
@@ -21,7 +21,7 @@ int cmd__delta(int argc, const char **argv)
 	int fd;
 	struct strbuf from = STRBUF_INIT, data = STRBUF_INIT;
 	char *out_buf;
-	unsigned long out_size;
+	size_t out_size;
 
 	if (argc != 5 || (strcmp(argv[1], "-d") && strcmp(argv[1], "-p")))
 		usage(usage_str);
@@ -31,11 +31,13 @@ int cmd__delta(int argc, const char **argv)
 	if (strbuf_read_file(&data, argv[3], 0) < 0)
 		die_errno("unable to read '%s'", argv[3]);
 
-	if (argv[1][1] == 'd')
+	if (argv[1][1] == 'd') {
+		unsigned long delta_size;
 		out_buf = diff_delta(from.buf, from.len,
 				     data.buf, data.len,
-				     &out_size, 0);
-	else
+				     &delta_size, 0);
+		out_size = delta_size;
+	} else
 		out_buf = patch_delta(from.buf, from.len,
 				      data.buf, data.len,
 				      &out_size);
-- 
gitgitgadget


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 3/7] pack-objects(check_pack_inflate()): use size_t instead of unsigned long
  2026-06-04 10:51 [PATCH 0/7] More work supporting objects larger than 4GB on Windows Johannes Schindelin via GitGitGadget
  2026-06-04 10:51 ` [PATCH 1/7] compat/msvc: use _chsize_s for ftruncate Johannes Schindelin via GitGitGadget
  2026-06-04 10:51 ` [PATCH 2/7] patch-delta: use size_t for sizes Johannes Schindelin via GitGitGadget
@ 2026-06-04 10:51 ` Johannes Schindelin via GitGitGadget
  2026-06-04 10:51 ` [PATCH 4/7] packfile: widen unpack_entry()'s size out-parameter to size_t Johannes Schindelin via GitGitGadget
                   ` (3 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: Johannes Schindelin via GitGitGadget @ 2026-06-04 10:51 UTC (permalink / raw)
  To: git; +Cc: Kristofer Karlsson, Johannes Schindelin, Johannes Schindelin

From: Johannes Schindelin <johannes.schindelin@gmx.de>

`write_reuse_object()` learned to track its packed-object size as
`size_t` in 606c192380 (odb, packfile: use size_t for streaming
object sizes, 2026-05-08), but the comparison sink it feeds,
`check_pack_inflate()`, still takes the expected decompressed size
as `unsigned long`. The call site bridges the mismatch with
`cast_size_t_to_ulong()`, which on Windows turns a >4 GiB object
into an immediate die().

That function only uses `expect` once: as the right-hand side of a
`stream.total_out == expect` equality test against zlib's counter.
zlib's own `total_out` counter is `uLong` and is therefore still
32-bit-bound on Windows. Widening `expect` to `size_t` cannot fix that,
but it is a strict improvement nonetheless: instead of dying outright,
an oversized object now simply makes the equality fail and lets
`write_reuse_object()` fall back to `write_no_reuse_object()`, which
decompresses and re-deflates the content (and which the larger
pack-objects widening series targets separately).

Drop the `cast_size_t_to_ulong()` shim at the call site now that
the receiving parameter speaks the same type as `entry_size`.

Assisted-by: Opus 4.7
Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
---
 builtin/pack-objects.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index fe9fbecb30..975f04d699 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -453,7 +453,7 @@ static int check_pack_inflate(struct packed_git *p,
 		struct pack_window **w_curs,
 		off_t offset,
 		off_t len,
-		unsigned long expect)
+		size_t expect)
 {
 	git_zstream stream;
 	unsigned char fakebuf[4096], *in;
@@ -671,8 +671,7 @@ static off_t write_reuse_object(struct hashfile *f, struct object_entry *entry,
 	datalen -= entry->in_pack_header_size;

 	if (!pack_to_stdout && p->index_version == 1 &&
-	    check_pack_inflate(p, &w_curs, offset, datalen,
-			       cast_size_t_to_ulong(entry_size))) {
+	    check_pack_inflate(p, &w_curs, offset, datalen, entry_size)) {
 		error(_("corrupt packed object for %s"),
 		      oid_to_hex(&entry->idx.oid));
 		unuse_pack(&w_curs);
-- 
gitgitgadget

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 4/7] packfile: widen unpack_entry()'s size out-parameter to size_t
  2026-06-04 10:51 [PATCH 0/7] More work supporting objects larger than 4GB on Windows Johannes Schindelin via GitGitGadget
                   ` (2 preceding siblings ...)
  2026-06-04 10:51 ` [PATCH 3/7] pack-objects(check_pack_inflate()): use size_t instead of unsigned long Johannes Schindelin via GitGitGadget
@ 2026-06-04 10:51 ` Johannes Schindelin via GitGitGadget
  2026-06-04 10:51 ` [PATCH 5/7] pack-objects: use size_t for in-core object sizes Johannes Schindelin via GitGitGadget
                   ` (2 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: Johannes Schindelin via GitGitGadget @ 2026-06-04 10:51 UTC (permalink / raw)
  To: git; +Cc: Kristofer Karlsson, Johannes Schindelin, Johannes Schindelin

From: Johannes Schindelin <johannes.schindelin@gmx.de>

The topic `js/objects-larger-than-4gb-on-windows` widened the streaming,
index-pack and unpack-objects paths to `size_t` but deliberately stopped
at the in-memory `unpack_entry()` cascade, which still hands back the
unpacked size through `unsigned long *`.  On Windows that boundary
truncates above 4 GiB because that data type is only 32 bits wide on
that platform.

Widen the code path. Except `packed_object_info_with_index_pos()`: It
cannot yet pass `oi->sizep` directly because the field is still
`unsigned long *`; bridge it with a `size_t` temporary that narrows
back, and let a later commit drop the bridge once the field is wide
too. `gfi_unpack_entry()` keeps its narrow signature because fast-import
tracks sizes through `unsigned long` everywhere it crosses subsystem
boundaries, keeping its signature allows the scope of this commit to be
somewhat reasonable, still.

Assisted-by: Opus 4.7
Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
---
 builtin/fast-import.c |  7 ++++++-
 pack-check.c          |  5 ++---
 packfile.c            | 28 +++++++++++++++++-----------
 packfile.h            |  3 ++-
 4 files changed, 27 insertions(+), 16 deletions(-)

diff --git a/builtin/fast-import.c b/builtin/fast-import.c
index 82bc6dcc00..3dff898c43 100644
--- a/builtin/fast-import.c
+++ b/builtin/fast-import.c
@@ -1239,6 +1239,8 @@ static void *gfi_unpack_entry(
 	unsigned long *sizep)
 {
 	enum object_type type;
+	size_t size_st = 0;
+	void *data;
 	struct packed_git *p = all_packs[oe->pack_id];
 	if (p == pack_data && p->pack_size < (pack_size + the_hash_algo->rawsz)) {
 		/* The object is stored in the packfile we are writing to
@@ -1260,7 +1262,10 @@ static void *gfi_unpack_entry(
 		 */
 		p->pack_size = pack_size + the_hash_algo->rawsz;
 	}
-	return unpack_entry(the_repository, p, oe->idx.offset, &type, sizep);
+	data = unpack_entry(the_repository, p, oe->idx.offset, &type, &size_st);
+	if (sizep)
+		*sizep = cast_size_t_to_ulong(size_st);
+	return data;
 }
 
 static void load_tree(struct tree_entry *root)
diff --git a/pack-check.c b/pack-check.c
index 2792f34d25..5adfb3f272 100644
--- a/pack-check.c
+++ b/pack-check.c
@@ -143,9 +143,8 @@ static int verify_packfile(struct repository *r,
 			data = NULL;
 			data_valid = 0;
 		} else {
-			unsigned long sz;
-			data = unpack_entry(r, p, entries[i].offset, &type, &sz);
-			size = sz;
+			data = unpack_entry(r, p, entries[i].offset, &type,
+					    &size);
 			data_valid = 1;
 		}
 
diff --git a/packfile.c b/packfile.c
index e202f48837..dab0a9b16d 100644
--- a/packfile.c
+++ b/packfile.c
@@ -1454,7 +1454,7 @@ struct delta_base_cache_entry {
 	struct delta_base_cache_key key;
 	struct list_head lru;
 	void *data;
-	unsigned long size;
+	size_t size;
 	enum object_type type;
 };
 
@@ -1525,7 +1525,7 @@ static void detach_delta_base_cache_entry(struct delta_base_cache_entry *ent)
 }
 
 static void *cache_or_unpack_entry(struct repository *r, struct packed_git *p,
-				   off_t base_offset, unsigned long *base_size,
+				   off_t base_offset, size_t *base_size,
 				   enum object_type *type)
 {
 	struct delta_base_cache_entry *ent;
@@ -1558,8 +1558,8 @@ void clear_delta_base_cache(void)
 }
 
 static void add_delta_base_cache(struct packed_git *p, off_t base_offset,
-				 void *base, unsigned long base_size,
-				 unsigned long delta_base_cache_limit,
+				 void *base, size_t base_size,
+				 size_t delta_base_cache_limit,
 				 enum object_type type)
 {
 	struct delta_base_cache_entry *ent;
@@ -1614,10 +1614,13 @@ static int packed_object_info_with_index_pos(struct packed_git *p, off_t obj_off
 	 * a "real" type later if the caller is interested.
 	 */
 	if (oi->contentp) {
-		*oi->contentp = cache_or_unpack_entry(p->repo, p, obj_offset, oi->sizep,
-						      &type);
+		size_t size_st = 0;
+		*oi->contentp = cache_or_unpack_entry(p->repo, p, obj_offset,
+						      &size_st, &type);
 		if (!*oi->contentp)
 			type = OBJ_BAD;
+		else if (oi->sizep)
+			*oi->sizep = cast_size_t_to_ulong(size_st);
 	} else if (oi->sizep || oi->typep || oi->delta_base_oid) {
 		type = unpack_object_header(p, &w_curs, &curpos, &size);
 	}
@@ -1735,7 +1738,7 @@ int packed_object_info(struct packed_git *p, off_t obj_offset,
 static void *unpack_compressed_entry(struct packed_git *p,
 				    struct pack_window **w_curs,
 				    off_t curpos,
-				    unsigned long size)
+				    size_t size)
 {
 	int st;
 	git_zstream stream;
@@ -1790,11 +1793,11 @@ int do_check_packed_object_crc;
 struct unpack_entry_stack_ent {
 	off_t obj_offset;
 	off_t curpos;
-	unsigned long size;
+	size_t size;
 };
 
 void *unpack_entry(struct repository *r, struct packed_git *p, off_t obj_offset,
-		   enum object_type *final_type, unsigned long *final_size)
+		   enum object_type *final_type, size_t *final_size)
 {
 	struct pack_window *w_curs = NULL;
 	off_t curpos = obj_offset;
@@ -1911,7 +1914,7 @@ void *unpack_entry(struct repository *r, struct packed_git *p, off_t obj_offset,
 		void *delta_data;
 		void *base = data;
 		void *external_base = NULL;
-		unsigned long delta_size, base_size = size;
+		size_t delta_size, base_size = size;
 		int i;
 		off_t base_obj_offset = obj_offset;
 
@@ -1928,6 +1931,7 @@ void *unpack_entry(struct repository *r, struct packed_git *p, off_t obj_offset,
 			struct object_id base_oid;
 			if (!(offset_to_pack_pos(p, obj_offset, &pos))) {
 				struct object_info oi = OBJECT_INFO_INIT;
+				unsigned long bsz_ul = 0;
 
 				nth_packed_object_id(&base_oid, p,
 						     pack_pos_to_index(p, pos));
@@ -1938,11 +1942,13 @@ void *unpack_entry(struct repository *r, struct packed_git *p, off_t obj_offset,
 				mark_bad_packed_object(p, &base_oid);
 
 				oi.typep = &type;
-				oi.sizep = &base_size;
+				oi.sizep = &bsz_ul;
 				oi.contentp = &base;
 				if (odb_read_object_info_extended(r->objects, &base_oid,
 								  &oi, 0) < 0)
 					base = NULL;
+				else
+					base_size = bsz_ul;
 
 				external_base = base;
 			}
diff --git a/packfile.h b/packfile.h
index 49d6bdecf6..0b5ae3f9fc 100644
--- a/packfile.h
+++ b/packfile.h
@@ -455,7 +455,8 @@ off_t nth_packed_object_offset(const struct packed_git *, uint32_t n);
 off_t find_pack_entry_one(const struct object_id *oid, struct packed_git *);
 
 int is_pack_valid(struct packed_git *);
-void *unpack_entry(struct repository *r, struct packed_git *, off_t, enum object_type *, unsigned long *);
+void *unpack_entry(struct repository *r, struct packed_git *, off_t,
+		   enum object_type *, size_t *);
 unsigned long unpack_object_header_buffer(const unsigned char *buf, unsigned long len, enum object_type *type, size_t *sizep);
 unsigned long get_size_from_delta(struct packed_git *, struct pack_window **, off_t);
 int unpack_object_header(struct packed_git *, struct pack_window **, off_t *, size_t *);
-- 
gitgitgadget


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 5/7] pack-objects: use size_t for in-core object sizes
  2026-06-04 10:51 [PATCH 0/7] More work supporting objects larger than 4GB on Windows Johannes Schindelin via GitGitGadget
                   ` (3 preceding siblings ...)
  2026-06-04 10:51 ` [PATCH 4/7] packfile: widen unpack_entry()'s size out-parameter to size_t Johannes Schindelin via GitGitGadget
@ 2026-06-04 10:51 ` Johannes Schindelin via GitGitGadget
  2026-06-04 10:51 ` [PATCH 6/7] packfile,delta: drop the `cast_size_t_to_ulong()` wrappers Johannes Schindelin via GitGitGadget
  2026-06-04 10:51 ` [PATCH 7/7] odb: use size_t for object_info.sizep and the size APIs Johannes Schindelin via GitGitGadget
  6 siblings, 0 replies; 8+ messages in thread
From: Johannes Schindelin via GitGitGadget @ 2026-06-04 10:51 UTC (permalink / raw)
  To: git; +Cc: Kristofer Karlsson, Johannes Schindelin, Johannes Schindelin

From: Johannes Schindelin <johannes.schindelin@gmx.de>

`pack-objects` stores per-entry object sizes in either the 31-bit
`size_` member of the `struct object_entry` or, when the value does not
fit, the `pack->delta_size[]` spill array.  The accessors (`oe_size`,
`oe_delta_size`, `oe_get_size_slow`, `oe_size_*_than`) and the setters
(`oe_set_size`, `oe_set_delta_size`) used `unsigned long` for the spill
type, which on Windows means the spill silently caps at 4 GiB per entry.
That is what made `upload-pack` die with "object too large to read on
this platform" when serving the >4 GiB blob in `t5608` tests 5 and 6
when run with `GIT_TEST_CLONE_2GB`.

Widen them all to `size_t` (including `pack->delta_size`) and drop the
three `cast_size_t_to_ulong()` calls in `check_object()` that guarded
`in_pack_size`.  The two `SET_SIZE(entry, canonical_size)` calls in the
same function stay cast-free as before, since `canonical_size` is still
`unsigned long` until a later commit widens `object_info::sizep`.

Assisted-by: Opus 4.7
Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
---
 builtin/pack-objects.c | 35 ++++++++++++++++++-----------------
 pack-objects.h         |  2 +-
 2 files changed, 19 insertions(+), 18 deletions(-)

diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index 975f04d699..bb372d0b03 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -66,8 +66,8 @@ static inline struct object_entry *oe_delta(
 		return &pack->objects[e->delta_idx - 1];
 }
 
-static inline unsigned long oe_delta_size(struct packing_data *pack,
-					  const struct object_entry *e)
+static inline size_t oe_delta_size(struct packing_data *pack,
+				   const struct object_entry *e)
 {
 	if (e->delta_size_valid)
 		return e->delta_size_;
@@ -83,11 +83,11 @@ static inline unsigned long oe_delta_size(struct packing_data *pack,
 	return pack->delta_size[e - pack->objects];
 }
 
-unsigned long oe_get_size_slow(struct packing_data *pack,
-			       const struct object_entry *e);
+size_t oe_get_size_slow(struct packing_data *pack,
+			const struct object_entry *e);
 
-static inline unsigned long oe_size(struct packing_data *pack,
-				    const struct object_entry *e)
+static inline size_t oe_size(struct packing_data *pack,
+			     const struct object_entry *e)
 {
 	if (e->size_valid)
 		return e->size_;
@@ -145,7 +145,7 @@ static inline void oe_set_delta_sibling(struct packing_data *pack,
 
 static inline void oe_set_size(struct packing_data *pack,
 			       struct object_entry *e,
-			       unsigned long size)
+			       size_t size)
 {
 	if (size < pack->oe_size_limit) {
 		e->size_ = size;
@@ -159,7 +159,7 @@ static inline void oe_set_size(struct packing_data *pack,
 
 static inline void oe_set_delta_size(struct packing_data *pack,
 				     struct object_entry *e,
-				     unsigned long size)
+				     size_t size)
 {
 	if (size < pack->oe_delta_size_limit) {
 		e->delta_size_ = size;
@@ -496,7 +496,7 @@ static void copy_pack_data(struct hashfile *f,
 
 static inline int oe_size_greater_than(struct packing_data *pack,
 				       const struct object_entry *lhs,
-				       unsigned long rhs)
+				       size_t rhs)
 {
 	if (lhs->size_valid)
 		return lhs->size_ > rhs;
@@ -2277,7 +2277,7 @@ static void check_object(struct object_entry *entry, uint32_t object_index)
 		default:
 			/* Not a delta hence we've already got all we need. */
 			oe_set_type(entry, entry->in_pack_type);
-			SET_SIZE(entry, cast_size_t_to_ulong(in_pack_size));
+			SET_SIZE(entry, in_pack_size);
 			entry->in_pack_header_size = used;
 			if (oe_type(entry) < OBJ_COMMIT || oe_type(entry) > OBJ_BLOB)
 				goto give_up;
@@ -2331,8 +2331,8 @@ static void check_object(struct object_entry *entry, uint32_t object_index)
 		if (have_base &&
 		    can_reuse_delta(&base_ref, entry, &base_entry)) {
 			oe_set_type(entry, entry->in_pack_type);
-			SET_SIZE(entry, cast_size_t_to_ulong(in_pack_size)); /* delta size */
-			SET_DELTA_SIZE(entry, cast_size_t_to_ulong(in_pack_size));
+			SET_SIZE(entry, in_pack_size); /* delta size */
+			SET_DELTA_SIZE(entry, in_pack_size);
 
 			if (base_entry) {
 				SET_DELTA(entry, base_entry);
@@ -2355,7 +2355,8 @@ static void check_object(struct object_entry *entry, uint32_t object_index)
 			 * object size from the delta header.
 			 */
 			delta_pos = entry->in_pack_offset + entry->in_pack_header_size;
-			canonical_size = get_size_from_delta(p, &w_curs, delta_pos);
+			canonical_size = get_size_from_delta(p, &w_curs,
+							     delta_pos);
 			if (canonical_size == 0)
 				goto give_up;
 			SET_SIZE(entry, canonical_size);
@@ -2711,7 +2712,7 @@ static pthread_mutex_t progress_mutex;
 
 static inline int oe_size_less_than(struct packing_data *pack,
 				    const struct object_entry *lhs,
-				    unsigned long rhs)
+				    size_t rhs)
 {
 	if (lhs->size_valid)
 		return lhs->size_ < rhs;
@@ -2734,8 +2735,8 @@ static inline void oe_set_tree_depth(struct packing_data *pack,
  * reconstruction (so non-deltas are true object sizes, but deltas
  * return the size of the delta data).
  */
-unsigned long oe_get_size_slow(struct packing_data *pack,
-			       const struct object_entry *e)
+size_t oe_get_size_slow(struct packing_data *pack,
+			const struct object_entry *e)
 {
 	struct packed_git *p;
 	struct pack_window *w_curs;
@@ -2769,7 +2770,7 @@ unsigned long oe_get_size_slow(struct packing_data *pack,
 
 	unuse_pack(&w_curs);
 	packing_data_unlock(&to_pack);
-	return cast_size_t_to_ulong(size);
+	return size;
 }
 
 static int try_delta(struct unpacked *trg, struct unpacked *src,
diff --git a/pack-objects.h b/pack-objects.h
index 83299d4732..e97e84ddcb 100644
--- a/pack-objects.h
+++ b/pack-objects.h
@@ -141,7 +141,7 @@ struct packing_data {
 	uint32_t index_size;
 
 	unsigned int *in_pack_pos;
-	unsigned long *delta_size;
+	size_t *delta_size;
 
 	/*
 	 * Only one of these can be non-NULL and they have different
-- 
gitgitgadget


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 6/7] packfile,delta: drop the `cast_size_t_to_ulong()` wrappers
  2026-06-04 10:51 [PATCH 0/7] More work supporting objects larger than 4GB on Windows Johannes Schindelin via GitGitGadget
                   ` (4 preceding siblings ...)
  2026-06-04 10:51 ` [PATCH 5/7] pack-objects: use size_t for in-core object sizes Johannes Schindelin via GitGitGadget
@ 2026-06-04 10:51 ` Johannes Schindelin via GitGitGadget
  2026-06-04 10:51 ` [PATCH 7/7] odb: use size_t for object_info.sizep and the size APIs Johannes Schindelin via GitGitGadget
  6 siblings, 0 replies; 8+ messages in thread
From: Johannes Schindelin via GitGitGadget @ 2026-06-04 10:51 UTC (permalink / raw)
  To: git; +Cc: Kristofer Karlsson, Johannes Schindelin, Johannes Schindelin

From: Johannes Schindelin <johannes.schindelin@gmx.de>

When I started the transition from `unsigned long` to `size_t`, in the
interest of keeping the patches reviewable, I introduced these calls to
prevent data type narrowing from silently failing to handle large object
sizes. I also introduced `*_sz()` variants that would allow most of the
callers to keep using that `unsigned long` that the 90s kindly asked to
be returned.

After the preceding commits, the only places that called the narrow
wrappers either no longer exist or already use the `_sz` form
internally, so the wrappers just narrow values back through
`cast_size_t_to_ulong()` for no reason.

Drop them and rename the `_sz` variants back to the natural names.

Assisted-by: Opus 4.7
Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
---
 delta.h       | 14 ++------------
 packfile.c    | 28 ++++++++--------------------
 packfile.h    |  2 +-
 patch-delta.c |  4 ++--
 4 files changed, 13 insertions(+), 35 deletions(-)

diff --git a/delta.h b/delta.h
index bb149dc82b..eb5c6d2fdb 100644
--- a/delta.h
+++ b/delta.h
@@ -86,11 +86,8 @@ void *patch_delta(const void *src_buf, size_t src_size,
  * This must be called twice on the delta data buffer, first to get the
  * expected source buffer size, and again to get the target buffer size.
  */
-/*
- * Size_t variant that doesn't truncate - use for >4GB objects on Windows.
- */
-static inline size_t get_delta_hdr_size_sz(const unsigned char **datap,
-					   const unsigned char *top)
+static inline size_t get_delta_hdr_size(const unsigned char **datap,
+					const unsigned char *top)
 {
 	const unsigned char *data = *datap;
 	size_t cmd, size = 0;
@@ -104,11 +101,4 @@ static inline size_t get_delta_hdr_size_sz(const unsigned char **datap,
 	return size;
 }
 
-static inline unsigned long get_delta_hdr_size(const unsigned char **datap,
-					       const unsigned char *top)
-{
-	size_t size = get_delta_hdr_size_sz(datap, top);
-	return cast_size_t_to_ulong(size);
-}
-
 #endif
diff --git a/packfile.c b/packfile.c
index dab0a9b16d..c174982d10 100644
--- a/packfile.c
+++ b/packfile.c
@@ -1164,11 +1164,12 @@ unsigned long unpack_object_header_buffer(const unsigned char *buf,
 }
 
 /*
- * Size_t variant for >4GB delta results on Windows.
+ * Read a delta object's header at curpos in p (already inflated as needed)
+ * and return the size of the result object (the post-application target).
  */
-static size_t get_size_from_delta_sz(struct packed_git *p,
-				     struct pack_window **w_curs,
-				     off_t curpos)
+size_t get_size_from_delta(struct packed_git *p,
+			   struct pack_window **w_curs,
+			   off_t curpos)
 {
 	const unsigned char *data;
 	unsigned char delta_head[20], *in;
@@ -1215,18 +1216,10 @@ static size_t get_size_from_delta_sz(struct packed_git *p,
 	data = delta_head;
 
 	/* ignore base size */
-	get_delta_hdr_size_sz(&data, delta_head+sizeof(delta_head));
+	get_delta_hdr_size(&data, delta_head+sizeof(delta_head));
 
 	/* Read the result size */
-	return get_delta_hdr_size_sz(&data, delta_head+sizeof(delta_head));
-}
-
-unsigned long get_size_from_delta(struct packed_git *p,
-				  struct pack_window **w_curs,
-				  off_t curpos)
-{
-	size_t size = get_size_from_delta_sz(p, w_curs, curpos);
-	return cast_size_t_to_ulong(size);
+	return get_delta_hdr_size(&data, delta_head+sizeof(delta_head));
 }
 
 int unpack_object_header(struct packed_git *p,
@@ -1634,12 +1627,7 @@ static int packed_object_info_with_index_pos(struct packed_git *p, off_t obj_off
 				ret = -1;
 				goto out;
 			}
-			/*
-			 * Use size_t variant to avoid die() on >4GB deltas.
-			 * oi->sizep is unsigned long, so truncation may occur,
-			 * but streaming code uses its own size_t tracking.
-			 */
-			size = get_size_from_delta_sz(p, &w_curs, tmp_pos);
+			size = get_size_from_delta(p, &w_curs, tmp_pos);
 			if (size == 0) {
 				ret = -1;
 				goto out;
diff --git a/packfile.h b/packfile.h
index 0b5ae3f9fc..bd4494906d 100644
--- a/packfile.h
+++ b/packfile.h
@@ -458,7 +458,7 @@ int is_pack_valid(struct packed_git *);
 void *unpack_entry(struct repository *r, struct packed_git *, off_t,
 		   enum object_type *, size_t *);
 unsigned long unpack_object_header_buffer(const unsigned char *buf, unsigned long len, enum object_type *type, size_t *sizep);
-unsigned long get_size_from_delta(struct packed_git *, struct pack_window **, off_t);
+size_t get_size_from_delta(struct packed_git *, struct pack_window **, off_t);
 int unpack_object_header(struct packed_git *, struct pack_window **, off_t *, size_t *);
 off_t get_delta_base(struct packed_git *p, struct pack_window **w_curs,
 		     off_t *curpos, enum object_type type,
diff --git a/patch-delta.c b/patch-delta.c
index 44cda97994..42199fa956 100644
--- a/patch-delta.c
+++ b/patch-delta.c
@@ -27,12 +27,12 @@ void *patch_delta(const void *src_buf, size_t src_size,
 	top = (const unsigned char *) delta_buf + delta_size;
 
 	/* make sure the orig file size matches what we expect */
-	size = get_delta_hdr_size_sz(&data, top);
+	size = get_delta_hdr_size(&data, top);
 	if (size != src_size)
 		return NULL;
 
 	/* now the result size */
-	size = get_delta_hdr_size_sz(&data, top);
+	size = get_delta_hdr_size(&data, top);
 	dst_buf = xmallocz(size);
 
 	out = dst_buf;
-- 
gitgitgadget


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 7/7] odb: use size_t for object_info.sizep and the size APIs
  2026-06-04 10:51 [PATCH 0/7] More work supporting objects larger than 4GB on Windows Johannes Schindelin via GitGitGadget
                   ` (5 preceding siblings ...)
  2026-06-04 10:51 ` [PATCH 6/7] packfile,delta: drop the `cast_size_t_to_ulong()` wrappers Johannes Schindelin via GitGitGadget
@ 2026-06-04 10:51 ` Johannes Schindelin via GitGitGadget
  6 siblings, 0 replies; 8+ messages in thread
From: Johannes Schindelin via GitGitGadget @ 2026-06-04 10:51 UTC (permalink / raw)
  To: git; +Cc: Kristofer Karlsson, Johannes Schindelin, Johannes Schindelin

From: Johannes Schindelin <johannes.schindelin@gmx.de>

When `js/objects-larger-than-4gb-on-windows` widened the streaming,
index-pack and unpack-objects code paths, in the interest of keeping the
patches somewhat reasonably-sized, it left the public ODB API still
typed in `unsigned long`. In particular `struct object_info::sizep` and
the four wrappers built on top of it (`odb_read_object`,
`odb_read_object_peeled`, `odb_read_object_info`, `odb_pretend_object`)
still return the unpacked size through `unsigned long *`, so on Windows
`cat-file -s` and the `git add` / `git status` paths for a >4 GiB blob
silently cap at 4 GiB.

Widen the field and the four wrappers. The previous commits already
widened the `unpack_entry()` cascade and pack-objects' in-core size
accessors, so most of the cascade arrives here with no further work: the
temporary shims in `packed_object_info_with_index_pos()` and in
`unpack_entry()`'s delta-base recovery path go away, the two
`SET_SIZE(entry, cast_size_t_to_ulong(canonical_size))` calls in
`check_object()` and the matching one in `drop_reused_delta()` collapse
to plain `SET_SIZE`, and `oe_get_size_slow()`'s tail
`cast_size_t_to_ulong()` is gone too.

What remains narrow are the boundaries this series does not
intend to touch: the diff, blame, textconv and fast-import machinery.

Even so, this patch is unfortunately quite large.

Assisted-by: Opus 4.7
Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
---
 apply.c                       |  6 +++---
 archive.c                     |  4 ++--
 attr.c                        |  2 +-
 bisect.c                      |  2 +-
 blame.c                       | 15 ++++++++++----
 builtin/cat-file.c            | 39 +++++++++++++++++++++--------------
 builtin/difftool.c            |  2 +-
 builtin/fast-export.c         |  7 +++++--
 builtin/fast-import.c         | 22 ++++++++++++++------
 builtin/fsck.c                |  2 +-
 builtin/grep.c                | 12 +++++------
 builtin/index-pack.c          |  6 +++---
 builtin/log.c                 |  2 +-
 builtin/ls-files.c            |  2 +-
 builtin/ls-tree.c             |  4 ++--
 builtin/merge-tree.c          |  6 +++---
 builtin/mktag.c               |  2 +-
 builtin/notes.c               |  6 +++---
 builtin/pack-objects.c        | 33 ++++++++++++++++++++---------
 builtin/repo.c                |  4 +++-
 builtin/tag.c                 |  4 ++--
 builtin/unpack-file.c         |  2 +-
 builtin/unpack-objects.c      |  6 ++++--
 bundle.c                      |  2 +-
 combine-diff.c                |  4 +++-
 commit.c                      | 10 ++++-----
 config.c                      |  2 +-
 diff.c                        |  5 ++++-
 dir.c                         |  2 +-
 entry.c                       |  4 +---
 fmt-merge-msg.c               |  4 ++--
 fsck.c                        |  2 +-
 grep.c                        |  4 +++-
 http-push.c                   |  2 +-
 list-objects-filter.c         |  2 +-
 mailmap.c                     |  2 +-
 match-trees.c                 |  4 ++--
 merge-blobs.c                 |  6 +++---
 merge-blobs.h                 |  2 +-
 merge-ort.c                   |  2 +-
 notes-cache.c                 |  2 +-
 notes-merge.c                 |  2 +-
 notes.c                       |  8 ++++---
 object-file.c                 | 18 +++++-----------
 object.c                      |  2 +-
 odb.c                         | 12 +++++------
 odb.h                         | 10 ++++-----
 odb/streaming.c               | 13 +-----------
 pack-bitmap.c                 |  4 ++--
 packfile.c                    | 12 +++--------
 path-walk.c                   |  2 +-
 protocol-caps.c               |  5 +++--
 read-cache.c                  |  6 +++---
 ref-filter.c                  |  2 +-
 reflog.c                      |  2 +-
 rerere.c                      |  2 +-
 submodule-config.c            |  2 +-
 t/helper/test-pack-deltas.c   |  3 ++-
 t/helper/test-partial-clone.c |  2 +-
 t/unit-tests/u-odb-inmemory.c |  2 +-
 tag.c                         |  4 ++--
 tree-walk.c                   | 10 +++++----
 tree.c                        |  2 +-
 xdiff-interface.c             |  2 +-
 64 files changed, 205 insertions(+), 173 deletions(-)

diff --git a/apply.c b/apply.c
index 3cf544e9a9..5e54453f79 100644
--- a/apply.c
+++ b/apply.c
@@ -3321,7 +3321,7 @@ static int apply_binary(struct apply_state *state,
 	if (odb_has_object(the_repository->objects, &oid, 0)) {
 		/* We already have the postimage */
 		enum object_type type;
-		unsigned long size;
+		size_t size;
 		char *result;
 
 		result = odb_read_object(the_repository->objects, &oid,
@@ -3384,7 +3384,7 @@ static int read_blob_object(struct strbuf *buf, const struct object_id *oid, uns
 		strbuf_addf(buf, "Subproject commit %s\n", oid_to_hex(oid));
 	} else {
 		enum object_type type;
-		unsigned long sz;
+		size_t sz;
 		char *result;
 
 		result = odb_read_object(the_repository->objects, oid,
@@ -3611,7 +3611,7 @@ static int load_preimage(struct apply_state *state,
 
 static int resolve_to(struct image *image, const struct object_id *result_id)
 {
-	unsigned long size;
+	size_t size;
 	enum object_type type;
 	char *data;
 
diff --git a/archive.c b/archive.c
index 51229107a5..59790be986 100644
--- a/archive.c
+++ b/archive.c
@@ -87,7 +87,7 @@ static void *object_file_to_archive(const struct archiver_args *args,
 				    const struct object_id *oid,
 				    unsigned int mode,
 				    enum object_type *type,
-				    unsigned long *sizep)
+				    size_t *sizep)
 {
 	void *buffer;
 	const struct commit *commit = args->convert ? args->commit : NULL;
@@ -158,7 +158,7 @@ static int write_archive_entry(const struct object_id *oid, const char *base,
 	write_archive_entry_fn_t write_entry = c->write_entry;
 	int err;
 	const char *path_without_prefix;
-	unsigned long size;
+	size_t size;
 	void *buffer;
 	enum object_type type;
 
diff --git a/attr.c b/attr.c
index 75369547b3..c61472a4e6 100644
--- a/attr.c
+++ b/attr.c
@@ -768,7 +768,7 @@ static struct attr_stack *read_attr_from_blob(struct index_state *istate,
 					      const char *path, unsigned flags)
 {
 	struct object_id oid;
-	unsigned long sz;
+	size_t sz;
 	enum object_type type;
 	void *buf;
 	unsigned short mode;
diff --git a/bisect.c b/bisect.c
index 905a9afb05..4742a5fef4 100644
--- a/bisect.c
+++ b/bisect.c
@@ -154,7 +154,7 @@ static void show_list(const char *debug, int counted, int nr,
 		struct commit *commit = p->item;
 		unsigned commit_flags = commit->object.flags;
 		enum object_type type;
-		unsigned long size;
+		size_t size;
 		char *buf = odb_read_object(the_repository->objects,
 					    &commit->object.oid, &type,
 					    &size);
diff --git a/blame.c b/blame.c
index 977cbb7097..126e232416 100644
--- a/blame.c
+++ b/blame.c
@@ -1041,10 +1041,13 @@ static void fill_origin_blob(struct diff_options *opt,
 		    textconv_object(opt->repo, o->path, o->mode,
 				    &o->blob_oid, 1, &file->ptr, &file_size))
 			;
-		else
+		else {
+			size_t file_size_st = 0;
 			file->ptr = odb_read_object(the_repository->objects,
 						    &o->blob_oid, &type,
-						    &file_size);
+						    &file_size_st);
+			file_size = cast_size_t_to_ulong(file_size_st);
+		}
 		file->size = file_size;
 
 		if (!file->ptr)
@@ -2869,10 +2872,14 @@ void setup_scoreboard(struct blame_scoreboard *sb,
 		    textconv_object(sb->repo, sb->path, o->mode, &o->blob_oid, 1, (char **) &sb->final_buf,
 				    &sb->final_buf_size))
 			;
-		else
+		else {
+			size_t final_buf_size_st = 0;
 			sb->final_buf = odb_read_object(the_repository->objects,
 							&o->blob_oid, &type,
-							&sb->final_buf_size);
+							&final_buf_size_st);
+			sb->final_buf_size =
+				cast_size_t_to_ulong(final_buf_size_st);
+		}
 
 		if (!sb->final_buf)
 			die(_("cannot read blob %s for path %s"),
diff --git a/builtin/cat-file.c b/builtin/cat-file.c
index fa45f774d7..fa6e396ddc 100644
--- a/builtin/cat-file.c
+++ b/builtin/cat-file.c
@@ -84,7 +84,7 @@ static char *replace_idents_using_mailmap(char *object_buf, size_t *size)
 
 static int filter_object(const char *path, unsigned mode,
 			 const struct object_id *oid,
-			 char **buf, unsigned long *size)
+			 char **buf, size_t *size)
 {
 	enum object_type type;
 
@@ -120,7 +120,7 @@ static int cat_one_file(int opt, const char *exp_type, const char *obj_name)
 	struct object_id oid;
 	enum object_type type;
 	char *buf;
-	unsigned long size;
+	size_t size;
 	struct object_context obj_context = {0};
 	struct object_info oi = OBJECT_INFO_INIT;
 	unsigned flags = OBJECT_INFO_LOOKUP_REPLACE;
@@ -166,7 +166,7 @@ static int cat_one_file(int opt, const char *exp_type, const char *obj_name)
 		if (use_mailmap && (type == OBJ_COMMIT || type == OBJ_TAG)) {
 			size_t s = size;
 			buf = replace_idents_using_mailmap(buf, &s);
-			size = cast_size_t_to_ulong(s);
+			size = s;
 		}
 
 		printf("%"PRIuMAX"\n", (uintmax_t)size);
@@ -188,9 +188,15 @@ static int cat_one_file(int opt, const char *exp_type, const char *obj_name)
 		break;
 
 	case 'c':
-		if (textconv_object(the_repository, path, obj_context.mode,
-				    &oid, 1, &buf, &size))
+	{
+		unsigned long size_ul = 0;
+		int textconv_ret = textconv_object(the_repository, path,
+						   obj_context.mode, &oid, 1,
+						   &buf, &size_ul);
+		size = size_ul;
+		if (textconv_ret)
 			break;
+	}
 		/* else fallthrough */
 
 	case 'p':
@@ -219,7 +225,7 @@ static int cat_one_file(int opt, const char *exp_type, const char *obj_name)
 		if (use_mailmap) {
 			size_t s = size;
 			buf = replace_idents_using_mailmap(buf, &s);
-			size = cast_size_t_to_ulong(s);
+			size = s;
 		}
 
 		/* otherwise just spit out the data */
@@ -266,7 +272,7 @@ static int cat_one_file(int opt, const char *exp_type, const char *obj_name)
 		if (use_mailmap) {
 			size_t s = size;
 			buf = replace_idents_using_mailmap(buf, &s);
-			size = cast_size_t_to_ulong(s);
+			size = s;
 		}
 		break;
 	}
@@ -288,7 +294,7 @@ cleanup:
 struct expand_data {
 	struct object_id oid;
 	enum object_type type;
-	unsigned long size;
+	size_t size;
 	unsigned short mode;
 	off_t disk_size;
 	const char *rest;
@@ -405,7 +411,7 @@ static void print_object_or_die(struct batch_options *opt, struct expand_data *d
 			fflush(stdout);
 		if (opt->transform_mode) {
 			char *contents;
-			unsigned long size;
+			size_t size;
 
 			if (!data->rest)
 				die("missing path for '%s'", oid_to_hex(oid));
@@ -417,9 +423,12 @@ static void print_object_or_die(struct batch_options *opt, struct expand_data *d
 					    oid_to_hex(oid), data->rest);
 			} else if (opt->transform_mode == 'c') {
 				enum object_type type;
-				if (!textconv_object(the_repository,
-						     data->rest, 0100644, oid,
-						     1, &contents, &size))
+				unsigned long size_ul = 0;
+				if (textconv_object(the_repository,
+						    data->rest, 0100644, oid,
+						    1, &contents, &size_ul))
+					size = size_ul;
+				else
 					contents = odb_read_object(the_repository->objects,
 								   oid, &type, &size);
 				if (!contents)
@@ -435,7 +444,7 @@ static void print_object_or_die(struct batch_options *opt, struct expand_data *d
 	}
 	else {
 		enum object_type type;
-		unsigned long size;
+		size_t size;
 		void *contents;
 
 		contents = odb_read_object(the_repository->objects, oid,
@@ -446,7 +455,7 @@ static void print_object_or_die(struct batch_options *opt, struct expand_data *d
 		if (use_mailmap) {
 			size_t s = size;
 			contents = replace_idents_using_mailmap(contents, &s);
-			size = cast_size_t_to_ulong(s);
+			size = s;
 		}
 
 		if (type != data->type)
@@ -555,7 +564,7 @@ static void batch_object_write(const char *obj_name,
 			if (!buf)
 				die(_("unable to read %s"), oid_to_hex(&data->oid));
 			buf = replace_idents_using_mailmap(buf, &s);
-			data->size = cast_size_t_to_ulong(s);
+			data->size = s;
 
 			free(buf);
 		}
diff --git a/builtin/difftool.c b/builtin/difftool.c
index 2a21005f2e..26778f8515 100644
--- a/builtin/difftool.c
+++ b/builtin/difftool.c
@@ -319,7 +319,7 @@ static char *get_symlink(struct repository *repo,
 		data = strbuf_detach(&link, NULL);
 	} else {
 		enum object_type type;
-		unsigned long size;
+		size_t size;
 		data = odb_read_object(repo->objects, oid, &type, &size);
 		if (!data)
 			die(_("could not read object %s for symlink %s"),
diff --git a/builtin/fast-export.c b/builtin/fast-export.c
index 2eb43a28da..0be43104dc 100644
--- a/builtin/fast-export.c
+++ b/builtin/fast-export.c
@@ -317,7 +317,10 @@ static void export_blob(const struct object_id *oid)
 		object = (struct object *)lookup_blob(the_repository, oid);
 		eaten = 0;
 	} else {
-		buf = odb_read_object(the_repository->objects, oid, &type, &size);
+		size_t size_st = 0;
+		buf = odb_read_object(the_repository->objects, oid, &type,
+				      &size_st);
+		size = cast_size_t_to_ulong(size_st);
 		if (!buf)
 			die(_("could not read blob %s"), oid_to_hex(oid));
 		if (check_object_signature(the_repository, oid, buf, size,
@@ -880,7 +883,7 @@ static char *anonymize_tag(void)
 
 static void handle_tag(const char *name, struct tag *tag)
 {
-	unsigned long size;
+	size_t size;
 	enum object_type type;
 	char *buf;
 	const char *tagger, *tagger_end, *message;
diff --git a/builtin/fast-import.c b/builtin/fast-import.c
index 3dff898c43..d11a2cc2c1 100644
--- a/builtin/fast-import.c
+++ b/builtin/fast-import.c
@@ -1291,7 +1291,10 @@ static void load_tree(struct tree_entry *root)
 			die(_("can't load tree %s"), oid_to_hex(oid));
 	} else {
 		enum object_type type;
-		buf = odb_read_object(the_repository->objects, oid, &type, &size);
+		size_t size_st = 0;
+		buf = odb_read_object(the_repository->objects, oid, &type,
+				      &size_st);
+		size = cast_size_t_to_ulong(size_st);
 		if (!buf || type != OBJ_TREE)
 			die(_("can't load tree %s"), oid_to_hex(oid));
 	}
@@ -2560,7 +2563,7 @@ static void note_change_n(const char *p, struct branch *b, unsigned char *old_fa
 			die(_("mark :%" PRIuMAX " not a commit"), commit_mark);
 		oidcpy(&commit_oid, &commit_oe->idx.oid);
 	} else if (!repo_get_oid(the_repository, p, &commit_oid)) {
-		unsigned long size;
+		size_t size;
 		char *buf = odb_read_object_peeled(the_repository->objects,
 						   &commit_oid, OBJ_COMMIT, &size,
 						   &commit_oid);
@@ -2627,10 +2630,12 @@ static void parse_from_existing(struct branch *b)
 		oidclr(&b->branch_tree.versions[1].oid, the_repository->hash_algo);
 	} else {
 		unsigned long size;
+		size_t size_st = 0;
 		char *buf;
 
 		buf = odb_read_object_peeled(the_repository->objects, &b->oid,
-					     OBJ_COMMIT, &size, &b->oid);
+					     OBJ_COMMIT, &size_st, &b->oid);
+		size = cast_size_t_to_ulong(size_st);
 		parse_from_commit(b, buf, size);
 		free(buf);
 	}
@@ -2722,7 +2727,7 @@ static struct hash_list *parse_merge(unsigned int *count)
 				die(_("mark :%" PRIuMAX " not a commit"), idnum);
 			oidcpy(&n->oid, &oe->idx.oid);
 		} else if (!repo_get_oid(the_repository, from, &n->oid)) {
-			unsigned long size;
+			size_t size;
 			char *buf = odb_read_object_peeled(the_repository->objects,
 							   &n->oid, OBJ_COMMIT,
 							   &size, &n->oid);
@@ -3330,7 +3335,10 @@ static void cat_blob(struct object_entry *oe, struct object_id *oid)
 	char *buf;
 
 	if (!oe || oe->pack_id == MAX_PACK_ID) {
-		buf = odb_read_object(the_repository->objects, oid, &type, &size);
+		size_t size_st = 0;
+		buf = odb_read_object(the_repository->objects, oid, &type,
+				      &size_st);
+		size = cast_size_t_to_ulong(size_st);
 	} else {
 		type = oe->type;
 		buf = gfi_unpack_entry(oe, &size);
@@ -3438,8 +3446,10 @@ static struct object_entry *dereference(struct object_entry *oe,
 		buf = gfi_unpack_entry(oe, &size);
 	} else {
 		enum object_type unused;
+		size_t size_st = 0;
 		buf = odb_read_object(the_repository->objects, oid,
-				      &unused, &size);
+				      &unused, &size_st);
+		size = cast_size_t_to_ulong(size_st);
 	}
 	if (!buf)
 		die(_("can't load object %s"), oid_to_hex(oid));
diff --git a/builtin/fsck.c b/builtin/fsck.c
index 248f8ff5a0..76b723f36d 100644
--- a/builtin/fsck.c
+++ b/builtin/fsck.c
@@ -724,7 +724,7 @@ static int fsck_loose(const struct object_id *oid, const char *path,
 	struct for_each_loose_cb *data = cb_data;
 	struct object *obj;
 	enum object_type type = OBJ_NONE;
-	unsigned long size;
+	size_t size;
 	void *contents = NULL;
 	int eaten;
 	struct object_info oi = OBJECT_INFO_INIT;
diff --git a/builtin/grep.c b/builtin/grep.c
index 6a09571903..26b85479ca 100644
--- a/builtin/grep.c
+++ b/builtin/grep.c
@@ -520,7 +520,7 @@ static int grep_submodule(struct grep_opt *opt,
 		enum object_type object_type;
 		struct tree_desc tree;
 		void *data;
-		unsigned long size;
+		size_t size;
 		struct strbuf base = STRBUF_INIT;
 
 		obj_read_lock();
@@ -573,7 +573,7 @@ static int grep_cache(struct grep_opt *opt,
 			enum object_type type;
 			struct tree_desc tree;
 			void *data;
-			unsigned long size;
+			size_t size;
 
 			data = odb_read_object(the_repository->objects, &ce->oid,
 					       &type, &size);
@@ -666,7 +666,7 @@ static int grep_tree(struct grep_opt *opt, const struct pathspec *pathspec,
 			enum object_type type;
 			struct tree_desc sub;
 			void *data;
-			unsigned long size;
+			size_t size;
 
 			data = odb_read_object(the_repository->objects,
 					       &entry.oid, &type, &size);
@@ -730,7 +730,7 @@ static void collect_blob_oids_for_tree(struct repository *repo,
 			enum object_type type;
 			struct tree_desc sub_tree;
 			void *data;
-			unsigned long size;
+			size_t size;
 
 			data = odb_read_object(repo->objects, &entry.oid,
 					       &type, &size);
@@ -764,7 +764,7 @@ static void collect_blob_oids_for_treeish(struct grep_opt *opt,
 {
 	struct tree_desc tree;
 	void *data;
-	unsigned long size;
+	size_t size;
 	struct strbuf base = STRBUF_INIT;
 	int len;
 
@@ -841,7 +841,7 @@ static int grep_object(struct grep_opt *opt, const struct pathspec *pathspec,
 	if (obj->type == OBJ_COMMIT || obj->type == OBJ_TREE) {
 		struct tree_desc tree;
 		void *data;
-		unsigned long size;
+		size_t size;
 		struct strbuf base;
 		int hit, len;
 
diff --git a/builtin/index-pack.c b/builtin/index-pack.c
index 3c4474e681..78da3a6566 100644
--- a/builtin/index-pack.c
+++ b/builtin/index-pack.c
@@ -258,7 +258,7 @@ static unsigned check_object(struct object *obj)
 		return 0;
 
 	if (!(obj->flags & FLAG_CHECKED)) {
-		unsigned long size;
+		size_t size;
 		int type = odb_read_object_info(the_repository->objects,
 						&obj->oid, &size);
 		if (type <= 0)
@@ -905,7 +905,7 @@ static void sha1_object(const void *data, struct object_entry *obj_entry,
 	if (collision_test_needed) {
 		void *has_data;
 		enum object_type has_type;
-		unsigned long has_size;
+		size_t has_size;
 		read_lock();
 		has_type = odb_read_object_info(the_repository->objects, oid, &has_size);
 		if (has_type < 0)
@@ -1515,7 +1515,7 @@ static void fix_unresolved_deltas(struct hashfile *f)
 		struct ref_delta_entry *d = sorted_by_pos[i];
 		enum object_type type;
 		void *data;
-		unsigned long size;
+		size_t size;
 
 		if (objects[d->obj_no].real_type != OBJ_REF_DELTA)
 			continue;
diff --git a/builtin/log.c b/builtin/log.c
index e464b30af4..d027ce1e0b 100644
--- a/builtin/log.c
+++ b/builtin/log.c
@@ -613,7 +613,7 @@ static int show_blob_object(const struct object_id *oid, struct rev_info *rev, c
 
 static int show_tag_object(const struct object_id *oid, struct rev_info *rev)
 {
-	unsigned long size;
+	size_t size;
 	enum object_type type;
 	char *buf = odb_read_object(the_repository->objects, oid, &type, &size);
 	unsigned long offset = 0;
diff --git a/builtin/ls-files.c b/builtin/ls-files.c
index e1a22b41b9..bfbd145e97 100644
--- a/builtin/ls-files.c
+++ b/builtin/ls-files.c
@@ -251,7 +251,7 @@ static void expand_objectsize(struct repository *repo, struct strbuf *line,
 			      const enum object_type type, unsigned int padded)
 {
 	if (type == OBJ_BLOB) {
-		unsigned long size;
+		size_t size;
 		if (odb_read_object_info(repo->objects, oid, &size) < 0)
 			die(_("could not get object info about '%s'"),
 			    oid_to_hex(oid));
diff --git a/builtin/ls-tree.c b/builtin/ls-tree.c
index 113e4a960d..7d075bfca2 100644
--- a/builtin/ls-tree.c
+++ b/builtin/ls-tree.c
@@ -27,7 +27,7 @@ static void expand_objectsize(struct strbuf *line, const struct object_id *oid,
 			      const enum object_type type, unsigned int padded)
 {
 	if (type == OBJ_BLOB) {
-		unsigned long size;
+		size_t size;
 		if (odb_read_object_info(the_repository->objects, oid, &size) < 0)
 			die(_("could not get object info about '%s'"),
 			    oid_to_hex(oid));
@@ -217,7 +217,7 @@ static int show_tree_long(const struct object_id *oid, struct strbuf *base,
 		return early;
 
 	if (type == OBJ_BLOB) {
-		unsigned long size;
+		size_t size;
 		if (odb_read_object_info(the_repository->objects, oid, &size) == OBJ_BAD)
 			xsnprintf(size_text, sizeof(size_text), "BAD");
 		else
diff --git a/builtin/merge-tree.c b/builtin/merge-tree.c
index 312b595d1e..49f41e520f 100644
--- a/builtin/merge-tree.c
+++ b/builtin/merge-tree.c
@@ -69,7 +69,7 @@ static const char *explanation(struct merge_list *entry)
 	return "removed in remote";
 }
 
-static void *result(struct merge_list *entry, unsigned long *size)
+static void *result(struct merge_list *entry, size_t *size)
 {
 	enum object_type type;
 	struct blob *base, *our, *their;
@@ -96,7 +96,7 @@ static void *result(struct merge_list *entry, unsigned long *size)
 			   base, our, their, size);
 }
 
-static void *origin(struct merge_list *entry, unsigned long *size)
+static void *origin(struct merge_list *entry, size_t *size)
 {
 	enum object_type type;
 	while (entry) {
@@ -119,7 +119,7 @@ static int show_outf(void *priv UNUSED, mmbuffer_t *mb, int nbuf)
 
 static void show_diff(struct merge_list *entry)
 {
-	unsigned long size;
+	size_t size;
 	mmfile_t src, dst;
 	xpparam_t xpp;
 	xdemitconf_t xecfg;
diff --git a/builtin/mktag.c b/builtin/mktag.c
index f40264a878..37c17e6beb 100644
--- a/builtin/mktag.c
+++ b/builtin/mktag.c
@@ -50,7 +50,7 @@ static int verify_object_in_tag(struct object_id *tagged_oid, int *tagged_type)
 {
 	int ret;
 	enum object_type type;
-	unsigned long size;
+	size_t size;
 	void *buffer;
 	const struct object_id *repl;
 
diff --git a/builtin/notes.c b/builtin/notes.c
index 9af602bdd7..962df867c8 100644
--- a/builtin/notes.c
+++ b/builtin/notes.c
@@ -150,7 +150,7 @@ static int list_each_note(const struct object_id *object_oid,
 
 static void copy_obj_to_fd(int fd, const struct object_id *oid)
 {
-	unsigned long size;
+	size_t size;
 	enum object_type type;
 	char *buf = odb_read_object(the_repository->objects, oid, &type, &size);
 	if (buf) {
@@ -313,7 +313,7 @@ static int parse_reuse_arg(const struct option *opt, const char *arg, int unset)
 	char *value;
 	struct object_id object;
 	enum object_type type;
-	unsigned long len;
+	size_t len;
 
 	BUG_ON_OPT_NEG(unset);
 
@@ -721,7 +721,7 @@ static int append_edit(int argc, const char **argv, const char *prefix,
 
 	if (note && !edit) {
 		/* Append buf to previous note contents */
-		unsigned long size;
+		size_t size;
 		enum object_type type;
 		struct strbuf buf = STRBUF_INIT;
 		char *prev_buf = odb_read_object(the_repository->objects, note, &type, &size);
diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index bb372d0b03..6202fe4dca 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -356,14 +356,17 @@ static void *get_delta(struct object_entry *entry)
 	unsigned long size, base_size, delta_size;
 	void *buf, *base_buf, *delta_buf;
 	enum object_type type;
+	size_t size_st = 0, base_size_st = 0;
 
 	buf = odb_read_object(the_repository->objects, &entry->idx.oid,
-			      &type, &size);
+			      &type, &size_st);
+	size = cast_size_t_to_ulong(size_st);
 	if (!buf)
 		die(_("unable to read %s"), oid_to_hex(&entry->idx.oid));
 	base_buf = odb_read_object(the_repository->objects,
 				   &DELTA(entry)->idx.oid, &type,
-				   &base_size);
+				   &base_size_st);
+	base_size = cast_size_t_to_ulong(base_size_st);
 	if (!base_buf)
 		die("unable to read %s",
 		    oid_to_hex(&DELTA(entry)->idx.oid));
@@ -528,9 +531,11 @@ static unsigned long write_no_reuse_object(struct hashfile *f, struct object_ent
 			type = st->type;
 			size = st->size;
 		} else {
+			size_t size_st = 0;
 			buf = odb_read_object(the_repository->objects,
 					      &entry->idx.oid, &type,
-					      &size);
+					      &size_st);
+			size = cast_size_t_to_ulong(size_st);
 			if (!buf)
 				die(_("unable to read %s"),
 				    oid_to_hex(&entry->idx.oid));
@@ -1935,6 +1940,7 @@ static struct pbase_tree_cache *pbase_tree_get(const struct object_id *oid)
 	struct pbase_tree_cache *ent, *nent;
 	void *data;
 	unsigned long size;
+	size_t size_st = 0;
 	enum object_type type;
 	int neigh;
 	int my_ix = pbase_tree_cache_ix(oid);
@@ -1962,7 +1968,8 @@ static struct pbase_tree_cache *pbase_tree_get(const struct object_id *oid)
 	/* Did not find one.  Either we got a bogus request or
 	 * we need to read and perhaps cache.
 	 */
-	data = odb_read_object(the_repository->objects, oid, &type, &size);
+	data = odb_read_object(the_repository->objects, oid, &type, &size_st);
+	size = cast_size_t_to_ulong(size_st);
 	if (!data)
 		return NULL;
 	if (type != OBJ_TREE) {
@@ -2117,13 +2124,15 @@ static void add_preferred_base(struct object_id *oid)
 	struct pbase_tree *it;
 	void *data;
 	unsigned long size;
+	size_t size_st = 0;
 	struct object_id tree_oid;
 
 	if (window <= num_preferred_base++)
 		return;
 
 	data = odb_read_object_peeled(the_repository->objects, oid,
-				      OBJ_TREE, &size, &tree_oid);
+				      OBJ_TREE, &size_st, &tree_oid);
+	size = cast_size_t_to_ulong(size_st);
 	if (!data)
 		return;
 
@@ -2235,7 +2244,7 @@ static void prefetch_to_pack(uint32_t object_index_start) {
 
 static void check_object(struct object_entry *entry, uint32_t object_index)
 {
-	unsigned long canonical_size;
+	size_t canonical_size;
 	enum object_type type;
 	struct object_info oi = {.typep = &type, .sizep = &canonical_size};
 
@@ -2434,7 +2443,7 @@ static void drop_reused_delta(struct object_entry *entry)
 	unsigned *idx = &to_pack.objects[entry->delta_idx - 1].delta_child_idx;
 	struct object_info oi = OBJECT_INFO_INIT;
 	enum object_type type;
-	unsigned long size;
+	size_t size;
 
 	while (*idx) {
 		struct object_entry *oe = &to_pack.objects[*idx - 1];
@@ -2746,7 +2755,7 @@ size_t oe_get_size_slow(struct packing_data *pack,
 	size_t size;
 
 	if (e->type_ != OBJ_OFS_DELTA && e->type_ != OBJ_REF_DELTA) {
-		unsigned long sz;
+		size_t sz;
 		packing_data_lock(&to_pack);
 		if (odb_read_object_info(the_repository->objects,
 					 &e->idx.oid, &sz) < 0)
@@ -2831,10 +2840,12 @@ static int try_delta(struct unpacked *trg, struct unpacked *src,
 
 	/* Load data if not already done */
 	if (!trg->data) {
+		size_t sz_st = 0;
 		packing_data_lock(&to_pack);
 		trg->data = odb_read_object(the_repository->objects,
 					    &trg_entry->idx.oid, &type,
-					    &sz);
+					    &sz_st);
+		sz = cast_size_t_to_ulong(sz_st);
 		packing_data_unlock(&to_pack);
 		if (!trg->data)
 			die(_("object %s cannot be read"),
@@ -2846,10 +2857,12 @@ static int try_delta(struct unpacked *trg, struct unpacked *src,
 		*mem_usage += sz;
 	}
 	if (!src->data) {
+		size_t sz_st = 0;
 		packing_data_lock(&to_pack);
 		src->data = odb_read_object(the_repository->objects,
 					    &src_entry->idx.oid, &type,
-					    &sz);
+					    &sz_st);
+		sz = cast_size_t_to_ulong(sz_st);
 		packing_data_unlock(&to_pack);
 		if (!src->data) {
 			if (src_entry->preferred_base) {
diff --git a/builtin/repo.c b/builtin/repo.c
index 71a5c1c29c..69f3626467 100644
--- a/builtin/repo.c
+++ b/builtin/repo.c
@@ -784,13 +784,14 @@ static int count_objects(const char *path UNUSED, struct oid_array *oids,
 	for (size_t i = 0; i < oids->nr; i++) {
 		struct object_info oi = OBJECT_INFO_INIT;
 		unsigned long inflated;
+		size_t inflated_st = 0;
 		struct commit *commit;
 		struct object *obj;
 		void *content;
 		off_t disk;
 		int eaten;
 
-		oi.sizep = &inflated;
+		oi.sizep = &inflated_st;
 		oi.disk_sizep = &disk;
 		oi.contentp = &content;
 
@@ -798,6 +799,7 @@ static int count_objects(const char *path UNUSED, struct oid_array *oids,
 						  OBJECT_INFO_SKIP_FETCH_OBJECT |
 						  OBJECT_INFO_QUICK) < 0)
 			continue;
+		inflated = cast_size_t_to_ulong(inflated_st);
 
 		obj = parse_object_buffer(the_repository, &oids->oid[i], type,
 					  inflated, content, &eaten);
diff --git a/builtin/tag.c b/builtin/tag.c
index d51c2e3349..06c125b53c 100644
--- a/builtin/tag.c
+++ b/builtin/tag.c
@@ -238,7 +238,7 @@ static int git_tag_config(const char *var, const char *value,
 
 static void write_tag_body(int fd, const struct object_id *oid)
 {
-	unsigned long size;
+	size_t size;
 	enum object_type type;
 	char *buf, *sp, *orig;
 	struct strbuf payload = STRBUF_INIT;
@@ -388,7 +388,7 @@ static void create_reflog_msg(const struct object_id *oid, struct strbuf *sb)
 	enum object_type type;
 	struct commit *c;
 	char *buf;
-	unsigned long size;
+	size_t size;
 	int subject_len = 0;
 	const char *subject_start;
 
diff --git a/builtin/unpack-file.c b/builtin/unpack-file.c
index 87877a9fab..387389ed49 100644
--- a/builtin/unpack-file.c
+++ b/builtin/unpack-file.c
@@ -12,7 +12,7 @@ static char *create_temp_file(struct object_id *oid)
 	static char path[50];
 	void *buf;
 	enum object_type type;
-	unsigned long size;
+	size_t size;
 	int fd;
 
 	buf = odb_read_object(the_repository->objects, oid, &type, &size);
diff --git a/builtin/unpack-objects.c b/builtin/unpack-objects.c
index e7a50c493c..f3849bb654 100644
--- a/builtin/unpack-objects.c
+++ b/builtin/unpack-objects.c
@@ -231,7 +231,7 @@ static int check_object(struct object *obj, enum object_type type,
 		die("object type mismatch");
 
 	if (!(obj->flags & FLAG_OPEN)) {
-		unsigned long size;
+		size_t size;
 		int type = odb_read_object_info(the_repository->objects, &obj->oid, &size);
 		if (type != obj->type || type <= 0)
 			die("object of unexpected type");
@@ -436,6 +436,7 @@ static void unpack_delta_entry(enum object_type type, unsigned long delta_size,
 {
 	void *delta_data, *base;
 	unsigned long base_size;
+	size_t base_size_st = 0;
 	struct object_id base_oid;
 
 	if (type == OBJ_REF_DELTA) {
@@ -512,7 +513,8 @@ static void unpack_delta_entry(enum object_type type, unsigned long delta_size,
 		return;
 
 	base = odb_read_object(the_repository->objects, &base_oid,
-			       &type, &base_size);
+			       &type, &base_size_st);
+	base_size = cast_size_t_to_ulong(base_size_st);
 	if (!base) {
 		error("failed to read delta-pack base object %s",
 		      oid_to_hex(&base_oid));
diff --git a/bundle.c b/bundle.c
index 42327f9739..fd2db2c837 100644
--- a/bundle.c
+++ b/bundle.c
@@ -296,7 +296,7 @@ int list_bundle_refs(struct bundle_header *header, int argc, const char **argv)
 
 static int is_tag_in_date_range(struct object *tag, struct rev_info *revs)
 {
-	unsigned long size;
+	size_t size;
 	enum object_type type;
 	char *buf = NULL, *line, *lineend;
 	timestamp_t date;
diff --git a/combine-diff.c b/combine-diff.c
index b799862068..3ce71db8bb 100644
--- a/combine-diff.c
+++ b/combine-diff.c
@@ -325,7 +325,9 @@ static char *grab_blob(struct repository *r,
 		*size = fill_textconv(r, textconv, df, &blob);
 		free_filespec(df);
 	} else {
-		blob = odb_read_object(r->objects, oid, &type, size);
+		size_t size_st = 0;
+		blob = odb_read_object(r->objects, oid, &type, &size_st);
+		*size = cast_size_t_to_ulong(size_st);
 		if (!blob)
 			die(_("unable to read %s"), oid_to_hex(oid));
 		if (type != OBJ_BLOB)
diff --git a/commit.c b/commit.c
index fd8723502e..7950effc58 100644
--- a/commit.c
+++ b/commit.c
@@ -395,7 +395,7 @@ const void *repo_get_commit_buffer(struct repository *r,
 	const void *ret = get_cached_commit_buffer(r, commit, sizep);
 	if (!ret) {
 		enum object_type type;
-		unsigned long size;
+		size_t size;
 		ret = odb_read_object(r->objects, &commit->object.oid, &type, &size);
 		if (!ret)
 			die("cannot read commit object %s",
@@ -404,7 +404,7 @@ const void *repo_get_commit_buffer(struct repository *r,
 			die("expected commit for %s, got %s",
 			    oid_to_hex(&commit->object.oid), type_name(type));
 		if (sizep)
-			*sizep = size;
+			*sizep = cast_size_t_to_ulong(size);
 	}
 	return ret;
 }
@@ -437,7 +437,7 @@ static inline void set_commit_tree(struct commit *c, struct tree *t)
 static void load_tree_from_commit_contents(struct repository *r, struct commit *commit)
 {
 	enum object_type type;
-	unsigned long size;
+	size_t size;
 	char *buf;
 	const char *p;
 	struct object_id tree_oid;
@@ -604,7 +604,7 @@ int repo_parse_commit_internal(struct repository *r,
 {
 	enum object_type type;
 	void *buffer;
-	unsigned long size;
+	size_t size;
 	struct object_info oi = {
 		.typep = &type,
 		.sizep = &size,
@@ -1313,7 +1313,7 @@ static void handle_signed_tag(const struct commit *parent, struct commit_extra_h
 	struct merge_remote_desc *desc;
 	struct commit_extra_header *mergetag;
 	char *buf;
-	unsigned long size;
+	size_t size;
 	enum object_type type;
 	struct strbuf payload = STRBUF_INIT;
 	struct strbuf signature = STRBUF_INIT;
diff --git a/config.c b/config.c
index a1b92fe083..21b231052c 100644
--- a/config.c
+++ b/config.c
@@ -1442,7 +1442,7 @@ int git_config_from_blob_oid(config_fn_t fn,
 {
 	enum object_type type;
 	char *buf;
-	unsigned long size;
+	size_t size;
 	int ret;
 
 	buf = odb_read_object(repo->objects, oid, &type, &size);
diff --git a/diff.c b/diff.c
index 5a584fa1d5..816b89dc6c 100644
--- a/diff.c
+++ b/diff.c
@@ -4594,8 +4594,9 @@ int diff_populate_filespec(struct repository *r,
 		}
 	}
 	else {
+		size_t size_st = 0;
 		struct object_info info = {
-			.sizep = &s->size
+			.sizep = &size_st
 		};
 
 		if (!(size_only || check_binary))
@@ -4617,6 +4618,7 @@ int diff_populate_filespec(struct repository *r,
 			die("unable to read %s", oid_to_hex(&s->oid));
 
 object_read:
+		s->size = cast_size_t_to_ulong(size_st);
 		if (size_only || check_binary) {
 			if (size_only)
 				return 0;
@@ -4631,6 +4633,7 @@ object_read:
 			if (odb_read_object_info_extended(r->objects, &s->oid, &info,
 							  OBJECT_INFO_LOOKUP_REPLACE))
 				die("unable to read %s", oid_to_hex(&s->oid));
+			s->size = cast_size_t_to_ulong(size_st);
 		}
 		s->should_free = 1;
 	}
diff --git a/dir.c b/dir.c
index 33c81c256e..b6764d98a7 100644
--- a/dir.c
+++ b/dir.c
@@ -324,7 +324,7 @@ static int do_read_blob(const struct object_id *oid, struct oid_stat *oid_stat,
 			size_t *size_out, char **data_out)
 {
 	enum object_type type;
-	unsigned long sz;
+	size_t sz;
 	char *data;
 
 	*size_out = 0;
diff --git a/entry.c b/entry.c
index 7817aee362..c444fe5a10 100644
--- a/entry.c
+++ b/entry.c
@@ -92,11 +92,9 @@ static int create_file(const char *path, unsigned int mode)
 void *read_blob_entry(const struct cache_entry *ce, size_t *size)
 {
 	enum object_type type;
-	unsigned long ul;
 	void *blob_data = odb_read_object(the_repository->objects, &ce->oid,
-					  &type, &ul);
+					  &type, size);
 
-	*size = ul;
 	if (blob_data) {
 		if (type == OBJ_BLOB)
 			return blob_data;
diff --git a/fmt-merge-msg.c b/fmt-merge-msg.c
index 45d8b20e97..14441f23ae 100644
--- a/fmt-merge-msg.c
+++ b/fmt-merge-msg.c
@@ -528,11 +528,11 @@ static void fmt_merge_msg_sigs(struct strbuf *out)
 	for (i = 0; i < origins.nr; i++) {
 		struct object_id *oid = origins.items[i].util;
 		enum object_type type;
-		unsigned long size;
+		size_t size;
 		char *buf = odb_read_object(the_repository->objects, oid,
 					    &type, &size);
 		char *origbuf = buf;
-		unsigned long len = size;
+		size_t len = size;
 		struct signature_check sigc = { NULL };
 		struct strbuf payload = STRBUF_INIT, sig = STRBUF_INIT;
 
diff --git a/fsck.c b/fsck.c
index b72200c352..82c2002f4a 100644
--- a/fsck.c
+++ b/fsck.c
@@ -1328,7 +1328,7 @@ static int fsck_blobs(struct oidset *blobs_found, struct oidset *blobs_done,
 	oidset_iter_init(blobs_found, &iter);
 	while ((oid = oidset_iter_next(&iter))) {
 		enum object_type type;
-		unsigned long size;
+		size_t size;
 		char *buf;
 
 		if (oidset_contains(blobs_done, oid))
diff --git a/grep.c b/grep.c
index a54e5d86a9..1d75d31421 100644
--- a/grep.c
+++ b/grep.c
@@ -1931,9 +1931,11 @@ void grep_source_clear_data(struct grep_source *gs)
 static int grep_source_load_oid(struct grep_source *gs)
 {
 	enum object_type type;
+	size_t size_st = 0;
 
 	gs->buf = odb_read_object(gs->repo->objects, gs->identifier,
-				  &type, &gs->size);
+				  &type, &size_st);
+	gs->size = cast_size_t_to_ulong(size_st);
 	if (!gs->buf)
 		return error(_("'%s': unable to read %s"),
 			     gs->name,
diff --git a/http-push.c b/http-push.c
index 520d6c3b6a..c61d9f7e02 100644
--- a/http-push.c
+++ b/http-push.c
@@ -365,7 +365,7 @@ static void start_put(struct transfer_request *request)
 	enum object_type type;
 	char hdr[50];
 	void *unpacked;
-	unsigned long len;
+	size_t len;
 	int hdrlen;
 	ssize_t size;
 	git_zstream stream;
diff --git a/list-objects-filter.c b/list-objects-filter.c
index 78316e7f90..c912ff3079 100644
--- a/list-objects-filter.c
+++ b/list-objects-filter.c
@@ -280,7 +280,7 @@ static enum list_objects_filter_result filter_blobs_limit(
 	void *filter_data_)
 {
 	struct filter_blobs_limit_data *filter_data = filter_data_;
-	unsigned long object_length;
+	size_t object_length;
 	enum object_type t;
 
 	switch (filter_situation) {
diff --git a/mailmap.c b/mailmap.c
index 3b2691781d..72b639e602 100644
--- a/mailmap.c
+++ b/mailmap.c
@@ -186,7 +186,7 @@ int read_mailmap_blob(struct repository *repo, struct string_list *map,
 {
 	struct object_id oid;
 	char *buf;
-	unsigned long size;
+	size_t size;
 	enum object_type type;
 
 	if (!name)
diff --git a/match-trees.c b/match-trees.c
index 4216933d06..2a43c0fa1a 100644
--- a/match-trees.c
+++ b/match-trees.c
@@ -61,7 +61,7 @@ static void *fill_tree_desc_strict(struct repository *r,
 {
 	void *buffer;
 	enum object_type type;
-	unsigned long size;
+	size_t size;
 
 	buffer = odb_read_object(r->objects, hash, &type, &size);
 	if (!buffer)
@@ -186,7 +186,7 @@ static int splice_tree(struct repository *r,
 	char *subpath;
 	int toplen;
 	char *buf;
-	unsigned long sz;
+	size_t sz;
 	struct tree_desc desc;
 	unsigned char *rewrite_here;
 	const struct object_id *rewrite_with;
diff --git a/merge-blobs.c b/merge-blobs.c
index 6fc2799417..16a75bd1e3 100644
--- a/merge-blobs.c
+++ b/merge-blobs.c
@@ -9,7 +9,7 @@
 static int fill_mmfile_blob(mmfile_t *f, struct blob *obj)
 {
 	void *buf;
-	unsigned long size;
+	size_t size;
 	enum object_type type;
 
 	buf = odb_read_object(the_repository->objects, &obj->object.oid,
@@ -35,7 +35,7 @@ static void *three_way_filemerge(struct index_state *istate,
 				 mmfile_t *base,
 				 mmfile_t *our,
 				 mmfile_t *their,
-				 unsigned long *size)
+				 size_t *size)
 {
 	enum ll_merge_result merge_status;
 	mmbuffer_t res;
@@ -61,7 +61,7 @@ static void *three_way_filemerge(struct index_state *istate,
 
 void *merge_blobs(struct index_state *istate, const char *path,
 		  struct blob *base, struct blob *our,
-		  struct blob *their, unsigned long *size)
+		  struct blob *their, size_t *size)
 {
 	void *res = NULL;
 	mmfile_t f1, f2, common;
diff --git a/merge-blobs.h b/merge-blobs.h
index 13cf9669e5..5797517a06 100644
--- a/merge-blobs.h
+++ b/merge-blobs.h
@@ -6,6 +6,6 @@ struct index_state;
 
 void *merge_blobs(struct index_state *, const char *,
 		  struct blob *, struct blob *,
-		  struct blob *, unsigned long *);
+		  struct blob *, size_t *);
 
 #endif /* MERGE_BLOBS_H */
diff --git a/merge-ort.c b/merge-ort.c
index 544be9e466..4f6273bd51 100644
--- a/merge-ort.c
+++ b/merge-ort.c
@@ -3716,7 +3716,7 @@ static int read_oid_strbuf(struct merge_options *opt,
 {
 	void *buf;
 	enum object_type type;
-	unsigned long size;
+	size_t size;
 	buf = odb_read_object(opt->repo->objects, oid, &type, &size);
 	if (!buf) {
 		path_msg(opt, ERROR_OBJECT_READ_FAILED, 0,
diff --git a/notes-cache.c b/notes-cache.c
index bf5bb1f6c1..74cef802bd 100644
--- a/notes-cache.c
+++ b/notes-cache.c
@@ -82,7 +82,7 @@ char *notes_cache_get(struct notes_cache *c, struct object_id *key_oid,
 	const struct object_id *value_oid;
 	enum object_type type;
 	char *value;
-	unsigned long size;
+	size_t size;
 
 	value_oid = get_note(&c->tree, key_oid);
 	if (!value_oid)
diff --git a/notes-merge.c b/notes-merge.c
index b9322abbcb..118cad2518 100644
--- a/notes-merge.c
+++ b/notes-merge.c
@@ -339,7 +339,7 @@ static void write_note_to_worktree(const struct object_id *obj,
 				   const struct object_id *note)
 {
 	enum object_type type;
-	unsigned long size;
+	size_t size;
 	void *buf = odb_read_object(the_repository->objects, note, &type, &size);
 
 	if (!buf)
diff --git a/notes.c b/notes.c
index 8f315e2a00..ec9c2cb150 100644
--- a/notes.c
+++ b/notes.c
@@ -811,7 +811,8 @@ int combine_notes_concatenate(struct object_id *cur_oid,
 			      const struct object_id *new_oid)
 {
 	char *cur_msg = NULL, *new_msg = NULL, *buf;
-	unsigned long cur_len, new_len, buf_len;
+	unsigned long buf_len;
+	size_t cur_len, new_len;
 	enum object_type cur_type, new_type;
 	int ret;
 
@@ -875,7 +876,7 @@ static int string_list_add_note_lines(struct string_list *list,
 				      const struct object_id *oid)
 {
 	char *data;
-	unsigned long len;
+	size_t len;
 	enum object_type t;
 
 	if (is_null_oid(oid))
@@ -1282,7 +1283,8 @@ static void format_note(struct notes_tree *t, const struct object_id *object_oid
 	static const char utf8[] = "utf-8";
 	const struct object_id *oid;
 	char *msg, *msg_p;
-	unsigned long linelen, msglen;
+	unsigned long linelen;
+	size_t msglen;
 	enum object_type type;
 
 	if (!t)
diff --git a/object-file.c b/object-file.c
index 90f995d000..a81d50c305 100644
--- a/object-file.c
+++ b/object-file.c
@@ -381,7 +381,7 @@ static int parse_loose_header(const char *hdr, struct object_info *oi)
 	}
 
 	if (oi->sizep)
-		*oi->sizep = cast_size_t_to_ulong(size);
+		*oi->sizep = size;
 
 	/*
 	 * The length must be followed by a zero byte
@@ -409,7 +409,7 @@ static int read_object_info_from_path(struct odb_source *source,
 	void *map = NULL;
 	git_zstream stream, *stream_to_end = NULL;
 	char hdr[MAX_HEADER_LEN];
-	unsigned long size_scratch;
+	size_t size_scratch;
 	enum object_type type_scratch;
 	struct stat st;
 
@@ -1222,7 +1222,7 @@ int force_object_loose(struct odb_source *source,
 {
 	const struct git_hash_algo *compat = source->odb->repo->compat_hash_algo;
 	void *buf;
-	unsigned long len;
+	size_t len;
 	struct object_info oi = OBJECT_INFO_INIT;
 	struct object_id compat_oid;
 	enum object_type type;
@@ -2126,7 +2126,7 @@ int read_loose_object(struct repository *repo,
 	unsigned long mapsize;
 	git_zstream stream;
 	char hdr[MAX_HEADER_LEN];
-	unsigned long *size = oi->sizep;
+	size_t *size = oi->sizep;
 
 	fd = git_open(path);
 	if (fd >= 0)
@@ -2302,7 +2302,6 @@ int odb_source_loose_read_object_stream(struct odb_read_stream **out,
 	struct object_info oi = OBJECT_INFO_INIT;
 	struct odb_loose_read_stream *st;
 	unsigned long mapsize;
-	unsigned long size_ul;
 	void *mapped;
 
 	mapped = odb_source_loose_map_object(source, oid, &mapsize);
@@ -2326,18 +2325,11 @@ int odb_source_loose_read_object_stream(struct odb_read_stream **out,
 		goto error;
 	}
 
-	/*
-	 * object_info.sizep is unsigned long* (32-bit on Windows), but
-	 * st->base.size is size_t (64-bit). Use temporary variable.
-	 * Note: loose objects >4GB would still truncate here, but such
-	 * large loose objects are uncommon (they'd normally be packed).
-	 */
-	oi.sizep = &size_ul;
+	oi.sizep = &st->base.size;
 	oi.typep = &st->base.type;
 
 	if (parse_loose_header(st->hdr, &oi) < 0 || st->base.type < 0)
 		goto error;
-	st->base.size = size_ul;
 
 	st->mapped = mapped;
 	st->mapsize = mapsize;
diff --git a/object.c b/object.c
index 465902ecc6..23b84aa7e2 100644
--- a/object.c
+++ b/object.c
@@ -325,7 +325,7 @@ struct object *parse_object_with_flags(struct repository *r,
 {
 	int skip_hash = !!(flags & PARSE_OBJECT_SKIP_HASH_CHECK);
 	int discard_tree = !!(flags & PARSE_OBJECT_DISCARD_TREE);
-	unsigned long size;
+	size_t size;
 	enum object_type type;
 	int eaten;
 	const struct object_id *repl = lookup_replace_object(r, oid);
diff --git a/odb.c b/odb.c
index 965ef68e4e..7d555be09f 100644
--- a/odb.c
+++ b/odb.c
@@ -625,7 +625,7 @@ static int oid_object_info_convert(struct repository *r,
 	enum object_type type;
 	struct object_id oid, delta_base_oid;
 	struct object_info new_oi, *oi;
-	unsigned long size;
+	size_t size;
 	void *content;
 	int ret;
 
@@ -716,7 +716,7 @@ int odb_read_object_info_extended(struct object_database *odb,
 /* returns enum object_type or negative */
 int odb_read_object_info(struct object_database *odb,
 			 const struct object_id *oid,
-			 unsigned long *sizep)
+			 size_t *sizep)
 {
 	enum object_type type;
 	struct object_info oi = OBJECT_INFO_INIT;
@@ -730,7 +730,7 @@ int odb_read_object_info(struct object_database *odb,
 }
 
 int odb_pretend_object(struct object_database *odb,
-		       void *buf, unsigned long len, enum object_type type,
+		       void *buf, size_t len, enum object_type type,
 		       struct object_id *oid)
 {
 	hash_object_file(odb->repo->hash_algo, buf, len, type, oid);
@@ -744,7 +744,7 @@ int odb_pretend_object(struct object_database *odb,
 void *odb_read_object(struct object_database *odb,
 		      const struct object_id *oid,
 		      enum object_type *type,
-		      unsigned long *size)
+		      size_t *size)
 {
 	struct object_info oi = OBJECT_INFO_INIT;
 	unsigned flags = OBJECT_INFO_DIE_IF_CORRUPT | OBJECT_INFO_LOOKUP_REPLACE;
@@ -762,12 +762,12 @@ void *odb_read_object(struct object_database *odb,
 void *odb_read_object_peeled(struct object_database *odb,
 			     const struct object_id *oid,
 			     enum object_type required_type,
-			     unsigned long *size,
+			     size_t *size,
 			     struct object_id *actual_oid_return)
 {
 	enum object_type type;
 	void *buffer;
-	unsigned long isize;
+	size_t isize;
 	struct object_id actual_oid;
 
 	oidcpy(&actual_oid, oid);
diff --git a/odb.h b/odb.h
index 73553ed5a7..e2f0bbad25 100644
--- a/odb.h
+++ b/odb.h
@@ -228,12 +228,12 @@ struct odb_source *odb_add_to_alternates_memory(struct object_database *odb,
 void *odb_read_object(struct object_database *odb,
 		      const struct object_id *oid,
 		      enum object_type *type,
-		      unsigned long *size);
+		      size_t *size);
 
 void *odb_read_object_peeled(struct object_database *odb,
 			     const struct object_id *oid,
 			     enum object_type required_type,
-			     unsigned long *size,
+			     size_t *size,
 			     struct object_id *oid_ret);
 
 /*
@@ -245,13 +245,13 @@ void *odb_read_object_peeled(struct object_database *odb,
  * that reference it.
  */
 int odb_pretend_object(struct object_database *odb,
-		       void *buf, unsigned long len, enum object_type type,
+		       void *buf, size_t len, enum object_type type,
 		       struct object_id *oid);
 
 struct object_info {
 	/* Request */
 	enum object_type *typep;
-	unsigned long *sizep;
+	size_t *sizep;
 	off_t *disk_sizep;
 	struct object_id *delta_base_oid;
 	void **contentp;
@@ -356,7 +356,7 @@ int odb_read_object_info_extended(struct object_database *odb,
  */
 int odb_read_object_info(struct object_database *odb,
 			 const struct object_id *oid,
-			 unsigned long *sizep);
+			 size_t *sizep);
 
 enum odb_has_object_flags {
 	/* Retry packed storage after checking packed and loose storage */
diff --git a/odb/streaming.c b/odb/streaming.c
index 7602a8d5d8..20531e864c 100644
--- a/odb/streaming.c
+++ b/odb/streaming.c
@@ -157,26 +157,15 @@ static int open_istream_incore(struct odb_read_stream **out,
 		.base.read = read_istream_incore,
 	};
 	struct odb_incore_read_stream *st;
-	unsigned long size_ul;
 	int ret;
 
 	oi.typep = &stream.base.type;
-	/*
-	 * object_info.sizep is unsigned long* (32-bit on Windows), but
-	 * stream.base.size is size_t (64-bit). We use a temporary variable
-	 * because the types are incompatible. Note: this path still truncates
-	 * for >4GB objects, but large objects should use pack streaming
-	 * (packfile_store_read_object_stream) which handles size_t properly.
-	 * This incore fallback is only used for small objects or when pack
-	 * streaming is unavailable.
-	 */
-	oi.sizep = &size_ul;
+	oi.sizep = &stream.base.size;
 	oi.contentp = (void **)&stream.buf;
 	ret = odb_read_object_info_extended(odb, oid, &oi,
 					    OBJECT_INFO_DIE_IF_CORRUPT);
 	if (ret)
 		return ret;
-	stream.base.size = size_ul;
 
 	CALLOC_ARRAY(st, 1);
 	*st = stream;
diff --git a/pack-bitmap.c b/pack-bitmap.c
index f9af8a96bd..e8a82945cc 100644
--- a/pack-bitmap.c
+++ b/pack-bitmap.c
@@ -1856,7 +1856,7 @@ static void filter_bitmap_blob_none(struct bitmap_index *bitmap_git,
 static unsigned long get_size_by_pos(struct bitmap_index *bitmap_git,
 				     uint32_t pos)
 {
-	unsigned long size;
+	size_t size;
 	struct object_info oi = OBJECT_INFO_INIT;
 
 	oi.sizep = &size;
@@ -1891,7 +1891,7 @@ static unsigned long get_size_by_pos(struct bitmap_index *bitmap_git,
 			die(_("unable to get size of %s"), oid_to_hex(&obj->oid));
 	}
 
-	return size;
+	return cast_size_t_to_ulong(size);
 }
 
 static void filter_bitmap_blob_limit(struct bitmap_index *bitmap_git,
diff --git a/packfile.c b/packfile.c
index c174982d10..78c389e6f3 100644
--- a/packfile.c
+++ b/packfile.c
@@ -1607,13 +1607,10 @@ static int packed_object_info_with_index_pos(struct packed_git *p, off_t obj_off
 	 * a "real" type later if the caller is interested.
 	 */
 	if (oi->contentp) {
-		size_t size_st = 0;
 		*oi->contentp = cache_or_unpack_entry(p->repo, p, obj_offset,
-						      &size_st, &type);
+						      oi->sizep, &type);
 		if (!*oi->contentp)
 			type = OBJ_BAD;
-		else if (oi->sizep)
-			*oi->sizep = cast_size_t_to_ulong(size_st);
 	} else if (oi->sizep || oi->typep || oi->delta_base_oid) {
 		type = unpack_object_header(p, &w_curs, &curpos, &size);
 	}
@@ -1633,7 +1630,7 @@ static int packed_object_info_with_index_pos(struct packed_git *p, off_t obj_off
 				goto out;
 			}
 		}
-		*oi->sizep = (unsigned long)size;
+		*oi->sizep = size;
 	}
 
 	if (oi->disk_sizep || (oi->mtimep && p->is_cruft)) {
@@ -1919,7 +1916,6 @@ void *unpack_entry(struct repository *r, struct packed_git *p, off_t obj_offset,
 			struct object_id base_oid;
 			if (!(offset_to_pack_pos(p, obj_offset, &pos))) {
 				struct object_info oi = OBJECT_INFO_INIT;
-				unsigned long bsz_ul = 0;
 
 				nth_packed_object_id(&base_oid, p,
 						     pack_pos_to_index(p, pos));
@@ -1930,13 +1926,11 @@ void *unpack_entry(struct repository *r, struct packed_git *p, off_t obj_offset,
 				mark_bad_packed_object(p, &base_oid);
 
 				oi.typep = &type;
-				oi.sizep = &bsz_ul;
+				oi.sizep = &base_size;
 				oi.contentp = &base;
 				if (odb_read_object_info_extended(r->objects, &base_oid,
 								  &oi, 0) < 0)
 					base = NULL;
-				else
-					base_size = bsz_ul;
 
 				external_base = base;
 			}
diff --git a/path-walk.c b/path-walk.c
index 94ff90bd15..edc8e736d7 100644
--- a/path-walk.c
+++ b/path-walk.c
@@ -368,7 +368,7 @@ static int walk_path(struct path_walk_context *ctx,
 		struct oid_array filtered = OID_ARRAY_INIT;
 
 		for (size_t i = 0; i < list->oids.nr; i++) {
-			unsigned long size;
+			size_t size;
 
 			if (odb_read_object_info(ctx->repo->objects,
 						 &list->oids.oid[i],
diff --git a/protocol-caps.c b/protocol-caps.c
index 35072ed60b..8858ea4489 100644
--- a/protocol-caps.c
+++ b/protocol-caps.c
@@ -50,7 +50,7 @@ static void send_info(struct repository *r, struct packet_writer *writer,
 	for_each_string_list_item (item, oid_str_list) {
 		const char *oid_str = item->string;
 		struct object_id oid;
-		unsigned long object_size;
+		size_t object_size;
 
 		if (get_oid_hex_algop(oid_str, &oid, r->hash_algo) < 0) {
 			packet_writer_error(
@@ -66,7 +66,8 @@ static void send_info(struct repository *r, struct packet_writer *writer,
 			if (odb_read_object_info(r->objects, &oid, &object_size) < 0) {
 				strbuf_addstr(&send_buffer, " ");
 			} else {
-				strbuf_addf(&send_buffer, " %lu", object_size);
+				strbuf_addf(&send_buffer, " %"PRIuMAX,
+					    (uintmax_t)object_size);
 			}
 		}
 
diff --git a/read-cache.c b/read-cache.c
index 21829102ae..21ca58beea 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -250,7 +250,7 @@ static int ce_compare_link(const struct cache_entry *ce, size_t expected_size)
 {
 	int match = -1;
 	void *buffer;
-	unsigned long size;
+	size_t size;
 	enum object_type type;
 	struct strbuf sb = STRBUF_INIT;
 
@@ -3462,7 +3462,7 @@ void *read_blob_data_from_index(struct index_state *istate,
 				const char *path, unsigned long *size)
 {
 	int pos, len;
-	unsigned long sz;
+	size_t sz;
 	enum object_type type;
 	void *data;
 
@@ -3490,7 +3490,7 @@ void *read_blob_data_from_index(struct index_state *istate,
 		return NULL;
 	}
 	if (size)
-		*size = sz;
+		*size = cast_size_t_to_ulong(sz);
 	return data;
 }
 
diff --git a/ref-filter.c b/ref-filter.c
index 1da4c0e60d..8ba91c72a1 100644
--- a/ref-filter.c
+++ b/ref-filter.c
@@ -86,7 +86,7 @@ struct ref_trailer_buf {
 static struct expand_data {
 	struct object_id oid;
 	enum object_type type;
-	unsigned long size;
+	size_t size;
 	off_t disk_size;
 	struct object_id delta_base_oid;
 	void *content;
diff --git a/reflog.c b/reflog.c
index 82337078d0..04edbe5670 100644
--- a/reflog.c
+++ b/reflog.c
@@ -154,7 +154,7 @@ static int tree_is_complete(const struct object_id *oid)
 
 	if (!tree->buffer) {
 		enum object_type type;
-		unsigned long size;
+		size_t size;
 		void *data = odb_read_object(the_repository->objects, oid,
 					     &type, &size);
 		if (!data) {
diff --git a/rerere.c b/rerere.c
index 0296700f9f..068321b24f 100644
--- a/rerere.c
+++ b/rerere.c
@@ -990,7 +990,7 @@ static int handle_cache(struct index_state *istate,
 
 	while (pos < istate->cache_nr) {
 		enum object_type type;
-		unsigned long size;
+		size_t size;
 
 		ce = istate->cache[pos++];
 		if (ce_namelen(ce) != len || memcmp(ce->name, path, len))
diff --git a/submodule-config.c b/submodule-config.c
index a81897b4e0..f75997402a 100644
--- a/submodule-config.c
+++ b/submodule-config.c
@@ -694,7 +694,7 @@ static const struct submodule *config_from(struct submodule_cache *cache,
 		enum lookup_type lookup_type)
 {
 	struct strbuf rev = STRBUF_INIT;
-	unsigned long config_size;
+	size_t config_size;
 	char *config = NULL;
 	struct object_id oid;
 	enum object_type type;
diff --git a/t/helper/test-pack-deltas.c b/t/helper/test-pack-deltas.c
index c493b75e02..840797cf0d 100644
--- a/t/helper/test-pack-deltas.c
+++ b/t/helper/test-pack-deltas.c
@@ -48,7 +48,8 @@ static void write_ref_delta(struct hashfile *f,
 			    struct object_id *base)
 {
 	unsigned char header[MAX_PACK_OBJECT_HEADER];
-	unsigned long size, base_size, delta_size, compressed_size, hdrlen;
+	unsigned long delta_size, compressed_size, hdrlen;
+	size_t size, base_size;
 	enum object_type type;
 	void *base_buf, *delta_buf;
 	void *buf = odb_read_object(the_repository->objects,
diff --git a/t/helper/test-partial-clone.c b/t/helper/test-partial-clone.c
index a7aab426d0..87c59108e0 100644
--- a/t/helper/test-partial-clone.c
+++ b/t/helper/test-partial-clone.c
@@ -17,7 +17,7 @@ static void object_info(const char *gitdir, const char *oid_hex)
 {
 	struct repository r;
 	struct object_id oid;
-	unsigned long size;
+	size_t size;
 	struct object_info oi = {.sizep = &size};
 	const char *p;
 
diff --git a/t/unit-tests/u-odb-inmemory.c b/t/unit-tests/u-odb-inmemory.c
index 482502ef4b..6844bfc37c 100644
--- a/t/unit-tests/u-odb-inmemory.c
+++ b/t/unit-tests/u-odb-inmemory.c
@@ -20,7 +20,7 @@ static void cl_assert_object_info(struct odb_source_inmemory *source,
 				  const char *expected_content)
 {
 	enum object_type actual_type;
-	unsigned long actual_size;
+	size_t actual_size;
 	void *actual_content;
 	struct object_info oi = {
 		.typep = &actual_type,
diff --git a/tag.c b/tag.c
index 2f12e51024..1a00ded6eb 100644
--- a/tag.c
+++ b/tag.c
@@ -49,7 +49,7 @@ int gpg_verify_tag(struct repository *r, const struct object_id *oid,
 {
 	enum object_type type;
 	char *buf;
-	unsigned long size;
+	size_t size;
 	int ret;
 
 	type = odb_read_object_info(r->objects, oid, NULL);
@@ -207,7 +207,7 @@ int parse_tag(struct repository *r, struct tag *item)
 {
 	enum object_type type;
 	void *data;
-	unsigned long size;
+	size_t size;
 	int ret;
 
 	if (item->object.parsed)
diff --git a/tree-walk.c b/tree-walk.c
index 7e1b956f27..a67f06b9eb 100644
--- a/tree-walk.c
+++ b/tree-walk.c
@@ -87,7 +87,7 @@ void *fill_tree_descriptor(struct repository *r,
 			   struct tree_desc *desc,
 			   const struct object_id *oid)
 {
-	unsigned long size = 0;
+	size_t size = 0;
 	void *buf = NULL;
 
 	if (oid) {
@@ -610,7 +610,7 @@ int get_tree_entry(struct repository *r,
 {
 	int retval;
 	void *tree;
-	unsigned long size;
+	size_t size;
 	struct object_id root;
 
 	tree = odb_read_object_peeled(r->objects, tree_oid, OBJ_TREE, &size, &root);
@@ -682,7 +682,7 @@ enum get_oid_result get_tree_entry_follow_symlinks(struct repository *r,
 		if (!t.buffer) {
 			void *tree;
 			struct object_id root;
-			unsigned long size;
+			size_t size;
 			tree = odb_read_object_peeled(r->objects, &current_tree_oid,
 						      OBJ_TREE, &size, &root);
 			if (!tree)
@@ -778,6 +778,7 @@ enum get_oid_result get_tree_entry_follow_symlinks(struct repository *r,
 		} else if (S_ISLNK(*mode)) {
 			/* Follow a symlink */
 			unsigned long link_len;
+			size_t link_len_st = 0;
 			size_t len;
 			char *contents, *contents_start;
 			struct dir_state *parent;
@@ -797,7 +798,8 @@ enum get_oid_result get_tree_entry_follow_symlinks(struct repository *r,
 
 			contents = odb_read_object(r->objects,
 						   &current_tree_oid, &type,
-						   &link_len);
+						   &link_len_st);
+			link_len = cast_size_t_to_ulong(link_len_st);
 
 			if (!contents)
 				goto done;
diff --git a/tree.c b/tree.c
index d703ab97c8..53f7395e9f 100644
--- a/tree.c
+++ b/tree.c
@@ -188,7 +188,7 @@ int repo_parse_tree_gently(struct repository *r, struct tree *item,
 {
 	 enum object_type type;
 	 void *buffer;
-	 unsigned long size;
+	 size_t size;
 
 	if (item->object.parsed)
 		return 0;
diff --git a/xdiff-interface.c b/xdiff-interface.c
index 5ee2b96d0a..db6938689f 100644
--- a/xdiff-interface.c
+++ b/xdiff-interface.c
@@ -179,7 +179,7 @@ int read_mmfile(mmfile_t *ptr, const char *filename)
 void read_mmblob(mmfile_t *ptr, struct object_database *odb,
 		 const struct object_id *oid)
 {
-	unsigned long size;
+	size_t size;
 	enum object_type type;
 
 	if (is_null_oid(oid)) {
-- 
gitgitgadget

^ permalink raw reply related	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2026-06-04 10:51 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-06-04 10:51 [PATCH 0/7] More work supporting objects larger than 4GB on Windows Johannes Schindelin via GitGitGadget
2026-06-04 10:51 ` [PATCH 1/7] compat/msvc: use _chsize_s for ftruncate Johannes Schindelin via GitGitGadget
2026-06-04 10:51 ` [PATCH 2/7] patch-delta: use size_t for sizes Johannes Schindelin via GitGitGadget
2026-06-04 10:51 ` [PATCH 3/7] pack-objects(check_pack_inflate()): use size_t instead of unsigned long Johannes Schindelin via GitGitGadget
2026-06-04 10:51 ` [PATCH 4/7] packfile: widen unpack_entry()'s size out-parameter to size_t Johannes Schindelin via GitGitGadget
2026-06-04 10:51 ` [PATCH 5/7] pack-objects: use size_t for in-core object sizes Johannes Schindelin via GitGitGadget
2026-06-04 10:51 ` [PATCH 6/7] packfile,delta: drop the `cast_size_t_to_ulong()` wrappers Johannes Schindelin via GitGitGadget
2026-06-04 10:51 ` [PATCH 7/7] odb: use size_t for object_info.sizep and the size APIs Johannes Schindelin via GitGitGadget

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.