* [PATCH 0/7] More work supporting objects larger than 4GB on Windows
@ 2026-06-04 10:51 Johannes Schindelin via GitGitGadget
2026-06-04 10:51 ` [PATCH 1/7] compat/msvc: use _chsize_s for ftruncate Johannes Schindelin via GitGitGadget
` (6 more replies)
0 siblings, 7 replies; 13+ messages in thread
From: Johannes Schindelin via GitGitGadget @ 2026-06-04 10:51 UTC (permalink / raw)
To: git; +Cc: Kristofer Karlsson, Johannes Schindelin
This patch series tries to address the problems pointed out by the expensive
tests that now run in CI: t5608 and t7508 verify various aspects about
objects larger than 4GB, which Git does not currently handle correctly when
run on a platform where size_t is 64-bit and unsigned long is 32-bit.
Unfortunately, this conflicts heavily with ps/odb-source-loose. I rebased
the branch onto seen and pushed the result to
https://github.com/dscho/git/tree/refs/heads/objects-larger-than-4gb-on-windows-pt2-seen,
to make it easier to resolve merge conflicts. Here is the relevant
range-diff:
1: f3aeae983a ! 1: 62adeb9818 odb: use size_t for object_info.sizep and the size APIs
@@ builtin/log.c: static int show_blob_object(const struct object_id *oid, struct r
## builtin/ls-files.c ##
@@ builtin/ls-files.c: static void expand_objectsize(struct repository *repo, struct strbuf *line,
- const enum object_type type, unsigned int padded)
- {
+ size_t len;
+
if (type == OBJ_BLOB) {
- unsigned long size;
+ size_t size;
@@ builtin/ls-files.c: static void expand_objectsize(struct repository *repo, struc
## builtin/ls-tree.c ##
@@ builtin/ls-tree.c: static void expand_objectsize(struct strbuf *line, const struct object_id *oid,
- const enum object_type type, unsigned int padded)
- {
+ size_t len;
+
if (type == OBJ_BLOB) {
- unsigned long size;
+ size_t size;
@@ notes.c: static void format_note(struct notes_tree *t, const struct object_id *o
if (!t)
## object-file.c ##
-@@ object-file.c: static int parse_loose_header(const char *hdr, struct object_info *oi)
+@@ object-file.c: int parse_loose_header(const char *hdr, struct object_info *oi)
}
if (oi->sizep)
@@ object-file.c: static int parse_loose_header(const char *hdr, struct object_info
/*
* The length must be followed by a zero byte
-@@ object-file.c: static int read_object_info_from_path(struct odb_source *source,
- void *map = NULL;
- git_zstream stream, *stream_to_end = NULL;
- char hdr[MAX_HEADER_LEN];
-- unsigned long size_scratch;
-+ size_t size_scratch;
- enum object_type type_scratch;
- struct stat st;
-
@@ object-file.c: int force_object_loose(struct odb_source *source,
- {
+ struct odb_source_files *files = odb_source_files_downcast(source);
const struct git_hash_algo *compat = source->odb->repo->compat_hash_algo;
void *buf;
- unsigned long len;
@@ object-file.c: int read_loose_object(struct repository *repo,
fd = git_open(path);
if (fd >= 0)
-@@ object-file.c: int odb_source_loose_read_object_stream(struct odb_read_stream **out,
- struct object_info oi = OBJECT_INFO_INIT;
- struct odb_loose_read_stream *st;
- unsigned long mapsize;
-- unsigned long size_ul;
- void *mapped;
-
- mapped = odb_source_loose_map_object(source, oid, &mapsize);
-@@ object-file.c: int odb_source_loose_read_object_stream(struct odb_read_stream **out,
- goto error;
- }
-
-- /*
-- * object_info.sizep is unsigned long* (32-bit on Windows), but
-- * st->base.size is size_t (64-bit). Use temporary variable.
-- * Note: loose objects >4GB would still truncate here, but such
-- * large loose objects are uncommon (they'd normally be packed).
-- */
-- oi.sizep = &size_ul;
-+ oi.sizep = &st->base.size;
- oi.typep = &st->base.type;
-
- if (parse_loose_header(st->hdr, &oi) < 0 || st->base.type < 0)
- goto error;
-- st->base.size = size_ul;
-
- st->mapped = mapped;
- st->mapsize = mapsize;
## object.c ##
@@ object.c: struct object *parse_object_with_flags(struct repository *r,
@@ odb.h: int odb_read_object_info_extended(struct object_database *odb,
enum odb_has_object_flags {
/* Retry packed storage after checking packed and loose storage */
+ ## odb/source-loose.c ##
+@@ odb/source-loose.c: static int read_object_info_from_path(struct odb_source_loose *loose,
+ void *map = NULL;
+ git_zstream stream, *stream_to_end = NULL;
+ char hdr[MAX_HEADER_LEN];
+- unsigned long size_scratch;
++ size_t size_scratch;
+ enum object_type type_scratch;
+ struct stat st;
+
+@@ odb/source-loose.c: static int odb_source_loose_read_object_stream(struct odb_read_stream **out,
+ struct object_info oi = OBJECT_INFO_INIT;
+ struct odb_loose_read_stream *st;
+ unsigned long mapsize;
+- unsigned long size_ul;
+ void *mapped;
+
+ mapped = odb_source_loose_map_object(loose, oid, &mapsize);
+@@ odb/source-loose.c: static int odb_source_loose_read_object_stream(struct odb_read_stream **out,
+ goto error;
+ }
+
+- /*
+- * object_info.sizep is unsigned long* (32-bit on Windows), but
+- * st->base.size is size_t (64-bit). Use temporary variable.
+- * Note: loose objects >4GB would still truncate here, but such
+- * large loose objects are uncommon (they'd normally be packed).
+- */
+- oi.sizep = &size_ul;
++ oi.sizep = &st->base.size;
+ oi.typep = &st->base.type;
+
+ if (parse_loose_header(st->hdr, &oi) < 0 || st->base.type < 0)
+ goto error;
+- st->base.size = size_ul;
+
+ st->mapped = mapped;
+ st->mapsize = mapsize;
+
## odb/streaming.c ##
@@ odb/streaming.c: static int open_istream_incore(struct odb_read_stream **out,
.base.read = read_istream_incore,
Johannes Schindelin (7):
compat/msvc: use _chsize_s for ftruncate
patch-delta: use size_t for sizes
pack-objects(check_pack_inflate()): use size_t instead of unsigned
long
packfile: widen unpack_entry()'s size out-parameter to size_t
pack-objects: use size_t for in-core object sizes
packfile,delta: drop the `cast_size_t_to_ulong()` wrappers
odb: use size_t for object_info.sizep and the size APIs
apply.c | 8 ++--
archive.c | 4 +-
attr.c | 2 +-
bisect.c | 2 +-
blame.c | 15 +++++--
builtin/cat-file.c | 39 ++++++++++++-------
builtin/difftool.c | 2 +-
builtin/fast-export.c | 7 +++-
builtin/fast-import.c | 29 ++++++++++----
builtin/fsck.c | 2 +-
builtin/grep.c | 12 +++---
builtin/index-pack.c | 10 ++---
builtin/log.c | 2 +-
builtin/ls-files.c | 2 +-
builtin/ls-tree.c | 4 +-
builtin/merge-tree.c | 6 +--
builtin/mktag.c | 2 +-
builtin/notes.c | 6 +--
builtin/pack-objects.c | 73 +++++++++++++++++++++--------------
builtin/repo.c | 4 +-
builtin/tag.c | 4 +-
builtin/unpack-file.c | 2 +-
builtin/unpack-objects.c | 8 ++--
bundle.c | 2 +-
combine-diff.c | 4 +-
commit.c | 10 ++---
compat/msvc-posix.h | 24 +++++++++++-
config.c | 2 +-
delta.h | 20 +++-------
diff.c | 5 ++-
dir.c | 2 +-
entry.c | 4 +-
fmt-merge-msg.c | 4 +-
fsck.c | 2 +-
grep.c | 4 +-
http-push.c | 2 +-
list-objects-filter.c | 2 +-
mailmap.c | 2 +-
match-trees.c | 4 +-
merge-blobs.c | 6 +--
merge-blobs.h | 2 +-
merge-ort.c | 2 +-
notes-cache.c | 2 +-
notes-merge.c | 2 +-
notes.c | 8 ++--
object-file.c | 18 +++------
object.c | 2 +-
odb.c | 12 +++---
odb.h | 10 ++---
odb/streaming.c | 13 +------
pack-bitmap.c | 4 +-
pack-check.c | 5 +--
pack-objects.h | 2 +-
packfile.c | 54 ++++++++++----------------
packfile.h | 5 ++-
patch-delta.c | 8 ++--
path-walk.c | 2 +-
protocol-caps.c | 5 ++-
read-cache.c | 6 +--
ref-filter.c | 2 +-
reflog.c | 2 +-
rerere.c | 2 +-
submodule-config.c | 2 +-
t/helper/test-delta.c | 10 +++--
t/helper/test-pack-deltas.c | 3 +-
t/helper/test-partial-clone.c | 2 +-
t/unit-tests/u-odb-inmemory.c | 2 +-
tag.c | 4 +-
tree-walk.c | 10 +++--
tree.c | 2 +-
xdiff-interface.c | 2 +-
71 files changed, 296 insertions(+), 253 deletions(-)
base-commit: 9ac3f193c05c2237e2b14ebaa1149e9fc8a1abe0
Published-As: https://github.com/gitgitgadget/git/releases/tag/pr-2137%2Fdscho%2Fobjects-larger-than-4gb-on-windows-pt2-v1
Fetch-It-Via: git fetch https://github.com/gitgitgadget/git pr-2137/dscho/objects-larger-than-4gb-on-windows-pt2-v1
Pull-Request: https://github.com/gitgitgadget/git/pull/2137
--
gitgitgadget
^ permalink raw reply [flat|nested] 13+ messages in thread* [PATCH 1/7] compat/msvc: use _chsize_s for ftruncate 2026-06-04 10:51 [PATCH 0/7] More work supporting objects larger than 4GB on Windows Johannes Schindelin via GitGitGadget @ 2026-06-04 10:51 ` Johannes Schindelin via GitGitGadget 2026-06-04 10:51 ` [PATCH 2/7] patch-delta: use size_t for sizes Johannes Schindelin via GitGitGadget ` (5 subsequent siblings) 6 siblings, 0 replies; 13+ messages in thread From: Johannes Schindelin via GitGitGadget @ 2026-06-04 10:51 UTC (permalink / raw) To: git; +Cc: Kristofer Karlsson, Johannes Schindelin, Johannes Schindelin From: Johannes Schindelin <johannes.schindelin@gmx.de> On Windows, `unsigned long` and `long` are 32 bits even on 64-bit builds. The MSVC compatibility header has shimmed `ftruncate()` with #define ftruncate _chsize ever since `compat/msvc-posix.h` was introduced. `_chsize()` takes a 32-bit `long` for the new length, which silently truncates files (and the requested size) to 2 GiB. That is enough to make t7508 test 126 "git add fails gracefully with 4 GiB and 8 GiB files" fail under MSVC: `test-tool truncate` creates a sparse 4 GiB or 8 GiB file via the shimmed `ftruncate()`, and the test never gets off the ground. `_chsize_s()` is the modern replacement, accepts a 64-bit `__int64` length, and is the only sensible target on Windows. The catch is that it does not follow the POSIX `-1` + `errno` convention: it returns `0` on success and an errno value (a small positive integer) on failure. A plain `#define ftruncate _chsize_s` would therefore silently break callers that test the return value as `< 0` or against `-1`, of which there are several: `http.c`, `parallel-checkout.c`, and `t/helper/test-truncate.c` among them. Introduce a `static inline` wrapper that calls `_chsize_s()`, copies its errno return into `errno`, and translates the result to the familiar `-1` / `0` convention, then point `ftruncate` at the wrapper. Place the wrapper after `#include "mingw-posix.h"` so the `off_t` parameter resolves to the already-widened `off64_t` rather than the 32-bit `_off_t` from `compat/vcbuild/include/unistd.h`. MinGW is unaffected: its `ftruncate()` already takes `off_t` and routes through `ftruncate64()` when `_FILE_OFFSET_BITS=64`, which is the default in our build. Assisted-by: Opus 4.7 Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> --- compat/msvc-posix.h | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/compat/msvc-posix.h b/compat/msvc-posix.h index c500b8b4aa..7ce39b8d3f 100644 --- a/compat/msvc-posix.h +++ b/compat/msvc-posix.h @@ -16,7 +16,6 @@ #define __attribute__(x) #define strcasecmp _stricmp #define strncasecmp _strnicmp -#define ftruncate _chsize #define strtoull _strtoui64 #define strtoll _strtoi64 @@ -30,4 +29,27 @@ typedef int sigset_t; #include "mingw-posix.h" +/* + * MSVC's `_chsize()` takes a 32-bit `long` and silently truncates files + * to 2 GiB. `_chsize_s()` accepts a 64-bit length but returns 0 on + * success or an errno value on failure, rather than the -1/errno + * convention POSIX `ftruncate()` callers expect. Wrap it so callers + * that test the return value as `< 0` or against `-1` keep working. + * + * Note: this declaration must follow `#include "mingw-posix.h"` so + * `off_t` resolves to `off64_t` and the parameter type matches the + * underlying `_chsize_s()` width. + */ +static inline int msvc_ftruncate(int fd, off_t length) +{ + int err = _chsize_s(fd, length); + + if (err) { + errno = err; + return -1; + } + return 0; +} +#define ftruncate msvc_ftruncate + #endif /* COMPAT_MSVC_POSIX_H */ -- gitgitgadget ^ permalink raw reply related [flat|nested] 13+ messages in thread
* [PATCH 2/7] patch-delta: use size_t for sizes 2026-06-04 10:51 [PATCH 0/7] More work supporting objects larger than 4GB on Windows Johannes Schindelin via GitGitGadget 2026-06-04 10:51 ` [PATCH 1/7] compat/msvc: use _chsize_s for ftruncate Johannes Schindelin via GitGitGadget @ 2026-06-04 10:51 ` Johannes Schindelin via GitGitGadget 2026-06-08 13:53 ` Patrick Steinhardt 2026-06-04 10:51 ` [PATCH 3/7] pack-objects(check_pack_inflate()): use size_t instead of unsigned long Johannes Schindelin via GitGitGadget ` (4 subsequent siblings) 6 siblings, 1 reply; 13+ messages in thread From: Johannes Schindelin via GitGitGadget @ 2026-06-04 10:51 UTC (permalink / raw) To: git; +Cc: Kristofer Karlsson, Johannes Schindelin, Johannes Schindelin From: Johannes Schindelin <johannes.schindelin@gmx.de> `patch_delta()` takes the source and delta sizes by value and writes back the reconstructed target size through an `unsigned long *`. That datatype cannot represent a value that exceeds 4 GiB on systems where `unsigned long` is 32-bit (notably 64-bit Windows builds), though, even though the delta encoding itself, the on-disk layout, and the in-memory buffers happily carry such sizes. A `size_t` companion to `get_delta_hdr_size()`, `get_delta_hdr_size_sz()`, was introduced in 17fa077596 (delta, packfile: use size_t for delta header sizes, 2026-05-08) precisely so that `patch_delta()` could be widened without changing the on-the-wire decoding helper's signature. Widen `patch_delta()`'s three size parameters to `size_t` and switch its internal use of `get_delta_hdr_size()` to the `_sz` variant. Then propagate the wider type through the callers. Assisted-by: Opus 4.7 Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> --- apply.c | 2 +- builtin/index-pack.c | 4 ++-- builtin/unpack-objects.c | 2 +- delta.h | 6 +++--- packfile.c | 4 +--- patch-delta.c | 12 ++++++------ t/helper/test-delta.c | 10 ++++++---- 7 files changed, 20 insertions(+), 20 deletions(-) diff --git a/apply.c b/apply.c index 249248d4f2..3cf544e9a9 100644 --- a/apply.c +++ b/apply.c @@ -3232,7 +3232,7 @@ static int apply_binary_fragment(struct apply_state *state, struct patch *patch) { struct fragment *fragment = patch->fragments; - unsigned long len; + size_t len; void *dst; if (!fragment) diff --git a/builtin/index-pack.c b/builtin/index-pack.c index cf0bd8280d..3c4474e681 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -71,7 +71,7 @@ struct base_data { /* Not initialized by make_base(). */ struct list_head list; void *data; - unsigned long size; + size_t size; }; /* @@ -1048,7 +1048,7 @@ static struct base_data *resolve_delta(struct object_entry *delta_obj, { void *delta_data, *result_data; struct base_data *result; - unsigned long result_size; + size_t result_size; if (show_stat) { int i = delta_obj - objects; diff --git a/builtin/unpack-objects.c b/builtin/unpack-objects.c index 59e9b8711e..e7a50c493c 100644 --- a/builtin/unpack-objects.c +++ b/builtin/unpack-objects.c @@ -314,7 +314,7 @@ static void resolve_delta(unsigned nr, enum object_type type, void *delta, unsigned long delta_size) { void *result; - unsigned long result_size; + size_t result_size; result = patch_delta(base, base_size, delta, delta_size, diff --git a/delta.h b/delta.h index fad68cfc45..bb149dc82b 100644 --- a/delta.h +++ b/delta.h @@ -75,9 +75,9 @@ diff_delta(const void *src_buf, unsigned long src_bufsize, * *trg_bufsize is updated with its size. On failure a NULL pointer is * returned. The returned buffer must be freed by the caller. */ -void *patch_delta(const void *src_buf, unsigned long src_size, - const void *delta_buf, unsigned long delta_size, - unsigned long *dst_size); +void *patch_delta(const void *src_buf, size_t src_size, + const void *delta_buf, size_t delta_size, + size_t *dst_size); /* the smallest possible delta size is 4 bytes */ #define DELTA_SIZE_MIN 4 diff --git a/packfile.c b/packfile.c index 89366abfe3..e202f48837 100644 --- a/packfile.c +++ b/packfile.c @@ -1964,10 +1964,8 @@ void *unpack_entry(struct repository *r, struct packed_git *p, off_t obj_offset, (uintmax_t)curpos, p->pack_name); data = NULL; } else { - unsigned long sz; data = patch_delta(base, base_size, delta_data, - delta_size, &sz); - size = sz; + delta_size, &size); /* * We could not apply the delta; warn the user, but diff --git a/patch-delta.c b/patch-delta.c index b5c8594db6..44cda97994 100644 --- a/patch-delta.c +++ b/patch-delta.c @@ -12,13 +12,13 @@ #include "git-compat-util.h" #include "delta.h" -void *patch_delta(const void *src_buf, unsigned long src_size, - const void *delta_buf, unsigned long delta_size, - unsigned long *dst_size) +void *patch_delta(const void *src_buf, size_t src_size, + const void *delta_buf, size_t delta_size, + size_t *dst_size) { const unsigned char *data, *top; unsigned char *dst_buf, *out, cmd; - unsigned long size; + size_t size; if (delta_size < DELTA_SIZE_MIN) return NULL; @@ -27,12 +27,12 @@ void *patch_delta(const void *src_buf, unsigned long src_size, top = (const unsigned char *) delta_buf + delta_size; /* make sure the orig file size matches what we expect */ - size = get_delta_hdr_size(&data, top); + size = get_delta_hdr_size_sz(&data, top); if (size != src_size) return NULL; /* now the result size */ - size = get_delta_hdr_size(&data, top); + size = get_delta_hdr_size_sz(&data, top); dst_buf = xmallocz(size); out = dst_buf; diff --git a/t/helper/test-delta.c b/t/helper/test-delta.c index 52ea00c937..8223a60229 100644 --- a/t/helper/test-delta.c +++ b/t/helper/test-delta.c @@ -21,7 +21,7 @@ int cmd__delta(int argc, const char **argv) int fd; struct strbuf from = STRBUF_INIT, data = STRBUF_INIT; char *out_buf; - unsigned long out_size; + size_t out_size; if (argc != 5 || (strcmp(argv[1], "-d") && strcmp(argv[1], "-p"))) usage(usage_str); @@ -31,11 +31,13 @@ int cmd__delta(int argc, const char **argv) if (strbuf_read_file(&data, argv[3], 0) < 0) die_errno("unable to read '%s'", argv[3]); - if (argv[1][1] == 'd') + if (argv[1][1] == 'd') { + unsigned long delta_size; out_buf = diff_delta(from.buf, from.len, data.buf, data.len, - &out_size, 0); - else + &delta_size, 0); + out_size = delta_size; + } else out_buf = patch_delta(from.buf, from.len, data.buf, data.len, &out_size); -- gitgitgadget ^ permalink raw reply related [flat|nested] 13+ messages in thread
* Re: [PATCH 2/7] patch-delta: use size_t for sizes 2026-06-04 10:51 ` [PATCH 2/7] patch-delta: use size_t for sizes Johannes Schindelin via GitGitGadget @ 2026-06-08 13:53 ` Patrick Steinhardt 0 siblings, 0 replies; 13+ messages in thread From: Patrick Steinhardt @ 2026-06-08 13:53 UTC (permalink / raw) To: Johannes Schindelin via GitGitGadget Cc: git, Kristofer Karlsson, Johannes Schindelin On Thu, Jun 04, 2026 at 10:51:07AM +0000, Johannes Schindelin via GitGitGadget wrote: > From: Johannes Schindelin <johannes.schindelin@gmx.de> > > `patch_delta()` takes the source and delta sizes by value and writes > back the reconstructed target size through an `unsigned long *`. That > datatype cannot represent a value that exceeds 4 GiB on systems where > `unsigned long` is 32-bit (notably 64-bit Windows builds), though, even > though the delta encoding itself, the on-disk layout, and the in-memory > buffers happily carry such sizes. A `size_t` companion to > `get_delta_hdr_size()`, `get_delta_hdr_size_sz()`, was introduced in > 17fa077596 (delta, packfile: use size_t for delta header sizes, > 2026-05-08) precisely so that `patch_delta()` could be widened without > changing the on-the-wire decoding helper's signature. > > Widen `patch_delta()`'s three size parameters to `size_t` and switch > its internal use of `get_delta_hdr_size()` to the `_sz` variant. > Then propagate the wider type through the callers. Does `get_delta_hdr_size()` have any remaining callers after this patch series? I currently only spot two such callers, and you convert both of them in this patch. And can we reasonably add a test case that exercises this change? > diff --git a/packfile.c b/packfile.c > index 89366abfe3..e202f48837 100644 > --- a/packfile.c > +++ b/packfile.c > @@ -1964,10 +1964,8 @@ void *unpack_entry(struct repository *r, struct packed_git *p, off_t obj_offset, > (uintmax_t)curpos, p->pack_name); > data = NULL; > } else { > - unsigned long sz; > data = patch_delta(base, base_size, delta_data, > - delta_size, &sz); > - size = sz; > + delta_size, &size); Nice that we get rid of this awkward construct. Patrick ^ permalink raw reply [flat|nested] 13+ messages in thread
* [PATCH 3/7] pack-objects(check_pack_inflate()): use size_t instead of unsigned long 2026-06-04 10:51 [PATCH 0/7] More work supporting objects larger than 4GB on Windows Johannes Schindelin via GitGitGadget 2026-06-04 10:51 ` [PATCH 1/7] compat/msvc: use _chsize_s for ftruncate Johannes Schindelin via GitGitGadget 2026-06-04 10:51 ` [PATCH 2/7] patch-delta: use size_t for sizes Johannes Schindelin via GitGitGadget @ 2026-06-04 10:51 ` Johannes Schindelin via GitGitGadget 2026-06-08 13:53 ` Patrick Steinhardt 2026-06-04 10:51 ` [PATCH 4/7] packfile: widen unpack_entry()'s size out-parameter to size_t Johannes Schindelin via GitGitGadget ` (3 subsequent siblings) 6 siblings, 1 reply; 13+ messages in thread From: Johannes Schindelin via GitGitGadget @ 2026-06-04 10:51 UTC (permalink / raw) To: git; +Cc: Kristofer Karlsson, Johannes Schindelin, Johannes Schindelin From: Johannes Schindelin <johannes.schindelin@gmx.de> `write_reuse_object()` learned to track its packed-object size as `size_t` in 606c192380 (odb, packfile: use size_t for streaming object sizes, 2026-05-08), but the comparison sink it feeds, `check_pack_inflate()`, still takes the expected decompressed size as `unsigned long`. The call site bridges the mismatch with `cast_size_t_to_ulong()`, which on Windows turns a >4 GiB object into an immediate die(). That function only uses `expect` once: as the right-hand side of a `stream.total_out == expect` equality test against zlib's counter. zlib's own `total_out` counter is `uLong` and is therefore still 32-bit-bound on Windows. Widening `expect` to `size_t` cannot fix that, but it is a strict improvement nonetheless: instead of dying outright, an oversized object now simply makes the equality fail and lets `write_reuse_object()` fall back to `write_no_reuse_object()`, which decompresses and re-deflates the content (and which the larger pack-objects widening series targets separately). Drop the `cast_size_t_to_ulong()` shim at the call site now that the receiving parameter speaks the same type as `entry_size`. Assisted-by: Opus 4.7 Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> --- builtin/pack-objects.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index fe9fbecb30..975f04d699 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -453,7 +453,7 @@ static int check_pack_inflate(struct packed_git *p, struct pack_window **w_curs, off_t offset, off_t len, - unsigned long expect) + size_t expect) { git_zstream stream; unsigned char fakebuf[4096], *in; @@ -671,8 +671,7 @@ static off_t write_reuse_object(struct hashfile *f, struct object_entry *entry, datalen -= entry->in_pack_header_size; if (!pack_to_stdout && p->index_version == 1 && - check_pack_inflate(p, &w_curs, offset, datalen, - cast_size_t_to_ulong(entry_size))) { + check_pack_inflate(p, &w_curs, offset, datalen, entry_size)) { error(_("corrupt packed object for %s"), oid_to_hex(&entry->idx.oid)); unuse_pack(&w_curs); -- gitgitgadget ^ permalink raw reply related [flat|nested] 13+ messages in thread
* Re: [PATCH 3/7] pack-objects(check_pack_inflate()): use size_t instead of unsigned long 2026-06-04 10:51 ` [PATCH 3/7] pack-objects(check_pack_inflate()): use size_t instead of unsigned long Johannes Schindelin via GitGitGadget @ 2026-06-08 13:53 ` Patrick Steinhardt 0 siblings, 0 replies; 13+ messages in thread From: Patrick Steinhardt @ 2026-06-08 13:53 UTC (permalink / raw) To: Johannes Schindelin via GitGitGadget Cc: git, Kristofer Karlsson, Johannes Schindelin On Thu, Jun 04, 2026 at 10:51:08AM +0000, Johannes Schindelin via GitGitGadget wrote: > From: Johannes Schindelin <johannes.schindelin@gmx.de> > > `write_reuse_object()` learned to track its packed-object size as > `size_t` in 606c192380 (odb, packfile: use size_t for streaming > object sizes, 2026-05-08), but the comparison sink it feeds, > `check_pack_inflate()`, still takes the expected decompressed size > as `unsigned long`. The call site bridges the mismatch with > `cast_size_t_to_ulong()`, which on Windows turns a >4 GiB object > into an immediate die(). > > That function only uses `expect` once: as the right-hand side of a > `stream.total_out == expect` equality test against zlib's counter. > zlib's own `total_out` counter is `uLong` and is therefore still > 32-bit-bound on Windows. Widening `expect` to `size_t` cannot fix that, > but it is a strict improvement nonetheless: instead of dying outright, > an oversized object now simply makes the equality fail and lets > `write_reuse_object()` fall back to `write_no_reuse_object()`, which > decompresses and re-deflates the content (and which the larger > pack-objects widening series targets separately). Hm. I wonder whether it's possible to reset `stream.total_out` on every iteration and instead have a local `size_t` variable that we use to track the total number of inflated bytes? Patrick ^ permalink raw reply [flat|nested] 13+ messages in thread
* [PATCH 4/7] packfile: widen unpack_entry()'s size out-parameter to size_t 2026-06-04 10:51 [PATCH 0/7] More work supporting objects larger than 4GB on Windows Johannes Schindelin via GitGitGadget ` (2 preceding siblings ...) 2026-06-04 10:51 ` [PATCH 3/7] pack-objects(check_pack_inflate()): use size_t instead of unsigned long Johannes Schindelin via GitGitGadget @ 2026-06-04 10:51 ` Johannes Schindelin via GitGitGadget 2026-06-08 13:53 ` Patrick Steinhardt 2026-06-04 10:51 ` [PATCH 5/7] pack-objects: use size_t for in-core object sizes Johannes Schindelin via GitGitGadget ` (2 subsequent siblings) 6 siblings, 1 reply; 13+ messages in thread From: Johannes Schindelin via GitGitGadget @ 2026-06-04 10:51 UTC (permalink / raw) To: git; +Cc: Kristofer Karlsson, Johannes Schindelin, Johannes Schindelin From: Johannes Schindelin <johannes.schindelin@gmx.de> The topic `js/objects-larger-than-4gb-on-windows` widened the streaming, index-pack and unpack-objects paths to `size_t` but deliberately stopped at the in-memory `unpack_entry()` cascade, which still hands back the unpacked size through `unsigned long *`. On Windows that boundary truncates above 4 GiB because that data type is only 32 bits wide on that platform. Widen the code path. Except `packed_object_info_with_index_pos()`: It cannot yet pass `oi->sizep` directly because the field is still `unsigned long *`; bridge it with a `size_t` temporary that narrows back, and let a later commit drop the bridge once the field is wide too. `gfi_unpack_entry()` keeps its narrow signature because fast-import tracks sizes through `unsigned long` everywhere it crosses subsystem boundaries, keeping its signature allows the scope of this commit to be somewhat reasonable, still. Assisted-by: Opus 4.7 Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> --- builtin/fast-import.c | 7 ++++++- pack-check.c | 5 ++--- packfile.c | 28 +++++++++++++++++----------- packfile.h | 3 ++- 4 files changed, 27 insertions(+), 16 deletions(-) diff --git a/builtin/fast-import.c b/builtin/fast-import.c index 82bc6dcc00..3dff898c43 100644 --- a/builtin/fast-import.c +++ b/builtin/fast-import.c @@ -1239,6 +1239,8 @@ static void *gfi_unpack_entry( unsigned long *sizep) { enum object_type type; + size_t size_st = 0; + void *data; struct packed_git *p = all_packs[oe->pack_id]; if (p == pack_data && p->pack_size < (pack_size + the_hash_algo->rawsz)) { /* The object is stored in the packfile we are writing to @@ -1260,7 +1262,10 @@ static void *gfi_unpack_entry( */ p->pack_size = pack_size + the_hash_algo->rawsz; } - return unpack_entry(the_repository, p, oe->idx.offset, &type, sizep); + data = unpack_entry(the_repository, p, oe->idx.offset, &type, &size_st); + if (sizep) + *sizep = cast_size_t_to_ulong(size_st); + return data; } static void load_tree(struct tree_entry *root) diff --git a/pack-check.c b/pack-check.c index 2792f34d25..5adfb3f272 100644 --- a/pack-check.c +++ b/pack-check.c @@ -143,9 +143,8 @@ static int verify_packfile(struct repository *r, data = NULL; data_valid = 0; } else { - unsigned long sz; - data = unpack_entry(r, p, entries[i].offset, &type, &sz); - size = sz; + data = unpack_entry(r, p, entries[i].offset, &type, + &size); data_valid = 1; } diff --git a/packfile.c b/packfile.c index e202f48837..dab0a9b16d 100644 --- a/packfile.c +++ b/packfile.c @@ -1454,7 +1454,7 @@ struct delta_base_cache_entry { struct delta_base_cache_key key; struct list_head lru; void *data; - unsigned long size; + size_t size; enum object_type type; }; @@ -1525,7 +1525,7 @@ static void detach_delta_base_cache_entry(struct delta_base_cache_entry *ent) } static void *cache_or_unpack_entry(struct repository *r, struct packed_git *p, - off_t base_offset, unsigned long *base_size, + off_t base_offset, size_t *base_size, enum object_type *type) { struct delta_base_cache_entry *ent; @@ -1558,8 +1558,8 @@ void clear_delta_base_cache(void) } static void add_delta_base_cache(struct packed_git *p, off_t base_offset, - void *base, unsigned long base_size, - unsigned long delta_base_cache_limit, + void *base, size_t base_size, + size_t delta_base_cache_limit, enum object_type type) { struct delta_base_cache_entry *ent; @@ -1614,10 +1614,13 @@ static int packed_object_info_with_index_pos(struct packed_git *p, off_t obj_off * a "real" type later if the caller is interested. */ if (oi->contentp) { - *oi->contentp = cache_or_unpack_entry(p->repo, p, obj_offset, oi->sizep, - &type); + size_t size_st = 0; + *oi->contentp = cache_or_unpack_entry(p->repo, p, obj_offset, + &size_st, &type); if (!*oi->contentp) type = OBJ_BAD; + else if (oi->sizep) + *oi->sizep = cast_size_t_to_ulong(size_st); } else if (oi->sizep || oi->typep || oi->delta_base_oid) { type = unpack_object_header(p, &w_curs, &curpos, &size); } @@ -1735,7 +1738,7 @@ int packed_object_info(struct packed_git *p, off_t obj_offset, static void *unpack_compressed_entry(struct packed_git *p, struct pack_window **w_curs, off_t curpos, - unsigned long size) + size_t size) { int st; git_zstream stream; @@ -1790,11 +1793,11 @@ int do_check_packed_object_crc; struct unpack_entry_stack_ent { off_t obj_offset; off_t curpos; - unsigned long size; + size_t size; }; void *unpack_entry(struct repository *r, struct packed_git *p, off_t obj_offset, - enum object_type *final_type, unsigned long *final_size) + enum object_type *final_type, size_t *final_size) { struct pack_window *w_curs = NULL; off_t curpos = obj_offset; @@ -1911,7 +1914,7 @@ void *unpack_entry(struct repository *r, struct packed_git *p, off_t obj_offset, void *delta_data; void *base = data; void *external_base = NULL; - unsigned long delta_size, base_size = size; + size_t delta_size, base_size = size; int i; off_t base_obj_offset = obj_offset; @@ -1928,6 +1931,7 @@ void *unpack_entry(struct repository *r, struct packed_git *p, off_t obj_offset, struct object_id base_oid; if (!(offset_to_pack_pos(p, obj_offset, &pos))) { struct object_info oi = OBJECT_INFO_INIT; + unsigned long bsz_ul = 0; nth_packed_object_id(&base_oid, p, pack_pos_to_index(p, pos)); @@ -1938,11 +1942,13 @@ void *unpack_entry(struct repository *r, struct packed_git *p, off_t obj_offset, mark_bad_packed_object(p, &base_oid); oi.typep = &type; - oi.sizep = &base_size; + oi.sizep = &bsz_ul; oi.contentp = &base; if (odb_read_object_info_extended(r->objects, &base_oid, &oi, 0) < 0) base = NULL; + else + base_size = bsz_ul; external_base = base; } diff --git a/packfile.h b/packfile.h index 49d6bdecf6..0b5ae3f9fc 100644 --- a/packfile.h +++ b/packfile.h @@ -455,7 +455,8 @@ off_t nth_packed_object_offset(const struct packed_git *, uint32_t n); off_t find_pack_entry_one(const struct object_id *oid, struct packed_git *); int is_pack_valid(struct packed_git *); -void *unpack_entry(struct repository *r, struct packed_git *, off_t, enum object_type *, unsigned long *); +void *unpack_entry(struct repository *r, struct packed_git *, off_t, + enum object_type *, size_t *); unsigned long unpack_object_header_buffer(const unsigned char *buf, unsigned long len, enum object_type *type, size_t *sizep); unsigned long get_size_from_delta(struct packed_git *, struct pack_window **, off_t); int unpack_object_header(struct packed_git *, struct pack_window **, off_t *, size_t *); -- gitgitgadget ^ permalink raw reply related [flat|nested] 13+ messages in thread
* Re: [PATCH 4/7] packfile: widen unpack_entry()'s size out-parameter to size_t 2026-06-04 10:51 ` [PATCH 4/7] packfile: widen unpack_entry()'s size out-parameter to size_t Johannes Schindelin via GitGitGadget @ 2026-06-08 13:53 ` Patrick Steinhardt 0 siblings, 0 replies; 13+ messages in thread From: Patrick Steinhardt @ 2026-06-08 13:53 UTC (permalink / raw) To: Johannes Schindelin via GitGitGadget Cc: git, Kristofer Karlsson, Johannes Schindelin On Thu, Jun 04, 2026 at 10:51:09AM +0000, Johannes Schindelin via GitGitGadget wrote: > diff --git a/builtin/fast-import.c b/builtin/fast-import.c > index 82bc6dcc00..3dff898c43 100644 > --- a/builtin/fast-import.c > +++ b/builtin/fast-import.c > @@ -1239,6 +1239,8 @@ static void *gfi_unpack_entry( > unsigned long *sizep) > { > enum object_type type; > + size_t size_st = 0; > + void *data; > struct packed_git *p = all_packs[oe->pack_id]; > if (p == pack_data && p->pack_size < (pack_size + the_hash_algo->rawsz)) { > /* The object is stored in the packfile we are writing to > @@ -1260,7 +1262,10 @@ static void *gfi_unpack_entry( > */ > p->pack_size = pack_size + the_hash_algo->rawsz; > } > - return unpack_entry(the_repository, p, oe->idx.offset, &type, sizep); > + data = unpack_entry(the_repository, p, oe->idx.offset, &type, &size_st); > + if (sizep) > + *sizep = cast_size_t_to_ulong(size_st); > + return data; > } Nit, please feel free to ignore: do we want to add a NEEDSWORK comment here? Patrick ^ permalink raw reply [flat|nested] 13+ messages in thread
* [PATCH 5/7] pack-objects: use size_t for in-core object sizes 2026-06-04 10:51 [PATCH 0/7] More work supporting objects larger than 4GB on Windows Johannes Schindelin via GitGitGadget ` (3 preceding siblings ...) 2026-06-04 10:51 ` [PATCH 4/7] packfile: widen unpack_entry()'s size out-parameter to size_t Johannes Schindelin via GitGitGadget @ 2026-06-04 10:51 ` Johannes Schindelin via GitGitGadget 2026-06-04 10:51 ` [PATCH 6/7] packfile,delta: drop the `cast_size_t_to_ulong()` wrappers Johannes Schindelin via GitGitGadget 2026-06-04 10:51 ` [PATCH 7/7] odb: use size_t for object_info.sizep and the size APIs Johannes Schindelin via GitGitGadget 6 siblings, 0 replies; 13+ messages in thread From: Johannes Schindelin via GitGitGadget @ 2026-06-04 10:51 UTC (permalink / raw) To: git; +Cc: Kristofer Karlsson, Johannes Schindelin, Johannes Schindelin From: Johannes Schindelin <johannes.schindelin@gmx.de> `pack-objects` stores per-entry object sizes in either the 31-bit `size_` member of the `struct object_entry` or, when the value does not fit, the `pack->delta_size[]` spill array. The accessors (`oe_size`, `oe_delta_size`, `oe_get_size_slow`, `oe_size_*_than`) and the setters (`oe_set_size`, `oe_set_delta_size`) used `unsigned long` for the spill type, which on Windows means the spill silently caps at 4 GiB per entry. That is what made `upload-pack` die with "object too large to read on this platform" when serving the >4 GiB blob in `t5608` tests 5 and 6 when run with `GIT_TEST_CLONE_2GB`. Widen them all to `size_t` (including `pack->delta_size`) and drop the three `cast_size_t_to_ulong()` calls in `check_object()` that guarded `in_pack_size`. The two `SET_SIZE(entry, canonical_size)` calls in the same function stay cast-free as before, since `canonical_size` is still `unsigned long` until a later commit widens `object_info::sizep`. Assisted-by: Opus 4.7 Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> --- builtin/pack-objects.c | 35 ++++++++++++++++++----------------- pack-objects.h | 2 +- 2 files changed, 19 insertions(+), 18 deletions(-) diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 975f04d699..bb372d0b03 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -66,8 +66,8 @@ static inline struct object_entry *oe_delta( return &pack->objects[e->delta_idx - 1]; } -static inline unsigned long oe_delta_size(struct packing_data *pack, - const struct object_entry *e) +static inline size_t oe_delta_size(struct packing_data *pack, + const struct object_entry *e) { if (e->delta_size_valid) return e->delta_size_; @@ -83,11 +83,11 @@ static inline unsigned long oe_delta_size(struct packing_data *pack, return pack->delta_size[e - pack->objects]; } -unsigned long oe_get_size_slow(struct packing_data *pack, - const struct object_entry *e); +size_t oe_get_size_slow(struct packing_data *pack, + const struct object_entry *e); -static inline unsigned long oe_size(struct packing_data *pack, - const struct object_entry *e) +static inline size_t oe_size(struct packing_data *pack, + const struct object_entry *e) { if (e->size_valid) return e->size_; @@ -145,7 +145,7 @@ static inline void oe_set_delta_sibling(struct packing_data *pack, static inline void oe_set_size(struct packing_data *pack, struct object_entry *e, - unsigned long size) + size_t size) { if (size < pack->oe_size_limit) { e->size_ = size; @@ -159,7 +159,7 @@ static inline void oe_set_size(struct packing_data *pack, static inline void oe_set_delta_size(struct packing_data *pack, struct object_entry *e, - unsigned long size) + size_t size) { if (size < pack->oe_delta_size_limit) { e->delta_size_ = size; @@ -496,7 +496,7 @@ static void copy_pack_data(struct hashfile *f, static inline int oe_size_greater_than(struct packing_data *pack, const struct object_entry *lhs, - unsigned long rhs) + size_t rhs) { if (lhs->size_valid) return lhs->size_ > rhs; @@ -2277,7 +2277,7 @@ static void check_object(struct object_entry *entry, uint32_t object_index) default: /* Not a delta hence we've already got all we need. */ oe_set_type(entry, entry->in_pack_type); - SET_SIZE(entry, cast_size_t_to_ulong(in_pack_size)); + SET_SIZE(entry, in_pack_size); entry->in_pack_header_size = used; if (oe_type(entry) < OBJ_COMMIT || oe_type(entry) > OBJ_BLOB) goto give_up; @@ -2331,8 +2331,8 @@ static void check_object(struct object_entry *entry, uint32_t object_index) if (have_base && can_reuse_delta(&base_ref, entry, &base_entry)) { oe_set_type(entry, entry->in_pack_type); - SET_SIZE(entry, cast_size_t_to_ulong(in_pack_size)); /* delta size */ - SET_DELTA_SIZE(entry, cast_size_t_to_ulong(in_pack_size)); + SET_SIZE(entry, in_pack_size); /* delta size */ + SET_DELTA_SIZE(entry, in_pack_size); if (base_entry) { SET_DELTA(entry, base_entry); @@ -2355,7 +2355,8 @@ static void check_object(struct object_entry *entry, uint32_t object_index) * object size from the delta header. */ delta_pos = entry->in_pack_offset + entry->in_pack_header_size; - canonical_size = get_size_from_delta(p, &w_curs, delta_pos); + canonical_size = get_size_from_delta(p, &w_curs, + delta_pos); if (canonical_size == 0) goto give_up; SET_SIZE(entry, canonical_size); @@ -2711,7 +2712,7 @@ static pthread_mutex_t progress_mutex; static inline int oe_size_less_than(struct packing_data *pack, const struct object_entry *lhs, - unsigned long rhs) + size_t rhs) { if (lhs->size_valid) return lhs->size_ < rhs; @@ -2734,8 +2735,8 @@ static inline void oe_set_tree_depth(struct packing_data *pack, * reconstruction (so non-deltas are true object sizes, but deltas * return the size of the delta data). */ -unsigned long oe_get_size_slow(struct packing_data *pack, - const struct object_entry *e) +size_t oe_get_size_slow(struct packing_data *pack, + const struct object_entry *e) { struct packed_git *p; struct pack_window *w_curs; @@ -2769,7 +2770,7 @@ unsigned long oe_get_size_slow(struct packing_data *pack, unuse_pack(&w_curs); packing_data_unlock(&to_pack); - return cast_size_t_to_ulong(size); + return size; } static int try_delta(struct unpacked *trg, struct unpacked *src, diff --git a/pack-objects.h b/pack-objects.h index 83299d4732..e97e84ddcb 100644 --- a/pack-objects.h +++ b/pack-objects.h @@ -141,7 +141,7 @@ struct packing_data { uint32_t index_size; unsigned int *in_pack_pos; - unsigned long *delta_size; + size_t *delta_size; /* * Only one of these can be non-NULL and they have different -- gitgitgadget ^ permalink raw reply related [flat|nested] 13+ messages in thread
* [PATCH 6/7] packfile,delta: drop the `cast_size_t_to_ulong()` wrappers 2026-06-04 10:51 [PATCH 0/7] More work supporting objects larger than 4GB on Windows Johannes Schindelin via GitGitGadget ` (4 preceding siblings ...) 2026-06-04 10:51 ` [PATCH 5/7] pack-objects: use size_t for in-core object sizes Johannes Schindelin via GitGitGadget @ 2026-06-04 10:51 ` Johannes Schindelin via GitGitGadget 2026-06-08 13:53 ` Patrick Steinhardt 2026-06-04 10:51 ` [PATCH 7/7] odb: use size_t for object_info.sizep and the size APIs Johannes Schindelin via GitGitGadget 6 siblings, 1 reply; 13+ messages in thread From: Johannes Schindelin via GitGitGadget @ 2026-06-04 10:51 UTC (permalink / raw) To: git; +Cc: Kristofer Karlsson, Johannes Schindelin, Johannes Schindelin From: Johannes Schindelin <johannes.schindelin@gmx.de> When I started the transition from `unsigned long` to `size_t`, in the interest of keeping the patches reviewable, I introduced these calls to prevent data type narrowing from silently failing to handle large object sizes. I also introduced `*_sz()` variants that would allow most of the callers to keep using that `unsigned long` that the 90s kindly asked to be returned. After the preceding commits, the only places that called the narrow wrappers either no longer exist or already use the `_sz` form internally, so the wrappers just narrow values back through `cast_size_t_to_ulong()` for no reason. Drop them and rename the `_sz` variants back to the natural names. Assisted-by: Opus 4.7 Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> --- delta.h | 14 ++------------ packfile.c | 28 ++++++++-------------------- packfile.h | 2 +- patch-delta.c | 4 ++-- 4 files changed, 13 insertions(+), 35 deletions(-) diff --git a/delta.h b/delta.h index bb149dc82b..eb5c6d2fdb 100644 --- a/delta.h +++ b/delta.h @@ -86,11 +86,8 @@ void *patch_delta(const void *src_buf, size_t src_size, * This must be called twice on the delta data buffer, first to get the * expected source buffer size, and again to get the target buffer size. */ -/* - * Size_t variant that doesn't truncate - use for >4GB objects on Windows. - */ -static inline size_t get_delta_hdr_size_sz(const unsigned char **datap, - const unsigned char *top) +static inline size_t get_delta_hdr_size(const unsigned char **datap, + const unsigned char *top) { const unsigned char *data = *datap; size_t cmd, size = 0; @@ -104,11 +101,4 @@ static inline size_t get_delta_hdr_size_sz(const unsigned char **datap, return size; } -static inline unsigned long get_delta_hdr_size(const unsigned char **datap, - const unsigned char *top) -{ - size_t size = get_delta_hdr_size_sz(datap, top); - return cast_size_t_to_ulong(size); -} - #endif diff --git a/packfile.c b/packfile.c index dab0a9b16d..c174982d10 100644 --- a/packfile.c +++ b/packfile.c @@ -1164,11 +1164,12 @@ unsigned long unpack_object_header_buffer(const unsigned char *buf, } /* - * Size_t variant for >4GB delta results on Windows. + * Read a delta object's header at curpos in p (already inflated as needed) + * and return the size of the result object (the post-application target). */ -static size_t get_size_from_delta_sz(struct packed_git *p, - struct pack_window **w_curs, - off_t curpos) +size_t get_size_from_delta(struct packed_git *p, + struct pack_window **w_curs, + off_t curpos) { const unsigned char *data; unsigned char delta_head[20], *in; @@ -1215,18 +1216,10 @@ static size_t get_size_from_delta_sz(struct packed_git *p, data = delta_head; /* ignore base size */ - get_delta_hdr_size_sz(&data, delta_head+sizeof(delta_head)); + get_delta_hdr_size(&data, delta_head+sizeof(delta_head)); /* Read the result size */ - return get_delta_hdr_size_sz(&data, delta_head+sizeof(delta_head)); -} - -unsigned long get_size_from_delta(struct packed_git *p, - struct pack_window **w_curs, - off_t curpos) -{ - size_t size = get_size_from_delta_sz(p, w_curs, curpos); - return cast_size_t_to_ulong(size); + return get_delta_hdr_size(&data, delta_head+sizeof(delta_head)); } int unpack_object_header(struct packed_git *p, @@ -1634,12 +1627,7 @@ static int packed_object_info_with_index_pos(struct packed_git *p, off_t obj_off ret = -1; goto out; } - /* - * Use size_t variant to avoid die() on >4GB deltas. - * oi->sizep is unsigned long, so truncation may occur, - * but streaming code uses its own size_t tracking. - */ - size = get_size_from_delta_sz(p, &w_curs, tmp_pos); + size = get_size_from_delta(p, &w_curs, tmp_pos); if (size == 0) { ret = -1; goto out; diff --git a/packfile.h b/packfile.h index 0b5ae3f9fc..bd4494906d 100644 --- a/packfile.h +++ b/packfile.h @@ -458,7 +458,7 @@ int is_pack_valid(struct packed_git *); void *unpack_entry(struct repository *r, struct packed_git *, off_t, enum object_type *, size_t *); unsigned long unpack_object_header_buffer(const unsigned char *buf, unsigned long len, enum object_type *type, size_t *sizep); -unsigned long get_size_from_delta(struct packed_git *, struct pack_window **, off_t); +size_t get_size_from_delta(struct packed_git *, struct pack_window **, off_t); int unpack_object_header(struct packed_git *, struct pack_window **, off_t *, size_t *); off_t get_delta_base(struct packed_git *p, struct pack_window **w_curs, off_t *curpos, enum object_type type, diff --git a/patch-delta.c b/patch-delta.c index 44cda97994..42199fa956 100644 --- a/patch-delta.c +++ b/patch-delta.c @@ -27,12 +27,12 @@ void *patch_delta(const void *src_buf, size_t src_size, top = (const unsigned char *) delta_buf + delta_size; /* make sure the orig file size matches what we expect */ - size = get_delta_hdr_size_sz(&data, top); + size = get_delta_hdr_size(&data, top); if (size != src_size) return NULL; /* now the result size */ - size = get_delta_hdr_size_sz(&data, top); + size = get_delta_hdr_size(&data, top); dst_buf = xmallocz(size); out = dst_buf; -- gitgitgadget ^ permalink raw reply related [flat|nested] 13+ messages in thread
* Re: [PATCH 6/7] packfile,delta: drop the `cast_size_t_to_ulong()` wrappers 2026-06-04 10:51 ` [PATCH 6/7] packfile,delta: drop the `cast_size_t_to_ulong()` wrappers Johannes Schindelin via GitGitGadget @ 2026-06-08 13:53 ` Patrick Steinhardt 0 siblings, 0 replies; 13+ messages in thread From: Patrick Steinhardt @ 2026-06-08 13:53 UTC (permalink / raw) To: Johannes Schindelin via GitGitGadget Cc: git, Kristofer Karlsson, Johannes Schindelin On Thu, Jun 04, 2026 at 10:51:11AM +0000, Johannes Schindelin via GitGitGadget wrote: > From: Johannes Schindelin <johannes.schindelin@gmx.de> > > When I started the transition from `unsigned long` to `size_t`, in the > interest of keeping the patches reviewable, I introduced these calls to > prevent data type narrowing from silently failing to handle large object > sizes. I also introduced `*_sz()` variants that would allow most of the > callers to keep using that `unsigned long` that the 90s kindly asked to > be returned. > > After the preceding commits, the only places that called the narrow > wrappers either no longer exist or already use the `_sz` form > internally, so the wrappers just narrow values back through > `cast_size_t_to_ulong()` for no reason. > > Drop them and rename the `_sz` variants back to the natural names. Aha, so you already address my comment I had on one of the preceding patches :) Patrick ^ permalink raw reply [flat|nested] 13+ messages in thread
* [PATCH 7/7] odb: use size_t for object_info.sizep and the size APIs 2026-06-04 10:51 [PATCH 0/7] More work supporting objects larger than 4GB on Windows Johannes Schindelin via GitGitGadget ` (5 preceding siblings ...) 2026-06-04 10:51 ` [PATCH 6/7] packfile,delta: drop the `cast_size_t_to_ulong()` wrappers Johannes Schindelin via GitGitGadget @ 2026-06-04 10:51 ` Johannes Schindelin via GitGitGadget 2026-06-08 13:53 ` Patrick Steinhardt 6 siblings, 1 reply; 13+ messages in thread From: Johannes Schindelin via GitGitGadget @ 2026-06-04 10:51 UTC (permalink / raw) To: git; +Cc: Kristofer Karlsson, Johannes Schindelin, Johannes Schindelin From: Johannes Schindelin <johannes.schindelin@gmx.de> When `js/objects-larger-than-4gb-on-windows` widened the streaming, index-pack and unpack-objects code paths, in the interest of keeping the patches somewhat reasonably-sized, it left the public ODB API still typed in `unsigned long`. In particular `struct object_info::sizep` and the four wrappers built on top of it (`odb_read_object`, `odb_read_object_peeled`, `odb_read_object_info`, `odb_pretend_object`) still return the unpacked size through `unsigned long *`, so on Windows `cat-file -s` and the `git add` / `git status` paths for a >4 GiB blob silently cap at 4 GiB. Widen the field and the four wrappers. The previous commits already widened the `unpack_entry()` cascade and pack-objects' in-core size accessors, so most of the cascade arrives here with no further work: the temporary shims in `packed_object_info_with_index_pos()` and in `unpack_entry()`'s delta-base recovery path go away, the two `SET_SIZE(entry, cast_size_t_to_ulong(canonical_size))` calls in `check_object()` and the matching one in `drop_reused_delta()` collapse to plain `SET_SIZE`, and `oe_get_size_slow()`'s tail `cast_size_t_to_ulong()` is gone too. What remains narrow are the boundaries this series does not intend to touch: the diff, blame, textconv and fast-import machinery. Even so, this patch is unfortunately quite large. Assisted-by: Opus 4.7 Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> --- apply.c | 6 +++--- archive.c | 4 ++-- attr.c | 2 +- bisect.c | 2 +- blame.c | 15 ++++++++++---- builtin/cat-file.c | 39 +++++++++++++++++++++-------------- builtin/difftool.c | 2 +- builtin/fast-export.c | 7 +++++-- builtin/fast-import.c | 22 ++++++++++++++------ builtin/fsck.c | 2 +- builtin/grep.c | 12 +++++------ builtin/index-pack.c | 6 +++--- builtin/log.c | 2 +- builtin/ls-files.c | 2 +- builtin/ls-tree.c | 4 ++-- builtin/merge-tree.c | 6 +++--- builtin/mktag.c | 2 +- builtin/notes.c | 6 +++--- builtin/pack-objects.c | 33 ++++++++++++++++++++--------- builtin/repo.c | 4 +++- builtin/tag.c | 4 ++-- builtin/unpack-file.c | 2 +- builtin/unpack-objects.c | 6 ++++-- bundle.c | 2 +- combine-diff.c | 4 +++- commit.c | 10 ++++----- config.c | 2 +- diff.c | 5 ++++- dir.c | 2 +- entry.c | 4 +--- fmt-merge-msg.c | 4 ++-- fsck.c | 2 +- grep.c | 4 +++- http-push.c | 2 +- list-objects-filter.c | 2 +- mailmap.c | 2 +- match-trees.c | 4 ++-- merge-blobs.c | 6 +++--- merge-blobs.h | 2 +- merge-ort.c | 2 +- notes-cache.c | 2 +- notes-merge.c | 2 +- notes.c | 8 ++++--- object-file.c | 18 +++++----------- object.c | 2 +- odb.c | 12 +++++------ odb.h | 10 ++++----- odb/streaming.c | 13 +----------- pack-bitmap.c | 4 ++-- packfile.c | 12 +++-------- path-walk.c | 2 +- protocol-caps.c | 5 +++-- read-cache.c | 6 +++--- ref-filter.c | 2 +- reflog.c | 2 +- rerere.c | 2 +- submodule-config.c | 2 +- t/helper/test-pack-deltas.c | 3 ++- t/helper/test-partial-clone.c | 2 +- t/unit-tests/u-odb-inmemory.c | 2 +- tag.c | 4 ++-- tree-walk.c | 10 +++++---- tree.c | 2 +- xdiff-interface.c | 2 +- 64 files changed, 205 insertions(+), 173 deletions(-) diff --git a/apply.c b/apply.c index 3cf544e9a9..5e54453f79 100644 --- a/apply.c +++ b/apply.c @@ -3321,7 +3321,7 @@ static int apply_binary(struct apply_state *state, if (odb_has_object(the_repository->objects, &oid, 0)) { /* We already have the postimage */ enum object_type type; - unsigned long size; + size_t size; char *result; result = odb_read_object(the_repository->objects, &oid, @@ -3384,7 +3384,7 @@ static int read_blob_object(struct strbuf *buf, const struct object_id *oid, uns strbuf_addf(buf, "Subproject commit %s\n", oid_to_hex(oid)); } else { enum object_type type; - unsigned long sz; + size_t sz; char *result; result = odb_read_object(the_repository->objects, oid, @@ -3611,7 +3611,7 @@ static int load_preimage(struct apply_state *state, static int resolve_to(struct image *image, const struct object_id *result_id) { - unsigned long size; + size_t size; enum object_type type; char *data; diff --git a/archive.c b/archive.c index 51229107a5..59790be986 100644 --- a/archive.c +++ b/archive.c @@ -87,7 +87,7 @@ static void *object_file_to_archive(const struct archiver_args *args, const struct object_id *oid, unsigned int mode, enum object_type *type, - unsigned long *sizep) + size_t *sizep) { void *buffer; const struct commit *commit = args->convert ? args->commit : NULL; @@ -158,7 +158,7 @@ static int write_archive_entry(const struct object_id *oid, const char *base, write_archive_entry_fn_t write_entry = c->write_entry; int err; const char *path_without_prefix; - unsigned long size; + size_t size; void *buffer; enum object_type type; diff --git a/attr.c b/attr.c index 75369547b3..c61472a4e6 100644 --- a/attr.c +++ b/attr.c @@ -768,7 +768,7 @@ static struct attr_stack *read_attr_from_blob(struct index_state *istate, const char *path, unsigned flags) { struct object_id oid; - unsigned long sz; + size_t sz; enum object_type type; void *buf; unsigned short mode; diff --git a/bisect.c b/bisect.c index 905a9afb05..4742a5fef4 100644 --- a/bisect.c +++ b/bisect.c @@ -154,7 +154,7 @@ static void show_list(const char *debug, int counted, int nr, struct commit *commit = p->item; unsigned commit_flags = commit->object.flags; enum object_type type; - unsigned long size; + size_t size; char *buf = odb_read_object(the_repository->objects, &commit->object.oid, &type, &size); diff --git a/blame.c b/blame.c index 977cbb7097..126e232416 100644 --- a/blame.c +++ b/blame.c @@ -1041,10 +1041,13 @@ static void fill_origin_blob(struct diff_options *opt, textconv_object(opt->repo, o->path, o->mode, &o->blob_oid, 1, &file->ptr, &file_size)) ; - else + else { + size_t file_size_st = 0; file->ptr = odb_read_object(the_repository->objects, &o->blob_oid, &type, - &file_size); + &file_size_st); + file_size = cast_size_t_to_ulong(file_size_st); + } file->size = file_size; if (!file->ptr) @@ -2869,10 +2872,14 @@ void setup_scoreboard(struct blame_scoreboard *sb, textconv_object(sb->repo, sb->path, o->mode, &o->blob_oid, 1, (char **) &sb->final_buf, &sb->final_buf_size)) ; - else + else { + size_t final_buf_size_st = 0; sb->final_buf = odb_read_object(the_repository->objects, &o->blob_oid, &type, - &sb->final_buf_size); + &final_buf_size_st); + sb->final_buf_size = + cast_size_t_to_ulong(final_buf_size_st); + } if (!sb->final_buf) die(_("cannot read blob %s for path %s"), diff --git a/builtin/cat-file.c b/builtin/cat-file.c index fa45f774d7..fa6e396ddc 100644 --- a/builtin/cat-file.c +++ b/builtin/cat-file.c @@ -84,7 +84,7 @@ static char *replace_idents_using_mailmap(char *object_buf, size_t *size) static int filter_object(const char *path, unsigned mode, const struct object_id *oid, - char **buf, unsigned long *size) + char **buf, size_t *size) { enum object_type type; @@ -120,7 +120,7 @@ static int cat_one_file(int opt, const char *exp_type, const char *obj_name) struct object_id oid; enum object_type type; char *buf; - unsigned long size; + size_t size; struct object_context obj_context = {0}; struct object_info oi = OBJECT_INFO_INIT; unsigned flags = OBJECT_INFO_LOOKUP_REPLACE; @@ -166,7 +166,7 @@ static int cat_one_file(int opt, const char *exp_type, const char *obj_name) if (use_mailmap && (type == OBJ_COMMIT || type == OBJ_TAG)) { size_t s = size; buf = replace_idents_using_mailmap(buf, &s); - size = cast_size_t_to_ulong(s); + size = s; } printf("%"PRIuMAX"\n", (uintmax_t)size); @@ -188,9 +188,15 @@ static int cat_one_file(int opt, const char *exp_type, const char *obj_name) break; case 'c': - if (textconv_object(the_repository, path, obj_context.mode, - &oid, 1, &buf, &size)) + { + unsigned long size_ul = 0; + int textconv_ret = textconv_object(the_repository, path, + obj_context.mode, &oid, 1, + &buf, &size_ul); + size = size_ul; + if (textconv_ret) break; + } /* else fallthrough */ case 'p': @@ -219,7 +225,7 @@ static int cat_one_file(int opt, const char *exp_type, const char *obj_name) if (use_mailmap) { size_t s = size; buf = replace_idents_using_mailmap(buf, &s); - size = cast_size_t_to_ulong(s); + size = s; } /* otherwise just spit out the data */ @@ -266,7 +272,7 @@ static int cat_one_file(int opt, const char *exp_type, const char *obj_name) if (use_mailmap) { size_t s = size; buf = replace_idents_using_mailmap(buf, &s); - size = cast_size_t_to_ulong(s); + size = s; } break; } @@ -288,7 +294,7 @@ cleanup: struct expand_data { struct object_id oid; enum object_type type; - unsigned long size; + size_t size; unsigned short mode; off_t disk_size; const char *rest; @@ -405,7 +411,7 @@ static void print_object_or_die(struct batch_options *opt, struct expand_data *d fflush(stdout); if (opt->transform_mode) { char *contents; - unsigned long size; + size_t size; if (!data->rest) die("missing path for '%s'", oid_to_hex(oid)); @@ -417,9 +423,12 @@ static void print_object_or_die(struct batch_options *opt, struct expand_data *d oid_to_hex(oid), data->rest); } else if (opt->transform_mode == 'c') { enum object_type type; - if (!textconv_object(the_repository, - data->rest, 0100644, oid, - 1, &contents, &size)) + unsigned long size_ul = 0; + if (textconv_object(the_repository, + data->rest, 0100644, oid, + 1, &contents, &size_ul)) + size = size_ul; + else contents = odb_read_object(the_repository->objects, oid, &type, &size); if (!contents) @@ -435,7 +444,7 @@ static void print_object_or_die(struct batch_options *opt, struct expand_data *d } else { enum object_type type; - unsigned long size; + size_t size; void *contents; contents = odb_read_object(the_repository->objects, oid, @@ -446,7 +455,7 @@ static void print_object_or_die(struct batch_options *opt, struct expand_data *d if (use_mailmap) { size_t s = size; contents = replace_idents_using_mailmap(contents, &s); - size = cast_size_t_to_ulong(s); + size = s; } if (type != data->type) @@ -555,7 +564,7 @@ static void batch_object_write(const char *obj_name, if (!buf) die(_("unable to read %s"), oid_to_hex(&data->oid)); buf = replace_idents_using_mailmap(buf, &s); - data->size = cast_size_t_to_ulong(s); + data->size = s; free(buf); } diff --git a/builtin/difftool.c b/builtin/difftool.c index 2a21005f2e..26778f8515 100644 --- a/builtin/difftool.c +++ b/builtin/difftool.c @@ -319,7 +319,7 @@ static char *get_symlink(struct repository *repo, data = strbuf_detach(&link, NULL); } else { enum object_type type; - unsigned long size; + size_t size; data = odb_read_object(repo->objects, oid, &type, &size); if (!data) die(_("could not read object %s for symlink %s"), diff --git a/builtin/fast-export.c b/builtin/fast-export.c index 2eb43a28da..0be43104dc 100644 --- a/builtin/fast-export.c +++ b/builtin/fast-export.c @@ -317,7 +317,10 @@ static void export_blob(const struct object_id *oid) object = (struct object *)lookup_blob(the_repository, oid); eaten = 0; } else { - buf = odb_read_object(the_repository->objects, oid, &type, &size); + size_t size_st = 0; + buf = odb_read_object(the_repository->objects, oid, &type, + &size_st); + size = cast_size_t_to_ulong(size_st); if (!buf) die(_("could not read blob %s"), oid_to_hex(oid)); if (check_object_signature(the_repository, oid, buf, size, @@ -880,7 +883,7 @@ static char *anonymize_tag(void) static void handle_tag(const char *name, struct tag *tag) { - unsigned long size; + size_t size; enum object_type type; char *buf; const char *tagger, *tagger_end, *message; diff --git a/builtin/fast-import.c b/builtin/fast-import.c index 3dff898c43..d11a2cc2c1 100644 --- a/builtin/fast-import.c +++ b/builtin/fast-import.c @@ -1291,7 +1291,10 @@ static void load_tree(struct tree_entry *root) die(_("can't load tree %s"), oid_to_hex(oid)); } else { enum object_type type; - buf = odb_read_object(the_repository->objects, oid, &type, &size); + size_t size_st = 0; + buf = odb_read_object(the_repository->objects, oid, &type, + &size_st); + size = cast_size_t_to_ulong(size_st); if (!buf || type != OBJ_TREE) die(_("can't load tree %s"), oid_to_hex(oid)); } @@ -2560,7 +2563,7 @@ static void note_change_n(const char *p, struct branch *b, unsigned char *old_fa die(_("mark :%" PRIuMAX " not a commit"), commit_mark); oidcpy(&commit_oid, &commit_oe->idx.oid); } else if (!repo_get_oid(the_repository, p, &commit_oid)) { - unsigned long size; + size_t size; char *buf = odb_read_object_peeled(the_repository->objects, &commit_oid, OBJ_COMMIT, &size, &commit_oid); @@ -2627,10 +2630,12 @@ static void parse_from_existing(struct branch *b) oidclr(&b->branch_tree.versions[1].oid, the_repository->hash_algo); } else { unsigned long size; + size_t size_st = 0; char *buf; buf = odb_read_object_peeled(the_repository->objects, &b->oid, - OBJ_COMMIT, &size, &b->oid); + OBJ_COMMIT, &size_st, &b->oid); + size = cast_size_t_to_ulong(size_st); parse_from_commit(b, buf, size); free(buf); } @@ -2722,7 +2727,7 @@ static struct hash_list *parse_merge(unsigned int *count) die(_("mark :%" PRIuMAX " not a commit"), idnum); oidcpy(&n->oid, &oe->idx.oid); } else if (!repo_get_oid(the_repository, from, &n->oid)) { - unsigned long size; + size_t size; char *buf = odb_read_object_peeled(the_repository->objects, &n->oid, OBJ_COMMIT, &size, &n->oid); @@ -3330,7 +3335,10 @@ static void cat_blob(struct object_entry *oe, struct object_id *oid) char *buf; if (!oe || oe->pack_id == MAX_PACK_ID) { - buf = odb_read_object(the_repository->objects, oid, &type, &size); + size_t size_st = 0; + buf = odb_read_object(the_repository->objects, oid, &type, + &size_st); + size = cast_size_t_to_ulong(size_st); } else { type = oe->type; buf = gfi_unpack_entry(oe, &size); @@ -3438,8 +3446,10 @@ static struct object_entry *dereference(struct object_entry *oe, buf = gfi_unpack_entry(oe, &size); } else { enum object_type unused; + size_t size_st = 0; buf = odb_read_object(the_repository->objects, oid, - &unused, &size); + &unused, &size_st); + size = cast_size_t_to_ulong(size_st); } if (!buf) die(_("can't load object %s"), oid_to_hex(oid)); diff --git a/builtin/fsck.c b/builtin/fsck.c index 248f8ff5a0..76b723f36d 100644 --- a/builtin/fsck.c +++ b/builtin/fsck.c @@ -724,7 +724,7 @@ static int fsck_loose(const struct object_id *oid, const char *path, struct for_each_loose_cb *data = cb_data; struct object *obj; enum object_type type = OBJ_NONE; - unsigned long size; + size_t size; void *contents = NULL; int eaten; struct object_info oi = OBJECT_INFO_INIT; diff --git a/builtin/grep.c b/builtin/grep.c index 6a09571903..26b85479ca 100644 --- a/builtin/grep.c +++ b/builtin/grep.c @@ -520,7 +520,7 @@ static int grep_submodule(struct grep_opt *opt, enum object_type object_type; struct tree_desc tree; void *data; - unsigned long size; + size_t size; struct strbuf base = STRBUF_INIT; obj_read_lock(); @@ -573,7 +573,7 @@ static int grep_cache(struct grep_opt *opt, enum object_type type; struct tree_desc tree; void *data; - unsigned long size; + size_t size; data = odb_read_object(the_repository->objects, &ce->oid, &type, &size); @@ -666,7 +666,7 @@ static int grep_tree(struct grep_opt *opt, const struct pathspec *pathspec, enum object_type type; struct tree_desc sub; void *data; - unsigned long size; + size_t size; data = odb_read_object(the_repository->objects, &entry.oid, &type, &size); @@ -730,7 +730,7 @@ static void collect_blob_oids_for_tree(struct repository *repo, enum object_type type; struct tree_desc sub_tree; void *data; - unsigned long size; + size_t size; data = odb_read_object(repo->objects, &entry.oid, &type, &size); @@ -764,7 +764,7 @@ static void collect_blob_oids_for_treeish(struct grep_opt *opt, { struct tree_desc tree; void *data; - unsigned long size; + size_t size; struct strbuf base = STRBUF_INIT; int len; @@ -841,7 +841,7 @@ static int grep_object(struct grep_opt *opt, const struct pathspec *pathspec, if (obj->type == OBJ_COMMIT || obj->type == OBJ_TREE) { struct tree_desc tree; void *data; - unsigned long size; + size_t size; struct strbuf base; int hit, len; diff --git a/builtin/index-pack.c b/builtin/index-pack.c index 3c4474e681..78da3a6566 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -258,7 +258,7 @@ static unsigned check_object(struct object *obj) return 0; if (!(obj->flags & FLAG_CHECKED)) { - unsigned long size; + size_t size; int type = odb_read_object_info(the_repository->objects, &obj->oid, &size); if (type <= 0) @@ -905,7 +905,7 @@ static void sha1_object(const void *data, struct object_entry *obj_entry, if (collision_test_needed) { void *has_data; enum object_type has_type; - unsigned long has_size; + size_t has_size; read_lock(); has_type = odb_read_object_info(the_repository->objects, oid, &has_size); if (has_type < 0) @@ -1515,7 +1515,7 @@ static void fix_unresolved_deltas(struct hashfile *f) struct ref_delta_entry *d = sorted_by_pos[i]; enum object_type type; void *data; - unsigned long size; + size_t size; if (objects[d->obj_no].real_type != OBJ_REF_DELTA) continue; diff --git a/builtin/log.c b/builtin/log.c index e464b30af4..d027ce1e0b 100644 --- a/builtin/log.c +++ b/builtin/log.c @@ -613,7 +613,7 @@ static int show_blob_object(const struct object_id *oid, struct rev_info *rev, c static int show_tag_object(const struct object_id *oid, struct rev_info *rev) { - unsigned long size; + size_t size; enum object_type type; char *buf = odb_read_object(the_repository->objects, oid, &type, &size); unsigned long offset = 0; diff --git a/builtin/ls-files.c b/builtin/ls-files.c index e1a22b41b9..bfbd145e97 100644 --- a/builtin/ls-files.c +++ b/builtin/ls-files.c @@ -251,7 +251,7 @@ static void expand_objectsize(struct repository *repo, struct strbuf *line, const enum object_type type, unsigned int padded) { if (type == OBJ_BLOB) { - unsigned long size; + size_t size; if (odb_read_object_info(repo->objects, oid, &size) < 0) die(_("could not get object info about '%s'"), oid_to_hex(oid)); diff --git a/builtin/ls-tree.c b/builtin/ls-tree.c index 113e4a960d..7d075bfca2 100644 --- a/builtin/ls-tree.c +++ b/builtin/ls-tree.c @@ -27,7 +27,7 @@ static void expand_objectsize(struct strbuf *line, const struct object_id *oid, const enum object_type type, unsigned int padded) { if (type == OBJ_BLOB) { - unsigned long size; + size_t size; if (odb_read_object_info(the_repository->objects, oid, &size) < 0) die(_("could not get object info about '%s'"), oid_to_hex(oid)); @@ -217,7 +217,7 @@ static int show_tree_long(const struct object_id *oid, struct strbuf *base, return early; if (type == OBJ_BLOB) { - unsigned long size; + size_t size; if (odb_read_object_info(the_repository->objects, oid, &size) == OBJ_BAD) xsnprintf(size_text, sizeof(size_text), "BAD"); else diff --git a/builtin/merge-tree.c b/builtin/merge-tree.c index 312b595d1e..49f41e520f 100644 --- a/builtin/merge-tree.c +++ b/builtin/merge-tree.c @@ -69,7 +69,7 @@ static const char *explanation(struct merge_list *entry) return "removed in remote"; } -static void *result(struct merge_list *entry, unsigned long *size) +static void *result(struct merge_list *entry, size_t *size) { enum object_type type; struct blob *base, *our, *their; @@ -96,7 +96,7 @@ static void *result(struct merge_list *entry, unsigned long *size) base, our, their, size); } -static void *origin(struct merge_list *entry, unsigned long *size) +static void *origin(struct merge_list *entry, size_t *size) { enum object_type type; while (entry) { @@ -119,7 +119,7 @@ static int show_outf(void *priv UNUSED, mmbuffer_t *mb, int nbuf) static void show_diff(struct merge_list *entry) { - unsigned long size; + size_t size; mmfile_t src, dst; xpparam_t xpp; xdemitconf_t xecfg; diff --git a/builtin/mktag.c b/builtin/mktag.c index f40264a878..37c17e6beb 100644 --- a/builtin/mktag.c +++ b/builtin/mktag.c @@ -50,7 +50,7 @@ static int verify_object_in_tag(struct object_id *tagged_oid, int *tagged_type) { int ret; enum object_type type; - unsigned long size; + size_t size; void *buffer; const struct object_id *repl; diff --git a/builtin/notes.c b/builtin/notes.c index 9af602bdd7..962df867c8 100644 --- a/builtin/notes.c +++ b/builtin/notes.c @@ -150,7 +150,7 @@ static int list_each_note(const struct object_id *object_oid, static void copy_obj_to_fd(int fd, const struct object_id *oid) { - unsigned long size; + size_t size; enum object_type type; char *buf = odb_read_object(the_repository->objects, oid, &type, &size); if (buf) { @@ -313,7 +313,7 @@ static int parse_reuse_arg(const struct option *opt, const char *arg, int unset) char *value; struct object_id object; enum object_type type; - unsigned long len; + size_t len; BUG_ON_OPT_NEG(unset); @@ -721,7 +721,7 @@ static int append_edit(int argc, const char **argv, const char *prefix, if (note && !edit) { /* Append buf to previous note contents */ - unsigned long size; + size_t size; enum object_type type; struct strbuf buf = STRBUF_INIT; char *prev_buf = odb_read_object(the_repository->objects, note, &type, &size); diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index bb372d0b03..6202fe4dca 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -356,14 +356,17 @@ static void *get_delta(struct object_entry *entry) unsigned long size, base_size, delta_size; void *buf, *base_buf, *delta_buf; enum object_type type; + size_t size_st = 0, base_size_st = 0; buf = odb_read_object(the_repository->objects, &entry->idx.oid, - &type, &size); + &type, &size_st); + size = cast_size_t_to_ulong(size_st); if (!buf) die(_("unable to read %s"), oid_to_hex(&entry->idx.oid)); base_buf = odb_read_object(the_repository->objects, &DELTA(entry)->idx.oid, &type, - &base_size); + &base_size_st); + base_size = cast_size_t_to_ulong(base_size_st); if (!base_buf) die("unable to read %s", oid_to_hex(&DELTA(entry)->idx.oid)); @@ -528,9 +531,11 @@ static unsigned long write_no_reuse_object(struct hashfile *f, struct object_ent type = st->type; size = st->size; } else { + size_t size_st = 0; buf = odb_read_object(the_repository->objects, &entry->idx.oid, &type, - &size); + &size_st); + size = cast_size_t_to_ulong(size_st); if (!buf) die(_("unable to read %s"), oid_to_hex(&entry->idx.oid)); @@ -1935,6 +1940,7 @@ static struct pbase_tree_cache *pbase_tree_get(const struct object_id *oid) struct pbase_tree_cache *ent, *nent; void *data; unsigned long size; + size_t size_st = 0; enum object_type type; int neigh; int my_ix = pbase_tree_cache_ix(oid); @@ -1962,7 +1968,8 @@ static struct pbase_tree_cache *pbase_tree_get(const struct object_id *oid) /* Did not find one. Either we got a bogus request or * we need to read and perhaps cache. */ - data = odb_read_object(the_repository->objects, oid, &type, &size); + data = odb_read_object(the_repository->objects, oid, &type, &size_st); + size = cast_size_t_to_ulong(size_st); if (!data) return NULL; if (type != OBJ_TREE) { @@ -2117,13 +2124,15 @@ static void add_preferred_base(struct object_id *oid) struct pbase_tree *it; void *data; unsigned long size; + size_t size_st = 0; struct object_id tree_oid; if (window <= num_preferred_base++) return; data = odb_read_object_peeled(the_repository->objects, oid, - OBJ_TREE, &size, &tree_oid); + OBJ_TREE, &size_st, &tree_oid); + size = cast_size_t_to_ulong(size_st); if (!data) return; @@ -2235,7 +2244,7 @@ static void prefetch_to_pack(uint32_t object_index_start) { static void check_object(struct object_entry *entry, uint32_t object_index) { - unsigned long canonical_size; + size_t canonical_size; enum object_type type; struct object_info oi = {.typep = &type, .sizep = &canonical_size}; @@ -2434,7 +2443,7 @@ static void drop_reused_delta(struct object_entry *entry) unsigned *idx = &to_pack.objects[entry->delta_idx - 1].delta_child_idx; struct object_info oi = OBJECT_INFO_INIT; enum object_type type; - unsigned long size; + size_t size; while (*idx) { struct object_entry *oe = &to_pack.objects[*idx - 1]; @@ -2746,7 +2755,7 @@ size_t oe_get_size_slow(struct packing_data *pack, size_t size; if (e->type_ != OBJ_OFS_DELTA && e->type_ != OBJ_REF_DELTA) { - unsigned long sz; + size_t sz; packing_data_lock(&to_pack); if (odb_read_object_info(the_repository->objects, &e->idx.oid, &sz) < 0) @@ -2831,10 +2840,12 @@ static int try_delta(struct unpacked *trg, struct unpacked *src, /* Load data if not already done */ if (!trg->data) { + size_t sz_st = 0; packing_data_lock(&to_pack); trg->data = odb_read_object(the_repository->objects, &trg_entry->idx.oid, &type, - &sz); + &sz_st); + sz = cast_size_t_to_ulong(sz_st); packing_data_unlock(&to_pack); if (!trg->data) die(_("object %s cannot be read"), @@ -2846,10 +2857,12 @@ static int try_delta(struct unpacked *trg, struct unpacked *src, *mem_usage += sz; } if (!src->data) { + size_t sz_st = 0; packing_data_lock(&to_pack); src->data = odb_read_object(the_repository->objects, &src_entry->idx.oid, &type, - &sz); + &sz_st); + sz = cast_size_t_to_ulong(sz_st); packing_data_unlock(&to_pack); if (!src->data) { if (src_entry->preferred_base) { diff --git a/builtin/repo.c b/builtin/repo.c index 71a5c1c29c..69f3626467 100644 --- a/builtin/repo.c +++ b/builtin/repo.c @@ -784,13 +784,14 @@ static int count_objects(const char *path UNUSED, struct oid_array *oids, for (size_t i = 0; i < oids->nr; i++) { struct object_info oi = OBJECT_INFO_INIT; unsigned long inflated; + size_t inflated_st = 0; struct commit *commit; struct object *obj; void *content; off_t disk; int eaten; - oi.sizep = &inflated; + oi.sizep = &inflated_st; oi.disk_sizep = &disk; oi.contentp = &content; @@ -798,6 +799,7 @@ static int count_objects(const char *path UNUSED, struct oid_array *oids, OBJECT_INFO_SKIP_FETCH_OBJECT | OBJECT_INFO_QUICK) < 0) continue; + inflated = cast_size_t_to_ulong(inflated_st); obj = parse_object_buffer(the_repository, &oids->oid[i], type, inflated, content, &eaten); diff --git a/builtin/tag.c b/builtin/tag.c index d51c2e3349..06c125b53c 100644 --- a/builtin/tag.c +++ b/builtin/tag.c @@ -238,7 +238,7 @@ static int git_tag_config(const char *var, const char *value, static void write_tag_body(int fd, const struct object_id *oid) { - unsigned long size; + size_t size; enum object_type type; char *buf, *sp, *orig; struct strbuf payload = STRBUF_INIT; @@ -388,7 +388,7 @@ static void create_reflog_msg(const struct object_id *oid, struct strbuf *sb) enum object_type type; struct commit *c; char *buf; - unsigned long size; + size_t size; int subject_len = 0; const char *subject_start; diff --git a/builtin/unpack-file.c b/builtin/unpack-file.c index 87877a9fab..387389ed49 100644 --- a/builtin/unpack-file.c +++ b/builtin/unpack-file.c @@ -12,7 +12,7 @@ static char *create_temp_file(struct object_id *oid) static char path[50]; void *buf; enum object_type type; - unsigned long size; + size_t size; int fd; buf = odb_read_object(the_repository->objects, oid, &type, &size); diff --git a/builtin/unpack-objects.c b/builtin/unpack-objects.c index e7a50c493c..f3849bb654 100644 --- a/builtin/unpack-objects.c +++ b/builtin/unpack-objects.c @@ -231,7 +231,7 @@ static int check_object(struct object *obj, enum object_type type, die("object type mismatch"); if (!(obj->flags & FLAG_OPEN)) { - unsigned long size; + size_t size; int type = odb_read_object_info(the_repository->objects, &obj->oid, &size); if (type != obj->type || type <= 0) die("object of unexpected type"); @@ -436,6 +436,7 @@ static void unpack_delta_entry(enum object_type type, unsigned long delta_size, { void *delta_data, *base; unsigned long base_size; + size_t base_size_st = 0; struct object_id base_oid; if (type == OBJ_REF_DELTA) { @@ -512,7 +513,8 @@ static void unpack_delta_entry(enum object_type type, unsigned long delta_size, return; base = odb_read_object(the_repository->objects, &base_oid, - &type, &base_size); + &type, &base_size_st); + base_size = cast_size_t_to_ulong(base_size_st); if (!base) { error("failed to read delta-pack base object %s", oid_to_hex(&base_oid)); diff --git a/bundle.c b/bundle.c index 42327f9739..fd2db2c837 100644 --- a/bundle.c +++ b/bundle.c @@ -296,7 +296,7 @@ int list_bundle_refs(struct bundle_header *header, int argc, const char **argv) static int is_tag_in_date_range(struct object *tag, struct rev_info *revs) { - unsigned long size; + size_t size; enum object_type type; char *buf = NULL, *line, *lineend; timestamp_t date; diff --git a/combine-diff.c b/combine-diff.c index b799862068..3ce71db8bb 100644 --- a/combine-diff.c +++ b/combine-diff.c @@ -325,7 +325,9 @@ static char *grab_blob(struct repository *r, *size = fill_textconv(r, textconv, df, &blob); free_filespec(df); } else { - blob = odb_read_object(r->objects, oid, &type, size); + size_t size_st = 0; + blob = odb_read_object(r->objects, oid, &type, &size_st); + *size = cast_size_t_to_ulong(size_st); if (!blob) die(_("unable to read %s"), oid_to_hex(oid)); if (type != OBJ_BLOB) diff --git a/commit.c b/commit.c index fd8723502e..7950effc58 100644 --- a/commit.c +++ b/commit.c @@ -395,7 +395,7 @@ const void *repo_get_commit_buffer(struct repository *r, const void *ret = get_cached_commit_buffer(r, commit, sizep); if (!ret) { enum object_type type; - unsigned long size; + size_t size; ret = odb_read_object(r->objects, &commit->object.oid, &type, &size); if (!ret) die("cannot read commit object %s", @@ -404,7 +404,7 @@ const void *repo_get_commit_buffer(struct repository *r, die("expected commit for %s, got %s", oid_to_hex(&commit->object.oid), type_name(type)); if (sizep) - *sizep = size; + *sizep = cast_size_t_to_ulong(size); } return ret; } @@ -437,7 +437,7 @@ static inline void set_commit_tree(struct commit *c, struct tree *t) static void load_tree_from_commit_contents(struct repository *r, struct commit *commit) { enum object_type type; - unsigned long size; + size_t size; char *buf; const char *p; struct object_id tree_oid; @@ -604,7 +604,7 @@ int repo_parse_commit_internal(struct repository *r, { enum object_type type; void *buffer; - unsigned long size; + size_t size; struct object_info oi = { .typep = &type, .sizep = &size, @@ -1313,7 +1313,7 @@ static void handle_signed_tag(const struct commit *parent, struct commit_extra_h struct merge_remote_desc *desc; struct commit_extra_header *mergetag; char *buf; - unsigned long size; + size_t size; enum object_type type; struct strbuf payload = STRBUF_INIT; struct strbuf signature = STRBUF_INIT; diff --git a/config.c b/config.c index a1b92fe083..21b231052c 100644 --- a/config.c +++ b/config.c @@ -1442,7 +1442,7 @@ int git_config_from_blob_oid(config_fn_t fn, { enum object_type type; char *buf; - unsigned long size; + size_t size; int ret; buf = odb_read_object(repo->objects, oid, &type, &size); diff --git a/diff.c b/diff.c index 5a584fa1d5..816b89dc6c 100644 --- a/diff.c +++ b/diff.c @@ -4594,8 +4594,9 @@ int diff_populate_filespec(struct repository *r, } } else { + size_t size_st = 0; struct object_info info = { - .sizep = &s->size + .sizep = &size_st }; if (!(size_only || check_binary)) @@ -4617,6 +4618,7 @@ int diff_populate_filespec(struct repository *r, die("unable to read %s", oid_to_hex(&s->oid)); object_read: + s->size = cast_size_t_to_ulong(size_st); if (size_only || check_binary) { if (size_only) return 0; @@ -4631,6 +4633,7 @@ object_read: if (odb_read_object_info_extended(r->objects, &s->oid, &info, OBJECT_INFO_LOOKUP_REPLACE)) die("unable to read %s", oid_to_hex(&s->oid)); + s->size = cast_size_t_to_ulong(size_st); } s->should_free = 1; } diff --git a/dir.c b/dir.c index 33c81c256e..b6764d98a7 100644 --- a/dir.c +++ b/dir.c @@ -324,7 +324,7 @@ static int do_read_blob(const struct object_id *oid, struct oid_stat *oid_stat, size_t *size_out, char **data_out) { enum object_type type; - unsigned long sz; + size_t sz; char *data; *size_out = 0; diff --git a/entry.c b/entry.c index 7817aee362..c444fe5a10 100644 --- a/entry.c +++ b/entry.c @@ -92,11 +92,9 @@ static int create_file(const char *path, unsigned int mode) void *read_blob_entry(const struct cache_entry *ce, size_t *size) { enum object_type type; - unsigned long ul; void *blob_data = odb_read_object(the_repository->objects, &ce->oid, - &type, &ul); + &type, size); - *size = ul; if (blob_data) { if (type == OBJ_BLOB) return blob_data; diff --git a/fmt-merge-msg.c b/fmt-merge-msg.c index 45d8b20e97..14441f23ae 100644 --- a/fmt-merge-msg.c +++ b/fmt-merge-msg.c @@ -528,11 +528,11 @@ static void fmt_merge_msg_sigs(struct strbuf *out) for (i = 0; i < origins.nr; i++) { struct object_id *oid = origins.items[i].util; enum object_type type; - unsigned long size; + size_t size; char *buf = odb_read_object(the_repository->objects, oid, &type, &size); char *origbuf = buf; - unsigned long len = size; + size_t len = size; struct signature_check sigc = { NULL }; struct strbuf payload = STRBUF_INIT, sig = STRBUF_INIT; diff --git a/fsck.c b/fsck.c index b72200c352..82c2002f4a 100644 --- a/fsck.c +++ b/fsck.c @@ -1328,7 +1328,7 @@ static int fsck_blobs(struct oidset *blobs_found, struct oidset *blobs_done, oidset_iter_init(blobs_found, &iter); while ((oid = oidset_iter_next(&iter))) { enum object_type type; - unsigned long size; + size_t size; char *buf; if (oidset_contains(blobs_done, oid)) diff --git a/grep.c b/grep.c index a54e5d86a9..1d75d31421 100644 --- a/grep.c +++ b/grep.c @@ -1931,9 +1931,11 @@ void grep_source_clear_data(struct grep_source *gs) static int grep_source_load_oid(struct grep_source *gs) { enum object_type type; + size_t size_st = 0; gs->buf = odb_read_object(gs->repo->objects, gs->identifier, - &type, &gs->size); + &type, &size_st); + gs->size = cast_size_t_to_ulong(size_st); if (!gs->buf) return error(_("'%s': unable to read %s"), gs->name, diff --git a/http-push.c b/http-push.c index 520d6c3b6a..c61d9f7e02 100644 --- a/http-push.c +++ b/http-push.c @@ -365,7 +365,7 @@ static void start_put(struct transfer_request *request) enum object_type type; char hdr[50]; void *unpacked; - unsigned long len; + size_t len; int hdrlen; ssize_t size; git_zstream stream; diff --git a/list-objects-filter.c b/list-objects-filter.c index 78316e7f90..c912ff3079 100644 --- a/list-objects-filter.c +++ b/list-objects-filter.c @@ -280,7 +280,7 @@ static enum list_objects_filter_result filter_blobs_limit( void *filter_data_) { struct filter_blobs_limit_data *filter_data = filter_data_; - unsigned long object_length; + size_t object_length; enum object_type t; switch (filter_situation) { diff --git a/mailmap.c b/mailmap.c index 3b2691781d..72b639e602 100644 --- a/mailmap.c +++ b/mailmap.c @@ -186,7 +186,7 @@ int read_mailmap_blob(struct repository *repo, struct string_list *map, { struct object_id oid; char *buf; - unsigned long size; + size_t size; enum object_type type; if (!name) diff --git a/match-trees.c b/match-trees.c index 4216933d06..2a43c0fa1a 100644 --- a/match-trees.c +++ b/match-trees.c @@ -61,7 +61,7 @@ static void *fill_tree_desc_strict(struct repository *r, { void *buffer; enum object_type type; - unsigned long size; + size_t size; buffer = odb_read_object(r->objects, hash, &type, &size); if (!buffer) @@ -186,7 +186,7 @@ static int splice_tree(struct repository *r, char *subpath; int toplen; char *buf; - unsigned long sz; + size_t sz; struct tree_desc desc; unsigned char *rewrite_here; const struct object_id *rewrite_with; diff --git a/merge-blobs.c b/merge-blobs.c index 6fc2799417..16a75bd1e3 100644 --- a/merge-blobs.c +++ b/merge-blobs.c @@ -9,7 +9,7 @@ static int fill_mmfile_blob(mmfile_t *f, struct blob *obj) { void *buf; - unsigned long size; + size_t size; enum object_type type; buf = odb_read_object(the_repository->objects, &obj->object.oid, @@ -35,7 +35,7 @@ static void *three_way_filemerge(struct index_state *istate, mmfile_t *base, mmfile_t *our, mmfile_t *their, - unsigned long *size) + size_t *size) { enum ll_merge_result merge_status; mmbuffer_t res; @@ -61,7 +61,7 @@ static void *three_way_filemerge(struct index_state *istate, void *merge_blobs(struct index_state *istate, const char *path, struct blob *base, struct blob *our, - struct blob *their, unsigned long *size) + struct blob *their, size_t *size) { void *res = NULL; mmfile_t f1, f2, common; diff --git a/merge-blobs.h b/merge-blobs.h index 13cf9669e5..5797517a06 100644 --- a/merge-blobs.h +++ b/merge-blobs.h @@ -6,6 +6,6 @@ struct index_state; void *merge_blobs(struct index_state *, const char *, struct blob *, struct blob *, - struct blob *, unsigned long *); + struct blob *, size_t *); #endif /* MERGE_BLOBS_H */ diff --git a/merge-ort.c b/merge-ort.c index 544be9e466..4f6273bd51 100644 --- a/merge-ort.c +++ b/merge-ort.c @@ -3716,7 +3716,7 @@ static int read_oid_strbuf(struct merge_options *opt, { void *buf; enum object_type type; - unsigned long size; + size_t size; buf = odb_read_object(opt->repo->objects, oid, &type, &size); if (!buf) { path_msg(opt, ERROR_OBJECT_READ_FAILED, 0, diff --git a/notes-cache.c b/notes-cache.c index bf5bb1f6c1..74cef802bd 100644 --- a/notes-cache.c +++ b/notes-cache.c @@ -82,7 +82,7 @@ char *notes_cache_get(struct notes_cache *c, struct object_id *key_oid, const struct object_id *value_oid; enum object_type type; char *value; - unsigned long size; + size_t size; value_oid = get_note(&c->tree, key_oid); if (!value_oid) diff --git a/notes-merge.c b/notes-merge.c index b9322abbcb..118cad2518 100644 --- a/notes-merge.c +++ b/notes-merge.c @@ -339,7 +339,7 @@ static void write_note_to_worktree(const struct object_id *obj, const struct object_id *note) { enum object_type type; - unsigned long size; + size_t size; void *buf = odb_read_object(the_repository->objects, note, &type, &size); if (!buf) diff --git a/notes.c b/notes.c index 8f315e2a00..ec9c2cb150 100644 --- a/notes.c +++ b/notes.c @@ -811,7 +811,8 @@ int combine_notes_concatenate(struct object_id *cur_oid, const struct object_id *new_oid) { char *cur_msg = NULL, *new_msg = NULL, *buf; - unsigned long cur_len, new_len, buf_len; + unsigned long buf_len; + size_t cur_len, new_len; enum object_type cur_type, new_type; int ret; @@ -875,7 +876,7 @@ static int string_list_add_note_lines(struct string_list *list, const struct object_id *oid) { char *data; - unsigned long len; + size_t len; enum object_type t; if (is_null_oid(oid)) @@ -1282,7 +1283,8 @@ static void format_note(struct notes_tree *t, const struct object_id *object_oid static const char utf8[] = "utf-8"; const struct object_id *oid; char *msg, *msg_p; - unsigned long linelen, msglen; + unsigned long linelen; + size_t msglen; enum object_type type; if (!t) diff --git a/object-file.c b/object-file.c index 90f995d000..a81d50c305 100644 --- a/object-file.c +++ b/object-file.c @@ -381,7 +381,7 @@ static int parse_loose_header(const char *hdr, struct object_info *oi) } if (oi->sizep) - *oi->sizep = cast_size_t_to_ulong(size); + *oi->sizep = size; /* * The length must be followed by a zero byte @@ -409,7 +409,7 @@ static int read_object_info_from_path(struct odb_source *source, void *map = NULL; git_zstream stream, *stream_to_end = NULL; char hdr[MAX_HEADER_LEN]; - unsigned long size_scratch; + size_t size_scratch; enum object_type type_scratch; struct stat st; @@ -1222,7 +1222,7 @@ int force_object_loose(struct odb_source *source, { const struct git_hash_algo *compat = source->odb->repo->compat_hash_algo; void *buf; - unsigned long len; + size_t len; struct object_info oi = OBJECT_INFO_INIT; struct object_id compat_oid; enum object_type type; @@ -2126,7 +2126,7 @@ int read_loose_object(struct repository *repo, unsigned long mapsize; git_zstream stream; char hdr[MAX_HEADER_LEN]; - unsigned long *size = oi->sizep; + size_t *size = oi->sizep; fd = git_open(path); if (fd >= 0) @@ -2302,7 +2302,6 @@ int odb_source_loose_read_object_stream(struct odb_read_stream **out, struct object_info oi = OBJECT_INFO_INIT; struct odb_loose_read_stream *st; unsigned long mapsize; - unsigned long size_ul; void *mapped; mapped = odb_source_loose_map_object(source, oid, &mapsize); @@ -2326,18 +2325,11 @@ int odb_source_loose_read_object_stream(struct odb_read_stream **out, goto error; } - /* - * object_info.sizep is unsigned long* (32-bit on Windows), but - * st->base.size is size_t (64-bit). Use temporary variable. - * Note: loose objects >4GB would still truncate here, but such - * large loose objects are uncommon (they'd normally be packed). - */ - oi.sizep = &size_ul; + oi.sizep = &st->base.size; oi.typep = &st->base.type; if (parse_loose_header(st->hdr, &oi) < 0 || st->base.type < 0) goto error; - st->base.size = size_ul; st->mapped = mapped; st->mapsize = mapsize; diff --git a/object.c b/object.c index 465902ecc6..23b84aa7e2 100644 --- a/object.c +++ b/object.c @@ -325,7 +325,7 @@ struct object *parse_object_with_flags(struct repository *r, { int skip_hash = !!(flags & PARSE_OBJECT_SKIP_HASH_CHECK); int discard_tree = !!(flags & PARSE_OBJECT_DISCARD_TREE); - unsigned long size; + size_t size; enum object_type type; int eaten; const struct object_id *repl = lookup_replace_object(r, oid); diff --git a/odb.c b/odb.c index 965ef68e4e..7d555be09f 100644 --- a/odb.c +++ b/odb.c @@ -625,7 +625,7 @@ static int oid_object_info_convert(struct repository *r, enum object_type type; struct object_id oid, delta_base_oid; struct object_info new_oi, *oi; - unsigned long size; + size_t size; void *content; int ret; @@ -716,7 +716,7 @@ int odb_read_object_info_extended(struct object_database *odb, /* returns enum object_type or negative */ int odb_read_object_info(struct object_database *odb, const struct object_id *oid, - unsigned long *sizep) + size_t *sizep) { enum object_type type; struct object_info oi = OBJECT_INFO_INIT; @@ -730,7 +730,7 @@ int odb_read_object_info(struct object_database *odb, } int odb_pretend_object(struct object_database *odb, - void *buf, unsigned long len, enum object_type type, + void *buf, size_t len, enum object_type type, struct object_id *oid) { hash_object_file(odb->repo->hash_algo, buf, len, type, oid); @@ -744,7 +744,7 @@ int odb_pretend_object(struct object_database *odb, void *odb_read_object(struct object_database *odb, const struct object_id *oid, enum object_type *type, - unsigned long *size) + size_t *size) { struct object_info oi = OBJECT_INFO_INIT; unsigned flags = OBJECT_INFO_DIE_IF_CORRUPT | OBJECT_INFO_LOOKUP_REPLACE; @@ -762,12 +762,12 @@ void *odb_read_object(struct object_database *odb, void *odb_read_object_peeled(struct object_database *odb, const struct object_id *oid, enum object_type required_type, - unsigned long *size, + size_t *size, struct object_id *actual_oid_return) { enum object_type type; void *buffer; - unsigned long isize; + size_t isize; struct object_id actual_oid; oidcpy(&actual_oid, oid); diff --git a/odb.h b/odb.h index 73553ed5a7..e2f0bbad25 100644 --- a/odb.h +++ b/odb.h @@ -228,12 +228,12 @@ struct odb_source *odb_add_to_alternates_memory(struct object_database *odb, void *odb_read_object(struct object_database *odb, const struct object_id *oid, enum object_type *type, - unsigned long *size); + size_t *size); void *odb_read_object_peeled(struct object_database *odb, const struct object_id *oid, enum object_type required_type, - unsigned long *size, + size_t *size, struct object_id *oid_ret); /* @@ -245,13 +245,13 @@ void *odb_read_object_peeled(struct object_database *odb, * that reference it. */ int odb_pretend_object(struct object_database *odb, - void *buf, unsigned long len, enum object_type type, + void *buf, size_t len, enum object_type type, struct object_id *oid); struct object_info { /* Request */ enum object_type *typep; - unsigned long *sizep; + size_t *sizep; off_t *disk_sizep; struct object_id *delta_base_oid; void **contentp; @@ -356,7 +356,7 @@ int odb_read_object_info_extended(struct object_database *odb, */ int odb_read_object_info(struct object_database *odb, const struct object_id *oid, - unsigned long *sizep); + size_t *sizep); enum odb_has_object_flags { /* Retry packed storage after checking packed and loose storage */ diff --git a/odb/streaming.c b/odb/streaming.c index 7602a8d5d8..20531e864c 100644 --- a/odb/streaming.c +++ b/odb/streaming.c @@ -157,26 +157,15 @@ static int open_istream_incore(struct odb_read_stream **out, .base.read = read_istream_incore, }; struct odb_incore_read_stream *st; - unsigned long size_ul; int ret; oi.typep = &stream.base.type; - /* - * object_info.sizep is unsigned long* (32-bit on Windows), but - * stream.base.size is size_t (64-bit). We use a temporary variable - * because the types are incompatible. Note: this path still truncates - * for >4GB objects, but large objects should use pack streaming - * (packfile_store_read_object_stream) which handles size_t properly. - * This incore fallback is only used for small objects or when pack - * streaming is unavailable. - */ - oi.sizep = &size_ul; + oi.sizep = &stream.base.size; oi.contentp = (void **)&stream.buf; ret = odb_read_object_info_extended(odb, oid, &oi, OBJECT_INFO_DIE_IF_CORRUPT); if (ret) return ret; - stream.base.size = size_ul; CALLOC_ARRAY(st, 1); *st = stream; diff --git a/pack-bitmap.c b/pack-bitmap.c index f9af8a96bd..e8a82945cc 100644 --- a/pack-bitmap.c +++ b/pack-bitmap.c @@ -1856,7 +1856,7 @@ static void filter_bitmap_blob_none(struct bitmap_index *bitmap_git, static unsigned long get_size_by_pos(struct bitmap_index *bitmap_git, uint32_t pos) { - unsigned long size; + size_t size; struct object_info oi = OBJECT_INFO_INIT; oi.sizep = &size; @@ -1891,7 +1891,7 @@ static unsigned long get_size_by_pos(struct bitmap_index *bitmap_git, die(_("unable to get size of %s"), oid_to_hex(&obj->oid)); } - return size; + return cast_size_t_to_ulong(size); } static void filter_bitmap_blob_limit(struct bitmap_index *bitmap_git, diff --git a/packfile.c b/packfile.c index c174982d10..78c389e6f3 100644 --- a/packfile.c +++ b/packfile.c @@ -1607,13 +1607,10 @@ static int packed_object_info_with_index_pos(struct packed_git *p, off_t obj_off * a "real" type later if the caller is interested. */ if (oi->contentp) { - size_t size_st = 0; *oi->contentp = cache_or_unpack_entry(p->repo, p, obj_offset, - &size_st, &type); + oi->sizep, &type); if (!*oi->contentp) type = OBJ_BAD; - else if (oi->sizep) - *oi->sizep = cast_size_t_to_ulong(size_st); } else if (oi->sizep || oi->typep || oi->delta_base_oid) { type = unpack_object_header(p, &w_curs, &curpos, &size); } @@ -1633,7 +1630,7 @@ static int packed_object_info_with_index_pos(struct packed_git *p, off_t obj_off goto out; } } - *oi->sizep = (unsigned long)size; + *oi->sizep = size; } if (oi->disk_sizep || (oi->mtimep && p->is_cruft)) { @@ -1919,7 +1916,6 @@ void *unpack_entry(struct repository *r, struct packed_git *p, off_t obj_offset, struct object_id base_oid; if (!(offset_to_pack_pos(p, obj_offset, &pos))) { struct object_info oi = OBJECT_INFO_INIT; - unsigned long bsz_ul = 0; nth_packed_object_id(&base_oid, p, pack_pos_to_index(p, pos)); @@ -1930,13 +1926,11 @@ void *unpack_entry(struct repository *r, struct packed_git *p, off_t obj_offset, mark_bad_packed_object(p, &base_oid); oi.typep = &type; - oi.sizep = &bsz_ul; + oi.sizep = &base_size; oi.contentp = &base; if (odb_read_object_info_extended(r->objects, &base_oid, &oi, 0) < 0) base = NULL; - else - base_size = bsz_ul; external_base = base; } diff --git a/path-walk.c b/path-walk.c index 94ff90bd15..edc8e736d7 100644 --- a/path-walk.c +++ b/path-walk.c @@ -368,7 +368,7 @@ static int walk_path(struct path_walk_context *ctx, struct oid_array filtered = OID_ARRAY_INIT; for (size_t i = 0; i < list->oids.nr; i++) { - unsigned long size; + size_t size; if (odb_read_object_info(ctx->repo->objects, &list->oids.oid[i], diff --git a/protocol-caps.c b/protocol-caps.c index 35072ed60b..8858ea4489 100644 --- a/protocol-caps.c +++ b/protocol-caps.c @@ -50,7 +50,7 @@ static void send_info(struct repository *r, struct packet_writer *writer, for_each_string_list_item (item, oid_str_list) { const char *oid_str = item->string; struct object_id oid; - unsigned long object_size; + size_t object_size; if (get_oid_hex_algop(oid_str, &oid, r->hash_algo) < 0) { packet_writer_error( @@ -66,7 +66,8 @@ static void send_info(struct repository *r, struct packet_writer *writer, if (odb_read_object_info(r->objects, &oid, &object_size) < 0) { strbuf_addstr(&send_buffer, " "); } else { - strbuf_addf(&send_buffer, " %lu", object_size); + strbuf_addf(&send_buffer, " %"PRIuMAX, + (uintmax_t)object_size); } } diff --git a/read-cache.c b/read-cache.c index 21829102ae..21ca58beea 100644 --- a/read-cache.c +++ b/read-cache.c @@ -250,7 +250,7 @@ static int ce_compare_link(const struct cache_entry *ce, size_t expected_size) { int match = -1; void *buffer; - unsigned long size; + size_t size; enum object_type type; struct strbuf sb = STRBUF_INIT; @@ -3462,7 +3462,7 @@ void *read_blob_data_from_index(struct index_state *istate, const char *path, unsigned long *size) { int pos, len; - unsigned long sz; + size_t sz; enum object_type type; void *data; @@ -3490,7 +3490,7 @@ void *read_blob_data_from_index(struct index_state *istate, return NULL; } if (size) - *size = sz; + *size = cast_size_t_to_ulong(sz); return data; } diff --git a/ref-filter.c b/ref-filter.c index 1da4c0e60d..8ba91c72a1 100644 --- a/ref-filter.c +++ b/ref-filter.c @@ -86,7 +86,7 @@ struct ref_trailer_buf { static struct expand_data { struct object_id oid; enum object_type type; - unsigned long size; + size_t size; off_t disk_size; struct object_id delta_base_oid; void *content; diff --git a/reflog.c b/reflog.c index 82337078d0..04edbe5670 100644 --- a/reflog.c +++ b/reflog.c @@ -154,7 +154,7 @@ static int tree_is_complete(const struct object_id *oid) if (!tree->buffer) { enum object_type type; - unsigned long size; + size_t size; void *data = odb_read_object(the_repository->objects, oid, &type, &size); if (!data) { diff --git a/rerere.c b/rerere.c index 0296700f9f..068321b24f 100644 --- a/rerere.c +++ b/rerere.c @@ -990,7 +990,7 @@ static int handle_cache(struct index_state *istate, while (pos < istate->cache_nr) { enum object_type type; - unsigned long size; + size_t size; ce = istate->cache[pos++]; if (ce_namelen(ce) != len || memcmp(ce->name, path, len)) diff --git a/submodule-config.c b/submodule-config.c index a81897b4e0..f75997402a 100644 --- a/submodule-config.c +++ b/submodule-config.c @@ -694,7 +694,7 @@ static const struct submodule *config_from(struct submodule_cache *cache, enum lookup_type lookup_type) { struct strbuf rev = STRBUF_INIT; - unsigned long config_size; + size_t config_size; char *config = NULL; struct object_id oid; enum object_type type; diff --git a/t/helper/test-pack-deltas.c b/t/helper/test-pack-deltas.c index c493b75e02..840797cf0d 100644 --- a/t/helper/test-pack-deltas.c +++ b/t/helper/test-pack-deltas.c @@ -48,7 +48,8 @@ static void write_ref_delta(struct hashfile *f, struct object_id *base) { unsigned char header[MAX_PACK_OBJECT_HEADER]; - unsigned long size, base_size, delta_size, compressed_size, hdrlen; + unsigned long delta_size, compressed_size, hdrlen; + size_t size, base_size; enum object_type type; void *base_buf, *delta_buf; void *buf = odb_read_object(the_repository->objects, diff --git a/t/helper/test-partial-clone.c b/t/helper/test-partial-clone.c index a7aab426d0..87c59108e0 100644 --- a/t/helper/test-partial-clone.c +++ b/t/helper/test-partial-clone.c @@ -17,7 +17,7 @@ static void object_info(const char *gitdir, const char *oid_hex) { struct repository r; struct object_id oid; - unsigned long size; + size_t size; struct object_info oi = {.sizep = &size}; const char *p; diff --git a/t/unit-tests/u-odb-inmemory.c b/t/unit-tests/u-odb-inmemory.c index 482502ef4b..6844bfc37c 100644 --- a/t/unit-tests/u-odb-inmemory.c +++ b/t/unit-tests/u-odb-inmemory.c @@ -20,7 +20,7 @@ static void cl_assert_object_info(struct odb_source_inmemory *source, const char *expected_content) { enum object_type actual_type; - unsigned long actual_size; + size_t actual_size; void *actual_content; struct object_info oi = { .typep = &actual_type, diff --git a/tag.c b/tag.c index 2f12e51024..1a00ded6eb 100644 --- a/tag.c +++ b/tag.c @@ -49,7 +49,7 @@ int gpg_verify_tag(struct repository *r, const struct object_id *oid, { enum object_type type; char *buf; - unsigned long size; + size_t size; int ret; type = odb_read_object_info(r->objects, oid, NULL); @@ -207,7 +207,7 @@ int parse_tag(struct repository *r, struct tag *item) { enum object_type type; void *data; - unsigned long size; + size_t size; int ret; if (item->object.parsed) diff --git a/tree-walk.c b/tree-walk.c index 7e1b956f27..a67f06b9eb 100644 --- a/tree-walk.c +++ b/tree-walk.c @@ -87,7 +87,7 @@ void *fill_tree_descriptor(struct repository *r, struct tree_desc *desc, const struct object_id *oid) { - unsigned long size = 0; + size_t size = 0; void *buf = NULL; if (oid) { @@ -610,7 +610,7 @@ int get_tree_entry(struct repository *r, { int retval; void *tree; - unsigned long size; + size_t size; struct object_id root; tree = odb_read_object_peeled(r->objects, tree_oid, OBJ_TREE, &size, &root); @@ -682,7 +682,7 @@ enum get_oid_result get_tree_entry_follow_symlinks(struct repository *r, if (!t.buffer) { void *tree; struct object_id root; - unsigned long size; + size_t size; tree = odb_read_object_peeled(r->objects, ¤t_tree_oid, OBJ_TREE, &size, &root); if (!tree) @@ -778,6 +778,7 @@ enum get_oid_result get_tree_entry_follow_symlinks(struct repository *r, } else if (S_ISLNK(*mode)) { /* Follow a symlink */ unsigned long link_len; + size_t link_len_st = 0; size_t len; char *contents, *contents_start; struct dir_state *parent; @@ -797,7 +798,8 @@ enum get_oid_result get_tree_entry_follow_symlinks(struct repository *r, contents = odb_read_object(r->objects, ¤t_tree_oid, &type, - &link_len); + &link_len_st); + link_len = cast_size_t_to_ulong(link_len_st); if (!contents) goto done; diff --git a/tree.c b/tree.c index d703ab97c8..53f7395e9f 100644 --- a/tree.c +++ b/tree.c @@ -188,7 +188,7 @@ int repo_parse_tree_gently(struct repository *r, struct tree *item, { enum object_type type; void *buffer; - unsigned long size; + size_t size; if (item->object.parsed) return 0; diff --git a/xdiff-interface.c b/xdiff-interface.c index 5ee2b96d0a..db6938689f 100644 --- a/xdiff-interface.c +++ b/xdiff-interface.c @@ -179,7 +179,7 @@ int read_mmfile(mmfile_t *ptr, const char *filename) void read_mmblob(mmfile_t *ptr, struct object_database *odb, const struct object_id *oid) { - unsigned long size; + size_t size; enum object_type type; if (is_null_oid(oid)) { -- gitgitgadget ^ permalink raw reply related [flat|nested] 13+ messages in thread
* Re: [PATCH 7/7] odb: use size_t for object_info.sizep and the size APIs 2026-06-04 10:51 ` [PATCH 7/7] odb: use size_t for object_info.sizep and the size APIs Johannes Schindelin via GitGitGadget @ 2026-06-08 13:53 ` Patrick Steinhardt 0 siblings, 0 replies; 13+ messages in thread From: Patrick Steinhardt @ 2026-06-08 13:53 UTC (permalink / raw) To: Johannes Schindelin via GitGitGadget Cc: git, Kristofer Karlsson, Johannes Schindelin On Thu, Jun 04, 2026 at 10:51:12AM +0000, Johannes Schindelin via GitGitGadget wrote: > diff --git a/builtin/cat-file.c b/builtin/cat-file.c > index fa45f774d7..fa6e396ddc 100644 > --- a/builtin/cat-file.c > +++ b/builtin/cat-file.c > @@ -120,7 +120,7 @@ static int cat_one_file(int opt, const char *exp_type, const char *obj_name) > struct object_id oid; > enum object_type type; > char *buf; > - unsigned long size; > + size_t size; > struct object_context obj_context = {0}; > struct object_info oi = OBJECT_INFO_INIT; > unsigned flags = OBJECT_INFO_LOOKUP_REPLACE; > @@ -166,7 +166,7 @@ static int cat_one_file(int opt, const char *exp_type, const char *obj_name) > if (use_mailmap && (type == OBJ_COMMIT || type == OBJ_TAG)) { > size_t s = size; > buf = replace_idents_using_mailmap(buf, &s); > - size = cast_size_t_to_ulong(s); > + size = s; > } > > printf("%"PRIuMAX"\n", (uintmax_t)size); Can't we drop this local variable completely and instead supply `&size` directly? > @@ -219,7 +225,7 @@ static int cat_one_file(int opt, const char *exp_type, const char *obj_name) > if (use_mailmap) { > size_t s = size; > buf = replace_idents_using_mailmap(buf, &s); > - size = cast_size_t_to_ulong(s); > + size = s; > } > > /* otherwise just spit out the data */ > @@ -266,7 +272,7 @@ static int cat_one_file(int opt, const char *exp_type, const char *obj_name) > if (use_mailmap) { > size_t s = size; > buf = replace_idents_using_mailmap(buf, &s); > - size = cast_size_t_to_ulong(s); > + size = s; > } > break; > } > @@ -446,7 +455,7 @@ static void print_object_or_die(struct batch_options *opt, struct expand_data *d > if (use_mailmap) { > size_t s = size; > contents = replace_idents_using_mailmap(contents, &s); > - size = cast_size_t_to_ulong(s); > + size = s; > } > > if (type != data->type) Likewise for these three instances. > @@ -555,7 +564,7 @@ static void batch_object_write(const char *obj_name, > if (!buf) > die(_("unable to read %s"), oid_to_hex(&data->oid)); > buf = replace_idents_using_mailmap(buf, &s); > - data->size = cast_size_t_to_ulong(s); > + data->size = s; > > free(buf); > } And I think this site here can be adapted, as well. > diff --git a/diff.c b/diff.c > index 5a584fa1d5..816b89dc6c 100644 > --- a/diff.c > +++ b/diff.c > @@ -4594,8 +4594,9 @@ int diff_populate_filespec(struct repository *r, > } > } > else { > + size_t size_st = 0; > struct object_info info = { > - .sizep = &s->size > + .sizep = &size_st > }; > > if (!(size_only || check_binary)) > @@ -4617,6 +4618,7 @@ int diff_populate_filespec(struct repository *r, > die("unable to read %s", oid_to_hex(&s->oid)); > > object_read: > + s->size = cast_size_t_to_ulong(size_st); > if (size_only || check_binary) { > if (size_only) > return 0; > @@ -4631,6 +4633,7 @@ object_read: > if (odb_read_object_info_extended(r->objects, &s->oid, &info, > OBJECT_INFO_LOOKUP_REPLACE)) > die("unable to read %s", oid_to_hex(&s->oid)); > + s->size = cast_size_t_to_ulong(size_st); > } > s->should_free = 1; > } The flow in this function is quite weird if you ask me, but that's a preexisting issue. This does look correct to me, even if it's awkward. Patrick ^ permalink raw reply [flat|nested] 13+ messages in thread
end of thread, other threads:[~2026-06-08 13:53 UTC | newest] Thread overview: 13+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2026-06-04 10:51 [PATCH 0/7] More work supporting objects larger than 4GB on Windows Johannes Schindelin via GitGitGadget 2026-06-04 10:51 ` [PATCH 1/7] compat/msvc: use _chsize_s for ftruncate Johannes Schindelin via GitGitGadget 2026-06-04 10:51 ` [PATCH 2/7] patch-delta: use size_t for sizes Johannes Schindelin via GitGitGadget 2026-06-08 13:53 ` Patrick Steinhardt 2026-06-04 10:51 ` [PATCH 3/7] pack-objects(check_pack_inflate()): use size_t instead of unsigned long Johannes Schindelin via GitGitGadget 2026-06-08 13:53 ` Patrick Steinhardt 2026-06-04 10:51 ` [PATCH 4/7] packfile: widen unpack_entry()'s size out-parameter to size_t Johannes Schindelin via GitGitGadget 2026-06-08 13:53 ` Patrick Steinhardt 2026-06-04 10:51 ` [PATCH 5/7] pack-objects: use size_t for in-core object sizes Johannes Schindelin via GitGitGadget 2026-06-04 10:51 ` [PATCH 6/7] packfile,delta: drop the `cast_size_t_to_ulong()` wrappers Johannes Schindelin via GitGitGadget 2026-06-08 13:53 ` Patrick Steinhardt 2026-06-04 10:51 ` [PATCH 7/7] odb: use size_t for object_info.sizep and the size APIs Johannes Schindelin via GitGitGadget 2026-06-08 13:53 ` Patrick Steinhardt
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox