From: "Johannes Schindelin via GitGitGadget" <gitgitgadget@gmail.com>
To: git@vger.kernel.org
Cc: Kristofer Karlsson <krka@spotify.com>,
Johannes Schindelin <johannes.schindelin@gmx.de>
Subject: [PATCH 0/7] More work supporting objects larger than 4GB on Windows
Date: Thu, 04 Jun 2026 10:51:05 +0000 [thread overview]
Message-ID: <pull.2137.git.1780570272.gitgitgadget@gmail.com> (raw)
This patch series tries to address the problems pointed out by the expensive
tests that now run in CI: t5608 and t7508 verify various aspects about
objects larger than 4GB, which Git does not currently handle correctly when
run on a platform where size_t is 64-bit and unsigned long is 32-bit.
Unfortunately, this conflicts heavily with ps/odb-source-loose. I rebased
the branch onto seen and pushed the result to
https://github.com/dscho/git/tree/refs/heads/objects-larger-than-4gb-on-windows-pt2-seen,
to make it easier to resolve merge conflicts. Here is the relevant
range-diff:
1: f3aeae983a ! 1: 62adeb9818 odb: use size_t for object_info.sizep and the size APIs
@@ builtin/log.c: static int show_blob_object(const struct object_id *oid, struct r
## builtin/ls-files.c ##
@@ builtin/ls-files.c: static void expand_objectsize(struct repository *repo, struct strbuf *line,
- const enum object_type type, unsigned int padded)
- {
+ size_t len;
+
if (type == OBJ_BLOB) {
- unsigned long size;
+ size_t size;
@@ builtin/ls-files.c: static void expand_objectsize(struct repository *repo, struc
## builtin/ls-tree.c ##
@@ builtin/ls-tree.c: static void expand_objectsize(struct strbuf *line, const struct object_id *oid,
- const enum object_type type, unsigned int padded)
- {
+ size_t len;
+
if (type == OBJ_BLOB) {
- unsigned long size;
+ size_t size;
@@ notes.c: static void format_note(struct notes_tree *t, const struct object_id *o
if (!t)
## object-file.c ##
-@@ object-file.c: static int parse_loose_header(const char *hdr, struct object_info *oi)
+@@ object-file.c: int parse_loose_header(const char *hdr, struct object_info *oi)
}
if (oi->sizep)
@@ object-file.c: static int parse_loose_header(const char *hdr, struct object_info
/*
* The length must be followed by a zero byte
-@@ object-file.c: static int read_object_info_from_path(struct odb_source *source,
- void *map = NULL;
- git_zstream stream, *stream_to_end = NULL;
- char hdr[MAX_HEADER_LEN];
-- unsigned long size_scratch;
-+ size_t size_scratch;
- enum object_type type_scratch;
- struct stat st;
-
@@ object-file.c: int force_object_loose(struct odb_source *source,
- {
+ struct odb_source_files *files = odb_source_files_downcast(source);
const struct git_hash_algo *compat = source->odb->repo->compat_hash_algo;
void *buf;
- unsigned long len;
@@ object-file.c: int read_loose_object(struct repository *repo,
fd = git_open(path);
if (fd >= 0)
-@@ object-file.c: int odb_source_loose_read_object_stream(struct odb_read_stream **out,
- struct object_info oi = OBJECT_INFO_INIT;
- struct odb_loose_read_stream *st;
- unsigned long mapsize;
-- unsigned long size_ul;
- void *mapped;
-
- mapped = odb_source_loose_map_object(source, oid, &mapsize);
-@@ object-file.c: int odb_source_loose_read_object_stream(struct odb_read_stream **out,
- goto error;
- }
-
-- /*
-- * object_info.sizep is unsigned long* (32-bit on Windows), but
-- * st->base.size is size_t (64-bit). Use temporary variable.
-- * Note: loose objects >4GB would still truncate here, but such
-- * large loose objects are uncommon (they'd normally be packed).
-- */
-- oi.sizep = &size_ul;
-+ oi.sizep = &st->base.size;
- oi.typep = &st->base.type;
-
- if (parse_loose_header(st->hdr, &oi) < 0 || st->base.type < 0)
- goto error;
-- st->base.size = size_ul;
-
- st->mapped = mapped;
- st->mapsize = mapsize;
## object.c ##
@@ object.c: struct object *parse_object_with_flags(struct repository *r,
@@ odb.h: int odb_read_object_info_extended(struct object_database *odb,
enum odb_has_object_flags {
/* Retry packed storage after checking packed and loose storage */
+ ## odb/source-loose.c ##
+@@ odb/source-loose.c: static int read_object_info_from_path(struct odb_source_loose *loose,
+ void *map = NULL;
+ git_zstream stream, *stream_to_end = NULL;
+ char hdr[MAX_HEADER_LEN];
+- unsigned long size_scratch;
++ size_t size_scratch;
+ enum object_type type_scratch;
+ struct stat st;
+
+@@ odb/source-loose.c: static int odb_source_loose_read_object_stream(struct odb_read_stream **out,
+ struct object_info oi = OBJECT_INFO_INIT;
+ struct odb_loose_read_stream *st;
+ unsigned long mapsize;
+- unsigned long size_ul;
+ void *mapped;
+
+ mapped = odb_source_loose_map_object(loose, oid, &mapsize);
+@@ odb/source-loose.c: static int odb_source_loose_read_object_stream(struct odb_read_stream **out,
+ goto error;
+ }
+
+- /*
+- * object_info.sizep is unsigned long* (32-bit on Windows), but
+- * st->base.size is size_t (64-bit). Use temporary variable.
+- * Note: loose objects >4GB would still truncate here, but such
+- * large loose objects are uncommon (they'd normally be packed).
+- */
+- oi.sizep = &size_ul;
++ oi.sizep = &st->base.size;
+ oi.typep = &st->base.type;
+
+ if (parse_loose_header(st->hdr, &oi) < 0 || st->base.type < 0)
+ goto error;
+- st->base.size = size_ul;
+
+ st->mapped = mapped;
+ st->mapsize = mapsize;
+
## odb/streaming.c ##
@@ odb/streaming.c: static int open_istream_incore(struct odb_read_stream **out,
.base.read = read_istream_incore,
Johannes Schindelin (7):
compat/msvc: use _chsize_s for ftruncate
patch-delta: use size_t for sizes
pack-objects(check_pack_inflate()): use size_t instead of unsigned
long
packfile: widen unpack_entry()'s size out-parameter to size_t
pack-objects: use size_t for in-core object sizes
packfile,delta: drop the `cast_size_t_to_ulong()` wrappers
odb: use size_t for object_info.sizep and the size APIs
apply.c | 8 ++--
archive.c | 4 +-
attr.c | 2 +-
bisect.c | 2 +-
blame.c | 15 +++++--
builtin/cat-file.c | 39 ++++++++++++-------
builtin/difftool.c | 2 +-
builtin/fast-export.c | 7 +++-
builtin/fast-import.c | 29 ++++++++++----
builtin/fsck.c | 2 +-
builtin/grep.c | 12 +++---
builtin/index-pack.c | 10 ++---
builtin/log.c | 2 +-
builtin/ls-files.c | 2 +-
builtin/ls-tree.c | 4 +-
builtin/merge-tree.c | 6 +--
builtin/mktag.c | 2 +-
builtin/notes.c | 6 +--
builtin/pack-objects.c | 73 +++++++++++++++++++++--------------
builtin/repo.c | 4 +-
builtin/tag.c | 4 +-
builtin/unpack-file.c | 2 +-
builtin/unpack-objects.c | 8 ++--
bundle.c | 2 +-
combine-diff.c | 4 +-
commit.c | 10 ++---
compat/msvc-posix.h | 24 +++++++++++-
config.c | 2 +-
delta.h | 20 +++-------
diff.c | 5 ++-
dir.c | 2 +-
entry.c | 4 +-
fmt-merge-msg.c | 4 +-
fsck.c | 2 +-
grep.c | 4 +-
http-push.c | 2 +-
list-objects-filter.c | 2 +-
mailmap.c | 2 +-
match-trees.c | 4 +-
merge-blobs.c | 6 +--
merge-blobs.h | 2 +-
merge-ort.c | 2 +-
notes-cache.c | 2 +-
notes-merge.c | 2 +-
notes.c | 8 ++--
object-file.c | 18 +++------
object.c | 2 +-
odb.c | 12 +++---
odb.h | 10 ++---
odb/streaming.c | 13 +------
pack-bitmap.c | 4 +-
pack-check.c | 5 +--
pack-objects.h | 2 +-
packfile.c | 54 ++++++++++----------------
packfile.h | 5 ++-
patch-delta.c | 8 ++--
path-walk.c | 2 +-
protocol-caps.c | 5 ++-
read-cache.c | 6 +--
ref-filter.c | 2 +-
reflog.c | 2 +-
rerere.c | 2 +-
submodule-config.c | 2 +-
t/helper/test-delta.c | 10 +++--
t/helper/test-pack-deltas.c | 3 +-
t/helper/test-partial-clone.c | 2 +-
t/unit-tests/u-odb-inmemory.c | 2 +-
tag.c | 4 +-
tree-walk.c | 10 +++--
tree.c | 2 +-
xdiff-interface.c | 2 +-
71 files changed, 296 insertions(+), 253 deletions(-)
base-commit: 9ac3f193c05c2237e2b14ebaa1149e9fc8a1abe0
Published-As: https://github.com/gitgitgadget/git/releases/tag/pr-2137%2Fdscho%2Fobjects-larger-than-4gb-on-windows-pt2-v1
Fetch-It-Via: git fetch https://github.com/gitgitgadget/git pr-2137/dscho/objects-larger-than-4gb-on-windows-pt2-v1
Pull-Request: https://github.com/gitgitgadget/git/pull/2137
--
gitgitgadget
next reply other threads:[~2026-06-04 10:51 UTC|newest]
Thread overview: 8+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-06-04 10:51 Johannes Schindelin via GitGitGadget [this message]
2026-06-04 10:51 ` [PATCH 1/7] compat/msvc: use _chsize_s for ftruncate Johannes Schindelin via GitGitGadget
2026-06-04 10:51 ` [PATCH 2/7] patch-delta: use size_t for sizes Johannes Schindelin via GitGitGadget
2026-06-04 10:51 ` [PATCH 3/7] pack-objects(check_pack_inflate()): use size_t instead of unsigned long Johannes Schindelin via GitGitGadget
2026-06-04 10:51 ` [PATCH 4/7] packfile: widen unpack_entry()'s size out-parameter to size_t Johannes Schindelin via GitGitGadget
2026-06-04 10:51 ` [PATCH 5/7] pack-objects: use size_t for in-core object sizes Johannes Schindelin via GitGitGadget
2026-06-04 10:51 ` [PATCH 6/7] packfile,delta: drop the `cast_size_t_to_ulong()` wrappers Johannes Schindelin via GitGitGadget
2026-06-04 10:51 ` [PATCH 7/7] odb: use size_t for object_info.sizep and the size APIs Johannes Schindelin via GitGitGadget
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=pull.2137.git.1780570272.gitgitgadget@gmail.com \
--to=gitgitgadget@gmail.com \
--cc=git@vger.kernel.org \
--cc=johannes.schindelin@gmx.de \
--cc=krka@spotify.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox