Git development
 help / color / mirror / Atom feed
From: "Johannes Schindelin via GitGitGadget" <gitgitgadget@gmail.com>
To: git@vger.kernel.org
Cc: Kristofer Karlsson <krka@spotify.com>,
	Johannes Schindelin <johannes.schindelin@gmx.de>
Subject: [PATCH 0/7] More work supporting objects larger than 4GB on Windows
Date: Thu, 04 Jun 2026 10:51:05 +0000	[thread overview]
Message-ID: <pull.2137.git.1780570272.gitgitgadget@gmail.com> (raw)

This patch series tries to address the problems pointed out by the expensive
tests that now run in CI: t5608 and t7508 verify various aspects about
objects larger than 4GB, which Git does not currently handle correctly when
run on a platform where size_t is 64-bit and unsigned long is 32-bit.

Unfortunately, this conflicts heavily with ps/odb-source-loose. I rebased
the branch onto seen and pushed the result to
https://github.com/dscho/git/tree/refs/heads/objects-larger-than-4gb-on-windows-pt2-seen,
to make it easier to resolve merge conflicts. Here is the relevant
range-diff:

1:  f3aeae983a ! 1:  62adeb9818 odb: use size_t for object_info.sizep and the size APIs
    @@ builtin/log.c: static int show_blob_object(const struct object_id *oid, struct r
     
      ## builtin/ls-files.c ##
     @@ builtin/ls-files.c: static void expand_objectsize(struct repository *repo, struct strbuf *line,
    - 			      const enum object_type type, unsigned int padded)
    - {
    + 	size_t len;
    + 
      	if (type == OBJ_BLOB) {
     -		unsigned long size;
     +		size_t size;
    @@ builtin/ls-files.c: static void expand_objectsize(struct repository *repo, struc
     
      ## builtin/ls-tree.c ##
     @@ builtin/ls-tree.c: static void expand_objectsize(struct strbuf *line, const struct object_id *oid,
    - 			      const enum object_type type, unsigned int padded)
    - {
    + 	size_t len;
    + 
      	if (type == OBJ_BLOB) {
     -		unsigned long size;
     +		size_t size;
    @@ notes.c: static void format_note(struct notes_tree *t, const struct object_id *o
      	if (!t)
     
      ## object-file.c ##
    -@@ object-file.c: static int parse_loose_header(const char *hdr, struct object_info *oi)
    +@@ object-file.c: int parse_loose_header(const char *hdr, struct object_info *oi)
      	}
      
      	if (oi->sizep)
    @@ object-file.c: static int parse_loose_header(const char *hdr, struct object_info
      
      	/*
      	 * The length must be followed by a zero byte
    -@@ object-file.c: static int read_object_info_from_path(struct odb_source *source,
    - 	void *map = NULL;
    - 	git_zstream stream, *stream_to_end = NULL;
    - 	char hdr[MAX_HEADER_LEN];
    --	unsigned long size_scratch;
    -+	size_t size_scratch;
    - 	enum object_type type_scratch;
    - 	struct stat st;
    - 
     @@ object-file.c: int force_object_loose(struct odb_source *source,
    - {
    + 	struct odb_source_files *files = odb_source_files_downcast(source);
      	const struct git_hash_algo *compat = source->odb->repo->compat_hash_algo;
      	void *buf;
     -	unsigned long len;
    @@ object-file.c: int read_loose_object(struct repository *repo,
      
      	fd = git_open(path);
      	if (fd >= 0)
    -@@ object-file.c: int odb_source_loose_read_object_stream(struct odb_read_stream **out,
    - 	struct object_info oi = OBJECT_INFO_INIT;
    - 	struct odb_loose_read_stream *st;
    - 	unsigned long mapsize;
    --	unsigned long size_ul;
    - 	void *mapped;
    - 
    - 	mapped = odb_source_loose_map_object(source, oid, &mapsize);
    -@@ object-file.c: int odb_source_loose_read_object_stream(struct odb_read_stream **out,
    - 		goto error;
    - 	}
    - 
    --	/*
    --	 * object_info.sizep is unsigned long* (32-bit on Windows), but
    --	 * st->base.size is size_t (64-bit). Use temporary variable.
    --	 * Note: loose objects >4GB would still truncate here, but such
    --	 * large loose objects are uncommon (they'd normally be packed).
    --	 */
    --	oi.sizep = &size_ul;
    -+	oi.sizep = &st->base.size;
    - 	oi.typep = &st->base.type;
    - 
    - 	if (parse_loose_header(st->hdr, &oi) < 0 || st->base.type < 0)
    - 		goto error;
    --	st->base.size = size_ul;
    - 
    - 	st->mapped = mapped;
    - 	st->mapsize = mapsize;
     
      ## object.c ##
     @@ object.c: struct object *parse_object_with_flags(struct repository *r,
    @@ odb.h: int odb_read_object_info_extended(struct object_database *odb,
      enum odb_has_object_flags {
      	/* Retry packed storage after checking packed and loose storage */
     
    + ## odb/source-loose.c ##
    +@@ odb/source-loose.c: static int read_object_info_from_path(struct odb_source_loose *loose,
    + 	void *map = NULL;
    + 	git_zstream stream, *stream_to_end = NULL;
    + 	char hdr[MAX_HEADER_LEN];
    +-	unsigned long size_scratch;
    ++	size_t size_scratch;
    + 	enum object_type type_scratch;
    + 	struct stat st;
    + 
    +@@ odb/source-loose.c: static int odb_source_loose_read_object_stream(struct odb_read_stream **out,
    + 	struct object_info oi = OBJECT_INFO_INIT;
    + 	struct odb_loose_read_stream *st;
    + 	unsigned long mapsize;
    +-	unsigned long size_ul;
    + 	void *mapped;
    + 
    + 	mapped = odb_source_loose_map_object(loose, oid, &mapsize);
    +@@ odb/source-loose.c: static int odb_source_loose_read_object_stream(struct odb_read_stream **out,
    + 		goto error;
    + 	}
    + 
    +-	/*
    +-	 * object_info.sizep is unsigned long* (32-bit on Windows), but
    +-	 * st->base.size is size_t (64-bit). Use temporary variable.
    +-	 * Note: loose objects >4GB would still truncate here, but such
    +-	 * large loose objects are uncommon (they'd normally be packed).
    +-	 */
    +-	oi.sizep = &size_ul;
    ++	oi.sizep = &st->base.size;
    + 	oi.typep = &st->base.type;
    + 
    + 	if (parse_loose_header(st->hdr, &oi) < 0 || st->base.type < 0)
    + 		goto error;
    +-	st->base.size = size_ul;
    + 
    + 	st->mapped = mapped;
    + 	st->mapsize = mapsize;
    +
      ## odb/streaming.c ##
     @@ odb/streaming.c: static int open_istream_incore(struct odb_read_stream **out,
      		.base.read = read_istream_incore,


Johannes Schindelin (7):
  compat/msvc: use _chsize_s for ftruncate
  patch-delta: use size_t for sizes
  pack-objects(check_pack_inflate()): use size_t instead of unsigned
    long
  packfile: widen unpack_entry()'s size out-parameter to size_t
  pack-objects: use size_t for in-core object sizes
  packfile,delta: drop the `cast_size_t_to_ulong()` wrappers
  odb: use size_t for object_info.sizep and the size APIs

 apply.c                       |  8 ++--
 archive.c                     |  4 +-
 attr.c                        |  2 +-
 bisect.c                      |  2 +-
 blame.c                       | 15 +++++--
 builtin/cat-file.c            | 39 ++++++++++++-------
 builtin/difftool.c            |  2 +-
 builtin/fast-export.c         |  7 +++-
 builtin/fast-import.c         | 29 ++++++++++----
 builtin/fsck.c                |  2 +-
 builtin/grep.c                | 12 +++---
 builtin/index-pack.c          | 10 ++---
 builtin/log.c                 |  2 +-
 builtin/ls-files.c            |  2 +-
 builtin/ls-tree.c             |  4 +-
 builtin/merge-tree.c          |  6 +--
 builtin/mktag.c               |  2 +-
 builtin/notes.c               |  6 +--
 builtin/pack-objects.c        | 73 +++++++++++++++++++++--------------
 builtin/repo.c                |  4 +-
 builtin/tag.c                 |  4 +-
 builtin/unpack-file.c         |  2 +-
 builtin/unpack-objects.c      |  8 ++--
 bundle.c                      |  2 +-
 combine-diff.c                |  4 +-
 commit.c                      | 10 ++---
 compat/msvc-posix.h           | 24 +++++++++++-
 config.c                      |  2 +-
 delta.h                       | 20 +++-------
 diff.c                        |  5 ++-
 dir.c                         |  2 +-
 entry.c                       |  4 +-
 fmt-merge-msg.c               |  4 +-
 fsck.c                        |  2 +-
 grep.c                        |  4 +-
 http-push.c                   |  2 +-
 list-objects-filter.c         |  2 +-
 mailmap.c                     |  2 +-
 match-trees.c                 |  4 +-
 merge-blobs.c                 |  6 +--
 merge-blobs.h                 |  2 +-
 merge-ort.c                   |  2 +-
 notes-cache.c                 |  2 +-
 notes-merge.c                 |  2 +-
 notes.c                       |  8 ++--
 object-file.c                 | 18 +++------
 object.c                      |  2 +-
 odb.c                         | 12 +++---
 odb.h                         | 10 ++---
 odb/streaming.c               | 13 +------
 pack-bitmap.c                 |  4 +-
 pack-check.c                  |  5 +--
 pack-objects.h                |  2 +-
 packfile.c                    | 54 ++++++++++----------------
 packfile.h                    |  5 ++-
 patch-delta.c                 |  8 ++--
 path-walk.c                   |  2 +-
 protocol-caps.c               |  5 ++-
 read-cache.c                  |  6 +--
 ref-filter.c                  |  2 +-
 reflog.c                      |  2 +-
 rerere.c                      |  2 +-
 submodule-config.c            |  2 +-
 t/helper/test-delta.c         | 10 +++--
 t/helper/test-pack-deltas.c   |  3 +-
 t/helper/test-partial-clone.c |  2 +-
 t/unit-tests/u-odb-inmemory.c |  2 +-
 tag.c                         |  4 +-
 tree-walk.c                   | 10 +++--
 tree.c                        |  2 +-
 xdiff-interface.c             |  2 +-
 71 files changed, 296 insertions(+), 253 deletions(-)


base-commit: 9ac3f193c05c2237e2b14ebaa1149e9fc8a1abe0
Published-As: https://github.com/gitgitgadget/git/releases/tag/pr-2137%2Fdscho%2Fobjects-larger-than-4gb-on-windows-pt2-v1
Fetch-It-Via: git fetch https://github.com/gitgitgadget/git pr-2137/dscho/objects-larger-than-4gb-on-windows-pt2-v1
Pull-Request: https://github.com/gitgitgadget/git/pull/2137
-- 
gitgitgadget

             reply	other threads:[~2026-06-04 10:51 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-06-04 10:51 Johannes Schindelin via GitGitGadget [this message]
2026-06-04 10:51 ` [PATCH 1/7] compat/msvc: use _chsize_s for ftruncate Johannes Schindelin via GitGitGadget
2026-06-04 10:51 ` [PATCH 2/7] patch-delta: use size_t for sizes Johannes Schindelin via GitGitGadget
2026-06-04 10:51 ` [PATCH 3/7] pack-objects(check_pack_inflate()): use size_t instead of unsigned long Johannes Schindelin via GitGitGadget
2026-06-04 10:51 ` [PATCH 4/7] packfile: widen unpack_entry()'s size out-parameter to size_t Johannes Schindelin via GitGitGadget
2026-06-04 10:51 ` [PATCH 5/7] pack-objects: use size_t for in-core object sizes Johannes Schindelin via GitGitGadget
2026-06-04 10:51 ` [PATCH 6/7] packfile,delta: drop the `cast_size_t_to_ulong()` wrappers Johannes Schindelin via GitGitGadget
2026-06-04 10:51 ` [PATCH 7/7] odb: use size_t for object_info.sizep and the size APIs Johannes Schindelin via GitGitGadget

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=pull.2137.git.1780570272.gitgitgadget@gmail.com \
    --to=gitgitgadget@gmail.com \
    --cc=git@vger.kernel.org \
    --cc=johannes.schindelin@gmx.de \
    --cc=krka@spotify.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox