All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/7] More work supporting objects larger than 4GB on Windows
@ 2026-06-04 10:51 Johannes Schindelin via GitGitGadget
  2026-06-04 10:51 ` [PATCH 1/7] compat/msvc: use _chsize_s for ftruncate Johannes Schindelin via GitGitGadget
                   ` (6 more replies)
  0 siblings, 7 replies; 8+ messages in thread
From: Johannes Schindelin via GitGitGadget @ 2026-06-04 10:51 UTC (permalink / raw)
  To: git; +Cc: Kristofer Karlsson, Johannes Schindelin

This patch series tries to address the problems pointed out by the expensive
tests that now run in CI: t5608 and t7508 verify various aspects about
objects larger than 4GB, which Git does not currently handle correctly when
run on a platform where size_t is 64-bit and unsigned long is 32-bit.

Unfortunately, this conflicts heavily with ps/odb-source-loose. I rebased
the branch onto seen and pushed the result to
https://github.com/dscho/git/tree/refs/heads/objects-larger-than-4gb-on-windows-pt2-seen,
to make it easier to resolve merge conflicts. Here is the relevant
range-diff:

1:  f3aeae983a ! 1:  62adeb9818 odb: use size_t for object_info.sizep and the size APIs
    @@ builtin/log.c: static int show_blob_object(const struct object_id *oid, struct r
     
      ## builtin/ls-files.c ##
     @@ builtin/ls-files.c: static void expand_objectsize(struct repository *repo, struct strbuf *line,
    - 			      const enum object_type type, unsigned int padded)
    - {
    + 	size_t len;
    + 
      	if (type == OBJ_BLOB) {
     -		unsigned long size;
     +		size_t size;
    @@ builtin/ls-files.c: static void expand_objectsize(struct repository *repo, struc
     
      ## builtin/ls-tree.c ##
     @@ builtin/ls-tree.c: static void expand_objectsize(struct strbuf *line, const struct object_id *oid,
    - 			      const enum object_type type, unsigned int padded)
    - {
    + 	size_t len;
    + 
      	if (type == OBJ_BLOB) {
     -		unsigned long size;
     +		size_t size;
    @@ notes.c: static void format_note(struct notes_tree *t, const struct object_id *o
      	if (!t)
     
      ## object-file.c ##
    -@@ object-file.c: static int parse_loose_header(const char *hdr, struct object_info *oi)
    +@@ object-file.c: int parse_loose_header(const char *hdr, struct object_info *oi)
      	}
      
      	if (oi->sizep)
    @@ object-file.c: static int parse_loose_header(const char *hdr, struct object_info
      
      	/*
      	 * The length must be followed by a zero byte
    -@@ object-file.c: static int read_object_info_from_path(struct odb_source *source,
    - 	void *map = NULL;
    - 	git_zstream stream, *stream_to_end = NULL;
    - 	char hdr[MAX_HEADER_LEN];
    --	unsigned long size_scratch;
    -+	size_t size_scratch;
    - 	enum object_type type_scratch;
    - 	struct stat st;
    - 
     @@ object-file.c: int force_object_loose(struct odb_source *source,
    - {
    + 	struct odb_source_files *files = odb_source_files_downcast(source);
      	const struct git_hash_algo *compat = source->odb->repo->compat_hash_algo;
      	void *buf;
     -	unsigned long len;
    @@ object-file.c: int read_loose_object(struct repository *repo,
      
      	fd = git_open(path);
      	if (fd >= 0)
    -@@ object-file.c: int odb_source_loose_read_object_stream(struct odb_read_stream **out,
    - 	struct object_info oi = OBJECT_INFO_INIT;
    - 	struct odb_loose_read_stream *st;
    - 	unsigned long mapsize;
    --	unsigned long size_ul;
    - 	void *mapped;
    - 
    - 	mapped = odb_source_loose_map_object(source, oid, &mapsize);
    -@@ object-file.c: int odb_source_loose_read_object_stream(struct odb_read_stream **out,
    - 		goto error;
    - 	}
    - 
    --	/*
    --	 * object_info.sizep is unsigned long* (32-bit on Windows), but
    --	 * st->base.size is size_t (64-bit). Use temporary variable.
    --	 * Note: loose objects >4GB would still truncate here, but such
    --	 * large loose objects are uncommon (they'd normally be packed).
    --	 */
    --	oi.sizep = &size_ul;
    -+	oi.sizep = &st->base.size;
    - 	oi.typep = &st->base.type;
    - 
    - 	if (parse_loose_header(st->hdr, &oi) < 0 || st->base.type < 0)
    - 		goto error;
    --	st->base.size = size_ul;
    - 
    - 	st->mapped = mapped;
    - 	st->mapsize = mapsize;
     
      ## object.c ##
     @@ object.c: struct object *parse_object_with_flags(struct repository *r,
    @@ odb.h: int odb_read_object_info_extended(struct object_database *odb,
      enum odb_has_object_flags {
      	/* Retry packed storage after checking packed and loose storage */
     
    + ## odb/source-loose.c ##
    +@@ odb/source-loose.c: static int read_object_info_from_path(struct odb_source_loose *loose,
    + 	void *map = NULL;
    + 	git_zstream stream, *stream_to_end = NULL;
    + 	char hdr[MAX_HEADER_LEN];
    +-	unsigned long size_scratch;
    ++	size_t size_scratch;
    + 	enum object_type type_scratch;
    + 	struct stat st;
    + 
    +@@ odb/source-loose.c: static int odb_source_loose_read_object_stream(struct odb_read_stream **out,
    + 	struct object_info oi = OBJECT_INFO_INIT;
    + 	struct odb_loose_read_stream *st;
    + 	unsigned long mapsize;
    +-	unsigned long size_ul;
    + 	void *mapped;
    + 
    + 	mapped = odb_source_loose_map_object(loose, oid, &mapsize);
    +@@ odb/source-loose.c: static int odb_source_loose_read_object_stream(struct odb_read_stream **out,
    + 		goto error;
    + 	}
    + 
    +-	/*
    +-	 * object_info.sizep is unsigned long* (32-bit on Windows), but
    +-	 * st->base.size is size_t (64-bit). Use temporary variable.
    +-	 * Note: loose objects >4GB would still truncate here, but such
    +-	 * large loose objects are uncommon (they'd normally be packed).
    +-	 */
    +-	oi.sizep = &size_ul;
    ++	oi.sizep = &st->base.size;
    + 	oi.typep = &st->base.type;
    + 
    + 	if (parse_loose_header(st->hdr, &oi) < 0 || st->base.type < 0)
    + 		goto error;
    +-	st->base.size = size_ul;
    + 
    + 	st->mapped = mapped;
    + 	st->mapsize = mapsize;
    +
      ## odb/streaming.c ##
     @@ odb/streaming.c: static int open_istream_incore(struct odb_read_stream **out,
      		.base.read = read_istream_incore,


Johannes Schindelin (7):
  compat/msvc: use _chsize_s for ftruncate
  patch-delta: use size_t for sizes
  pack-objects(check_pack_inflate()): use size_t instead of unsigned
    long
  packfile: widen unpack_entry()'s size out-parameter to size_t
  pack-objects: use size_t for in-core object sizes
  packfile,delta: drop the `cast_size_t_to_ulong()` wrappers
  odb: use size_t for object_info.sizep and the size APIs

 apply.c                       |  8 ++--
 archive.c                     |  4 +-
 attr.c                        |  2 +-
 bisect.c                      |  2 +-
 blame.c                       | 15 +++++--
 builtin/cat-file.c            | 39 ++++++++++++-------
 builtin/difftool.c            |  2 +-
 builtin/fast-export.c         |  7 +++-
 builtin/fast-import.c         | 29 ++++++++++----
 builtin/fsck.c                |  2 +-
 builtin/grep.c                | 12 +++---
 builtin/index-pack.c          | 10 ++---
 builtin/log.c                 |  2 +-
 builtin/ls-files.c            |  2 +-
 builtin/ls-tree.c             |  4 +-
 builtin/merge-tree.c          |  6 +--
 builtin/mktag.c               |  2 +-
 builtin/notes.c               |  6 +--
 builtin/pack-objects.c        | 73 +++++++++++++++++++++--------------
 builtin/repo.c                |  4 +-
 builtin/tag.c                 |  4 +-
 builtin/unpack-file.c         |  2 +-
 builtin/unpack-objects.c      |  8 ++--
 bundle.c                      |  2 +-
 combine-diff.c                |  4 +-
 commit.c                      | 10 ++---
 compat/msvc-posix.h           | 24 +++++++++++-
 config.c                      |  2 +-
 delta.h                       | 20 +++-------
 diff.c                        |  5 ++-
 dir.c                         |  2 +-
 entry.c                       |  4 +-
 fmt-merge-msg.c               |  4 +-
 fsck.c                        |  2 +-
 grep.c                        |  4 +-
 http-push.c                   |  2 +-
 list-objects-filter.c         |  2 +-
 mailmap.c                     |  2 +-
 match-trees.c                 |  4 +-
 merge-blobs.c                 |  6 +--
 merge-blobs.h                 |  2 +-
 merge-ort.c                   |  2 +-
 notes-cache.c                 |  2 +-
 notes-merge.c                 |  2 +-
 notes.c                       |  8 ++--
 object-file.c                 | 18 +++------
 object.c                      |  2 +-
 odb.c                         | 12 +++---
 odb.h                         | 10 ++---
 odb/streaming.c               | 13 +------
 pack-bitmap.c                 |  4 +-
 pack-check.c                  |  5 +--
 pack-objects.h                |  2 +-
 packfile.c                    | 54 ++++++++++----------------
 packfile.h                    |  5 ++-
 patch-delta.c                 |  8 ++--
 path-walk.c                   |  2 +-
 protocol-caps.c               |  5 ++-
 read-cache.c                  |  6 +--
 ref-filter.c                  |  2 +-
 reflog.c                      |  2 +-
 rerere.c                      |  2 +-
 submodule-config.c            |  2 +-
 t/helper/test-delta.c         | 10 +++--
 t/helper/test-pack-deltas.c   |  3 +-
 t/helper/test-partial-clone.c |  2 +-
 t/unit-tests/u-odb-inmemory.c |  2 +-
 tag.c                         |  4 +-
 tree-walk.c                   | 10 +++--
 tree.c                        |  2 +-
 xdiff-interface.c             |  2 +-
 71 files changed, 296 insertions(+), 253 deletions(-)


base-commit: 9ac3f193c05c2237e2b14ebaa1149e9fc8a1abe0
Published-As: https://github.com/gitgitgadget/git/releases/tag/pr-2137%2Fdscho%2Fobjects-larger-than-4gb-on-windows-pt2-v1
Fetch-It-Via: git fetch https://github.com/gitgitgadget/git pr-2137/dscho/objects-larger-than-4gb-on-windows-pt2-v1
Pull-Request: https://github.com/gitgitgadget/git/pull/2137
-- 
gitgitgadget

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2026-06-04 10:51 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-06-04 10:51 [PATCH 0/7] More work supporting objects larger than 4GB on Windows Johannes Schindelin via GitGitGadget
2026-06-04 10:51 ` [PATCH 1/7] compat/msvc: use _chsize_s for ftruncate Johannes Schindelin via GitGitGadget
2026-06-04 10:51 ` [PATCH 2/7] patch-delta: use size_t for sizes Johannes Schindelin via GitGitGadget
2026-06-04 10:51 ` [PATCH 3/7] pack-objects(check_pack_inflate()): use size_t instead of unsigned long Johannes Schindelin via GitGitGadget
2026-06-04 10:51 ` [PATCH 4/7] packfile: widen unpack_entry()'s size out-parameter to size_t Johannes Schindelin via GitGitGadget
2026-06-04 10:51 ` [PATCH 5/7] pack-objects: use size_t for in-core object sizes Johannes Schindelin via GitGitGadget
2026-06-04 10:51 ` [PATCH 6/7] packfile,delta: drop the `cast_size_t_to_ulong()` wrappers Johannes Schindelin via GitGitGadget
2026-06-04 10:51 ` [PATCH 7/7] odb: use size_t for object_info.sizep and the size APIs Johannes Schindelin via GitGitGadget

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.