Git development
 help / color / mirror / Atom feed
From: "Johannes Schindelin via GitGitGadget" <gitgitgadget@gmail.com>
To: git@vger.kernel.org
Cc: Philip Oakley <philipoakley@iee.email>,
	Patrick Steinhardt <ps@pks.im>,
	Johannes Schindelin <johannes.schindelin@gmx.de>
Subject: [PATCH v2 0/6] Support hashing objects larger than 4GB on Windows
Date: Tue, 16 Jun 2026 14:49:51 +0000	[thread overview]
Message-ID: <pull.2138.v2.git.1781621398.gitgitgadget@gmail.com> (raw)
In-Reply-To: <pull.2138.git.1780593313.gitgitgadget@gmail.com>

Philip Oakley has contributed these patches ~4.5 years ago, and they have
been carried in Git for Windows ever since.

Now that there are already other patch series flying around that try to
address various aspects about >4GB objects (which aren't handled well by Git
until it stops forcing unsigned long to do size_t's job), it seems a good
time to upstream these patches, too, at long last.

Changes since v1:

 * Rebased to current master to resolve the conflicts with
   ps/odb-source-loose
 * Dropped the !LONG_IS_64BIT prereq from the added/touched tests, as it is
   now no longer needed

Philip Oakley (6):
  hash-object: demonstrate a >4GB/LLP64 problem
  object-file.c: use size_t for header lengths
  hash algorithms: use size_t for section lengths
  hash-object --stdin: verify that it works with >4GB/LLP64
  hash-object: add another >4GB/LLP64 test case
  hash-object: add a >4GB/LLP64 test case using filtered input

 object-file.c          | 14 +++++++-------
 object-file.h          |  6 +++---
 odb/source-files.c     |  2 +-
 odb/source-inmemory.c  |  2 +-
 odb/source-loose.c     |  4 ++--
 odb/source.h           |  2 +-
 sha1dc_git.c           |  3 +--
 sha1dc_git.h           |  2 +-
 t/t1007-hash-object.sh | 39 +++++++++++++++++++++++++++++++++++++++
 9 files changed, 56 insertions(+), 18 deletions(-)


base-commit: 700432b2ba22603a0bcb71475c9c333d17c9b0d1
Published-As: https://github.com/gitgitgadget/git/releases/tag/pr-2138%2Fdscho%2FPhilipOakley%2Fhashliteral_t-v2
Fetch-It-Via: git fetch https://github.com/gitgitgadget/git pr-2138/dscho/PhilipOakley/hashliteral_t-v2
Pull-Request: https://github.com/gitgitgadget/git/pull/2138

Range-diff vs v1:

 1:  84e1cd0aa0 = 1:  9c01bac407 hash-object: demonstrate a >4GB/LLP64 problem
 2:  809d83e46f ! 2:  aa5859c14f object-file.c: use size_t for header lengths
     @@ Commit message
          Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
      
       ## object-file.c ##
     -@@ object-file.c: int odb_source_loose_read_object_info(struct odb_source *source,
     +@@ object-file.c: int parse_loose_header(const char *hdr, struct object_info *oi)
       static void hash_object_body(const struct git_hash_algo *algo, struct git_hash_ctx *c,
       			     const void *buf, unsigned long len,
       			     struct object_id *oid,
     @@ object-file.c: int odb_source_loose_read_object_info(struct odb_source *source,
      @@ object-file.c: static void hash_object_body(const struct git_hash_algo *algo, struct git_hash_c
       }
       
     - static void write_object_file_prepare(const struct git_hash_algo *algo,
     --				      const void *buf, unsigned long len,
     -+				      const void *buf, size_t len,
     - 				      enum object_type type, struct object_id *oid,
     --				      char *hdr, int *hdrlen)
     -+				      char *hdr, size_t *hdrlen)
     + void write_object_file_prepare(const struct git_hash_algo *algo,
     +-			       const void *buf, unsigned long len,
     ++			       const void *buf, size_t len,
     + 			       enum object_type type, struct object_id *oid,
     +-			       char *hdr, int *hdrlen)
     ++			       char *hdr, size_t *hdrlen)
       {
       	struct git_hash_ctx c;
       
     @@ object-file.c: out:
       
       	write_object_file_prepare(algo, buf, len, type, oid, hdr, &hdrlen);
       }
     -@@ object-file.c: cleanup:
     +
     + ## object-file.h ##
     +@@ object-file.h: int finalize_object_file_flags(struct repository *repo,
     + 			       enum finalize_object_file_flags flags);
     + 
     + void hash_object_file(const struct git_hash_algo *algo, const void *buf,
     +-		      unsigned long len, enum object_type type,
     ++		      size_t len, enum object_type type,
     + 		      struct object_id *oid);
     + void write_object_file_prepare(const struct git_hash_algo *algo,
     +-			       const void *buf, unsigned long len,
     ++			       const void *buf, size_t len,
     + 			       enum object_type type, struct object_id *oid,
     +-			       char *hdr, int *hdrlen);
     ++			       char *hdr, size_t *hdrlen);
     + int write_loose_object(struct odb_source_loose *loose,
     + 		       const struct object_id *oid, char *hdr,
     + 		       int hdrlen, const void *buf, unsigned long len,
     +
     + ## odb/source-files.c ##
     +@@ odb/source-files.c: static int odb_source_files_freshen_object(struct odb_source *source,
     + }
     + 
     + static int odb_source_files_write_object(struct odb_source *source,
     +-					 const void *buf, unsigned long len,
     ++					 const void *buf, size_t len,
     + 					 enum object_type type,
     + 					 struct object_id *oid,
     + 					 struct object_id *compat_oid,
     +
     + ## odb/source-inmemory.c ##
     +@@ odb/source-inmemory.c: static int odb_source_inmemory_count_objects(struct odb_source *source,
       }
       
     - int odb_source_loose_write_object(struct odb_source *source,
     --				  const void *buf, unsigned long len,
     -+				  const void *buf, size_t len,
     - 				  enum object_type type, struct object_id *oid,
     - 				  struct object_id *compat_oid_in,
     - 				  enum odb_write_object_flags flags)
     -@@ object-file.c: int odb_source_loose_write_object(struct odb_source *source,
     + static int odb_source_inmemory_write_object(struct odb_source *source,
     +-					    const void *buf, unsigned long len,
     ++					    const void *buf, size_t len,
     + 					    enum object_type type,
     + 					    struct object_id *oid,
     + 					    struct object_id *compat_oid UNUSED,
     +
     + ## odb/source-loose.c ##
     +@@ odb/source-loose.c: static int odb_source_loose_freshen_object(struct odb_source *source,
     + }
     + 
     + static int odb_source_loose_write_object(struct odb_source *source,
     +-					 const void *buf, unsigned long len,
     ++					 const void *buf, size_t len,
     + 					 enum object_type type, struct object_id *oid,
     + 					 struct object_id *compat_oid_in,
     + 					 enum odb_write_object_flags flags)
     +@@ odb/source-loose.c: static int odb_source_loose_write_object(struct odb_source *source,
       	const struct git_hash_algo *compat = source->odb->repo->compat_hash_algo;
       	struct object_id compat_oid;
       	char hdr[MAX_HEADER_LEN];
     @@ object-file.c: int odb_source_loose_write_object(struct odb_source *source,
       	/* Generate compat_oid */
       	if (compat) {
      
     - ## object-file.h ##
     -@@ object-file.h: int odb_source_loose_freshen_object(struct odb_source *source,
     - 				    const struct object_id *oid);
     - 
     - int odb_source_loose_write_object(struct odb_source *source,
     --				  const void *buf, unsigned long len,
     -+				  const void *buf, size_t len,
     - 				  enum object_type type, struct object_id *oid,
     - 				  struct object_id *compat_oid_in,
     - 				  enum odb_write_object_flags flags);
     -@@ object-file.h: int finalize_object_file_flags(struct repository *repo,
     - 			       enum finalize_object_file_flags flags);
     - 
     - void hash_object_file(const struct git_hash_algo *algo, const void *buf,
     --		      unsigned long len, enum object_type type,
     -+		      size_t len, enum object_type type,
     - 		      struct object_id *oid);
     - 
     - /* Helper to check and "touch" a file */
     + ## odb/source.h ##
     +@@ odb/source.h: struct odb_source {
     + 	 * return 0 on success, a negative error code otherwise.
     + 	 */
     + 	int (*write_object)(struct odb_source *source,
     +-			    const void *buf, unsigned long len,
     ++			    const void *buf, size_t len,
     + 			    enum object_type type,
     + 			    struct object_id *oid,
     + 			    struct object_id *compat_oid,
 3:  253d6f8004 ! 3:  b401eb490f hash algorithms: use size_t for section lengths
     @@ Commit message
          Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
      
       ## object-file.c ##
     -@@ object-file.c: int odb_source_loose_read_object_info(struct odb_source *source,
     +@@ object-file.c: int parse_loose_header(const char *hdr, struct object_info *oi)
       }
       
       static void hash_object_body(const struct git_hash_algo *algo, struct git_hash_ctx *c,
     @@ object-file.c: int odb_source_loose_read_object_info(struct odb_source *source,
       			     struct object_id *oid,
       			     char *hdr, size_t *hdrlen)
       {
     -@@ object-file.c: static void write_object_file_prepare(const struct git_hash_algo *algo,
     +@@ object-file.c: void write_object_file_prepare(const struct git_hash_algo *algo,
       	/* Generate the header */
       	*hdrlen = format_object_header(hdr, *hdrlen, type, len);
       
     @@ t/t1007-hash-object.sh: test_expect_success '--stdin outside of repository (uses
       '
       
      -test_expect_failure EXPENSIVE,SIZE_T_IS_64BIT,!LONG_IS_64BIT \
     -+test_expect_success EXPENSIVE,SIZE_T_IS_64BIT,!LONG_IS_64BIT \
     ++test_expect_success EXPENSIVE,SIZE_T_IS_64BIT \
       		'files over 4GB hash literally' '
       	test-tool genzeros $((5*1024*1024*1024)) >big &&
       	test_oid large5GB >expect &&
 4:  ba629a3f03 ! 4:  411727336a hash-object --stdin: verify that it works with >4GB/LLP64
     @@ Commit message
          Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
      
       ## t/t1007-hash-object.sh ##
     -@@ t/t1007-hash-object.sh: test_expect_success EXPENSIVE,SIZE_T_IS_64BIT,!LONG_IS_64BIT \
     +@@ t/t1007-hash-object.sh: test_expect_success EXPENSIVE,SIZE_T_IS_64BIT \
       	test_cmp expect actual
       '
       
     -+test_expect_success EXPENSIVE,SIZE_T_IS_64BIT,!LONG_IS_64BIT \
     ++test_expect_success EXPENSIVE,SIZE_T_IS_64BIT \
      +		'files over 4GB hash correctly via --stdin' '
      +	{ test -f big || test-tool genzeros $((5*1024*1024*1024)) >big; } &&
      +	test_oid large5GB >expect &&
 5:  f48d570bba ! 5:  e6bb4e6228 hash-object: add another >4GB/LLP64 test case
     @@ Commit message
          Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
      
       ## t/t1007-hash-object.sh ##
     -@@ t/t1007-hash-object.sh: test_expect_success EXPENSIVE,SIZE_T_IS_64BIT,!LONG_IS_64BIT \
     +@@ t/t1007-hash-object.sh: test_expect_success EXPENSIVE,SIZE_T_IS_64BIT \
       	test_cmp expect actual
       '
       
     -+test_expect_success EXPENSIVE,SIZE_T_IS_64BIT,!LONG_IS_64BIT \
     ++test_expect_success EXPENSIVE,SIZE_T_IS_64BIT \
      +		'files over 4GB hash correctly' '
      +	{ test -f big || test-tool genzeros $((5*1024*1024*1024)) >big; } &&
      +	test_oid large5GB >expect &&
 6:  8a6beeb16d ! 6:  568807ac34 hash-object: add a >4GB/LLP64 test case using filtered input
     @@ Commit message
          Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
      
       ## t/t1007-hash-object.sh ##
     -@@ t/t1007-hash-object.sh: test_expect_success EXPENSIVE,SIZE_T_IS_64BIT,!LONG_IS_64BIT \
     +@@ t/t1007-hash-object.sh: test_expect_success EXPENSIVE,SIZE_T_IS_64BIT \
       	test_cmp expect actual
       '
       
      +# This clean filter does nothing, other than excercising the interface.
      +# We ensure that cleaning doesn't mangle large files on 64-bit Windows.
     -+test_expect_success EXPENSIVE,SIZE_T_IS_64BIT,!LONG_IS_64BIT \
     ++test_expect_success EXPENSIVE,SIZE_T_IS_64BIT \
      +		'hash filtered files over 4GB correctly' '
      +	{ test -f big || test-tool genzeros $((5*1024*1024*1024)) >big; } &&
      +	test_oid large5GB >expect &&

-- 
gitgitgadget

  parent reply	other threads:[~2026-06-16 14:50 UTC|newest]

Thread overview: 24+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-06-04 17:15 [PATCH 0/6] Support hashing objects larger than 4GB on Windows Johannes Schindelin via GitGitGadget
2026-06-04 17:15 ` [PATCH 1/6] hash-object: demonstrate a >4GB/LLP64 problem Philip Oakley via GitGitGadget
2026-06-04 17:15 ` [PATCH 2/6] object-file.c: use size_t for header lengths Philip Oakley via GitGitGadget
2026-06-15  8:35   ` Patrick Steinhardt
2026-06-16 14:48     ` Johannes Schindelin
2026-06-04 17:15 ` [PATCH 3/6] hash algorithms: use size_t for section lengths Philip Oakley via GitGitGadget
2026-06-15  8:35   ` Patrick Steinhardt
2026-06-16 14:48     ` Johannes Schindelin
2026-06-04 17:15 ` [PATCH 4/6] hash-object --stdin: verify that it works with >4GB/LLP64 Philip Oakley via GitGitGadget
2026-06-15  8:35   ` Patrick Steinhardt
2026-06-04 17:15 ` [PATCH 5/6] hash-object: add another >4GB/LLP64 test case Philip Oakley via GitGitGadget
2026-06-15  8:35   ` Patrick Steinhardt
2026-06-16 14:48     ` Johannes Schindelin
2026-06-04 17:15 ` [PATCH 6/6] hash-object: add a >4GB/LLP64 test case using filtered input Philip Oakley via GitGitGadget
2026-06-04 21:56 ` [PATCH 0/6] Support hashing objects larger than 4GB on Windows Philip Oakley
2026-06-08 23:56   ` Junio C Hamano
2026-06-16 14:49 ` Johannes Schindelin via GitGitGadget [this message]
2026-06-16 14:49   ` [PATCH v2 1/6] hash-object: demonstrate a >4GB/LLP64 problem Philip Oakley via GitGitGadget
2026-06-16 14:49   ` [PATCH v2 2/6] object-file.c: use size_t for header lengths Philip Oakley via GitGitGadget
2026-06-16 14:49   ` [PATCH v2 3/6] hash algorithms: use size_t for section lengths Philip Oakley via GitGitGadget
2026-06-16 14:49   ` [PATCH v2 4/6] hash-object --stdin: verify that it works with >4GB/LLP64 Philip Oakley via GitGitGadget
2026-06-16 14:49   ` [PATCH v2 5/6] hash-object: add another >4GB/LLP64 test case Philip Oakley via GitGitGadget
2026-06-16 14:49   ` [PATCH v2 6/6] hash-object: add a >4GB/LLP64 test case using filtered input Philip Oakley via GitGitGadget
2026-06-16 16:09   ` [PATCH v2 0/6] Support hashing objects larger than 4GB on Windows Junio C Hamano

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=pull.2138.v2.git.1781621398.gitgitgadget@gmail.com \
    --to=gitgitgadget@gmail.com \
    --cc=git@vger.kernel.org \
    --cc=johannes.schindelin@gmx.de \
    --cc=philipoakley@iee.email \
    --cc=ps@pks.im \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox