From: "Johannes Schindelin via GitGitGadget" <gitgitgadget@gmail.com>
To: git@vger.kernel.org
Cc: Philip Oakley <philipoakley@iee.email>,
Patrick Steinhardt <ps@pks.im>,
Johannes Schindelin <johannes.schindelin@gmx.de>
Subject: [PATCH v2 0/6] Support hashing objects larger than 4GB on Windows
Date: Tue, 16 Jun 2026 14:49:51 +0000 [thread overview]
Message-ID: <pull.2138.v2.git.1781621398.gitgitgadget@gmail.com> (raw)
In-Reply-To: <pull.2138.git.1780593313.gitgitgadget@gmail.com>
Philip Oakley has contributed these patches ~4.5 years ago, and they have
been carried in Git for Windows ever since.
Now that there are already other patch series flying around that try to
address various aspects about >4GB objects (which aren't handled well by Git
until it stops forcing unsigned long to do size_t's job), it seems a good
time to upstream these patches, too, at long last.
Changes since v1:
* Rebased to current master to resolve the conflicts with
ps/odb-source-loose
* Dropped the !LONG_IS_64BIT prereq from the added/touched tests, as it is
now no longer needed
Philip Oakley (6):
hash-object: demonstrate a >4GB/LLP64 problem
object-file.c: use size_t for header lengths
hash algorithms: use size_t for section lengths
hash-object --stdin: verify that it works with >4GB/LLP64
hash-object: add another >4GB/LLP64 test case
hash-object: add a >4GB/LLP64 test case using filtered input
object-file.c | 14 +++++++-------
object-file.h | 6 +++---
odb/source-files.c | 2 +-
odb/source-inmemory.c | 2 +-
odb/source-loose.c | 4 ++--
odb/source.h | 2 +-
sha1dc_git.c | 3 +--
sha1dc_git.h | 2 +-
t/t1007-hash-object.sh | 39 +++++++++++++++++++++++++++++++++++++++
9 files changed, 56 insertions(+), 18 deletions(-)
base-commit: 700432b2ba22603a0bcb71475c9c333d17c9b0d1
Published-As: https://github.com/gitgitgadget/git/releases/tag/pr-2138%2Fdscho%2FPhilipOakley%2Fhashliteral_t-v2
Fetch-It-Via: git fetch https://github.com/gitgitgadget/git pr-2138/dscho/PhilipOakley/hashliteral_t-v2
Pull-Request: https://github.com/gitgitgadget/git/pull/2138
Range-diff vs v1:
1: 84e1cd0aa0 = 1: 9c01bac407 hash-object: demonstrate a >4GB/LLP64 problem
2: 809d83e46f ! 2: aa5859c14f object-file.c: use size_t for header lengths
@@ Commit message
Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
## object-file.c ##
-@@ object-file.c: int odb_source_loose_read_object_info(struct odb_source *source,
+@@ object-file.c: int parse_loose_header(const char *hdr, struct object_info *oi)
static void hash_object_body(const struct git_hash_algo *algo, struct git_hash_ctx *c,
const void *buf, unsigned long len,
struct object_id *oid,
@@ object-file.c: int odb_source_loose_read_object_info(struct odb_source *source,
@@ object-file.c: static void hash_object_body(const struct git_hash_algo *algo, struct git_hash_c
}
- static void write_object_file_prepare(const struct git_hash_algo *algo,
-- const void *buf, unsigned long len,
-+ const void *buf, size_t len,
- enum object_type type, struct object_id *oid,
-- char *hdr, int *hdrlen)
-+ char *hdr, size_t *hdrlen)
+ void write_object_file_prepare(const struct git_hash_algo *algo,
+- const void *buf, unsigned long len,
++ const void *buf, size_t len,
+ enum object_type type, struct object_id *oid,
+- char *hdr, int *hdrlen)
++ char *hdr, size_t *hdrlen)
{
struct git_hash_ctx c;
@@ object-file.c: out:
write_object_file_prepare(algo, buf, len, type, oid, hdr, &hdrlen);
}
-@@ object-file.c: cleanup:
+
+ ## object-file.h ##
+@@ object-file.h: int finalize_object_file_flags(struct repository *repo,
+ enum finalize_object_file_flags flags);
+
+ void hash_object_file(const struct git_hash_algo *algo, const void *buf,
+- unsigned long len, enum object_type type,
++ size_t len, enum object_type type,
+ struct object_id *oid);
+ void write_object_file_prepare(const struct git_hash_algo *algo,
+- const void *buf, unsigned long len,
++ const void *buf, size_t len,
+ enum object_type type, struct object_id *oid,
+- char *hdr, int *hdrlen);
++ char *hdr, size_t *hdrlen);
+ int write_loose_object(struct odb_source_loose *loose,
+ const struct object_id *oid, char *hdr,
+ int hdrlen, const void *buf, unsigned long len,
+
+ ## odb/source-files.c ##
+@@ odb/source-files.c: static int odb_source_files_freshen_object(struct odb_source *source,
+ }
+
+ static int odb_source_files_write_object(struct odb_source *source,
+- const void *buf, unsigned long len,
++ const void *buf, size_t len,
+ enum object_type type,
+ struct object_id *oid,
+ struct object_id *compat_oid,
+
+ ## odb/source-inmemory.c ##
+@@ odb/source-inmemory.c: static int odb_source_inmemory_count_objects(struct odb_source *source,
}
- int odb_source_loose_write_object(struct odb_source *source,
-- const void *buf, unsigned long len,
-+ const void *buf, size_t len,
- enum object_type type, struct object_id *oid,
- struct object_id *compat_oid_in,
- enum odb_write_object_flags flags)
-@@ object-file.c: int odb_source_loose_write_object(struct odb_source *source,
+ static int odb_source_inmemory_write_object(struct odb_source *source,
+- const void *buf, unsigned long len,
++ const void *buf, size_t len,
+ enum object_type type,
+ struct object_id *oid,
+ struct object_id *compat_oid UNUSED,
+
+ ## odb/source-loose.c ##
+@@ odb/source-loose.c: static int odb_source_loose_freshen_object(struct odb_source *source,
+ }
+
+ static int odb_source_loose_write_object(struct odb_source *source,
+- const void *buf, unsigned long len,
++ const void *buf, size_t len,
+ enum object_type type, struct object_id *oid,
+ struct object_id *compat_oid_in,
+ enum odb_write_object_flags flags)
+@@ odb/source-loose.c: static int odb_source_loose_write_object(struct odb_source *source,
const struct git_hash_algo *compat = source->odb->repo->compat_hash_algo;
struct object_id compat_oid;
char hdr[MAX_HEADER_LEN];
@@ object-file.c: int odb_source_loose_write_object(struct odb_source *source,
/* Generate compat_oid */
if (compat) {
- ## object-file.h ##
-@@ object-file.h: int odb_source_loose_freshen_object(struct odb_source *source,
- const struct object_id *oid);
-
- int odb_source_loose_write_object(struct odb_source *source,
-- const void *buf, unsigned long len,
-+ const void *buf, size_t len,
- enum object_type type, struct object_id *oid,
- struct object_id *compat_oid_in,
- enum odb_write_object_flags flags);
-@@ object-file.h: int finalize_object_file_flags(struct repository *repo,
- enum finalize_object_file_flags flags);
-
- void hash_object_file(const struct git_hash_algo *algo, const void *buf,
-- unsigned long len, enum object_type type,
-+ size_t len, enum object_type type,
- struct object_id *oid);
-
- /* Helper to check and "touch" a file */
+ ## odb/source.h ##
+@@ odb/source.h: struct odb_source {
+ * return 0 on success, a negative error code otherwise.
+ */
+ int (*write_object)(struct odb_source *source,
+- const void *buf, unsigned long len,
++ const void *buf, size_t len,
+ enum object_type type,
+ struct object_id *oid,
+ struct object_id *compat_oid,
3: 253d6f8004 ! 3: b401eb490f hash algorithms: use size_t for section lengths
@@ Commit message
Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
## object-file.c ##
-@@ object-file.c: int odb_source_loose_read_object_info(struct odb_source *source,
+@@ object-file.c: int parse_loose_header(const char *hdr, struct object_info *oi)
}
static void hash_object_body(const struct git_hash_algo *algo, struct git_hash_ctx *c,
@@ object-file.c: int odb_source_loose_read_object_info(struct odb_source *source,
struct object_id *oid,
char *hdr, size_t *hdrlen)
{
-@@ object-file.c: static void write_object_file_prepare(const struct git_hash_algo *algo,
+@@ object-file.c: void write_object_file_prepare(const struct git_hash_algo *algo,
/* Generate the header */
*hdrlen = format_object_header(hdr, *hdrlen, type, len);
@@ t/t1007-hash-object.sh: test_expect_success '--stdin outside of repository (uses
'
-test_expect_failure EXPENSIVE,SIZE_T_IS_64BIT,!LONG_IS_64BIT \
-+test_expect_success EXPENSIVE,SIZE_T_IS_64BIT,!LONG_IS_64BIT \
++test_expect_success EXPENSIVE,SIZE_T_IS_64BIT \
'files over 4GB hash literally' '
test-tool genzeros $((5*1024*1024*1024)) >big &&
test_oid large5GB >expect &&
4: ba629a3f03 ! 4: 411727336a hash-object --stdin: verify that it works with >4GB/LLP64
@@ Commit message
Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
## t/t1007-hash-object.sh ##
-@@ t/t1007-hash-object.sh: test_expect_success EXPENSIVE,SIZE_T_IS_64BIT,!LONG_IS_64BIT \
+@@ t/t1007-hash-object.sh: test_expect_success EXPENSIVE,SIZE_T_IS_64BIT \
test_cmp expect actual
'
-+test_expect_success EXPENSIVE,SIZE_T_IS_64BIT,!LONG_IS_64BIT \
++test_expect_success EXPENSIVE,SIZE_T_IS_64BIT \
+ 'files over 4GB hash correctly via --stdin' '
+ { test -f big || test-tool genzeros $((5*1024*1024*1024)) >big; } &&
+ test_oid large5GB >expect &&
5: f48d570bba ! 5: e6bb4e6228 hash-object: add another >4GB/LLP64 test case
@@ Commit message
Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
## t/t1007-hash-object.sh ##
-@@ t/t1007-hash-object.sh: test_expect_success EXPENSIVE,SIZE_T_IS_64BIT,!LONG_IS_64BIT \
+@@ t/t1007-hash-object.sh: test_expect_success EXPENSIVE,SIZE_T_IS_64BIT \
test_cmp expect actual
'
-+test_expect_success EXPENSIVE,SIZE_T_IS_64BIT,!LONG_IS_64BIT \
++test_expect_success EXPENSIVE,SIZE_T_IS_64BIT \
+ 'files over 4GB hash correctly' '
+ { test -f big || test-tool genzeros $((5*1024*1024*1024)) >big; } &&
+ test_oid large5GB >expect &&
6: 8a6beeb16d ! 6: 568807ac34 hash-object: add a >4GB/LLP64 test case using filtered input
@@ Commit message
Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
## t/t1007-hash-object.sh ##
-@@ t/t1007-hash-object.sh: test_expect_success EXPENSIVE,SIZE_T_IS_64BIT,!LONG_IS_64BIT \
+@@ t/t1007-hash-object.sh: test_expect_success EXPENSIVE,SIZE_T_IS_64BIT \
test_cmp expect actual
'
+# This clean filter does nothing, other than excercising the interface.
+# We ensure that cleaning doesn't mangle large files on 64-bit Windows.
-+test_expect_success EXPENSIVE,SIZE_T_IS_64BIT,!LONG_IS_64BIT \
++test_expect_success EXPENSIVE,SIZE_T_IS_64BIT \
+ 'hash filtered files over 4GB correctly' '
+ { test -f big || test-tool genzeros $((5*1024*1024*1024)) >big; } &&
+ test_oid large5GB >expect &&
--
gitgitgadget
next prev parent reply other threads:[~2026-06-16 14:50 UTC|newest]
Thread overview: 24+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-06-04 17:15 [PATCH 0/6] Support hashing objects larger than 4GB on Windows Johannes Schindelin via GitGitGadget
2026-06-04 17:15 ` [PATCH 1/6] hash-object: demonstrate a >4GB/LLP64 problem Philip Oakley via GitGitGadget
2026-06-04 17:15 ` [PATCH 2/6] object-file.c: use size_t for header lengths Philip Oakley via GitGitGadget
2026-06-15 8:35 ` Patrick Steinhardt
2026-06-16 14:48 ` Johannes Schindelin
2026-06-04 17:15 ` [PATCH 3/6] hash algorithms: use size_t for section lengths Philip Oakley via GitGitGadget
2026-06-15 8:35 ` Patrick Steinhardt
2026-06-16 14:48 ` Johannes Schindelin
2026-06-04 17:15 ` [PATCH 4/6] hash-object --stdin: verify that it works with >4GB/LLP64 Philip Oakley via GitGitGadget
2026-06-15 8:35 ` Patrick Steinhardt
2026-06-04 17:15 ` [PATCH 5/6] hash-object: add another >4GB/LLP64 test case Philip Oakley via GitGitGadget
2026-06-15 8:35 ` Patrick Steinhardt
2026-06-16 14:48 ` Johannes Schindelin
2026-06-04 17:15 ` [PATCH 6/6] hash-object: add a >4GB/LLP64 test case using filtered input Philip Oakley via GitGitGadget
2026-06-04 21:56 ` [PATCH 0/6] Support hashing objects larger than 4GB on Windows Philip Oakley
2026-06-08 23:56 ` Junio C Hamano
2026-06-16 14:49 ` Johannes Schindelin via GitGitGadget [this message]
2026-06-16 14:49 ` [PATCH v2 1/6] hash-object: demonstrate a >4GB/LLP64 problem Philip Oakley via GitGitGadget
2026-06-16 14:49 ` [PATCH v2 2/6] object-file.c: use size_t for header lengths Philip Oakley via GitGitGadget
2026-06-16 14:49 ` [PATCH v2 3/6] hash algorithms: use size_t for section lengths Philip Oakley via GitGitGadget
2026-06-16 14:49 ` [PATCH v2 4/6] hash-object --stdin: verify that it works with >4GB/LLP64 Philip Oakley via GitGitGadget
2026-06-16 14:49 ` [PATCH v2 5/6] hash-object: add another >4GB/LLP64 test case Philip Oakley via GitGitGadget
2026-06-16 14:49 ` [PATCH v2 6/6] hash-object: add a >4GB/LLP64 test case using filtered input Philip Oakley via GitGitGadget
2026-06-16 16:09 ` [PATCH v2 0/6] Support hashing objects larger than 4GB on Windows Junio C Hamano
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=pull.2138.v2.git.1781621398.gitgitgadget@gmail.com \
--to=gitgitgadget@gmail.com \
--cc=git@vger.kernel.org \
--cc=johannes.schindelin@gmx.de \
--cc=philipoakley@iee.email \
--cc=ps@pks.im \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox