From: "Johannes Schindelin via GitGitGadget" <gitgitgadget@gmail.com>
To: git@vger.kernel.org
Cc: Kristofer Karlsson <krka@spotify.com>,
Johannes Schindelin <johannes.schindelin@gmx.de>,
Johannes Schindelin <johannes.schindelin@gmx.de>
Subject: [PATCH 4/7] packfile: widen unpack_entry()'s size out-parameter to size_t
Date: Thu, 04 Jun 2026 10:51:09 +0000 [thread overview]
Message-ID: <bdebc36f21d1e2a13bc91d72a3ada1db3f7e184e.1780570273.git.gitgitgadget@gmail.com> (raw)
In-Reply-To: <pull.2137.git.1780570272.gitgitgadget@gmail.com>
From: Johannes Schindelin <johannes.schindelin@gmx.de>
The topic `js/objects-larger-than-4gb-on-windows` widened the streaming,
index-pack and unpack-objects paths to `size_t` but deliberately stopped
at the in-memory `unpack_entry()` cascade, which still hands back the
unpacked size through `unsigned long *`. On Windows that boundary
truncates above 4 GiB because that data type is only 32 bits wide on
that platform.
Widen the code path. Except `packed_object_info_with_index_pos()`: It
cannot yet pass `oi->sizep` directly because the field is still
`unsigned long *`; bridge it with a `size_t` temporary that narrows
back, and let a later commit drop the bridge once the field is wide
too. `gfi_unpack_entry()` keeps its narrow signature because fast-import
tracks sizes through `unsigned long` everywhere it crosses subsystem
boundaries, keeping its signature allows the scope of this commit to be
somewhat reasonable, still.
Assisted-by: Opus 4.7
Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
---
builtin/fast-import.c | 7 ++++++-
pack-check.c | 5 ++---
packfile.c | 28 +++++++++++++++++-----------
packfile.h | 3 ++-
4 files changed, 27 insertions(+), 16 deletions(-)
diff --git a/builtin/fast-import.c b/builtin/fast-import.c
index 82bc6dcc00..3dff898c43 100644
--- a/builtin/fast-import.c
+++ b/builtin/fast-import.c
@@ -1239,6 +1239,8 @@ static void *gfi_unpack_entry(
unsigned long *sizep)
{
enum object_type type;
+ size_t size_st = 0;
+ void *data;
struct packed_git *p = all_packs[oe->pack_id];
if (p == pack_data && p->pack_size < (pack_size + the_hash_algo->rawsz)) {
/* The object is stored in the packfile we are writing to
@@ -1260,7 +1262,10 @@ static void *gfi_unpack_entry(
*/
p->pack_size = pack_size + the_hash_algo->rawsz;
}
- return unpack_entry(the_repository, p, oe->idx.offset, &type, sizep);
+ data = unpack_entry(the_repository, p, oe->idx.offset, &type, &size_st);
+ if (sizep)
+ *sizep = cast_size_t_to_ulong(size_st);
+ return data;
}
static void load_tree(struct tree_entry *root)
diff --git a/pack-check.c b/pack-check.c
index 2792f34d25..5adfb3f272 100644
--- a/pack-check.c
+++ b/pack-check.c
@@ -143,9 +143,8 @@ static int verify_packfile(struct repository *r,
data = NULL;
data_valid = 0;
} else {
- unsigned long sz;
- data = unpack_entry(r, p, entries[i].offset, &type, &sz);
- size = sz;
+ data = unpack_entry(r, p, entries[i].offset, &type,
+ &size);
data_valid = 1;
}
diff --git a/packfile.c b/packfile.c
index e202f48837..dab0a9b16d 100644
--- a/packfile.c
+++ b/packfile.c
@@ -1454,7 +1454,7 @@ struct delta_base_cache_entry {
struct delta_base_cache_key key;
struct list_head lru;
void *data;
- unsigned long size;
+ size_t size;
enum object_type type;
};
@@ -1525,7 +1525,7 @@ static void detach_delta_base_cache_entry(struct delta_base_cache_entry *ent)
}
static void *cache_or_unpack_entry(struct repository *r, struct packed_git *p,
- off_t base_offset, unsigned long *base_size,
+ off_t base_offset, size_t *base_size,
enum object_type *type)
{
struct delta_base_cache_entry *ent;
@@ -1558,8 +1558,8 @@ void clear_delta_base_cache(void)
}
static void add_delta_base_cache(struct packed_git *p, off_t base_offset,
- void *base, unsigned long base_size,
- unsigned long delta_base_cache_limit,
+ void *base, size_t base_size,
+ size_t delta_base_cache_limit,
enum object_type type)
{
struct delta_base_cache_entry *ent;
@@ -1614,10 +1614,13 @@ static int packed_object_info_with_index_pos(struct packed_git *p, off_t obj_off
* a "real" type later if the caller is interested.
*/
if (oi->contentp) {
- *oi->contentp = cache_or_unpack_entry(p->repo, p, obj_offset, oi->sizep,
- &type);
+ size_t size_st = 0;
+ *oi->contentp = cache_or_unpack_entry(p->repo, p, obj_offset,
+ &size_st, &type);
if (!*oi->contentp)
type = OBJ_BAD;
+ else if (oi->sizep)
+ *oi->sizep = cast_size_t_to_ulong(size_st);
} else if (oi->sizep || oi->typep || oi->delta_base_oid) {
type = unpack_object_header(p, &w_curs, &curpos, &size);
}
@@ -1735,7 +1738,7 @@ int packed_object_info(struct packed_git *p, off_t obj_offset,
static void *unpack_compressed_entry(struct packed_git *p,
struct pack_window **w_curs,
off_t curpos,
- unsigned long size)
+ size_t size)
{
int st;
git_zstream stream;
@@ -1790,11 +1793,11 @@ int do_check_packed_object_crc;
struct unpack_entry_stack_ent {
off_t obj_offset;
off_t curpos;
- unsigned long size;
+ size_t size;
};
void *unpack_entry(struct repository *r, struct packed_git *p, off_t obj_offset,
- enum object_type *final_type, unsigned long *final_size)
+ enum object_type *final_type, size_t *final_size)
{
struct pack_window *w_curs = NULL;
off_t curpos = obj_offset;
@@ -1911,7 +1914,7 @@ void *unpack_entry(struct repository *r, struct packed_git *p, off_t obj_offset,
void *delta_data;
void *base = data;
void *external_base = NULL;
- unsigned long delta_size, base_size = size;
+ size_t delta_size, base_size = size;
int i;
off_t base_obj_offset = obj_offset;
@@ -1928,6 +1931,7 @@ void *unpack_entry(struct repository *r, struct packed_git *p, off_t obj_offset,
struct object_id base_oid;
if (!(offset_to_pack_pos(p, obj_offset, &pos))) {
struct object_info oi = OBJECT_INFO_INIT;
+ unsigned long bsz_ul = 0;
nth_packed_object_id(&base_oid, p,
pack_pos_to_index(p, pos));
@@ -1938,11 +1942,13 @@ void *unpack_entry(struct repository *r, struct packed_git *p, off_t obj_offset,
mark_bad_packed_object(p, &base_oid);
oi.typep = &type;
- oi.sizep = &base_size;
+ oi.sizep = &bsz_ul;
oi.contentp = &base;
if (odb_read_object_info_extended(r->objects, &base_oid,
&oi, 0) < 0)
base = NULL;
+ else
+ base_size = bsz_ul;
external_base = base;
}
diff --git a/packfile.h b/packfile.h
index 49d6bdecf6..0b5ae3f9fc 100644
--- a/packfile.h
+++ b/packfile.h
@@ -455,7 +455,8 @@ off_t nth_packed_object_offset(const struct packed_git *, uint32_t n);
off_t find_pack_entry_one(const struct object_id *oid, struct packed_git *);
int is_pack_valid(struct packed_git *);
-void *unpack_entry(struct repository *r, struct packed_git *, off_t, enum object_type *, unsigned long *);
+void *unpack_entry(struct repository *r, struct packed_git *, off_t,
+ enum object_type *, size_t *);
unsigned long unpack_object_header_buffer(const unsigned char *buf, unsigned long len, enum object_type *type, size_t *sizep);
unsigned long get_size_from_delta(struct packed_git *, struct pack_window **, off_t);
int unpack_object_header(struct packed_git *, struct pack_window **, off_t *, size_t *);
--
gitgitgadget
next prev parent reply other threads:[~2026-06-04 10:51 UTC|newest]
Thread overview: 8+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-06-04 10:51 [PATCH 0/7] More work supporting objects larger than 4GB on Windows Johannes Schindelin via GitGitGadget
2026-06-04 10:51 ` [PATCH 1/7] compat/msvc: use _chsize_s for ftruncate Johannes Schindelin via GitGitGadget
2026-06-04 10:51 ` [PATCH 2/7] patch-delta: use size_t for sizes Johannes Schindelin via GitGitGadget
2026-06-04 10:51 ` [PATCH 3/7] pack-objects(check_pack_inflate()): use size_t instead of unsigned long Johannes Schindelin via GitGitGadget
2026-06-04 10:51 ` Johannes Schindelin via GitGitGadget [this message]
2026-06-04 10:51 ` [PATCH 5/7] pack-objects: use size_t for in-core object sizes Johannes Schindelin via GitGitGadget
2026-06-04 10:51 ` [PATCH 6/7] packfile,delta: drop the `cast_size_t_to_ulong()` wrappers Johannes Schindelin via GitGitGadget
2026-06-04 10:51 ` [PATCH 7/7] odb: use size_t for object_info.sizep and the size APIs Johannes Schindelin via GitGitGadget
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=bdebc36f21d1e2a13bc91d72a3ada1db3f7e184e.1780570273.git.gitgitgadget@gmail.com \
--to=gitgitgadget@gmail.com \
--cc=git@vger.kernel.org \
--cc=johannes.schindelin@gmx.de \
--cc=krka@spotify.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox