Git development

Git development
 help / color / mirror / Atom feed

* [PATCH v2 6/7] packfile,delta: drop the `cast_size_t_to_ulong()` wrappers
From: Johannes Schindelin via GitGitGadget @ 2026-06-15 11:52 UTC (permalink / raw)
  To: git
  Cc: Kristofer Karlsson, Patrick Steinhardt, Johannes Schindelin,
	Johannes Schindelin
In-Reply-To: <pull.2137.v2.git.1781524349.gitgitgadget@gmail.com>

From: Johannes Schindelin <johannes.schindelin@gmx.de>

When I started the transition from `unsigned long` to `size_t`, in the
interest of keeping the patches reviewable, I introduced these calls to
prevent data type narrowing from silently failing to handle large object
sizes. I also introduced `*_sz()` variants that would allow most of the
callers to keep using that `unsigned long` that the 90s kindly asked to
be returned.

After the preceding commits, the only places that called the narrow
wrappers either no longer exist or already use the `_sz` form
internally, so the wrappers just narrow values back through
`cast_size_t_to_ulong()` for no reason.

Drop them and rename the `_sz` variants back to the natural names.

Assisted-by: Opus 4.7
Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
---
 delta.h       | 14 ++------------
 packfile.c    | 28 ++++++++--------------------
 packfile.h    |  2 +-
 patch-delta.c |  4 ++--
 4 files changed, 13 insertions(+), 35 deletions(-)

diff --git a/delta.h b/delta.h
index bb149dc82b..eb5c6d2fdb 100644
--- a/delta.h
+++ b/delta.h
@@ -86,11 +86,8 @@ void *patch_delta(const void *src_buf, size_t src_size,
  * This must be called twice on the delta data buffer, first to get the
  * expected source buffer size, and again to get the target buffer size.
  */
-/*
- * Size_t variant that doesn't truncate - use for >4GB objects on Windows.
- */
-static inline size_t get_delta_hdr_size_sz(const unsigned char **datap,
-					   const unsigned char *top)
+static inline size_t get_delta_hdr_size(const unsigned char **datap,
+					const unsigned char *top)
 {
 	const unsigned char *data = *datap;
 	size_t cmd, size = 0;
@@ -104,11 +101,4 @@ static inline size_t get_delta_hdr_size_sz(const unsigned char **datap,
 	return size;
 }
 
-static inline unsigned long get_delta_hdr_size(const unsigned char **datap,
-					       const unsigned char *top)
-{
-	size_t size = get_delta_hdr_size_sz(datap, top);
-	return cast_size_t_to_ulong(size);
-}
-
 #endif
diff --git a/packfile.c b/packfile.c
index dab0a9b16d..c174982d10 100644
--- a/packfile.c
+++ b/packfile.c
@@ -1164,11 +1164,12 @@ unsigned long unpack_object_header_buffer(const unsigned char *buf,
 }
 
 /*
- * Size_t variant for >4GB delta results on Windows.
+ * Read a delta object's header at curpos in p (already inflated as needed)
+ * and return the size of the result object (the post-application target).
  */
-static size_t get_size_from_delta_sz(struct packed_git *p,
-				     struct pack_window **w_curs,
-				     off_t curpos)
+size_t get_size_from_delta(struct packed_git *p,
+			   struct pack_window **w_curs,
+			   off_t curpos)
 {
 	const unsigned char *data;
 	unsigned char delta_head[20], *in;
@@ -1215,18 +1216,10 @@ static size_t get_size_from_delta_sz(struct packed_git *p,
 	data = delta_head;
 
 	/* ignore base size */
-	get_delta_hdr_size_sz(&data, delta_head+sizeof(delta_head));
+	get_delta_hdr_size(&data, delta_head+sizeof(delta_head));
 
 	/* Read the result size */
-	return get_delta_hdr_size_sz(&data, delta_head+sizeof(delta_head));
-}
-
-unsigned long get_size_from_delta(struct packed_git *p,
-				  struct pack_window **w_curs,
-				  off_t curpos)
-{
-	size_t size = get_size_from_delta_sz(p, w_curs, curpos);
-	return cast_size_t_to_ulong(size);
+	return get_delta_hdr_size(&data, delta_head+sizeof(delta_head));
 }
 
 int unpack_object_header(struct packed_git *p,
@@ -1634,12 +1627,7 @@ static int packed_object_info_with_index_pos(struct packed_git *p, off_t obj_off
 				ret = -1;
 				goto out;
 			}
-			/*
-			 * Use size_t variant to avoid die() on >4GB deltas.
-			 * oi->sizep is unsigned long, so truncation may occur,
-			 * but streaming code uses its own size_t tracking.
-			 */
-			size = get_size_from_delta_sz(p, &w_curs, tmp_pos);
+			size = get_size_from_delta(p, &w_curs, tmp_pos);
 			if (size == 0) {
 				ret = -1;
 				goto out;
diff --git a/packfile.h b/packfile.h
index 0b5ae3f9fc..bd4494906d 100644
--- a/packfile.h
+++ b/packfile.h
@@ -458,7 +458,7 @@ int is_pack_valid(struct packed_git *);
 void *unpack_entry(struct repository *r, struct packed_git *, off_t,
 		   enum object_type *, size_t *);
 unsigned long unpack_object_header_buffer(const unsigned char *buf, unsigned long len, enum object_type *type, size_t *sizep);
-unsigned long get_size_from_delta(struct packed_git *, struct pack_window **, off_t);
+size_t get_size_from_delta(struct packed_git *, struct pack_window **, off_t);
 int unpack_object_header(struct packed_git *, struct pack_window **, off_t *, size_t *);
 off_t get_delta_base(struct packed_git *p, struct pack_window **w_curs,
 		     off_t *curpos, enum object_type type,
diff --git a/patch-delta.c b/patch-delta.c
index 44cda97994..42199fa956 100644
--- a/patch-delta.c
+++ b/patch-delta.c
@@ -27,12 +27,12 @@ void *patch_delta(const void *src_buf, size_t src_size,
 	top = (const unsigned char *) delta_buf + delta_size;
 
 	/* make sure the orig file size matches what we expect */
-	size = get_delta_hdr_size_sz(&data, top);
+	size = get_delta_hdr_size(&data, top);
 	if (size != src_size)
 		return NULL;
 
 	/* now the result size */
-	size = get_delta_hdr_size_sz(&data, top);
+	size = get_delta_hdr_size(&data, top);
 	dst_buf = xmallocz(size);
 
 	out = dst_buf;
-- 
gitgitgadget


^ permalink raw reply related

* [PATCH v2 7/7] odb: use size_t for object_info.sizep and the size APIs
From: Johannes Schindelin via GitGitGadget @ 2026-06-15 11:52 UTC (permalink / raw)
  To: git
  Cc: Kristofer Karlsson, Patrick Steinhardt, Johannes Schindelin,
	Johannes Schindelin
In-Reply-To: <pull.2137.v2.git.1781524349.gitgitgadget@gmail.com>

From: Johannes Schindelin <johannes.schindelin@gmx.de>

When `js/objects-larger-than-4gb-on-windows` widened the streaming,
index-pack and unpack-objects code paths, in the interest of keeping the
patches somewhat reasonably-sized, it left the public ODB API still
typed in `unsigned long`. In particular `struct object_info::sizep` and
the four wrappers built on top of it (`odb_read_object`,
`odb_read_object_peeled`, `odb_read_object_info`, `odb_pretend_object`)
still return the unpacked size through `unsigned long *`, so on Windows
`cat-file -s` and the `git add` / `git status` paths for a >4 GiB blob
silently cap at 4 GiB.

Widen the field and the four wrappers. The previous commits already
widened the `unpack_entry()` cascade and pack-objects' in-core size
accessors, so most of the cascade arrives here with no further work: the
temporary shims in `packed_object_info_with_index_pos()` and in
`unpack_entry()`'s delta-base recovery path go away, the two
`SET_SIZE(entry, cast_size_t_to_ulong(canonical_size))` calls in
`check_object()` and the matching one in `drop_reused_delta()` collapse
to plain `SET_SIZE`, and `oe_get_size_slow()`'s tail
`cast_size_t_to_ulong()` is gone too.

What remains narrow are the boundaries this series does not
intend to touch: the diff, blame, textconv and fast-import machinery.

Even so, this patch is unfortunately quite large.

Assisted-by: Opus 4.7
Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
---
 apply.c                       |  6 ++--
 archive.c                     |  4 +--
 attr.c                        |  2 +-
 bisect.c                      |  2 +-
 blame.c                       | 15 ++++++---
 builtin/cat-file.c            | 61 ++++++++++++++++-------------------
 builtin/difftool.c            |  2 +-
 builtin/fast-export.c         |  7 ++--
 builtin/fast-import.c         | 22 +++++++++----
 builtin/fsck.c                |  2 +-
 builtin/grep.c                | 12 +++----
 builtin/index-pack.c          |  6 ++--
 builtin/log.c                 |  2 +-
 builtin/ls-files.c            |  2 +-
 builtin/ls-tree.c             |  4 +--
 builtin/merge-tree.c          |  6 ++--
 builtin/mktag.c               |  2 +-
 builtin/notes.c               |  6 ++--
 builtin/pack-objects.c        | 33 +++++++++++++------
 builtin/repo.c                |  4 ++-
 builtin/tag.c                 |  4 +--
 builtin/unpack-file.c         |  2 +-
 builtin/unpack-objects.c      |  6 ++--
 bundle.c                      |  2 +-
 combine-diff.c                |  4 ++-
 commit.c                      | 10 +++---
 config.c                      |  2 +-
 diff.c                        |  5 ++-
 dir.c                         |  2 +-
 entry.c                       |  4 +--
 fmt-merge-msg.c               |  4 +--
 fsck.c                        |  2 +-
 grep.c                        |  4 ++-
 http-push.c                   |  2 +-
 list-objects-filter.c         |  2 +-
 mailmap.c                     |  2 +-
 match-trees.c                 |  4 +--
 merge-blobs.c                 |  6 ++--
 merge-blobs.h                 |  2 +-
 merge-ort.c                   |  2 +-
 notes-cache.c                 |  2 +-
 notes-merge.c                 |  2 +-
 notes.c                       |  8 +++--
 object-file.c                 |  6 ++--
 object.c                      |  2 +-
 odb.c                         | 12 +++----
 odb.h                         | 10 +++---
 odb/source-loose.c            | 12 ++-----
 odb/streaming.c               | 13 +-------
 pack-bitmap.c                 |  4 +--
 packfile.c                    | 12 ++-----
 path-walk.c                   |  2 +-
 protocol-caps.c               |  5 +--
 read-cache.c                  |  6 ++--
 ref-filter.c                  |  2 +-
 reflog.c                      |  2 +-
 rerere.c                      |  2 +-
 submodule-config.c            |  2 +-
 t/helper/test-pack-deltas.c   |  3 +-
 t/helper/test-partial-clone.c |  2 +-
 t/unit-tests/u-odb-inmemory.c |  2 +-
 tag.c                         |  4 +--
 tree-walk.c                   | 10 +++---
 tree.c                        |  2 +-
 xdiff-interface.c             |  2 +-
 65 files changed, 209 insertions(+), 191 deletions(-)

diff --git a/apply.c b/apply.c
index 3cf544e9a9..5e54453f79 100644
--- a/apply.c
+++ b/apply.c
@@ -3321,7 +3321,7 @@ static int apply_binary(struct apply_state *state,
 	if (odb_has_object(the_repository->objects, &oid, 0)) {
 		/* We already have the postimage */
 		enum object_type type;
-		unsigned long size;
+		size_t size;
 		char *result;
 
 		result = odb_read_object(the_repository->objects, &oid,
@@ -3384,7 +3384,7 @@ static int read_blob_object(struct strbuf *buf, const struct object_id *oid, uns
 		strbuf_addf(buf, "Subproject commit %s\n", oid_to_hex(oid));
 	} else {
 		enum object_type type;
-		unsigned long sz;
+		size_t sz;
 		char *result;
 
 		result = odb_read_object(the_repository->objects, oid,
@@ -3611,7 +3611,7 @@ static int load_preimage(struct apply_state *state,
 
 static int resolve_to(struct image *image, const struct object_id *result_id)
 {
-	unsigned long size;
+	size_t size;
 	enum object_type type;
 	char *data;
 
diff --git a/archive.c b/archive.c
index 51229107a5..59790be986 100644
--- a/archive.c
+++ b/archive.c
@@ -87,7 +87,7 @@ static void *object_file_to_archive(const struct archiver_args *args,
 				    const struct object_id *oid,
 				    unsigned int mode,
 				    enum object_type *type,
-				    unsigned long *sizep)
+				    size_t *sizep)
 {
 	void *buffer;
 	const struct commit *commit = args->convert ? args->commit : NULL;
@@ -158,7 +158,7 @@ static int write_archive_entry(const struct object_id *oid, const char *base,
 	write_archive_entry_fn_t write_entry = c->write_entry;
 	int err;
 	const char *path_without_prefix;
-	unsigned long size;
+	size_t size;
 	void *buffer;
 	enum object_type type;
 
diff --git a/attr.c b/attr.c
index 75369547b3..c61472a4e6 100644
--- a/attr.c
+++ b/attr.c
@@ -768,7 +768,7 @@ static struct attr_stack *read_attr_from_blob(struct index_state *istate,
 					      const char *path, unsigned flags)
 {
 	struct object_id oid;
-	unsigned long sz;
+	size_t sz;
 	enum object_type type;
 	void *buf;
 	unsigned short mode;
diff --git a/bisect.c b/bisect.c
index e29d1cbc64..94c7028d2a 100644
--- a/bisect.c
+++ b/bisect.c
@@ -154,7 +154,7 @@ static void show_list(const char *debug, int counted, int nr,
 		struct commit *commit = p->item;
 		unsigned commit_flags = commit->object.flags;
 		enum object_type type;
-		unsigned long size;
+		size_t size;
 		char *buf = odb_read_object(the_repository->objects,
 					    &commit->object.oid, &type,
 					    &size);
diff --git a/blame.c b/blame.c
index 977cbb7097..126e232416 100644
--- a/blame.c
+++ b/blame.c
@@ -1041,10 +1041,13 @@ static void fill_origin_blob(struct diff_options *opt,
 		    textconv_object(opt->repo, o->path, o->mode,
 				    &o->blob_oid, 1, &file->ptr, &file_size))
 			;
-		else
+		else {
+			size_t file_size_st = 0;
 			file->ptr = odb_read_object(the_repository->objects,
 						    &o->blob_oid, &type,
-						    &file_size);
+						    &file_size_st);
+			file_size = cast_size_t_to_ulong(file_size_st);
+		}
 		file->size = file_size;
 
 		if (!file->ptr)
@@ -2869,10 +2872,14 @@ void setup_scoreboard(struct blame_scoreboard *sb,
 		    textconv_object(sb->repo, sb->path, o->mode, &o->blob_oid, 1, (char **) &sb->final_buf,
 				    &sb->final_buf_size))
 			;
-		else
+		else {
+			size_t final_buf_size_st = 0;
 			sb->final_buf = odb_read_object(the_repository->objects,
 							&o->blob_oid, &type,
-							&sb->final_buf_size);
+							&final_buf_size_st);
+			sb->final_buf_size =
+				cast_size_t_to_ulong(final_buf_size_st);
+		}
 
 		if (!sb->final_buf)
 			die(_("cannot read blob %s for path %s"),
diff --git a/builtin/cat-file.c b/builtin/cat-file.c
index 2b64f8f733..adb2ef5130 100644
--- a/builtin/cat-file.c
+++ b/builtin/cat-file.c
@@ -84,7 +84,7 @@ static char *replace_idents_using_mailmap(char *object_buf, size_t *size)
 
 static int filter_object(const char *path, unsigned mode,
 			 const struct object_id *oid,
-			 char **buf, unsigned long *size)
+			 char **buf, size_t *size)
 {
 	enum object_type type;
 
@@ -120,7 +120,7 @@ static int cat_one_file(int opt, const char *exp_type, const char *obj_name)
 	struct object_id oid;
 	enum object_type type;
 	char *buf;
-	unsigned long size;
+	size_t size;
 	struct object_context obj_context = {0};
 	struct object_info oi = OBJECT_INFO_INIT;
 	unsigned flags = OBJECT_INFO_LOOKUP_REPLACE;
@@ -163,11 +163,8 @@ static int cat_one_file(int opt, const char *exp_type, const char *obj_name)
 		if (odb_read_object_info_extended(the_repository->objects, &oid, &oi, flags) < 0)
 			die("git cat-file: could not get object info");
 
-		if (use_mailmap && (type == OBJ_COMMIT || type == OBJ_TAG)) {
-			size_t s = size;
-			buf = replace_idents_using_mailmap(buf, &s);
-			size = cast_size_t_to_ulong(s);
-		}
+		if (use_mailmap && (type == OBJ_COMMIT || type == OBJ_TAG))
+			buf = replace_idents_using_mailmap(buf, &size);
 
 		printf("%"PRIuMAX"\n", (uintmax_t)size);
 		ret = 0;
@@ -188,9 +185,15 @@ static int cat_one_file(int opt, const char *exp_type, const char *obj_name)
 		break;
 
 	case 'c':
-		if (textconv_object(the_repository, path, obj_context.mode,
-				    &oid, 1, &buf, &size))
+	{
+		unsigned long size_ul = 0;
+		int textconv_ret = textconv_object(the_repository, path,
+						   obj_context.mode, &oid, 1,
+						   &buf, &size_ul);
+		size = size_ul;
+		if (textconv_ret)
 			break;
+	}
 		/* else fallthrough */
 
 	case 'p':
@@ -216,11 +219,8 @@ static int cat_one_file(int opt, const char *exp_type, const char *obj_name)
 		if (!buf)
 			die("Cannot read object %s", obj_name);
 
-		if (use_mailmap) {
-			size_t s = size;
-			buf = replace_idents_using_mailmap(buf, &s);
-			size = cast_size_t_to_ulong(s);
-		}
+		if (use_mailmap)
+			buf = replace_idents_using_mailmap(buf, &size);
 
 		/* otherwise just spit out the data */
 		break;
@@ -263,11 +263,8 @@ static int cat_one_file(int opt, const char *exp_type, const char *obj_name)
 		buf = odb_read_object_peeled(the_repository->objects, &oid,
 					     exp_type_id, &size, NULL);
 
-		if (use_mailmap) {
-			size_t s = size;
-			buf = replace_idents_using_mailmap(buf, &s);
-			size = cast_size_t_to_ulong(s);
-		}
+		if (use_mailmap)
+			buf = replace_idents_using_mailmap(buf, &size);
 		break;
 	}
 	default:
@@ -288,7 +285,7 @@ cleanup:
 struct expand_data {
 	struct object_id oid;
 	enum object_type type;
-	unsigned long size;
+	size_t size;
 	unsigned short mode;
 	off_t disk_size;
 	const char *rest;
@@ -404,7 +401,7 @@ static void print_object_or_die(struct batch_options *opt, struct expand_data *d
 			fflush(stdout);
 		if (opt->transform_mode) {
 			char *contents;
-			unsigned long size;
+			size_t size;
 
 			if (!data->rest)
 				die("missing path for '%s'", oid_to_hex(oid));
@@ -416,9 +413,12 @@ static void print_object_or_die(struct batch_options *opt, struct expand_data *d
 					    oid_to_hex(oid), data->rest);
 			} else if (opt->transform_mode == 'c') {
 				enum object_type type;
-				if (!textconv_object(the_repository,
-						     data->rest, 0100644, oid,
-						     1, &contents, &size))
+				unsigned long size_ul = 0;
+				if (textconv_object(the_repository,
+						    data->rest, 0100644, oid,
+						    1, &contents, &size_ul))
+					size = size_ul;
+				else
 					contents = odb_read_object(the_repository->objects,
 								   oid, &type, &size);
 				if (!contents)
@@ -434,7 +434,7 @@ static void print_object_or_die(struct batch_options *opt, struct expand_data *d
 	}
 	else {
 		enum object_type type;
-		unsigned long size;
+		size_t size;
 		void *contents;
 
 		contents = odb_read_object(the_repository->objects, oid,
@@ -442,11 +442,8 @@ static void print_object_or_die(struct batch_options *opt, struct expand_data *d
 		if (!contents)
 			die("object %s disappeared", oid_to_hex(oid));
 
-		if (use_mailmap) {
-			size_t s = size;
-			contents = replace_idents_using_mailmap(contents, &s);
-			size = cast_size_t_to_ulong(s);
-		}
+		if (use_mailmap)
+			contents = replace_idents_using_mailmap(contents, &size);
 
 		if (type != data->type)
 			die("object %s changed type!?", oid_to_hex(oid));
@@ -546,15 +543,13 @@ static void batch_object_write(const char *obj_name,
 		}
 
 		if (use_mailmap && (data->type == OBJ_COMMIT || data->type == OBJ_TAG)) {
-			size_t s = data->size;
 			char *buf = NULL;
 
 			buf = odb_read_object(the_repository->objects, &data->oid,
 					      &data->type, &data->size);
 			if (!buf)
 				die(_("unable to read %s"), oid_to_hex(&data->oid));
-			buf = replace_idents_using_mailmap(buf, &s);
-			data->size = cast_size_t_to_ulong(s);
+			buf = replace_idents_using_mailmap(buf, &data->size);
 
 			free(buf);
 		}
diff --git a/builtin/difftool.c b/builtin/difftool.c
index 2a21005f2e..26778f8515 100644
--- a/builtin/difftool.c
+++ b/builtin/difftool.c
@@ -319,7 +319,7 @@ static char *get_symlink(struct repository *repo,
 		data = strbuf_detach(&link, NULL);
 	} else {
 		enum object_type type;
-		unsigned long size;
+		size_t size;
 		data = odb_read_object(repo->objects, oid, &type, &size);
 		if (!data)
 			die(_("could not read object %s for symlink %s"),
diff --git a/builtin/fast-export.c b/builtin/fast-export.c
index 2eb43a28da..0be43104dc 100644
--- a/builtin/fast-export.c
+++ b/builtin/fast-export.c
@@ -317,7 +317,10 @@ static void export_blob(const struct object_id *oid)
 		object = (struct object *)lookup_blob(the_repository, oid);
 		eaten = 0;
 	} else {
-		buf = odb_read_object(the_repository->objects, oid, &type, &size);
+		size_t size_st = 0;
+		buf = odb_read_object(the_repository->objects, oid, &type,
+				      &size_st);
+		size = cast_size_t_to_ulong(size_st);
 		if (!buf)
 			die(_("could not read blob %s"), oid_to_hex(oid));
 		if (check_object_signature(the_repository, oid, buf, size,
@@ -880,7 +883,7 @@ static char *anonymize_tag(void)
 
 static void handle_tag(const char *name, struct tag *tag)
 {
-	unsigned long size;
+	size_t size;
 	enum object_type type;
 	char *buf;
 	const char *tagger, *tagger_end, *message;
diff --git a/builtin/fast-import.c b/builtin/fast-import.c
index 3dff898c43..d11a2cc2c1 100644
--- a/builtin/fast-import.c
+++ b/builtin/fast-import.c
@@ -1291,7 +1291,10 @@ static void load_tree(struct tree_entry *root)
 			die(_("can't load tree %s"), oid_to_hex(oid));
 	} else {
 		enum object_type type;
-		buf = odb_read_object(the_repository->objects, oid, &type, &size);
+		size_t size_st = 0;
+		buf = odb_read_object(the_repository->objects, oid, &type,
+				      &size_st);
+		size = cast_size_t_to_ulong(size_st);
 		if (!buf || type != OBJ_TREE)
 			die(_("can't load tree %s"), oid_to_hex(oid));
 	}
@@ -2560,7 +2563,7 @@ static void note_change_n(const char *p, struct branch *b, unsigned char *old_fa
 			die(_("mark :%" PRIuMAX " not a commit"), commit_mark);
 		oidcpy(&commit_oid, &commit_oe->idx.oid);
 	} else if (!repo_get_oid(the_repository, p, &commit_oid)) {
-		unsigned long size;
+		size_t size;
 		char *buf = odb_read_object_peeled(the_repository->objects,
 						   &commit_oid, OBJ_COMMIT, &size,
 						   &commit_oid);
@@ -2627,10 +2630,12 @@ static void parse_from_existing(struct branch *b)
 		oidclr(&b->branch_tree.versions[1].oid, the_repository->hash_algo);
 	} else {
 		unsigned long size;
+		size_t size_st = 0;
 		char *buf;
 
 		buf = odb_read_object_peeled(the_repository->objects, &b->oid,
-					     OBJ_COMMIT, &size, &b->oid);
+					     OBJ_COMMIT, &size_st, &b->oid);
+		size = cast_size_t_to_ulong(size_st);
 		parse_from_commit(b, buf, size);
 		free(buf);
 	}
@@ -2722,7 +2727,7 @@ static struct hash_list *parse_merge(unsigned int *count)
 				die(_("mark :%" PRIuMAX " not a commit"), idnum);
 			oidcpy(&n->oid, &oe->idx.oid);
 		} else if (!repo_get_oid(the_repository, from, &n->oid)) {
-			unsigned long size;
+			size_t size;
 			char *buf = odb_read_object_peeled(the_repository->objects,
 							   &n->oid, OBJ_COMMIT,
 							   &size, &n->oid);
@@ -3330,7 +3335,10 @@ static void cat_blob(struct object_entry *oe, struct object_id *oid)
 	char *buf;
 
 	if (!oe || oe->pack_id == MAX_PACK_ID) {
-		buf = odb_read_object(the_repository->objects, oid, &type, &size);
+		size_t size_st = 0;
+		buf = odb_read_object(the_repository->objects, oid, &type,
+				      &size_st);
+		size = cast_size_t_to_ulong(size_st);
 	} else {
 		type = oe->type;
 		buf = gfi_unpack_entry(oe, &size);
@@ -3438,8 +3446,10 @@ static struct object_entry *dereference(struct object_entry *oe,
 		buf = gfi_unpack_entry(oe, &size);
 	} else {
 		enum object_type unused;
+		size_t size_st = 0;
 		buf = odb_read_object(the_repository->objects, oid,
-				      &unused, &size);
+				      &unused, &size_st);
+		size = cast_size_t_to_ulong(size_st);
 	}
 	if (!buf)
 		die(_("can't load object %s"), oid_to_hex(oid));
diff --git a/builtin/fsck.c b/builtin/fsck.c
index 248f8ff5a0..76b723f36d 100644
--- a/builtin/fsck.c
+++ b/builtin/fsck.c
@@ -724,7 +724,7 @@ static int fsck_loose(const struct object_id *oid, const char *path,
 	struct for_each_loose_cb *data = cb_data;
 	struct object *obj;
 	enum object_type type = OBJ_NONE;
-	unsigned long size;
+	size_t size;
 	void *contents = NULL;
 	int eaten;
 	struct object_info oi = OBJECT_INFO_INIT;
diff --git a/builtin/grep.c b/builtin/grep.c
index 6a09571903..26b85479ca 100644
--- a/builtin/grep.c
+++ b/builtin/grep.c
@@ -520,7 +520,7 @@ static int grep_submodule(struct grep_opt *opt,
 		enum object_type object_type;
 		struct tree_desc tree;
 		void *data;
-		unsigned long size;
+		size_t size;
 		struct strbuf base = STRBUF_INIT;
 
 		obj_read_lock();
@@ -573,7 +573,7 @@ static int grep_cache(struct grep_opt *opt,
 			enum object_type type;
 			struct tree_desc tree;
 			void *data;
-			unsigned long size;
+			size_t size;
 
 			data = odb_read_object(the_repository->objects, &ce->oid,
 					       &type, &size);
@@ -666,7 +666,7 @@ static int grep_tree(struct grep_opt *opt, const struct pathspec *pathspec,
 			enum object_type type;
 			struct tree_desc sub;
 			void *data;
-			unsigned long size;
+			size_t size;
 
 			data = odb_read_object(the_repository->objects,
 					       &entry.oid, &type, &size);
@@ -730,7 +730,7 @@ static void collect_blob_oids_for_tree(struct repository *repo,
 			enum object_type type;
 			struct tree_desc sub_tree;
 			void *data;
-			unsigned long size;
+			size_t size;
 
 			data = odb_read_object(repo->objects, &entry.oid,
 					       &type, &size);
@@ -764,7 +764,7 @@ static void collect_blob_oids_for_treeish(struct grep_opt *opt,
 {
 	struct tree_desc tree;
 	void *data;
-	unsigned long size;
+	size_t size;
 	struct strbuf base = STRBUF_INIT;
 	int len;
 
@@ -841,7 +841,7 @@ static int grep_object(struct grep_opt *opt, const struct pathspec *pathspec,
 	if (obj->type == OBJ_COMMIT || obj->type == OBJ_TREE) {
 		struct tree_desc tree;
 		void *data;
-		unsigned long size;
+		size_t size;
 		struct strbuf base;
 		int hit, len;
 
diff --git a/builtin/index-pack.c b/builtin/index-pack.c
index 3c4474e681..78da3a6566 100644
--- a/builtin/index-pack.c
+++ b/builtin/index-pack.c
@@ -258,7 +258,7 @@ static unsigned check_object(struct object *obj)
 		return 0;
 
 	if (!(obj->flags & FLAG_CHECKED)) {
-		unsigned long size;
+		size_t size;
 		int type = odb_read_object_info(the_repository->objects,
 						&obj->oid, &size);
 		if (type <= 0)
@@ -905,7 +905,7 @@ static void sha1_object(const void *data, struct object_entry *obj_entry,
 	if (collision_test_needed) {
 		void *has_data;
 		enum object_type has_type;
-		unsigned long has_size;
+		size_t has_size;
 		read_lock();
 		has_type = odb_read_object_info(the_repository->objects, oid, &has_size);
 		if (has_type < 0)
@@ -1515,7 +1515,7 @@ static void fix_unresolved_deltas(struct hashfile *f)
 		struct ref_delta_entry *d = sorted_by_pos[i];
 		enum object_type type;
 		void *data;
-		unsigned long size;
+		size_t size;
 
 		if (objects[d->obj_no].real_type != OBJ_REF_DELTA)
 			continue;
diff --git a/builtin/log.c b/builtin/log.c
index e464b30af4..d027ce1e0b 100644
--- a/builtin/log.c
+++ b/builtin/log.c
@@ -613,7 +613,7 @@ static int show_blob_object(const struct object_id *oid, struct rev_info *rev, c
 
 static int show_tag_object(const struct object_id *oid, struct rev_info *rev)
 {
-	unsigned long size;
+	size_t size;
 	enum object_type type;
 	char *buf = odb_read_object(the_repository->objects, oid, &type, &size);
 	unsigned long offset = 0;
diff --git a/builtin/ls-files.c b/builtin/ls-files.c
index 12d5d828ff..f30507215a 100644
--- a/builtin/ls-files.c
+++ b/builtin/ls-files.c
@@ -256,7 +256,7 @@ static void expand_objectsize(struct repository *repo, struct strbuf *line,
 	size_t len;
 
 	if (type == OBJ_BLOB) {
-		unsigned long size;
+		size_t size;
 		if (odb_read_object_info(repo->objects, oid, &size) < 0)
 			die(_("could not get object info about '%s'"),
 			    oid_to_hex(oid));
diff --git a/builtin/ls-tree.c b/builtin/ls-tree.c
index 57846911ce..46edaffc2e 100644
--- a/builtin/ls-tree.c
+++ b/builtin/ls-tree.c
@@ -32,7 +32,7 @@ static void expand_objectsize(struct strbuf *line, const struct object_id *oid,
 	size_t len;
 
 	if (type == OBJ_BLOB) {
-		unsigned long size;
+		size_t size;
 		if (odb_read_object_info(the_repository->objects, oid, &size) < 0)
 			die(_("could not get object info about '%s'"),
 			    oid_to_hex(oid));
@@ -220,7 +220,7 @@ static int show_tree_long(const struct object_id *oid, struct strbuf *base,
 		return early;
 
 	if (type == OBJ_BLOB) {
-		unsigned long size;
+		size_t size;
 		if (odb_read_object_info(the_repository->objects, oid, &size) == OBJ_BAD)
 			xsnprintf(size_text, sizeof(size_text), "BAD");
 		else
diff --git a/builtin/merge-tree.c b/builtin/merge-tree.c
index 312b595d1e..49f41e520f 100644
--- a/builtin/merge-tree.c
+++ b/builtin/merge-tree.c
@@ -69,7 +69,7 @@ static const char *explanation(struct merge_list *entry)
 	return "removed in remote";
 }
 
-static void *result(struct merge_list *entry, unsigned long *size)
+static void *result(struct merge_list *entry, size_t *size)
 {
 	enum object_type type;
 	struct blob *base, *our, *their;
@@ -96,7 +96,7 @@ static void *result(struct merge_list *entry, unsigned long *size)
 			   base, our, their, size);
 }
 
-static void *origin(struct merge_list *entry, unsigned long *size)
+static void *origin(struct merge_list *entry, size_t *size)
 {
 	enum object_type type;
 	while (entry) {
@@ -119,7 +119,7 @@ static int show_outf(void *priv UNUSED, mmbuffer_t *mb, int nbuf)
 
 static void show_diff(struct merge_list *entry)
 {
-	unsigned long size;
+	size_t size;
 	mmfile_t src, dst;
 	xpparam_t xpp;
 	xdemitconf_t xecfg;
diff --git a/builtin/mktag.c b/builtin/mktag.c
index f40264a878..37c17e6beb 100644
--- a/builtin/mktag.c
+++ b/builtin/mktag.c
@@ -50,7 +50,7 @@ static int verify_object_in_tag(struct object_id *tagged_oid, int *tagged_type)
 {
 	int ret;
 	enum object_type type;
-	unsigned long size;
+	size_t size;
 	void *buffer;
 	const struct object_id *repl;
 
diff --git a/builtin/notes.c b/builtin/notes.c
index 9af602bdd7..962df867c8 100644
--- a/builtin/notes.c
+++ b/builtin/notes.c
@@ -150,7 +150,7 @@ static int list_each_note(const struct object_id *object_oid,
 
 static void copy_obj_to_fd(int fd, const struct object_id *oid)
 {
-	unsigned long size;
+	size_t size;
 	enum object_type type;
 	char *buf = odb_read_object(the_repository->objects, oid, &type, &size);
 	if (buf) {
@@ -313,7 +313,7 @@ static int parse_reuse_arg(const struct option *opt, const char *arg, int unset)
 	char *value;
 	struct object_id object;
 	enum object_type type;
-	unsigned long len;
+	size_t len;
 
 	BUG_ON_OPT_NEG(unset);
 
@@ -721,7 +721,7 @@ static int append_edit(int argc, const char **argv, const char *prefix,
 
 	if (note && !edit) {
 		/* Append buf to previous note contents */
-		unsigned long size;
+		size_t size;
 		enum object_type type;
 		struct strbuf buf = STRBUF_INIT;
 		char *prev_buf = odb_read_object(the_repository->objects, note, &type, &size);
diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index 961d547ef2..b5092d97ee 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -356,14 +356,17 @@ static void *get_delta(struct object_entry *entry)
 	unsigned long size, base_size, delta_size;
 	void *buf, *base_buf, *delta_buf;
 	enum object_type type;
+	size_t size_st = 0, base_size_st = 0;
 
 	buf = odb_read_object(the_repository->objects, &entry->idx.oid,
-			      &type, &size);
+			      &type, &size_st);
+	size = cast_size_t_to_ulong(size_st);
 	if (!buf)
 		die(_("unable to read %s"), oid_to_hex(&entry->idx.oid));
 	base_buf = odb_read_object(the_repository->objects,
 				   &DELTA(entry)->idx.oid, &type,
-				   &base_size);
+				   &base_size_st);
+	base_size = cast_size_t_to_ulong(base_size_st);
 	if (!base_buf)
 		die("unable to read %s",
 		    oid_to_hex(&DELTA(entry)->idx.oid));
@@ -528,9 +531,11 @@ static unsigned long write_no_reuse_object(struct hashfile *f, struct object_ent
 			type = st->type;
 			size = st->size;
 		} else {
+			size_t size_st = 0;
 			buf = odb_read_object(the_repository->objects,
 					      &entry->idx.oid, &type,
-					      &size);
+					      &size_st);
+			size = cast_size_t_to_ulong(size_st);
 			if (!buf)
 				die(_("unable to read %s"),
 				    oid_to_hex(&entry->idx.oid));
@@ -1937,6 +1942,7 @@ static struct pbase_tree_cache *pbase_tree_get(const struct object_id *oid)
 	struct pbase_tree_cache *ent, *nent;
 	void *data;
 	unsigned long size;
+	size_t size_st = 0;
 	enum object_type type;
 	int neigh;
 	int my_ix = pbase_tree_cache_ix(oid);
@@ -1964,7 +1970,8 @@ static struct pbase_tree_cache *pbase_tree_get(const struct object_id *oid)
 	/* Did not find one.  Either we got a bogus request or
 	 * we need to read and perhaps cache.
 	 */
-	data = odb_read_object(the_repository->objects, oid, &type, &size);
+	data = odb_read_object(the_repository->objects, oid, &type, &size_st);
+	size = cast_size_t_to_ulong(size_st);
 	if (!data)
 		return NULL;
 	if (type != OBJ_TREE) {
@@ -2119,13 +2126,15 @@ static void add_preferred_base(struct object_id *oid)
 	struct pbase_tree *it;
 	void *data;
 	unsigned long size;
+	size_t size_st = 0;
 	struct object_id tree_oid;
 
 	if (window <= num_preferred_base++)
 		return;
 
 	data = odb_read_object_peeled(the_repository->objects, oid,
-				      OBJ_TREE, &size, &tree_oid);
+				      OBJ_TREE, &size_st, &tree_oid);
+	size = cast_size_t_to_ulong(size_st);
 	if (!data)
 		return;
 
@@ -2237,7 +2246,7 @@ static void prefetch_to_pack(uint32_t object_index_start) {
 
 static void check_object(struct object_entry *entry, uint32_t object_index)
 {
-	unsigned long canonical_size;
+	size_t canonical_size;
 	enum object_type type;
 	struct object_info oi = {.typep = &type, .sizep = &canonical_size};
 
@@ -2436,7 +2445,7 @@ static void drop_reused_delta(struct object_entry *entry)
 	unsigned *idx = &to_pack.objects[entry->delta_idx - 1].delta_child_idx;
 	struct object_info oi = OBJECT_INFO_INIT;
 	enum object_type type;
-	unsigned long size;
+	size_t size;
 
 	while (*idx) {
 		struct object_entry *oe = &to_pack.objects[*idx - 1];
@@ -2748,7 +2757,7 @@ size_t oe_get_size_slow(struct packing_data *pack,
 	size_t size;
 
 	if (e->type_ != OBJ_OFS_DELTA && e->type_ != OBJ_REF_DELTA) {
-		unsigned long sz;
+		size_t sz;
 		packing_data_lock(&to_pack);
 		if (odb_read_object_info(the_repository->objects,
 					 &e->idx.oid, &sz) < 0)
@@ -2833,10 +2842,12 @@ static int try_delta(struct unpacked *trg, struct unpacked *src,
 
 	/* Load data if not already done */
 	if (!trg->data) {
+		size_t sz_st = 0;
 		packing_data_lock(&to_pack);
 		trg->data = odb_read_object(the_repository->objects,
 					    &trg_entry->idx.oid, &type,
-					    &sz);
+					    &sz_st);
+		sz = cast_size_t_to_ulong(sz_st);
 		packing_data_unlock(&to_pack);
 		if (!trg->data)
 			die(_("object %s cannot be read"),
@@ -2848,10 +2859,12 @@ static int try_delta(struct unpacked *trg, struct unpacked *src,
 		*mem_usage += sz;
 	}
 	if (!src->data) {
+		size_t sz_st = 0;
 		packing_data_lock(&to_pack);
 		src->data = odb_read_object(the_repository->objects,
 					    &src_entry->idx.oid, &type,
-					    &sz);
+					    &sz_st);
+		sz = cast_size_t_to_ulong(sz_st);
 		packing_data_unlock(&to_pack);
 		if (!src->data) {
 			if (src_entry->preferred_base) {
diff --git a/builtin/repo.c b/builtin/repo.c
index 71a5c1c29c..69f3626467 100644
--- a/builtin/repo.c
+++ b/builtin/repo.c
@@ -784,13 +784,14 @@ static int count_objects(const char *path UNUSED, struct oid_array *oids,
 	for (size_t i = 0; i < oids->nr; i++) {
 		struct object_info oi = OBJECT_INFO_INIT;
 		unsigned long inflated;
+		size_t inflated_st = 0;
 		struct commit *commit;
 		struct object *obj;
 		void *content;
 		off_t disk;
 		int eaten;
 
-		oi.sizep = &inflated;
+		oi.sizep = &inflated_st;
 		oi.disk_sizep = &disk;
 		oi.contentp = &content;
 
@@ -798,6 +799,7 @@ static int count_objects(const char *path UNUSED, struct oid_array *oids,
 						  OBJECT_INFO_SKIP_FETCH_OBJECT |
 						  OBJECT_INFO_QUICK) < 0)
 			continue;
+		inflated = cast_size_t_to_ulong(inflated_st);
 
 		obj = parse_object_buffer(the_repository, &oids->oid[i], type,
 					  inflated, content, &eaten);
diff --git a/builtin/tag.c b/builtin/tag.c
index d51c2e3349..06c125b53c 100644
--- a/builtin/tag.c
+++ b/builtin/tag.c
@@ -238,7 +238,7 @@ static int git_tag_config(const char *var, const char *value,
 
 static void write_tag_body(int fd, const struct object_id *oid)
 {
-	unsigned long size;
+	size_t size;
 	enum object_type type;
 	char *buf, *sp, *orig;
 	struct strbuf payload = STRBUF_INIT;
@@ -388,7 +388,7 @@ static void create_reflog_msg(const struct object_id *oid, struct strbuf *sb)
 	enum object_type type;
 	struct commit *c;
 	char *buf;
-	unsigned long size;
+	size_t size;
 	int subject_len = 0;
 	const char *subject_start;
 
diff --git a/builtin/unpack-file.c b/builtin/unpack-file.c
index 87877a9fab..387389ed49 100644
--- a/builtin/unpack-file.c
+++ b/builtin/unpack-file.c
@@ -12,7 +12,7 @@ static char *create_temp_file(struct object_id *oid)
 	static char path[50];
 	void *buf;
 	enum object_type type;
-	unsigned long size;
+	size_t size;
 	int fd;
 
 	buf = odb_read_object(the_repository->objects, oid, &type, &size);
diff --git a/builtin/unpack-objects.c b/builtin/unpack-objects.c
index e7a50c493c..f3849bb654 100644
--- a/builtin/unpack-objects.c
+++ b/builtin/unpack-objects.c
@@ -231,7 +231,7 @@ static int check_object(struct object *obj, enum object_type type,
 		die("object type mismatch");
 
 	if (!(obj->flags & FLAG_OPEN)) {
-		unsigned long size;
+		size_t size;
 		int type = odb_read_object_info(the_repository->objects, &obj->oid, &size);
 		if (type != obj->type || type <= 0)
 			die("object of unexpected type");
@@ -436,6 +436,7 @@ static void unpack_delta_entry(enum object_type type, unsigned long delta_size,
 {
 	void *delta_data, *base;
 	unsigned long base_size;
+	size_t base_size_st = 0;
 	struct object_id base_oid;
 
 	if (type == OBJ_REF_DELTA) {
@@ -512,7 +513,8 @@ static void unpack_delta_entry(enum object_type type, unsigned long delta_size,
 		return;
 
 	base = odb_read_object(the_repository->objects, &base_oid,
-			       &type, &base_size);
+			       &type, &base_size_st);
+	base_size = cast_size_t_to_ulong(base_size_st);
 	if (!base) {
 		error("failed to read delta-pack base object %s",
 		      oid_to_hex(&base_oid));
diff --git a/bundle.c b/bundle.c
index 42327f9739..fd2db2c837 100644
--- a/bundle.c
+++ b/bundle.c
@@ -296,7 +296,7 @@ int list_bundle_refs(struct bundle_header *header, int argc, const char **argv)
 
 static int is_tag_in_date_range(struct object *tag, struct rev_info *revs)
 {
-	unsigned long size;
+	size_t size;
 	enum object_type type;
 	char *buf = NULL, *line, *lineend;
 	timestamp_t date;
diff --git a/combine-diff.c b/combine-diff.c
index b799862068..3ce71db8bb 100644
--- a/combine-diff.c
+++ b/combine-diff.c
@@ -325,7 +325,9 @@ static char *grab_blob(struct repository *r,
 		*size = fill_textconv(r, textconv, df, &blob);
 		free_filespec(df);
 	} else {
-		blob = odb_read_object(r->objects, oid, &type, size);
+		size_t size_st = 0;
+		blob = odb_read_object(r->objects, oid, &type, &size_st);
+		*size = cast_size_t_to_ulong(size_st);
 		if (!blob)
 			die(_("unable to read %s"), oid_to_hex(oid));
 		if (type != OBJ_BLOB)
diff --git a/commit.c b/commit.c
index fd8723502e..7950effc58 100644
--- a/commit.c
+++ b/commit.c
@@ -395,7 +395,7 @@ const void *repo_get_commit_buffer(struct repository *r,
 	const void *ret = get_cached_commit_buffer(r, commit, sizep);
 	if (!ret) {
 		enum object_type type;
-		unsigned long size;
+		size_t size;
 		ret = odb_read_object(r->objects, &commit->object.oid, &type, &size);
 		if (!ret)
 			die("cannot read commit object %s",
@@ -404,7 +404,7 @@ const void *repo_get_commit_buffer(struct repository *r,
 			die("expected commit for %s, got %s",
 			    oid_to_hex(&commit->object.oid), type_name(type));
 		if (sizep)
-			*sizep = size;
+			*sizep = cast_size_t_to_ulong(size);
 	}
 	return ret;
 }
@@ -437,7 +437,7 @@ static inline void set_commit_tree(struct commit *c, struct tree *t)
 static void load_tree_from_commit_contents(struct repository *r, struct commit *commit)
 {
 	enum object_type type;
-	unsigned long size;
+	size_t size;
 	char *buf;
 	const char *p;
 	struct object_id tree_oid;
@@ -604,7 +604,7 @@ int repo_parse_commit_internal(struct repository *r,
 {
 	enum object_type type;
 	void *buffer;
-	unsigned long size;
+	size_t size;
 	struct object_info oi = {
 		.typep = &type,
 		.sizep = &size,
@@ -1313,7 +1313,7 @@ static void handle_signed_tag(const struct commit *parent, struct commit_extra_h
 	struct merge_remote_desc *desc;
 	struct commit_extra_header *mergetag;
 	char *buf;
-	unsigned long size;
+	size_t size;
 	enum object_type type;
 	struct strbuf payload = STRBUF_INIT;
 	struct strbuf signature = STRBUF_INIT;
diff --git a/config.c b/config.c
index a1b92fe083..21b231052c 100644
--- a/config.c
+++ b/config.c
@@ -1442,7 +1442,7 @@ int git_config_from_blob_oid(config_fn_t fn,
 {
 	enum object_type type;
 	char *buf;
-	unsigned long size;
+	size_t size;
 	int ret;
 
 	buf = odb_read_object(repo->objects, oid, &type, &size);
diff --git a/diff.c b/diff.c
index 5a584fa1d5..816b89dc6c 100644
--- a/diff.c
+++ b/diff.c
@@ -4594,8 +4594,9 @@ int diff_populate_filespec(struct repository *r,
 		}
 	}
 	else {
+		size_t size_st = 0;
 		struct object_info info = {
-			.sizep = &s->size
+			.sizep = &size_st
 		};
 
 		if (!(size_only || check_binary))
@@ -4617,6 +4618,7 @@ int diff_populate_filespec(struct repository *r,
 			die("unable to read %s", oid_to_hex(&s->oid));
 
 object_read:
+		s->size = cast_size_t_to_ulong(size_st);
 		if (size_only || check_binary) {
 			if (size_only)
 				return 0;
@@ -4631,6 +4633,7 @@ object_read:
 			if (odb_read_object_info_extended(r->objects, &s->oid, &info,
 							  OBJECT_INFO_LOOKUP_REPLACE))
 				die("unable to read %s", oid_to_hex(&s->oid));
+			s->size = cast_size_t_to_ulong(size_st);
 		}
 		s->should_free = 1;
 	}
diff --git a/dir.c b/dir.c
index 33c81c256e..b6764d98a7 100644
--- a/dir.c
+++ b/dir.c
@@ -324,7 +324,7 @@ static int do_read_blob(const struct object_id *oid, struct oid_stat *oid_stat,
 			size_t *size_out, char **data_out)
 {
 	enum object_type type;
-	unsigned long sz;
+	size_t sz;
 	char *data;
 
 	*size_out = 0;
diff --git a/entry.c b/entry.c
index 7817aee362..c444fe5a10 100644
--- a/entry.c
+++ b/entry.c
@@ -92,11 +92,9 @@ static int create_file(const char *path, unsigned int mode)
 void *read_blob_entry(const struct cache_entry *ce, size_t *size)
 {
 	enum object_type type;
-	unsigned long ul;
 	void *blob_data = odb_read_object(the_repository->objects, &ce->oid,
-					  &type, &ul);
+					  &type, size);
 
-	*size = ul;
 	if (blob_data) {
 		if (type == OBJ_BLOB)
 			return blob_data;
diff --git a/fmt-merge-msg.c b/fmt-merge-msg.c
index 45d8b20e97..14441f23ae 100644
--- a/fmt-merge-msg.c
+++ b/fmt-merge-msg.c
@@ -528,11 +528,11 @@ static void fmt_merge_msg_sigs(struct strbuf *out)
 	for (i = 0; i < origins.nr; i++) {
 		struct object_id *oid = origins.items[i].util;
 		enum object_type type;
-		unsigned long size;
+		size_t size;
 		char *buf = odb_read_object(the_repository->objects, oid,
 					    &type, &size);
 		char *origbuf = buf;
-		unsigned long len = size;
+		size_t len = size;
 		struct signature_check sigc = { NULL };
 		struct strbuf payload = STRBUF_INIT, sig = STRBUF_INIT;
 
diff --git a/fsck.c b/fsck.c
index b4ffee6a04..94c8651c7d 100644
--- a/fsck.c
+++ b/fsck.c
@@ -1328,7 +1328,7 @@ static int fsck_blobs(struct oidset *blobs_found, struct oidset *blobs_done,
 	oidset_iter_init(blobs_found, &iter);
 	while ((oid = oidset_iter_next(&iter))) {
 		enum object_type type;
-		unsigned long size;
+		size_t size;
 		char *buf;
 
 		if (oidset_contains(blobs_done, oid))
diff --git a/grep.c b/grep.c
index a54e5d86a9..1d75d31421 100644
--- a/grep.c
+++ b/grep.c
@@ -1931,9 +1931,11 @@ void grep_source_clear_data(struct grep_source *gs)
 static int grep_source_load_oid(struct grep_source *gs)
 {
 	enum object_type type;
+	size_t size_st = 0;
 
 	gs->buf = odb_read_object(gs->repo->objects, gs->identifier,
-				  &type, &gs->size);
+				  &type, &size_st);
+	gs->size = cast_size_t_to_ulong(size_st);
 	if (!gs->buf)
 		return error(_("'%s': unable to read %s"),
 			     gs->name,
diff --git a/http-push.c b/http-push.c
index 520d6c3b6a..c61d9f7e02 100644
--- a/http-push.c
+++ b/http-push.c
@@ -365,7 +365,7 @@ static void start_put(struct transfer_request *request)
 	enum object_type type;
 	char hdr[50];
 	void *unpacked;
-	unsigned long len;
+	size_t len;
 	int hdrlen;
 	ssize_t size;
 	git_zstream stream;
diff --git a/list-objects-filter.c b/list-objects-filter.c
index 78316e7f90..c912ff3079 100644
--- a/list-objects-filter.c
+++ b/list-objects-filter.c
@@ -280,7 +280,7 @@ static enum list_objects_filter_result filter_blobs_limit(
 	void *filter_data_)
 {
 	struct filter_blobs_limit_data *filter_data = filter_data_;
-	unsigned long object_length;
+	size_t object_length;
 	enum object_type t;
 
 	switch (filter_situation) {
diff --git a/mailmap.c b/mailmap.c
index 3b2691781d..72b639e602 100644
--- a/mailmap.c
+++ b/mailmap.c
@@ -186,7 +186,7 @@ int read_mailmap_blob(struct repository *repo, struct string_list *map,
 {
 	struct object_id oid;
 	char *buf;
-	unsigned long size;
+	size_t size;
 	enum object_type type;
 
 	if (!name)
diff --git a/match-trees.c b/match-trees.c
index 4216933d06..2a43c0fa1a 100644
--- a/match-trees.c
+++ b/match-trees.c
@@ -61,7 +61,7 @@ static void *fill_tree_desc_strict(struct repository *r,
 {
 	void *buffer;
 	enum object_type type;
-	unsigned long size;
+	size_t size;
 
 	buffer = odb_read_object(r->objects, hash, &type, &size);
 	if (!buffer)
@@ -186,7 +186,7 @@ static int splice_tree(struct repository *r,
 	char *subpath;
 	int toplen;
 	char *buf;
-	unsigned long sz;
+	size_t sz;
 	struct tree_desc desc;
 	unsigned char *rewrite_here;
 	const struct object_id *rewrite_with;
diff --git a/merge-blobs.c b/merge-blobs.c
index 6fc2799417..16a75bd1e3 100644
--- a/merge-blobs.c
+++ b/merge-blobs.c
@@ -9,7 +9,7 @@
 static int fill_mmfile_blob(mmfile_t *f, struct blob *obj)
 {
 	void *buf;
-	unsigned long size;
+	size_t size;
 	enum object_type type;
 
 	buf = odb_read_object(the_repository->objects, &obj->object.oid,
@@ -35,7 +35,7 @@ static void *three_way_filemerge(struct index_state *istate,
 				 mmfile_t *base,
 				 mmfile_t *our,
 				 mmfile_t *their,
-				 unsigned long *size)
+				 size_t *size)
 {
 	enum ll_merge_result merge_status;
 	mmbuffer_t res;
@@ -61,7 +61,7 @@ static void *three_way_filemerge(struct index_state *istate,
 
 void *merge_blobs(struct index_state *istate, const char *path,
 		  struct blob *base, struct blob *our,
-		  struct blob *their, unsigned long *size)
+		  struct blob *their, size_t *size)
 {
 	void *res = NULL;
 	mmfile_t f1, f2, common;
diff --git a/merge-blobs.h b/merge-blobs.h
index 13cf9669e5..5797517a06 100644
--- a/merge-blobs.h
+++ b/merge-blobs.h
@@ -6,6 +6,6 @@ struct index_state;
 
 void *merge_blobs(struct index_state *, const char *,
 		  struct blob *, struct blob *,
-		  struct blob *, unsigned long *);
+		  struct blob *, size_t *);
 
 #endif /* MERGE_BLOBS_H */
diff --git a/merge-ort.c b/merge-ort.c
index 544be9e466..4f6273bd51 100644
--- a/merge-ort.c
+++ b/merge-ort.c
@@ -3716,7 +3716,7 @@ static int read_oid_strbuf(struct merge_options *opt,
 {
 	void *buf;
 	enum object_type type;
-	unsigned long size;
+	size_t size;
 	buf = odb_read_object(opt->repo->objects, oid, &type, &size);
 	if (!buf) {
 		path_msg(opt, ERROR_OBJECT_READ_FAILED, 0,
diff --git a/notes-cache.c b/notes-cache.c
index bf5bb1f6c1..74cef802bd 100644
--- a/notes-cache.c
+++ b/notes-cache.c
@@ -82,7 +82,7 @@ char *notes_cache_get(struct notes_cache *c, struct object_id *key_oid,
 	const struct object_id *value_oid;
 	enum object_type type;
 	char *value;
-	unsigned long size;
+	size_t size;
 
 	value_oid = get_note(&c->tree, key_oid);
 	if (!value_oid)
diff --git a/notes-merge.c b/notes-merge.c
index b9322abbcb..118cad2518 100644
--- a/notes-merge.c
+++ b/notes-merge.c
@@ -339,7 +339,7 @@ static void write_note_to_worktree(const struct object_id *obj,
 				   const struct object_id *note)
 {
 	enum object_type type;
-	unsigned long size;
+	size_t size;
 	void *buf = odb_read_object(the_repository->objects, note, &type, &size);
 
 	if (!buf)
diff --git a/notes.c b/notes.c
index 8f315e2a00..ec9c2cb150 100644
--- a/notes.c
+++ b/notes.c
@@ -811,7 +811,8 @@ int combine_notes_concatenate(struct object_id *cur_oid,
 			      const struct object_id *new_oid)
 {
 	char *cur_msg = NULL, *new_msg = NULL, *buf;
-	unsigned long cur_len, new_len, buf_len;
+	unsigned long buf_len;
+	size_t cur_len, new_len;
 	enum object_type cur_type, new_type;
 	int ret;
 
@@ -875,7 +876,7 @@ static int string_list_add_note_lines(struct string_list *list,
 				      const struct object_id *oid)
 {
 	char *data;
-	unsigned long len;
+	size_t len;
 	enum object_type t;
 
 	if (is_null_oid(oid))
@@ -1282,7 +1283,8 @@ static void format_note(struct notes_tree *t, const struct object_id *object_oid
 	static const char utf8[] = "utf-8";
 	const struct object_id *oid;
 	char *msg, *msg_p;
-	unsigned long linelen, msglen;
+	unsigned long linelen;
+	size_t msglen;
 	enum object_type type;
 
 	if (!t)
diff --git a/object-file.c b/object-file.c
index bce941874e..3a21c14027 100644
--- a/object-file.c
+++ b/object-file.c
@@ -300,7 +300,7 @@ int parse_loose_header(const char *hdr, struct object_info *oi)
 	}
 
 	if (oi->sizep)
-		*oi->sizep = cast_size_t_to_ulong(size);
+		*oi->sizep = size;
 
 	/*
 	 * The length must be followed by a zero byte
@@ -931,7 +931,7 @@ int force_object_loose(struct odb_source *source,
 	struct odb_source_files *files = odb_source_files_downcast(source);
 	const struct git_hash_algo *compat = source->odb->repo->compat_hash_algo;
 	void *buf;
-	unsigned long len;
+	size_t len;
 	struct object_info oi = OBJECT_INFO_INIT;
 	struct object_id compat_oid;
 	enum object_type type;
@@ -1614,7 +1614,7 @@ int read_loose_object(struct repository *repo,
 	unsigned long mapsize;
 	git_zstream stream;
 	char hdr[MAX_HEADER_LEN];
-	unsigned long *size = oi->sizep;
+	size_t *size = oi->sizep;
 
 	fd = git_open(path);
 	if (fd >= 0)
diff --git a/object.c b/object.c
index 465902ecc6..23b84aa7e2 100644
--- a/object.c
+++ b/object.c
@@ -325,7 +325,7 @@ struct object *parse_object_with_flags(struct repository *r,
 {
 	int skip_hash = !!(flags & PARSE_OBJECT_SKIP_HASH_CHECK);
 	int discard_tree = !!(flags & PARSE_OBJECT_DISCARD_TREE);
-	unsigned long size;
+	size_t size;
 	enum object_type type;
 	int eaten;
 	const struct object_id *repl = lookup_replace_object(r, oid);
diff --git a/odb.c b/odb.c
index 965ef68e4e..7d555be09f 100644
--- a/odb.c
+++ b/odb.c
@@ -625,7 +625,7 @@ static int oid_object_info_convert(struct repository *r,
 	enum object_type type;
 	struct object_id oid, delta_base_oid;
 	struct object_info new_oi, *oi;
-	unsigned long size;
+	size_t size;
 	void *content;
 	int ret;
 
@@ -716,7 +716,7 @@ int odb_read_object_info_extended(struct object_database *odb,
 /* returns enum object_type or negative */
 int odb_read_object_info(struct object_database *odb,
 			 const struct object_id *oid,
-			 unsigned long *sizep)
+			 size_t *sizep)
 {
 	enum object_type type;
 	struct object_info oi = OBJECT_INFO_INIT;
@@ -730,7 +730,7 @@ int odb_read_object_info(struct object_database *odb,
 }
 
 int odb_pretend_object(struct object_database *odb,
-		       void *buf, unsigned long len, enum object_type type,
+		       void *buf, size_t len, enum object_type type,
 		       struct object_id *oid)
 {
 	hash_object_file(odb->repo->hash_algo, buf, len, type, oid);
@@ -744,7 +744,7 @@ int odb_pretend_object(struct object_database *odb,
 void *odb_read_object(struct object_database *odb,
 		      const struct object_id *oid,
 		      enum object_type *type,
-		      unsigned long *size)
+		      size_t *size)
 {
 	struct object_info oi = OBJECT_INFO_INIT;
 	unsigned flags = OBJECT_INFO_DIE_IF_CORRUPT | OBJECT_INFO_LOOKUP_REPLACE;
@@ -762,12 +762,12 @@ void *odb_read_object(struct object_database *odb,
 void *odb_read_object_peeled(struct object_database *odb,
 			     const struct object_id *oid,
 			     enum object_type required_type,
-			     unsigned long *size,
+			     size_t *size,
 			     struct object_id *actual_oid_return)
 {
 	enum object_type type;
 	void *buffer;
-	unsigned long isize;
+	size_t isize;
 	struct object_id actual_oid;
 
 	oidcpy(&actual_oid, oid);
diff --git a/odb.h b/odb.h
index 73553ed5a7..e2f0bbad25 100644
--- a/odb.h
+++ b/odb.h
@@ -228,12 +228,12 @@ struct odb_source *odb_add_to_alternates_memory(struct object_database *odb,
 void *odb_read_object(struct object_database *odb,
 		      const struct object_id *oid,
 		      enum object_type *type,
-		      unsigned long *size);
+		      size_t *size);
 
 void *odb_read_object_peeled(struct object_database *odb,
 			     const struct object_id *oid,
 			     enum object_type required_type,
-			     unsigned long *size,
+			     size_t *size,
 			     struct object_id *oid_ret);
 
 /*
@@ -245,13 +245,13 @@ void *odb_read_object_peeled(struct object_database *odb,
  * that reference it.
  */
 int odb_pretend_object(struct object_database *odb,
-		       void *buf, unsigned long len, enum object_type type,
+		       void *buf, size_t len, enum object_type type,
 		       struct object_id *oid);
 
 struct object_info {
 	/* Request */
 	enum object_type *typep;
-	unsigned long *sizep;
+	size_t *sizep;
 	off_t *disk_sizep;
 	struct object_id *delta_base_oid;
 	void **contentp;
@@ -356,7 +356,7 @@ int odb_read_object_info_extended(struct object_database *odb,
  */
 int odb_read_object_info(struct object_database *odb,
 			 const struct object_id *oid,
-			 unsigned long *sizep);
+			 size_t *sizep);
 
 enum odb_has_object_flags {
 	/* Retry packed storage after checking packed and loose storage */
diff --git a/odb/source-loose.c b/odb/source-loose.c
index 7d7ea2fb84..66e6bb8d3f 100644
--- a/odb/source-loose.c
+++ b/odb/source-loose.c
@@ -72,7 +72,7 @@ static int read_object_info_from_path(struct odb_source_loose *loose,
 	void *map = NULL;
 	git_zstream stream, *stream_to_end = NULL;
 	char hdr[MAX_HEADER_LEN];
-	unsigned long size_scratch;
+	size_t size_scratch;
 	enum object_type type_scratch;
 	struct stat st;
 
@@ -355,7 +355,6 @@ static int odb_source_loose_read_object_stream(struct odb_read_stream **out,
 	struct object_info oi = OBJECT_INFO_INIT;
 	struct odb_loose_read_stream *st;
 	unsigned long mapsize;
-	unsigned long size_ul;
 	void *mapped;
 
 	mapped = odb_source_loose_map_object(loose, oid, &mapsize);
@@ -379,18 +378,11 @@ static int odb_source_loose_read_object_stream(struct odb_read_stream **out,
 		goto error;
 	}
 
-	/*
-	 * object_info.sizep is unsigned long* (32-bit on Windows), but
-	 * st->base.size is size_t (64-bit). Use temporary variable.
-	 * Note: loose objects >4GB would still truncate here, but such
-	 * large loose objects are uncommon (they'd normally be packed).
-	 */
-	oi.sizep = &size_ul;
+	oi.sizep = &st->base.size;
 	oi.typep = &st->base.type;
 
 	if (parse_loose_header(st->hdr, &oi) < 0 || st->base.type < 0)
 		goto error;
-	st->base.size = size_ul;
 
 	st->mapped = mapped;
 	st->mapsize = mapsize;
diff --git a/odb/streaming.c b/odb/streaming.c
index 7602a8d5d8..20531e864c 100644
--- a/odb/streaming.c
+++ b/odb/streaming.c
@@ -157,26 +157,15 @@ static int open_istream_incore(struct odb_read_stream **out,
 		.base.read = read_istream_incore,
 	};
 	struct odb_incore_read_stream *st;
-	unsigned long size_ul;
 	int ret;
 
 	oi.typep = &stream.base.type;
-	/*
-	 * object_info.sizep is unsigned long* (32-bit on Windows), but
-	 * stream.base.size is size_t (64-bit). We use a temporary variable
-	 * because the types are incompatible. Note: this path still truncates
-	 * for >4GB objects, but large objects should use pack streaming
-	 * (packfile_store_read_object_stream) which handles size_t properly.
-	 * This incore fallback is only used for small objects or when pack
-	 * streaming is unavailable.
-	 */
-	oi.sizep = &size_ul;
+	oi.sizep = &stream.base.size;
 	oi.contentp = (void **)&stream.buf;
 	ret = odb_read_object_info_extended(odb, oid, &oi,
 					    OBJECT_INFO_DIE_IF_CORRUPT);
 	if (ret)
 		return ret;
-	stream.base.size = size_ul;
 
 	CALLOC_ARRAY(st, 1);
 	*st = stream;
diff --git a/pack-bitmap.c b/pack-bitmap.c
index f9af8a96bd..e8a82945cc 100644
--- a/pack-bitmap.c
+++ b/pack-bitmap.c
@@ -1856,7 +1856,7 @@ static void filter_bitmap_blob_none(struct bitmap_index *bitmap_git,
 static unsigned long get_size_by_pos(struct bitmap_index *bitmap_git,
 				     uint32_t pos)
 {
-	unsigned long size;
+	size_t size;
 	struct object_info oi = OBJECT_INFO_INIT;
 
 	oi.sizep = &size;
@@ -1891,7 +1891,7 @@ static unsigned long get_size_by_pos(struct bitmap_index *bitmap_git,
 			die(_("unable to get size of %s"), oid_to_hex(&obj->oid));
 	}
 
-	return size;
+	return cast_size_t_to_ulong(size);
 }
 
 static void filter_bitmap_blob_limit(struct bitmap_index *bitmap_git,
diff --git a/packfile.c b/packfile.c
index c174982d10..78c389e6f3 100644
--- a/packfile.c
+++ b/packfile.c
@@ -1607,13 +1607,10 @@ static int packed_object_info_with_index_pos(struct packed_git *p, off_t obj_off
 	 * a "real" type later if the caller is interested.
 	 */
 	if (oi->contentp) {
-		size_t size_st = 0;
 		*oi->contentp = cache_or_unpack_entry(p->repo, p, obj_offset,
-						      &size_st, &type);
+						      oi->sizep, &type);
 		if (!*oi->contentp)
 			type = OBJ_BAD;
-		else if (oi->sizep)
-			*oi->sizep = cast_size_t_to_ulong(size_st);
 	} else if (oi->sizep || oi->typep || oi->delta_base_oid) {
 		type = unpack_object_header(p, &w_curs, &curpos, &size);
 	}
@@ -1633,7 +1630,7 @@ static int packed_object_info_with_index_pos(struct packed_git *p, off_t obj_off
 				goto out;
 			}
 		}
-		*oi->sizep = (unsigned long)size;
+		*oi->sizep = size;
 	}
 
 	if (oi->disk_sizep || (oi->mtimep && p->is_cruft)) {
@@ -1919,7 +1916,6 @@ void *unpack_entry(struct repository *r, struct packed_git *p, off_t obj_offset,
 			struct object_id base_oid;
 			if (!(offset_to_pack_pos(p, obj_offset, &pos))) {
 				struct object_info oi = OBJECT_INFO_INIT;
-				unsigned long bsz_ul = 0;
 
 				nth_packed_object_id(&base_oid, p,
 						     pack_pos_to_index(p, pos));
@@ -1930,13 +1926,11 @@ void *unpack_entry(struct repository *r, struct packed_git *p, off_t obj_offset,
 				mark_bad_packed_object(p, &base_oid);
 
 				oi.typep = &type;
-				oi.sizep = &bsz_ul;
+				oi.sizep = &base_size;
 				oi.contentp = &base;
 				if (odb_read_object_info_extended(r->objects, &base_oid,
 								  &oi, 0) < 0)
 					base = NULL;
-				else
-					base_size = bsz_ul;
 
 				external_base = base;
 			}
diff --git a/path-walk.c b/path-walk.c
index 94ff90bd15..edc8e736d7 100644
--- a/path-walk.c
+++ b/path-walk.c
@@ -368,7 +368,7 @@ static int walk_path(struct path_walk_context *ctx,
 		struct oid_array filtered = OID_ARRAY_INIT;
 
 		for (size_t i = 0; i < list->oids.nr; i++) {
-			unsigned long size;
+			size_t size;
 
 			if (odb_read_object_info(ctx->repo->objects,
 						 &list->oids.oid[i],
diff --git a/protocol-caps.c b/protocol-caps.c
index 35072ed60b..8858ea4489 100644
--- a/protocol-caps.c
+++ b/protocol-caps.c
@@ -50,7 +50,7 @@ static void send_info(struct repository *r, struct packet_writer *writer,
 	for_each_string_list_item (item, oid_str_list) {
 		const char *oid_str = item->string;
 		struct object_id oid;
-		unsigned long object_size;
+		size_t object_size;
 
 		if (get_oid_hex_algop(oid_str, &oid, r->hash_algo) < 0) {
 			packet_writer_error(
@@ -66,7 +66,8 @@ static void send_info(struct repository *r, struct packet_writer *writer,
 			if (odb_read_object_info(r->objects, &oid, &object_size) < 0) {
 				strbuf_addstr(&send_buffer, " ");
 			} else {
-				strbuf_addf(&send_buffer, " %lu", object_size);
+				strbuf_addf(&send_buffer, " %"PRIuMAX,
+					    (uintmax_t)object_size);
 			}
 		}
 
diff --git a/read-cache.c b/read-cache.c
index 21829102ae..21ca58beea 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -250,7 +250,7 @@ static int ce_compare_link(const struct cache_entry *ce, size_t expected_size)
 {
 	int match = -1;
 	void *buffer;
-	unsigned long size;
+	size_t size;
 	enum object_type type;
 	struct strbuf sb = STRBUF_INIT;
 
@@ -3462,7 +3462,7 @@ void *read_blob_data_from_index(struct index_state *istate,
 				const char *path, unsigned long *size)
 {
 	int pos, len;
-	unsigned long sz;
+	size_t sz;
 	enum object_type type;
 	void *data;
 
@@ -3490,7 +3490,7 @@ void *read_blob_data_from_index(struct index_state *istate,
 		return NULL;
 	}
 	if (size)
-		*size = sz;
+		*size = cast_size_t_to_ulong(sz);
 	return data;
 }
 
diff --git a/ref-filter.c b/ref-filter.c
index 1da4c0e60d..8ba91c72a1 100644
--- a/ref-filter.c
+++ b/ref-filter.c
@@ -86,7 +86,7 @@ struct ref_trailer_buf {
 static struct expand_data {
 	struct object_id oid;
 	enum object_type type;
-	unsigned long size;
+	size_t size;
 	off_t disk_size;
 	struct object_id delta_base_oid;
 	void *content;
diff --git a/reflog.c b/reflog.c
index 82337078d0..04edbe5670 100644
--- a/reflog.c
+++ b/reflog.c
@@ -154,7 +154,7 @@ static int tree_is_complete(const struct object_id *oid)
 
 	if (!tree->buffer) {
 		enum object_type type;
-		unsigned long size;
+		size_t size;
 		void *data = odb_read_object(the_repository->objects, oid,
 					     &type, &size);
 		if (!data) {
diff --git a/rerere.c b/rerere.c
index 0296700f9f..068321b24f 100644
--- a/rerere.c
+++ b/rerere.c
@@ -990,7 +990,7 @@ static int handle_cache(struct index_state *istate,
 
 	while (pos < istate->cache_nr) {
 		enum object_type type;
-		unsigned long size;
+		size_t size;
 
 		ce = istate->cache[pos++];
 		if (ce_namelen(ce) != len || memcmp(ce->name, path, len))
diff --git a/submodule-config.c b/submodule-config.c
index a81897b4e0..f75997402a 100644
--- a/submodule-config.c
+++ b/submodule-config.c
@@ -694,7 +694,7 @@ static const struct submodule *config_from(struct submodule_cache *cache,
 		enum lookup_type lookup_type)
 {
 	struct strbuf rev = STRBUF_INIT;
-	unsigned long config_size;
+	size_t config_size;
 	char *config = NULL;
 	struct object_id oid;
 	enum object_type type;
diff --git a/t/helper/test-pack-deltas.c b/t/helper/test-pack-deltas.c
index c493b75e02..840797cf0d 100644
--- a/t/helper/test-pack-deltas.c
+++ b/t/helper/test-pack-deltas.c
@@ -48,7 +48,8 @@ static void write_ref_delta(struct hashfile *f,
 			    struct object_id *base)
 {
 	unsigned char header[MAX_PACK_OBJECT_HEADER];
-	unsigned long size, base_size, delta_size, compressed_size, hdrlen;
+	unsigned long delta_size, compressed_size, hdrlen;
+	size_t size, base_size;
 	enum object_type type;
 	void *base_buf, *delta_buf;
 	void *buf = odb_read_object(the_repository->objects,
diff --git a/t/helper/test-partial-clone.c b/t/helper/test-partial-clone.c
index a7aab426d0..87c59108e0 100644
--- a/t/helper/test-partial-clone.c
+++ b/t/helper/test-partial-clone.c
@@ -17,7 +17,7 @@ static void object_info(const char *gitdir, const char *oid_hex)
 {
 	struct repository r;
 	struct object_id oid;
-	unsigned long size;
+	size_t size;
 	struct object_info oi = {.sizep = &size};
 	const char *p;
 
diff --git a/t/unit-tests/u-odb-inmemory.c b/t/unit-tests/u-odb-inmemory.c
index 482502ef4b..6844bfc37c 100644
--- a/t/unit-tests/u-odb-inmemory.c
+++ b/t/unit-tests/u-odb-inmemory.c
@@ -20,7 +20,7 @@ static void cl_assert_object_info(struct odb_source_inmemory *source,
 				  const char *expected_content)
 {
 	enum object_type actual_type;
-	unsigned long actual_size;
+	size_t actual_size;
 	void *actual_content;
 	struct object_info oi = {
 		.typep = &actual_type,
diff --git a/tag.c b/tag.c
index 2f12e51024..1a00ded6eb 100644
--- a/tag.c
+++ b/tag.c
@@ -49,7 +49,7 @@ int gpg_verify_tag(struct repository *r, const struct object_id *oid,
 {
 	enum object_type type;
 	char *buf;
-	unsigned long size;
+	size_t size;
 	int ret;
 
 	type = odb_read_object_info(r->objects, oid, NULL);
@@ -207,7 +207,7 @@ int parse_tag(struct repository *r, struct tag *item)
 {
 	enum object_type type;
 	void *data;
-	unsigned long size;
+	size_t size;
 	int ret;
 
 	if (item->object.parsed)
diff --git a/tree-walk.c b/tree-walk.c
index 7e1b956f27..a67f06b9eb 100644
--- a/tree-walk.c
+++ b/tree-walk.c
@@ -87,7 +87,7 @@ void *fill_tree_descriptor(struct repository *r,
 			   struct tree_desc *desc,
 			   const struct object_id *oid)
 {
-	unsigned long size = 0;
+	size_t size = 0;
 	void *buf = NULL;
 
 	if (oid) {
@@ -610,7 +610,7 @@ int get_tree_entry(struct repository *r,
 {
 	int retval;
 	void *tree;
-	unsigned long size;
+	size_t size;
 	struct object_id root;
 
 	tree = odb_read_object_peeled(r->objects, tree_oid, OBJ_TREE, &size, &root);
@@ -682,7 +682,7 @@ enum get_oid_result get_tree_entry_follow_symlinks(struct repository *r,
 		if (!t.buffer) {
 			void *tree;
 			struct object_id root;
-			unsigned long size;
+			size_t size;
 			tree = odb_read_object_peeled(r->objects, &current_tree_oid,
 						      OBJ_TREE, &size, &root);
 			if (!tree)
@@ -778,6 +778,7 @@ enum get_oid_result get_tree_entry_follow_symlinks(struct repository *r,
 		} else if (S_ISLNK(*mode)) {
 			/* Follow a symlink */
 			unsigned long link_len;
+			size_t link_len_st = 0;
 			size_t len;
 			char *contents, *contents_start;
 			struct dir_state *parent;
@@ -797,7 +798,8 @@ enum get_oid_result get_tree_entry_follow_symlinks(struct repository *r,
 
 			contents = odb_read_object(r->objects,
 						   &current_tree_oid, &type,
-						   &link_len);
+						   &link_len_st);
+			link_len = cast_size_t_to_ulong(link_len_st);
 
 			if (!contents)
 				goto done;
diff --git a/tree.c b/tree.c
index d703ab97c8..53f7395e9f 100644
--- a/tree.c
+++ b/tree.c
@@ -188,7 +188,7 @@ int repo_parse_tree_gently(struct repository *r, struct tree *item,
 {
 	 enum object_type type;
 	 void *buffer;
-	 unsigned long size;
+	 size_t size;
 
 	if (item->object.parsed)
 		return 0;
diff --git a/xdiff-interface.c b/xdiff-interface.c
index 5ee2b96d0a..db6938689f 100644
--- a/xdiff-interface.c
+++ b/xdiff-interface.c
@@ -179,7 +179,7 @@ int read_mmfile(mmfile_t *ptr, const char *filename)
 void read_mmblob(mmfile_t *ptr, struct object_database *odb,
 		 const struct object_id *oid)
 {
-	unsigned long size;
+	size_t size;
 	enum object_type type;
 
 	if (is_null_oid(oid)) {
-- 
gitgitgadget

^ permalink raw reply related

* [PATCH] gitlab-ci: migrate Windows builds away from Chocolatey
From: Patrick Steinhardt @ 2026-06-15 12:21 UTC (permalink / raw)
  To: git

The Windows builds in GitLab CI use Chocolatey to install dependencies.
Unfortunately, Chocolatey seems to be very unreliable, which causes the
jobs to fail very regularly. This is a limitation that seems to be
somewhat known [1]:

  As an organization, you want 100% reliability (or at least that
  potential), and you may want full trust and control as well. This is
  something you can get with internally hosted packages, and you are
  unlikely to achieve from use of the Community Package Repository.

So using the Community Package Repository is kind of discouraged in case
one wants reliability. We _do_ want reliability though, and we cannot
easily switch to an enterprise license to fix this issue.

Introduce a new script that downloads and installs dependencies
directly. This has a couple of benefits:

  - We can drop our dependency on Chocolatey completely, thus improving
    reliability.

  - We can easily cache the installers.

  - We get direct control over the exact versions we install.

  - Installing dependencies is sped up from roundabout 3 minutes to 1
    minute.

[1]: https://docs.chocolatey.org/en-us/community-repository/community-packages-disclaimer/#summary

Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
Hi

I've been quite annoyed recently because our Windows builds in GitLab CI
are extremely flakey. All of those flakes come from Chocolatey, which is
why this patch moves away from it.

Thanks!

Patrick
---
 .gitlab-ci.yml              | 11 ++++++---
 ci/install-dependencies.ps1 | 55 +++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 63 insertions(+), 3 deletions(-)

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index e0b9a0d82b..87a5343a94 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -161,11 +161,16 @@ test:mingw64:
     TEST_OUTPUT_DIRECTORY: "C:/Git-Test"
   tags:
     - saas-windows-medium-amd64
+  cache:
+    key:
+      files:
+        - ci/install-dependencies.ps1
+    paths:
+      - .dependencies
   before_script:
     - *windows_before_script
-    - choco install -y git meson ninja rust-ms
-    - Import-Module $env:ChocolateyInstall\helpers\chocolateyProfile.psm1
-    - refreshenv
+    - ./ci/install-dependencies.ps1
+    - $env:Path = "C:\Meson;C:\Rust\bin;$env:Path"
     - New-Item -Path $env:TEST_OUTPUT_DIRECTORY -ItemType Directory
 
 build:msvc-meson:
diff --git a/ci/install-dependencies.ps1 b/ci/install-dependencies.ps1
new file mode 100755
index 0000000000..e3b367fa54
--- /dev/null
+++ b/ci/install-dependencies.ps1
@@ -0,0 +1,55 @@
+param(
+    [string]$DownloadDirectory = '.dependencies'
+)
+
+$ErrorActionPreference = 'Stop'
+$ProgressPreference = 'SilentlyContinue'
+
+$GitVersion = '2.54.0.windows.1'
+$MesonVersion = '1.11.0'
+$RustVersion = '1.96.0'
+
+New-Item -Path $DownloadDirectory -ItemType Directory -Force | Out-Null
+New-Item -Path .git/info -ItemType Directory -Force | Out-Null
+New-Item -Path .git/info/exclude -ItemType File -Force | Out-Null
+Add-Content -Path .git/info/exclude -Value "/$DownloadDirectory"
+
+function Get-Installer {
+    param(
+        [Parameter(Mandatory = $true)][string]$Name,
+        [Parameter(Mandatory = $true)][string]$Url
+    )
+
+    $path = Join-Path $DownloadDirectory $Name
+    if (-not (Test-Path $path)) {
+        Write-Host "Downloading $Url"
+        Invoke-WebRequest $Url -OutFile $path -TimeoutSec 300
+    }
+    return $path
+}
+
+function Invoke-Installer {
+    param(
+        [Parameter(Mandatory = $true)][string]$FilePath,
+        [Parameter(Mandatory = $true)][string[]]$ArgumentList
+    )
+
+    Write-Host "Running $FilePath $($ArgumentList -join ' ')"
+    $process = Start-Process -Wait -PassThru -FilePath $FilePath -ArgumentList $ArgumentList
+    if ($process.ExitCode -ne 0) {
+        throw "$FilePath failed with exit code $($process.ExitCode)"
+    }
+}
+
+$gitAssetVersion = $GitVersion -replace '\.windows\.\d+$', ''
+$gitInstaller = Get-Installer "Git-Installer.exe" `
+    "https://github.com/git-for-windows/git/releases/download/v$GitVersion/PortableGit-$gitAssetVersion-64-bit.7z.exe"
+Invoke-Installer $gitInstaller @('-y', '-o"C:\Program Files\Git"')
+
+$mesonMsi = Get-Installer "meson.msi" `
+    "https://github.com/mesonbuild/meson/releases/download/$MesonVersion/meson-$MesonVersion-64.msi"
+Invoke-Installer msiexec.exe @('/i', $mesonMsi, 'INSTALLDIR=C:\Meson', '/quiet', '/norestart')
+
+$rustMsi = Get-Installer "rust.msi" `
+    "https://static.rust-lang.org/dist/rust-$RustVersion-x86_64-pc-windows-msvc.msi"
+Invoke-Installer msiexec.exe @('/i', $rustMsi, 'INSTALLDIR=C:\Rust', 'ADDLOCAL=Rustc,Cargo,Std', '/quiet', '/norestart')

---
base-commit: ea97ad8d017de0c9037451a78008a0fd60abea0c
change-id: 20260615-b4-pks-gitlab-ci-drop-chocolatey-bfe9d4bb1442


^ permalink raw reply related

* Re: [PATCH 2/9] setup: stop applying repository format twice
From: Patrick Steinhardt @ 2026-06-15 12:36 UTC (permalink / raw)
  To: Karthik Nayak; +Cc: git
In-Reply-To: <CAOLa=ZQC7YCBxjxkbm8qcWqpNFgAKNpvw9B6t=+XnX4bbkGq0Q@mail.gmail.com>

On Fri, Jun 12, 2026 at 02:00:20AM -0700, Karthik Nayak wrote:
> Patrick Steinhardt <ps@pks.im> writes:
> 
> > When discovering the repository in "setup.c" we apply the final
> > repository format multiple times:
> >
> >   - Once via `repository_format_configure()`, where we configure the
> >     repository format for both `struct repository_format` and `struct
> >     repository`.
> >
> >   - And once via `apply_repository_format()`, where we then apply the
> >     `struct repository_format` to the `struct repository` again.
> >
> 
> Okay so we're talking applying the repository format to the `struct
> repository` specifically.
> 
> > As the format will be applied to the repository when applying the format
> > it's thus somewhat unnecessary to also apply it to the repository when
> > adapting the discovered format.
> 
> This was a bit confusing to read at first. Okay since we already apply
> the format in the second step, the first is not necessary.

I agree. I'll rephrase this a bit.

Patrick

^ permalink raw reply

* Re: [PATCH 4/9] refs: unregister reference stores from "chdir_notify"
From: Patrick Steinhardt @ 2026-06-15 12:36 UTC (permalink / raw)
  To: Karthik Nayak; +Cc: git
In-Reply-To: <CAOLa=ZS_0b9o2YucgA6Se_Mq4nLo1Luow7adTLAifbkF9jpUrA@mail.gmail.com>

On Fri, Jun 12, 2026 at 02:18:28AM -0700, Karthik Nayak wrote:
> Patrick Steinhardt <ps@pks.im> writes:
[snip]
> > We never noticed either of these symptoms, but they are obviously bad.
> >
> > Partially fix those issues by unregistering the reference stores when
> > releasing them. The leak of the main reference database will be fixed in
> > a subsequent commit.
> >
> > Note that this requires us to use `chdir_notify_register()` instead of
> > `chdir_notify_parent()`, as there is no infrastructure to unregister the
> 
> Shouldn't this be s/chdir_notify_parent/chdir_notify_reparent ?

Yup, good catch.

Patrick

^ permalink raw reply

* Re: [PATCH 9/9] refs: always use absolute paths for reference stores
From: Patrick Steinhardt @ 2026-06-15 12:36 UTC (permalink / raw)
  To: Karthik Nayak; +Cc: git
In-Reply-To: <CAOLa=ZR60bhH4z9ZoKTCn97QzautcihxPbTZ=_e0raMTjzajZQ@mail.gmail.com>

On Fri, Jun 12, 2026 at 02:58:19AM -0700, Karthik Nayak wrote:
> Patrick Steinhardt <ps@pks.im> writes:
> 
> > Both the "files" and "reftable" backends use
> > `refs_compute_filesystem_location()` to figure out the location of both
> > the git and common directories. Depending on how the function is called
> > we may or may not return an absolute path.
> >
> > There isn't really a good reason to use relative paths though. Quite on
> > the contrary, because we sometimes use relative paths we are forced to
> > register for chdir(3p) notifications via `chdir_notify_reparent()`.
> >
> 
> With the previous changes added, we register via
> `chdir_notify_register()`
> 
> > Adapt the function to always return absolute paths. This results in a
> > user-visible change in behaviour where we now unconditionally print
> > absolute paths in error messages. But arguably, that change in behaviour
> > is acceptable and may even be good in cases where a Git command may end
> > up accessing references across multiple different repositories.
> >
> > Furthermore, drop the calls to `chdir_notify_reparent()`, which aren't
> > required anymore now that the paths are always absolute.
> >
> 
> Same here, should be `chdir_notify_register()`

Yes, will fix.

Patrick

^ permalink raw reply

* Re: [PATCH 0/9] refs: stop using `chdir_notify_reparent()`
From: Patrick Steinhardt @ 2026-06-15 12:36 UTC (permalink / raw)
  To: Jeff King; +Cc: git, Karthik Nayak
In-Reply-To: <20260613140024.GA766297@coredump.intra.peff.net>

On Sat, Jun 13, 2026 at 10:00:24AM -0400, Jeff King wrote:
> On Fri, Jun 12, 2026 at 08:18:16AM +0200, Patrick Steinhardt wrote:
> 
> > > If we move to a world of all absolute paths where chdir-notify is not
> > > necessary, will we lose that optimization?
> > 
> > Probably. Unfortunately, the commit doesn't have any repeatable
> > benchmarks in there, so it's hard to say whether we could still
> > reproduce those issues or not.
> 
> Here's an easy-ish reproduction specific to the ref code:
> 
>   rm -rf a/
>   dir=$(perl -e 'print "a/" x 1024')
>   mkdir -p $dir &&
>   cd $dir &&
>   git init &&
>   git commit --allow-empty -m foo &&
>   seq -f 'create refs/heads/foo%05g HEAD' 10000 |
>   git update-ref --stdin &&
>   time git show-ref
> 
> Before your series, I get timings like this:
> 
>   real	0m0.078s
>   user	0m0.020s
>   sys	0m0.057s
> 
> After, I get:
> 
>   real	0m0.876s
>   user	0m0.004s
>   sys	0m0.872s
> 
> So it really is measurable (and I did not expect the effect to be nearly
> so large). Unsurprisingly the extra CPU goes to system time.

This is indeed surprisingly bad.

> But obviously that case is quite silly. It's an absurdly deep hierarchy,
> and 10,000 loose refs is a lot. Just running "git pack-refs --all"
> brings the before/after to roughly the same timings (around 40ms --
> faster even than the before timing).
> 
> So it _can_ matter, but I think ultimately the better direction is
> probably "make fewer syscalls". Which we do via packfiles, and via
> packed-refs, and eventually via reftables, all of which put more data
> into a single file.
> 
> I offer the script above more as food for thought, and not necessarily
> an argument against your series.

Hum, yeah. I'm a bit hesitant to just wave your findings away. I mean I
agree with you that it's unlikely to really matter in practice. But you
never really know, and I'm not sure that I consider dropping the chdir
infra important enough to knowingly take that hit.

I definitely think that we should merge the remainder of this series
though, as these patches simplify "setup.c" and fix a couple of memory
leaks. But maybe we drop the last patch for now and...

> > Ideally, we'd have the best of both worlds: absolute paths everywhere
> > without the performance hit. A while back I had a discussion with
> > Torvalds on the securiy mailing list around this issue, and ultimately
> > the conclusion was that the best way forward would be to use openat(3p).
> > 
> > This wouldn't only allow us to optimize cases like this, but it also has
> > the added benefit that we're much less prone to TOCTOU-style issues and
> > we might even be able to use flags like O_BENEATH. So it would basically
> > be win-win. The only problem is of course that Windows doesn't have
> > openat(3p), so we'd have to emulate it, and that's where I always lost
> > the desire to do this.
> > 
> > When waking up this morning though I had the thought that we shouldn't
> > try to emulate openat(3p) directly, but instead create a higher-level
> > interface.
> > [...]
> 
> Yeah, I think given a decent interface it might not be so bad. It would
> mean code thinking about filesystem syscalls in a different way, but if
> done subsystem-by-subsystem it might be OK to do incrementally. Much of
> the code that would want to switch to this is using repo_git_path() or
> similar already (and getting rid of those remaining static-buffer
> functions would be a nice bonus).
> 
> I do wonder if your series here to move to absolute paths makes the
> TOCTOU situation a little worse. With a relative path, once we are
> "inside" the repo then we are only susceptible to changes within it.
> Whereas with an absolute path, if one of the intermediate paths changes
> from under us, there may be confusion.
> 
> Without thinking on it too hard, though, I'd guess if any such case is a
> security problem, it already was during the "open" part (because it
> implies that the attacker controls paths below you in the hierarchy, and
> you had to get to your cwd _somehow_, at which point they could have
> attacked you then).

... eventually give this idea here a test?

Patrick

^ permalink raw reply

* Re: [PATCH v5 06/10] reset: introduce ability to skip updating HEAD
From: Patrick Steinhardt @ 2026-06-15 12:45 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, Pablo Sabater, Kristoffer Haugsbakk, Phillip Wood
In-Reply-To: <xmqq33ytneiu.fsf@gitster.g>

On Thu, Jun 11, 2026 at 11:00:25AM -0700, Junio C Hamano wrote:
> Patrick Steinhardt <ps@pks.im> writes:
> 
> > Note that in a previous iteration we instead introduced a flag that made
> > callers opt out of updating any references. This was somewhat awkward
> > though because we already have the `UPDATE_ORIG_HEAD` flag, so the
> > result was somewhat inconsistent.
> >
> > Suggested-by: Phillip Wood <phillip.wood123@gmail.com>
> > Signed-off-by: Patrick Steinhardt <ps@pks.im>
> > ---
> >  builtin/rebase.c | 14 ++++++++++----
> >  reset.c          |  9 +++++++--
> >  reset.h          |  9 ++++++---
> >  sequencer.c      |  4 +++-
> >  4 files changed, 26 insertions(+), 10 deletions(-)
> >
> > diff --git a/reset.c b/reset.c
> > ...
> > @@ -129,7 +133,7 @@ int reset_working_tree(struct repository *r,
> >  		oid = &head_oid;
> >  
> >  	if (refs_only) {
> > -		if (!dry_run)
> > +		if (update_head)
> >  			return update_refs(r, opts, oid, head);
> >  		return 0;
> >  	}
> 
> So when refs_only and update-head are in effect, we will call
> update_refs(), even if dry_run is given.  update_refs() does not
> seem to pay attention to (opts->flags & RESET_WORKING_TREE_DRY_RUN)
> at all, so wouldn't this mean that we would update even in a dry-run
> session?

Ugh, good catch, this is obviously wrong. Will fix.

Patrick

^ permalink raw reply

* Re: [PATCH v3 1/3] MyFirstContribution: recommend shallow threading of cover letters
From: Patrick Steinhardt @ 2026-06-15 12:58 UTC (permalink / raw)
  To: Karthik Nayak
  Cc: git, Junio C Hamano, Tuomas Ahola, Weijie Yuan, Ramsay Jones,
	SZEDER Gábor, Kristoffer Haugsbakk, Toon Claes
In-Reply-To: <CAOLa=ZQE-kkpSX=pP2A6SXdbp_O6AHzRmbUDOtKCsvz2Yz66Ng@mail.gmail.com>

On Wed, Jun 10, 2026 at 07:08:33AM -0400, Karthik Nayak wrote:
> Patrick Steinhardt <ps@pks.im> writes:
> 
> > The "MyFirstContribution" document recommends the use of deep threading
> > of cover letters: every cover letter of subsequent iterations shall be
> > linked to the cover letter of the preceding version. The result of this
> > is that eventually, threads with many versions are getting nested so
> > deep that it becomes hard to follow.
> >
> > Adapt the recommendation to instead propose shallow threading of cover
> > letters: instead of linking the cover letter to the previous cover
> > letter, the user is supposed to always link it to the first cover
> > letter. This still makes it easy to follow the iterations, but has the
> > benefit of nesting to a much shallower level.
> 
> Should we also modify 'Documentation/SubmittingPatches'? Which states:
> 
>   All subsequent versions of a patch series and other related patches
>   should be grouped into their own e-mail thread to help readers find
>   all parts of the series.  To that end, send them as replies to either
>   an additional "cover letter" message (see below), the first patch, or
>   the respective preceding patch. Here is a
>   link:MyFirstContribution.html#v2-git-send-email[step-by-step guide] on
>   how to submit updated versions of a patch series.
> 
> Personally, I find it a bit awkward when new versions are sent as a new
> separate thread, especially when the subject is changed over versions.

I don't necessarily see this as contradicting advice, I rather read it
as "patches of vN+1 should have their own subthread". But it certainly
is confusingly written, and I'm not even sure myself whether I'm reading
it correctly or not.

I kind of feel like this is a bit outside the scope of this series. Also
because I'm not a 100% sure how to reword this to make it read nicer :)
But I'm very happy to accept suggestions here.

Patrick

^ permalink raw reply

* Re: [PATCH v3 3/3] b4: introduce configuration for the Git project
From: Patrick Steinhardt @ 2026-06-15 12:58 UTC (permalink / raw)
  To: Karthik Nayak
  Cc: git, Junio C Hamano, Tuomas Ahola, Weijie Yuan, Ramsay Jones,
	SZEDER Gábor, Kristoffer Haugsbakk, Toon Claes
In-Reply-To: <CAOLa=ZQxA52p+9DcZZ=gVTqZ66ETQvZRQYjZNFjzdbsPwTW2iQ@mail.gmail.com>

On Wed, Jun 10, 2026 at 07:13:33AM -0400, Karthik Nayak wrote:
> Patrick Steinhardt <ps@pks.im> writes:
> 
> > We're about to extend our documentation to recommend b4 for sending
> 
> Nit: This is in the past now

True, will fix.

Patrick

^ permalink raw reply

* [PATCH v4 0/3] Documentation: recommend the use of b4
From: Patrick Steinhardt @ 2026-06-15 12:59 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Tuomas Ahola, Weijie Yuan, Ramsay Jones,
	SZEDER Gábor, Kristoffer Haugsbakk, Toon Claes,
	Karthik Nayak
In-Reply-To: <20260602-pks-b4-v1-0-a7ae5a49e9cf@pks.im>

Hi,

this small patch series wires up b4 in Git and recommends the use
thereof via "MyFirstContribution", as discussed in [1].

Changes in v4:
  - Improve a commit message.
  - Link to v3: https://patch.msgid.link/20260608-pks-b4-v3-0-f5e497d10c56@pks.im

Changes in v3:
  - I wasn't really able to judge consensus one way or the other
    regarding the deep vs shallow nesting of cover letters, so I still
    have the change to shallow nesting of cover letters part of this
    series. If we continue to be split on this one (or if we favor the
    current status quo) I'm happy to drop the first patch and adapt the
    last patch to use deep nesting of cover letters instead.
  - Hopefully fix some confusion by saying "shallow/deep threading of
    cover letters".
  - Fix some more instances where we recommend deep threading of cover
    letters.
  - Link to v2: https://patch.msgid.link/20260603-pks-b4-v2-0-a8aea0aa2c23@pks.im

Changes in v2:
  - Reorder commits so that the b4 docs are added first.
  - Add a section that highlights how to configure b4, and that points
    out that the per-project defaults can be overridden via Git
    configuration.
  - Add a patch to MyFirstContribution that recommends shallow
    threading. I mostly intend this to be a discussion starter so that
    the `.b4-config` file matches our preferred threading style.
  - Fix a typo.
  - Link to v1: https://patch.msgid.link/20260602-pks-b4-v1-0-a7ae5a49e9cf@pks.im

Thanks!

Patrick

[1]: <xmqqik81xpqx.fsf@gitster.g>

---
Patrick Steinhardt (3):
      MyFirstContribution: recommend shallow threading of cover letters
      MyFirstContribution: recommend the use of b4
      b4: introduce configuration for the Git project

 .b4-config                             |   6 ++
 .b4-cover-template                     |  11 ++++
 Documentation/MyFirstContribution.adoc | 100 ++++++++++++++++++++++++++++++---
 Documentation/SubmittingPatches        |   6 +-
 4 files changed, 114 insertions(+), 9 deletions(-)

Range-diff versus v3:

1:  1aec56f76c = 1:  b6b488e6a8 MyFirstContribution: recommend shallow threading of cover letters
2:  f2036769bd = 2:  1a68b993d2 MyFirstContribution: recommend the use of b4
3:  fb522c7d90 ! 3:  5bc8fba96a b4: introduce configuration for the Git project
    @@ Metadata
      ## Commit message ##
         b4: introduce configuration for the Git project
     
    -    We're about to extend our documentation to recommend b4 for sending
    -    patch series to the mailing list. Prepare for this by introducing a b4
    -    configuration so that the tool knows to honor our preferences. For now,
    -    this configuration does two things:
    +    In the preceding commit we have extended our documentation to recommend
    +    b4 for sending patch series to the mailing list. Introduce configuration
    +    so that it knows to honor preferences of the Git project by default. For
    +    now, this configuration does two things:
     
           - It configures "send-same-thread = shallow", which tells b4 to always
             send subsequent versions of the same patch series as a reply to the

---
base-commit: 9ac3f193c05c2237e2b14ebaa1149e9fc8a1abe0
change-id: 20260602-pks-b4-31cc20d7f84b


^ permalink raw reply

* [PATCH v4 1/3] MyFirstContribution: recommend shallow threading of cover letters
From: Patrick Steinhardt @ 2026-06-15 12:59 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Tuomas Ahola, Weijie Yuan, Ramsay Jones,
	SZEDER Gábor, Kristoffer Haugsbakk, Toon Claes,
	Karthik Nayak
In-Reply-To: <20260615-pks-b4-v4-0-22cfca8f19c5@pks.im>

The "MyFirstContribution" document recommends the use of deep threading
of cover letters: every cover letter of subsequent iterations shall be
linked to the cover letter of the preceding version. The result of this
is that eventually, threads with many versions are getting nested so
deep that it becomes hard to follow.

Adapt the recommendation to instead propose shallow threading of cover
letters: instead of linking the cover letter to the previous cover
letter, the user is supposed to always link it to the first cover
letter. This still makes it easy to follow the iterations, but has the
benefit of nesting to a much shallower level.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
 Documentation/MyFirstContribution.adoc | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/Documentation/MyFirstContribution.adoc b/Documentation/MyFirstContribution.adoc
index b9fdefce02..984b7f5aa8 100644
--- a/Documentation/MyFirstContribution.adoc
+++ b/Documentation/MyFirstContribution.adoc
@@ -790,7 +790,7 @@ We can note a few things:
   v3", etc. in place of "PATCH". For example, "[PATCH v2 1/3]" would be the first of
   three patches in the second iteration. Each iteration is sent with a new cover
   letter (like "[PATCH v2 0/3]" above), itself a reply to the cover letter of the
-  previous iteration (more on that below).
+  first iteration (more on that below).
 
 NOTE: A single-patch topic is sent with "[PATCH]", "[PATCH v2]", etc. without
 _i_/_n_ numbering (in the above thread overview, no single-patch topic appears,
@@ -1214,7 +1214,7 @@ between your last version and now, if it's something significant. You do not
 need the exact same body in your second cover letter; focus on explaining to
 reviewers the changes you've made that may not be as visible.
 
-You will also need to go and find the Message-ID of your previous cover letter.
+You will also need to go and find the Message-ID of your first cover letter.
 You can either note it when you send the first series, from the output of `git
 send-email`, or you can look it up on the
 https://lore.kernel.org/git[mailing list]. Find your cover letter in the
@@ -1227,8 +1227,8 @@ Message-ID: <foo.12345.author@example.com>
 
 Your Message-ID is `<foo.12345.author@example.com>`. This example will be used
 below as well; make sure to replace it with the correct Message-ID for your
-**previous cover letter** - that is, if you're sending v2, use the Message-ID
-from v1; if you're sending v3, use the Message-ID from v2.
+**first cover letter** - that is, for any subsequent version that you send,
+always use the Message-ID from v1.
 
 While you're looking at the email, you should also note who is CC'd, as it's
 common practice in the mailing list to keep all CCs on a thread. You can add

-- 
2.55.0.rc0.738.g0c8ab3ebcc.dirty


^ permalink raw reply related

* [PATCH v4 2/3] MyFirstContribution: recommend the use of b4
From: Patrick Steinhardt @ 2026-06-15 12:59 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Tuomas Ahola, Weijie Yuan, Ramsay Jones,
	SZEDER Gábor, Kristoffer Haugsbakk, Toon Claes,
	Karthik Nayak
In-Reply-To: <20260615-pks-b4-v4-0-22cfca8f19c5@pks.im>

The b4 tool originates from the Linux kernel community and is intended
to help mailing-list based workflows. It automates a lot of the annoying
bookkeeping tasks that contributors typically need to do: tracking the
list of recipients, Message-IDs, range-diffs and the like. In addition
to that, b4 also has many other subcommands that help the maintainer and
reviewers.

The Git project uses the same infrastructure as the kernel, so this tool
is also a very good fit for us. Adapt "MyFirstContribution" to
explicitly recommend its use.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
 Documentation/MyFirstContribution.adoc | 92 ++++++++++++++++++++++++++++++++--
 Documentation/SubmittingPatches        |  6 ++-
 2 files changed, 93 insertions(+), 5 deletions(-)

diff --git a/Documentation/MyFirstContribution.adoc b/Documentation/MyFirstContribution.adoc
index 984b7f5aa8..607876f3d8 100644
--- a/Documentation/MyFirstContribution.adoc
+++ b/Documentation/MyFirstContribution.adoc
@@ -833,7 +833,7 @@ This patchset is part of the MyFirstContribution tutorial and should not
 be merged.
 ----
 
-At this point the tutorial diverges, in order to demonstrate two
+At this point the tutorial diverges, in order to demonstrate three
 different methods of formatting your patchset and getting it reviewed.
 
 The first method to be covered is GitGitGadget, which is useful for those
@@ -845,9 +845,14 @@ more fine-grained control over the emails to be sent. This method requires some
 setup which can change depending on your system and will not be covered in this
 tutorial.
 
+The third method to be covered is `b4`, which builds on top of `git
+format-patch` and `git send-email`. This method is the recommended way to
+submit patches via mail as it automates a lot of the bookkeeping required by
+`git send-email`.
+
 Regardless of which method you choose, your engagement with reviewers will be
-the same; the review process will be covered after the sections on GitGitGadget
-and `git send-email`.
+the same; the review process will be covered after the sections on GitGitGadget,
+`git send-email` and `b4`.
 
 [[howto-ggg]]
 == Sending Patches via GitGitGadget
@@ -1296,6 +1301,87 @@ index 88f126184c..38da593a60 100644
 2.21.0.392.gf8f6787159e-goog
 ----
 
+[[howto-b4]]
+== Sending Patches with `b4`
+
+`b4` is a tool that builds on top of `git format-patch` and `git send-email`.
+It automates much of the bookkeeping involved in sending a patch series to a
+mailing-list-based project.
+
+Refer to the https://b4.docs.kernel.org/[b4 documentation] for a full reference.
+
+[[prep-b4]]
+=== Preparing a Patch Series
+
+`b4` tracks your patch series as a branch. To start tracking the `psuh` branch
+you have been working on, run:
+
+----
+$ b4 prep --enroll master
+----
+
+This enrolls the current branch, using `master` as the base of the topic. `b4`
+manages the cover letter as part of the branch, so you can edit it at any time
+with:
+
+----
+$ b4 prep --edit-cover
+----
+
+The cover letter not only tracks the content of the top-level mail, but also
+the set of recipients. You can add recipients by adding `To:` and `Cc:`
+trailer lines.
+
+[[send-b4]]
+=== Sending the Patches
+
+Before sending the series out for real, you can inspect what `b4` would send by
+passing `--dry-run`:
+
+----
+$ b4 send --dry-run
+----
+
+Once you are happy with the result, send the series with:
+
+----
+$ b4 send
+----
+
+[[v2-b4]]
+=== Sending v2
+
+When you are ready to send a new iteration of your series, refine your
+patches as usual using linkgit:git-rebase[1]. Note that you typically want to
+rebase on top of the cover letter. You can configure an alias to enable easy
+rebases going forward:
+
+---
+$ git config set alias.b4-rebase 'rebase "HEAD^{/--- b4-submit-tracking ---}"'
+$ git b4-rebase -i
+---
+
+Before sending out the new version you should also update the cover letter with
+`b4 prep --edit-cover` to note the relevant changes compared to the previous
+version. You can inspect the changes between the two versions with `b4 prep
+--compare-to=v1`.
+
+Same as with the first version, you can use `b4 send` to send out the second
+version. `b4` automatically bumps the version to `v2`, generates the range-diff
+against the previous iteration, and threads the new series as a reply to the
+cover letter of the first version.
+
+[[configure-b4]]
+=== Configure b4
+
+`b4` can be configured via linkgit:git-config[1]. In addition to that, projects
+can have their own set of defaults in `.b4-config` in the root tree, which also
+uses Git's config format. The user's configuration always takes precedence over
+the per-project defaults.
+
+Refer to the https://b4.docs.kernel.org/en/latest/config.html[b4 config documentation]
+for more information on the available options.
+
 [[now-what]]
 == My Patch Got Emailed - Now What?
 
diff --git a/Documentation/SubmittingPatches b/Documentation/SubmittingPatches
index d570184ec8..99427e1ee1 100644
--- a/Documentation/SubmittingPatches
+++ b/Documentation/SubmittingPatches
@@ -573,8 +573,10 @@ your existing e-mail client (often optimized for "multipart/*" MIME
 type e-mails) might render your patches unusable.
 
 NOTE: Here we outline the procedure using `format-patch` and
-`send-email`, but you can instead use GitGitGadget to send in your
-patches (see link:MyFirstContribution.html[MyFirstContribution]).
+`send-email`, but you can instead use GitGitGadget or `b4` to send in
+your patches (see link:MyFirstContribution.html[MyFirstContribution]).
+Contributors are encouraged to use `b4`, which automates much of the
+bookkeeping that is otherwise done by hand.
 
 People on the Git mailing list need to be able to read and
 comment on the changes you are submitting.  It is important for

-- 
2.55.0.rc0.738.g0c8ab3ebcc.dirty


^ permalink raw reply related

* [PATCH v4 3/3] b4: introduce configuration for the Git project
From: Patrick Steinhardt @ 2026-06-15 12:59 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Tuomas Ahola, Weijie Yuan, Ramsay Jones,
	SZEDER Gábor, Kristoffer Haugsbakk, Toon Claes,
	Karthik Nayak
In-Reply-To: <20260615-pks-b4-v4-0-22cfca8f19c5@pks.im>

In the preceding commit we have extended our documentation to recommend
b4 for sending patch series to the mailing list. Introduce configuration
so that it knows to honor preferences of the Git project by default. For
now, this configuration does two things:

  - It configures "send-same-thread = shallow", which tells b4 to always
    send subsequent versions of the same patch series as a reply to the
    cover letter of the first version.

  - It configures "prep-cover-template", which tells b4 to use a custom
    template for the cover letter. The most important change compared to
    the default template is that our custom template also includes a
    range-diff.

There's potentially more things that we may want to configure going
forward, like for example auto-configuration of folks to Cc on certain
patches. But these two tweaks feel like a good place to start.

Note that these values only serve as defaults, and users may want to
tweak those defaults based on their own preference. Luckily, users can
do that without having to touch `.b4-config` at all, as b4 allows them
to override values via Git configuration:

    ```
    $ git config set b4.prep-cover-template /does/not/exist
    $ b4 send --dry-run
    ERROR: prep-cover-template says to use x, but it does not exist
    ```

So this gives users an easy way to override our defaults without having
to touch ".b4-config", which would dirty the tree.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
 .b4-config         |  6 ++++++
 .b4-cover-template | 11 +++++++++++
 2 files changed, 17 insertions(+)

diff --git a/.b4-config b/.b4-config
new file mode 100644
index 0000000000..fd4fb56b6d
--- /dev/null
+++ b/.b4-config
@@ -0,0 +1,6 @@
+# Note that these are default values that you can tweak via the typical
+# git-config(1) machinery. You thus shouldn't ever have to change this file.
+# See also https://b4.docs.kernel.org/en/latest/config.html.
+[b4]
+send-same-thread = shallow
+prep-cover-template = ./.b4-cover-template
diff --git a/.b4-cover-template b/.b4-cover-template
new file mode 100644
index 0000000000..ab864933b5
--- /dev/null
+++ b/.b4-cover-template
@@ -0,0 +1,11 @@
+${cover}
+
+---
+${shortlog}
+
+${diffstat}
+
+${range_diff}
+---
+base-commit: ${base_commit}
+${prerequisites}

-- 
2.55.0.rc0.738.g0c8ab3ebcc.dirty

^ permalink raw reply related

* Re: [RFC PATCH 1/2] doc: encourage review replies before rerolling
From: Patrick Steinhardt @ 2026-06-15 13:17 UTC (permalink / raw)
  To: Weijie Yuan; +Cc: git, gitster
In-Reply-To: <68a1969c35cbc2d24af7a0d09c376ecf403c3591.1781358364.git.wy@wyuan.org>

On Sat, Jun 13, 2026 at 10:08:30PM +0800, Weijie Yuan wrote:
> Review feedback should not be answered only by sending a new patch
> version. Encourage contributors to discuss their planned response in the
> mailing-list thread before rerolling.
> 
> This makes the author's reasoning explicit before the next version is
> prepared, instead of forcing reviewers to infer it from the rerolled
> patches.

Not only that, but it also encourages more social interactions between
contributors.

> diff --git a/Documentation/MyFirstContribution.adoc b/Documentation/MyFirstContribution.adoc
> index 0e2a9313ce..59891e3c14 100644
> --- a/Documentation/MyFirstContribution.adoc
> +++ b/Documentation/MyFirstContribution.adoc
> @@ -1423,11 +1423,13 @@ fewer mistakes were the only one they would need to review.
>  After a few days, you will hopefully receive a reply to your patchset with some
>  comments. Woohoo! Now you can get back to work.
>  
> -It's good manners to reply to each comment, notifying the reviewer that you have
> -made the change suggested, feel the original is better, or that the comment
> -inspired you to do something a new way which is superior to both the original
> -and the suggested change. This way reviewers don't need to inspect your v2 to
> -figure out whether you implemented their comment or not.
> +It's good manners to reply to each comment in the mailing list discussion
> +instead of letting the next version of your patch be your only response. Tell
> +the reviewer whether you plan to make the suggested change, keep the original,
> +or pursue a different approach. This way reviewers can respond to your reasoning
> +before you spend time preparing a version they may not agree with, and later do
> +not need to inspect your v2 to figure out whether you implemented their comment
> +or not.
>  
>  Reviewers may ask you about what you wrote in the patchset, either in
>  the proposed commit log message or in the changes themselves.  You

I feel like the new version doesn't really add anything significant to
this paragraph that it didn't already say before your patch, but it does
so with more words.

I'm of course biased though, so maybe more words help newcomers?

> diff --git a/Documentation/SubmittingPatches b/Documentation/SubmittingPatches
> index 6b83b6c89e..d8ad7fb73e 100644
> --- a/Documentation/SubmittingPatches
> +++ b/Documentation/SubmittingPatches
> @@ -48,8 +48,12 @@ area.
>  
>  . You get comments and suggestions for improvements.  You may even get
>    them in an "on top of your change" patch form.  You are expected to
> -  respond to them with "Reply-All" on the mailing list, while taking
> -  them into account while preparing an updated set of patches.
> +  respond to them with "Reply-All" on the mailing list, instead of
> +  letting an updated patch series be your only response.  Tell
> +  reviewers which suggestions you plan to use, which ones you disagree
> +  with, and when a comment leads you to consider a different approach.
> +  Use these replies and any follow-up discussion as input when
> +  preparing an updated set of patches.

This change I agree with though, as it highlights what kind of
discussions we expect to happen.

> @@ -639,7 +643,9 @@ grouped into their own e-mail thread to help readers find all parts of the
>  series.  To that end, send them as replies to either an additional "cover
>  letter" message (see below), the first patch, or the respective preceding patch.
>  Here is a link:MyFirstContribution.html#v2-git-send-email[step-by-step guide] on
> -how to submit updated versions of a patch series.
> +how to submit updated versions of a patch series.  Before sending another
> +version, make sure you have answered meaningful review comments in the existing
> +discussion.

This change is probably good, as well.

Overall it's a bit on the annoying side that we have to always make sure
to update both SubmittingPatches and MyFirstContribution in tandem.
Makes me wonder whether they are mostly redundant and whether it would
make sense to eventually merge them. But that's a tangent and not
anything that needs to be addressed in this (or any other) patch series.

Patrick

^ permalink raw reply

* Re: [RFC PATCH 2/2] doc: advise batching patch rerolls
From: Patrick Steinhardt @ 2026-06-15 13:17 UTC (permalink / raw)
  To: Weijie Yuan; +Cc: Junio C Hamano, git
In-Reply-To: <ai2NwMS-i_UTWR5T@wyuan.org>

On Sun, Jun 14, 2026 at 01:05:04AM +0800, Weijie Yuan wrote:
> On Sat, Jun 13, 2026 at 09:02:39AM -0700, Junio C Hamano wrote:
> > Weijie Yuan <wy@wyuan.org> writes:
> > 
> > > Contributors often need guidance on how quickly to send later iterations
> > > of a patch series. Add a rough default of no more than one new version
> > > of the same series per day so feedback can be batched and reviewers have
> > > time to comment.
> > >
> > > Mention factors that can affect the timing, such as series size, review
> > > depth, substantial rework, and how close the topic is to being accepted.
> > 
> > Another good thing to discourage yourself from rerolling too quickly
> > is that such a practice forces you to think twice and be very
> > careful before sending patches out.  As you have only one chance to
> > get it right before, say, 24 hours, you'd want to make sure that you
> > would not distract your reviewers with stupid typoes, off-by-one
> > errors, and such, and concentrate their reviews more on what matters
> > more, i.e., the higher level design, choice of algorithms, etc.
> > 
> > > +This consideration applies not only when going from the initial patch to v2, but
> > > +also to later iterations of the same series. There is no fixed rule for how long
> > > +to wait before sending a new version. A useful default is to send at most one
> > > +new version of the same patch series per day. This gives multiple reviewers time
> > > +to comment, lets you batch feedback together, and gives you time to think
> > > +through the comments you received.
> > 
> > And the 24-hour gives equal chance to comment on your patches to
> > anybody no matter where they live ;-)
> 
> Thanks for your comments above! Let me think about how to integrate
> these contents with the patch.
> 
> > I see you CC'ed Patrick, and I am sure he'll give us more useful
> > suggestions than I do here ;-)
> 
> This is his practical advice, and I just stole Patrick´s wording, to be
> fair ;-) so of course I should CC him and let him know I am a wording
> thief :-P, hope it wouldn't disturb him ;-) 

Indeed, so I don't really have anything else to add here.

By the way, talking about mailing list etiquette: in scenarios like this
it makes sense to add a Helped-by trailer. That would've serviced as
hint to Junio that I was already involved, and it gives credit to that
other contributor. I myself don't care much about the latter part
anymore, but newer contributors might.

And no, I don't mind at all that you "stole" my wording. Quite on the
contrary, I'm happy you picked up my thoughts and cared enough to put
them into a nice patch series :)

Thanks!

Patrick

^ permalink raw reply

* [PATCH v6 00/10] builtin/history: introduce "drop" subcommand
From: Patrick Steinhardt @ 2026-06-15 13:54 UTC (permalink / raw)
  To: git; +Cc: Pablo Sabater, Junio C Hamano, Kristoffer Haugsbakk, Phillip Wood
In-Reply-To: <20260601-b4-pks-history-drop-v1-0-643e32340d55@pks.im>

Hi,

this small patch series introduces the new "drop" subcommand for
git-history(1). As a reader might guess, the command does exactly that:
given a commit, it will drop that commit from the commit history and
replay descendant branches on top of it.

Changes in v6:
  - Fix bad interactions of DRY_RUN with UPDATE_HEAD
  - Link to v5: https://patch.msgid.link/20260611-b4-pks-history-drop-v5-0-34d35725559c@pks.im

Changes in v5:
  - Reject UPDATE_ORIG_HEAD without UPDATE_HEAD.
  - Link to v4: https://patch.msgid.link/20260610-b4-pks-history-drop-v4-0-70d5f0ae8c25@pks.im

Changes in v4:
  - Remove the `SKIP_REF_UPDATES` flag in favor of a new `UPDATE_HEAD`
    flag, as suggested by Phillip.
  - Rename `reset_head()` to `reset_working_tree()`. This better matches
    the new scope of the function, and it helps us to catch any
    in-flight patches that would now have to set the `UPDATE_HEAD` flag.
  - Link to v3: https://patch.msgid.link/20260608-b4-pks-history-drop-v3-0-84ca8e43e937@pks.im

Changes in v3:
  - Fix commit message typos.
  - Make `update_orig_head` and `skip_ref_updates` mutually exclusive.
  - Use fancy revisions to specify the commit to drop in the example
    section.
  - Detect conflicting changes in the index/working tree in dry-run
    mode.
  - Consistently use a subshell.
  - Rename `RESET_HEAD_ORIG_HEAD` to `RESET_HEAD_UPDATE_ORIG_HEAD`.
  - 
  - Link to v2: https://patch.msgid.link/20260603-b4-pks-history-drop-v2-0-742cb5b5176d@pks.im

Changes in v2:
  - Reworked `update_worktree()` to use `reset_head()`, which required a
    bunch of changes to `reset_head()`.
  - Consistently mention the commit that cannot be dropped as part of
    error messages.
  - Adapt error message to not use backticks anymore.
  - Drop redundant "--graph" flag in a test helper.
  - Link to v1: https://patch.msgid.link/20260601-b4-pks-history-drop-v1-0-643e32340d55@pks.im

Thanks!

Patrick

---
Patrick Steinhardt (10):
      read-cache: split out function to drop unmerged entries to stage 0
      reset: drop `USE_THE_REPOSITORY_VARIABLE`
      reset: rename `reset_head()`
      reset: modernize flags passed to `reset_working_tree()`
      reset: introduce dry-run mode
      reset: introduce ability to skip updating HEAD
      reset: allow the caller to specify the current HEAD object
      reset: stop assuming that the caller passes in a clean index
      builtin/history: split handling of ref updates into two phases
      builtin/history: implement "drop" subcommand

 Documentation/git-history.adoc |  38 ++-
 builtin/history.c              | 288 +++++++++++++++++++---
 builtin/rebase.c               |  41 ++--
 read-cache-ll.h                |   1 +
 read-cache.c                   |  12 +-
 reset.c                        | 102 +++++---
 reset.h                        |  51 ++--
 sequencer.c                    |  17 +-
 t/meson.build                  |   1 +
 t/t3454-history-drop.sh        | 537 +++++++++++++++++++++++++++++++++++++++++
 10 files changed, 971 insertions(+), 117 deletions(-)

Range-diff versus v5:

 1:  e21a324987 =  1:  07dee893d7 read-cache: split out function to drop unmerged entries to stage 0
 2:  d16b3df944 =  2:  96f33165b8 reset: drop `USE_THE_REPOSITORY_VARIABLE`
 3:  b73224c4b0 =  3:  35f31792f3 reset: rename `reset_head()`
 4:  1b5fbaa9c2 =  4:  1837483676 reset: modernize flags passed to `reset_working_tree()`
 5:  a73fb4b3e8 =  5:  206b73f71e reset: introduce dry-run mode
 6:  3be3208155 !  6:  48c7b1571f reset: introduce ability to skip updating HEAD
    @@ reset.c: int reset_working_tree(struct repository *r,
      
      	if (refs_only) {
     -		if (!dry_run)
    -+		if (update_head)
    ++		if (!dry_run && update_head)
      			return update_refs(r, opts, oid, head);
      		return 0;
      	}
 7:  fb3a357d93 =  7:  d6a9a3e524 reset: allow the caller to specify the current HEAD object
 8:  9b883dbbad =  8:  d4bc3acd87 reset: stop assuming that the caller passes in a clean index
 9:  88a929e1a5 =  9:  493a3f4422 builtin/history: split handling of ref updates into two phases
10:  8f19defcb0 = 10:  184849df09 builtin/history: implement "drop" subcommand

---
base-commit: 1666c1265231b0bc5f613fbbf3f0a9896cdef76e
change-id: 20260601-b4-pks-history-drop-28f6c6399e7b


^ permalink raw reply

* [PATCH v6 01/10] read-cache: split out function to drop unmerged entries to stage 0
From: Patrick Steinhardt @ 2026-06-15 13:54 UTC (permalink / raw)
  To: git; +Cc: Pablo Sabater, Junio C Hamano, Kristoffer Haugsbakk, Phillip Wood
In-Reply-To: <20260615-b4-pks-history-drop-v6-0-2e329e536d78@pks.im>

In `repo_read_index_unmerged()` we read the index and then drop any
unmerged entries to stage 0. In a subsequent commit we'll want to
perform this operation on arbitrary indexes, not only the one of the
given repository.

Prepare for this by splitting out the functionality into a new function
that can act on an arbitrary index.

While at it, fix a signedness mismatch when iterating through the index
cache entries.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
 read-cache-ll.h |  1 +
 read-cache.c    | 12 +++++++-----
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/read-cache-ll.h b/read-cache-ll.h
index 2c8b4b21b1..71b87615eb 100644
--- a/read-cache-ll.h
+++ b/read-cache-ll.h
@@ -309,6 +309,7 @@ int write_locked_index(struct index_state *, struct lock_file *lock, unsigned fl
 void discard_index(struct index_state *);
 void move_index_extensions(struct index_state *dst, struct index_state *src);
 int unmerged_index(const struct index_state *);
+int index_state_unmerged_to_stage0(struct index_state *istate);
 
 /**
  * Returns 1 if istate differs from tree, 0 otherwise.  If tree is NULL,
diff --git a/read-cache.c b/read-cache.c
index 21829102ae..799a5bc719 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -3403,13 +3403,15 @@ int write_locked_index(struct index_state *istate, struct lock_file *lock,
  */
 int repo_read_index_unmerged(struct repository *repo)
 {
-	struct index_state *istate;
-	int i;
+	repo_read_index(repo);
+	return index_state_unmerged_to_stage0(repo->index);
+}
+
+int index_state_unmerged_to_stage0(struct index_state *istate)
+{
 	int unmerged = 0;
 
-	repo_read_index(repo);
-	istate = repo->index;
-	for (i = 0; i < istate->cache_nr; i++) {
+	for (unsigned int i = 0; i < istate->cache_nr; i++) {
 		struct cache_entry *ce = istate->cache[i];
 		struct cache_entry *new_ce;
 		int len;

-- 
2.55.0.rc0.738.g0c8ab3ebcc.dirty


^ permalink raw reply related

* [PATCH v6 02/10] reset: drop `USE_THE_REPOSITORY_VARIABLE`
From: Patrick Steinhardt @ 2026-06-15 13:54 UTC (permalink / raw)
  To: git; +Cc: Pablo Sabater, Junio C Hamano, Kristoffer Haugsbakk, Phillip Wood
In-Reply-To: <20260615-b4-pks-history-drop-v6-0-2e329e536d78@pks.im>

In "reset.c" we still have references to `the_repository`, even though
the only entry point into the file already receives a repository as
parameter.

Update all uses of `the_repository` to instead use the passed-in repo
and drop `USE_THE_REPOSITORY_VARIABLE`.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
 reset.c | 27 +++++++++++++--------------
 1 file changed, 13 insertions(+), 14 deletions(-)

diff --git a/reset.c b/reset.c
index 46e30e6394..3b3cb74dab 100644
--- a/reset.c
+++ b/reset.c
@@ -1,5 +1,3 @@
-#define USE_THE_REPOSITORY_VARIABLE
-
 #include "git-compat-util.h"
 #include "cache-tree.h"
 #include "gettext.h"
@@ -13,7 +11,8 @@
 #include "unpack-trees.h"
 #include "hook.h"
 
-static int update_refs(const struct reset_head_opts *opts,
+static int update_refs(struct repository *repo,
+		       const struct reset_head_opts *opts,
 		       const struct object_id *oid,
 		       const struct object_id *head)
 {
@@ -42,19 +41,19 @@ static int update_refs(const struct reset_head_opts *opts,
 	prefix_len = msg.len;
 
 	if (update_orig_head) {
-		if (!repo_get_oid(the_repository, "ORIG_HEAD", &oid_old_orig))
+		if (!repo_get_oid(repo, "ORIG_HEAD", &oid_old_orig))
 			old_orig = &oid_old_orig;
 		if (head) {
 			if (!reflog_orig_head) {
 				strbuf_addstr(&msg, "updating ORIG_HEAD");
 				reflog_orig_head = msg.buf;
 			}
-			refs_update_ref(get_main_ref_store(the_repository),
+			refs_update_ref(get_main_ref_store(repo),
 					reflog_orig_head, "ORIG_HEAD",
 					orig_head ? orig_head : head,
 					old_orig, 0, UPDATE_REFS_MSG_ON_ERR);
 		} else if (old_orig)
-			refs_delete_ref(get_main_ref_store(the_repository),
+			refs_delete_ref(get_main_ref_store(repo),
 					NULL, "ORIG_HEAD", old_orig, 0);
 	}
 
@@ -64,23 +63,23 @@ static int update_refs(const struct reset_head_opts *opts,
 		reflog_head = msg.buf;
 	}
 	if (!switch_to_branch)
-		ret = refs_update_ref(get_main_ref_store(the_repository),
+		ret = refs_update_ref(get_main_ref_store(repo),
 				      reflog_head, "HEAD", oid, head,
 				      detach_head ? REF_NO_DEREF : 0,
 				      UPDATE_REFS_MSG_ON_ERR);
 	else {
-		ret = refs_update_ref(get_main_ref_store(the_repository),
+		ret = refs_update_ref(get_main_ref_store(repo),
 				      reflog_branch ? reflog_branch : reflog_head,
 				      switch_to_branch, oid, NULL, 0,
 				      UPDATE_REFS_MSG_ON_ERR);
 		if (!ret)
-			ret = refs_update_symref(get_main_ref_store(the_repository),
+			ret = refs_update_symref(get_main_ref_store(repo),
 						 "HEAD", switch_to_branch,
 						 reflog_head);
 	}
 	if (!ret && run_hook)
-		run_hooks_l(the_repository, "post-checkout",
-			    oid_to_hex(head ? head : null_oid(the_hash_algo)),
+		run_hooks_l(repo, "post-checkout",
+			    oid_to_hex(head ? head : null_oid(repo->hash_algo)),
 			    oid_to_hex(oid), "1", NULL);
 	strbuf_release(&msg);
 	return ret;
@@ -126,7 +125,7 @@ int reset_head(struct repository *r, const struct reset_head_opts *opts)
 		oid = &head_oid;
 
 	if (refs_only)
-		return update_refs(opts, oid, head);
+		return update_refs(r, opts, oid, head);
 
 	action = reset_hard ? "reset" : "checkout";
 	setup_unpack_trees_porcelain(&unpack_tree_opts, action);
@@ -163,7 +162,7 @@ int reset_head(struct repository *r, const struct reset_head_opts *opts)
 		goto leave_reset_head;
 	}
 
-	tree = repo_parse_tree_indirect(the_repository, oid);
+	tree = repo_parse_tree_indirect(r, oid);
 	if (!tree) {
 		ret = error(_("unable to read tree (%s)"), oid_to_hex(oid));
 		goto leave_reset_head;
@@ -177,7 +176,7 @@ int reset_head(struct repository *r, const struct reset_head_opts *opts)
 	}
 
 	if (oid != &head_oid || update_orig_head || switch_to_branch)
-		ret = update_refs(opts, oid, head);
+		ret = update_refs(r, opts, oid, head);
 
 leave_reset_head:
 	rollback_lock_file(&lock);

-- 
2.55.0.rc0.738.g0c8ab3ebcc.dirty


^ permalink raw reply related

* [PATCH v6 03/10] reset: rename `reset_head()`
From: Patrick Steinhardt @ 2026-06-15 13:54 UTC (permalink / raw)
  To: git; +Cc: Pablo Sabater, Junio C Hamano, Kristoffer Haugsbakk, Phillip Wood
In-Reply-To: <20260615-b4-pks-history-drop-v6-0-2e329e536d78@pks.im>

In a subsequent commit we're about to adapt `reset_head()` so that the
reference update to HEAD is optional, only. At this point the function
starts to feel misnamed, as it doesn't necessarily have anything to do
with the HEAD reference anymore. The gist of the function then is that
we reset the working tree to a specific new commit, updating both the
index and the checked-out files.

Rename it to `reset_working_tree()` to better reflect that.

Note that we don't adjust the flags yet. This will happen in a
subsequent commit.

Suggested-by: Phillip Wood <phillip.wood123@gmail.com>
---
 builtin/rebase.c | 20 ++++++++++----------
 reset.c          |  5 +++--
 reset.h          |  4 ++--
 sequencer.c      |  8 ++++----
 4 files changed, 19 insertions(+), 18 deletions(-)

diff --git a/builtin/rebase.c b/builtin/rebase.c
index fa4f5d9306..22fbba3c62 100644
--- a/builtin/rebase.c
+++ b/builtin/rebase.c
@@ -592,7 +592,7 @@ static int finish_rebase(struct rebase_options *opts)
 static int move_to_original_branch(struct rebase_options *opts)
 {
 	struct strbuf branch_reflog = STRBUF_INIT, head_reflog = STRBUF_INIT;
-	struct reset_head_opts ropts = { 0 };
+	struct reset_working_tree_options ropts = { 0 };
 	int ret;
 
 	if (!opts->head_name)
@@ -610,7 +610,7 @@ static int move_to_original_branch(struct rebase_options *opts)
 	ropts.flags = RESET_HEAD_REFS_ONLY;
 	ropts.branch_msg = branch_reflog.buf;
 	ropts.head_msg = head_reflog.buf;
-	ret = reset_head(the_repository, &ropts);
+	ret = reset_working_tree(the_repository, &ropts);
 
 	strbuf_release(&branch_reflog);
 	strbuf_release(&head_reflog);
@@ -685,7 +685,7 @@ static int run_am(struct rebase_options *opts)
 
 	status = run_command(&format_patch);
 	if (status) {
-		struct reset_head_opts ropts = { 0 };
+		struct reset_working_tree_options ropts = { 0 };
 		unlink(rebased_patches);
 		free(rebased_patches);
 		child_process_clear(&am);
@@ -693,7 +693,7 @@ static int run_am(struct rebase_options *opts)
 		ropts.oid = &opts->orig_head->object.oid;
 		ropts.branch = opts->head_name;
 		ropts.default_reflog_action = opts->reflog_action;
-		reset_head(the_repository, &ropts);
+		reset_working_tree(the_repository, &ropts);
 		error(_("\ngit encountered an error while preparing the "
 			"patches to replay\n"
 			"these revisions:\n"
@@ -855,7 +855,7 @@ static int rebase_config(const char *var, const char *value,
 static int checkout_up_to_date(struct rebase_options *options)
 {
 	struct strbuf buf = STRBUF_INIT;
-	struct reset_head_opts ropts = { 0 };
+	struct reset_working_tree_options ropts = { 0 };
 	int ret = 0;
 
 	strbuf_addf(&buf, "%s: checkout %s",
@@ -866,7 +866,7 @@ static int checkout_up_to_date(struct rebase_options *options)
 	if (!ropts.branch)
 		ropts.flags |=  RESET_HEAD_DETACH;
 	ropts.head_msg = buf.buf;
-	if (reset_head(the_repository, &ropts) < 0)
+	if (reset_working_tree(the_repository, &ropts) < 0)
 		ret = error(_("could not switch to %s"), options->switch_to);
 	strbuf_release(&buf);
 
@@ -1116,7 +1116,7 @@ int cmd_rebase(int argc,
 	int reschedule_failed_exec = -1;
 	int allow_preemptive_ff = 1;
 	int preserve_merges_selected = 0;
-	struct reset_head_opts ropts = { 0 };
+	struct reset_working_tree_options ropts = { 0 };
 	struct option builtin_rebase_options[] = {
 		OPT_STRING(0, "onto", &options.onto_name,
 			   N_("revision"),
@@ -1385,7 +1385,7 @@ int cmd_rebase(int argc,
 		rerere_clear(the_repository, &merge_rr);
 		string_list_clear(&merge_rr, 1);
 		ropts.flags = RESET_HEAD_HARD;
-		if (reset_head(the_repository, &ropts) < 0)
+		if (reset_working_tree(the_repository, &ropts) < 0)
 			die(_("could not discard worktree changes"));
 		remove_branch_state(the_repository, 0);
 		if (read_basic_state(&options))
@@ -1410,7 +1410,7 @@ int cmd_rebase(int argc,
 		ropts.head_msg = head_msg.buf;
 		ropts.branch = options.head_name;
 		ropts.flags = RESET_HEAD_HARD;
-		if (reset_head(the_repository, &ropts) < 0)
+		if (reset_working_tree(the_repository, &ropts) < 0)
 			die(_("could not move back to %s"),
 			    oid_to_hex(&options.orig_head->object.oid));
 		strbuf_release(&head_msg);
@@ -1880,7 +1880,7 @@ int cmd_rebase(int argc,
 			RESET_HEAD_RUN_POST_CHECKOUT_HOOK;
 	ropts.head_msg = msg.buf;
 	ropts.default_reflog_action = options.reflog_action;
-	if (reset_head(the_repository, &ropts)) {
+	if (reset_working_tree(the_repository, &ropts)) {
 		ret = error(_("Could not detach HEAD"));
 		goto cleanup_autostash;
 	}
diff --git a/reset.c b/reset.c
index 3b3cb74dab..799596398b 100644
--- a/reset.c
+++ b/reset.c
@@ -12,7 +12,7 @@
 #include "hook.h"
 
 static int update_refs(struct repository *repo,
-		       const struct reset_head_opts *opts,
+		       const struct reset_working_tree_options *opts,
 		       const struct object_id *oid,
 		       const struct object_id *head)
 {
@@ -85,7 +85,8 @@ static int update_refs(struct repository *repo,
 	return ret;
 }
 
-int reset_head(struct repository *r, const struct reset_head_opts *opts)
+int reset_working_tree(struct repository *r,
+		       const struct reset_working_tree_options *opts)
 {
 	const struct object_id *oid = opts->oid;
 	const char *switch_to_branch = opts->branch;
diff --git a/reset.h b/reset.h
index a28f81829d..f130152014 100644
--- a/reset.h
+++ b/reset.h
@@ -17,7 +17,7 @@
 /* Update ORIG_HEAD as well as HEAD */
 #define RESET_ORIG_HEAD (1<<4)
 
-struct reset_head_opts {
+struct reset_working_tree_options {
 	/*
 	 * The commit to checkout/reset to. Defaults to HEAD.
 	 */
@@ -55,6 +55,6 @@ struct reset_head_opts {
 	const char *default_reflog_action;
 };
 
-int reset_head(struct repository *r, const struct reset_head_opts *opts);
+int reset_working_tree(struct repository *r, const struct reset_working_tree_options *opts);
 
 #endif
diff --git a/sequencer.c b/sequencer.c
index 1ee4b2875b..d73ecf0384 100644
--- a/sequencer.c
+++ b/sequencer.c
@@ -4677,7 +4677,7 @@ static void create_autostash_internal(struct repository *r,
 	if (has_unstaged_changes(r, 1) ||
 	    has_uncommitted_changes(r, 1)) {
 		struct child_process stash = CHILD_PROCESS_INIT;
-		struct reset_head_opts ropts = { .flags = RESET_HEAD_HARD };
+		struct reset_working_tree_options ropts = { .flags = RESET_HEAD_HARD };
 		struct object_id oid;
 
 		strvec_pushl(&stash.args,
@@ -4707,7 +4707,7 @@ static void create_autostash_internal(struct repository *r,
 
 		if (!silent)
 			printf(_("Created autostash: %s\n"), buf.buf);
-		if (reset_head(r, &ropts) < 0)
+		if (reset_working_tree(r, &ropts) < 0)
 			die(_("could not reset --hard"));
 		discard_index(r->index);
 		if (repo_read_index(r) < 0)
@@ -4867,7 +4867,7 @@ static int checkout_onto(struct repository *r, struct replay_opts *opts,
 			 const char *onto_name, const struct object_id *onto,
 			 const struct object_id *orig_head)
 {
-	struct reset_head_opts ropts = {
+	struct reset_working_tree_options ropts = {
 		.oid = onto,
 		.orig_head = orig_head,
 		.flags = RESET_HEAD_DETACH | RESET_ORIG_HEAD |
@@ -4876,7 +4876,7 @@ static int checkout_onto(struct repository *r, struct replay_opts *opts,
 					   onto_name),
 		.default_reflog_action = sequencer_reflog_action(opts)
 	};
-	if (reset_head(r, &ropts)) {
+	if (reset_working_tree(r, &ropts)) {
 		apply_autostash(rebase_path_autostash());
 		sequencer_remove_state(opts);
 		return error(_("could not detach HEAD"));

-- 
2.55.0.rc0.738.g0c8ab3ebcc.dirty


^ permalink raw reply related

* [PATCH v6 04/10] reset: modernize flags passed to `reset_working_tree()`
From: Patrick Steinhardt @ 2026-06-15 13:54 UTC (permalink / raw)
  To: git; +Cc: Pablo Sabater, Junio C Hamano, Kristoffer Haugsbakk, Phillip Wood
In-Reply-To: <20260615-b4-pks-history-drop-v6-0-2e329e536d78@pks.im>

The flags passed to `reset_working_tree()` are declared as defines. This
has fallen a bit out of practice nowadays, where we instead prefer to
use enums. Furthermore, the prefix of those flags does not match the
function name anymore after the rename in the preceding commit.

Adapt the code to follow modern best practices and adapt the flag names.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
 builtin/rebase.c | 15 ++++++++-------
 reset.c          | 12 ++++++------
 reset.h          | 31 +++++++++++++++++++------------
 sequencer.c      |  9 ++++++---
 4 files changed, 39 insertions(+), 28 deletions(-)

diff --git a/builtin/rebase.c b/builtin/rebase.c
index 22fbba3c62..06dcbaf5e8 100644
--- a/builtin/rebase.c
+++ b/builtin/rebase.c
@@ -607,7 +607,7 @@ static int move_to_original_branch(struct rebase_options *opts)
 	strbuf_addf(&head_reflog, "%s (finish): returning to %s",
 		    opts->reflog_action, opts->head_name);
 	ropts.branch = opts->head_name;
-	ropts.flags = RESET_HEAD_REFS_ONLY;
+	ropts.flags = RESET_WORKING_TREE_REFS_ONLY;
 	ropts.branch_msg = branch_reflog.buf;
 	ropts.head_msg = head_reflog.buf;
 	ret = reset_working_tree(the_repository, &ropts);
@@ -862,9 +862,9 @@ static int checkout_up_to_date(struct rebase_options *options)
 		    options->reflog_action, options->switch_to);
 	ropts.oid = &options->orig_head->object.oid;
 	ropts.branch = options->head_name;
-	ropts.flags = RESET_HEAD_RUN_POST_CHECKOUT_HOOK;
+	ropts.flags = RESET_WORKING_TREE_RUN_POST_CHECKOUT_HOOK;
 	if (!ropts.branch)
-		ropts.flags |=  RESET_HEAD_DETACH;
+		ropts.flags |=  RESET_WORKING_TREE_DETACH;
 	ropts.head_msg = buf.buf;
 	if (reset_working_tree(the_repository, &ropts) < 0)
 		ret = error(_("could not switch to %s"), options->switch_to);
@@ -1384,7 +1384,7 @@ int cmd_rebase(int argc,
 
 		rerere_clear(the_repository, &merge_rr);
 		string_list_clear(&merge_rr, 1);
-		ropts.flags = RESET_HEAD_HARD;
+		ropts.flags = RESET_WORKING_TREE_HARD;
 		if (reset_working_tree(the_repository, &ropts) < 0)
 			die(_("could not discard worktree changes"));
 		remove_branch_state(the_repository, 0);
@@ -1409,7 +1409,7 @@ int cmd_rebase(int argc,
 		ropts.oid = &options.orig_head->object.oid;
 		ropts.head_msg = head_msg.buf;
 		ropts.branch = options.head_name;
-		ropts.flags = RESET_HEAD_HARD;
+		ropts.flags = RESET_WORKING_TREE_HARD;
 		if (reset_working_tree(the_repository, &ropts) < 0)
 			die(_("could not move back to %s"),
 			    oid_to_hex(&options.orig_head->object.oid));
@@ -1876,8 +1876,9 @@ int cmd_rebase(int argc,
 		    options.reflog_action, options.onto_name);
 	ropts.oid = &options.onto->object.oid;
 	ropts.orig_head = &options.orig_head->object.oid;
-	ropts.flags = RESET_HEAD_DETACH | RESET_ORIG_HEAD |
-			RESET_HEAD_RUN_POST_CHECKOUT_HOOK;
+	ropts.flags = RESET_WORKING_TREE_DETACH |
+		      RESET_WORKING_TREE_UPDATE_ORIG_HEAD |
+		      RESET_WORKING_TREE_RUN_POST_CHECKOUT_HOOK;
 	ropts.head_msg = msg.buf;
 	ropts.default_reflog_action = options.reflog_action;
 	if (reset_working_tree(the_repository, &ropts)) {
diff --git a/reset.c b/reset.c
index 799596398b..4ca7f23a25 100644
--- a/reset.c
+++ b/reset.c
@@ -16,9 +16,9 @@ static int update_refs(struct repository *repo,
 		       const struct object_id *oid,
 		       const struct object_id *head)
 {
-	unsigned detach_head = opts->flags & RESET_HEAD_DETACH;
-	unsigned run_hook = opts->flags & RESET_HEAD_RUN_POST_CHECKOUT_HOOK;
-	unsigned update_orig_head = opts->flags & RESET_ORIG_HEAD;
+	unsigned detach_head = opts->flags & RESET_WORKING_TREE_DETACH;
+	unsigned run_hook = opts->flags & RESET_WORKING_TREE_RUN_POST_CHECKOUT_HOOK;
+	unsigned update_orig_head = opts->flags & RESET_WORKING_TREE_UPDATE_ORIG_HEAD;
 	const struct object_id *orig_head = opts->orig_head;
 	const char *switch_to_branch = opts->branch;
 	const char *reflog_branch = opts->branch_msg;
@@ -90,9 +90,9 @@ int reset_working_tree(struct repository *r,
 {
 	const struct object_id *oid = opts->oid;
 	const char *switch_to_branch = opts->branch;
-	unsigned reset_hard = opts->flags & RESET_HEAD_HARD;
-	unsigned refs_only = opts->flags & RESET_HEAD_REFS_ONLY;
-	unsigned update_orig_head = opts->flags & RESET_ORIG_HEAD;
+	unsigned reset_hard = opts->flags & RESET_WORKING_TREE_HARD;
+	unsigned refs_only = opts->flags & RESET_WORKING_TREE_REFS_ONLY;
+	unsigned update_orig_head = opts->flags & RESET_WORKING_TREE_UPDATE_ORIG_HEAD;
 	struct object_id *head = NULL, head_oid;
 	struct tree_desc desc[2] = { { NULL }, { NULL } };
 	struct lock_file lock = LOCK_INIT;
diff --git a/reset.h b/reset.h
index f130152014..2e5826de99 100644
--- a/reset.h
+++ b/reset.h
@@ -6,16 +6,22 @@
 
 #define GIT_REFLOG_ACTION_ENVIRONMENT "GIT_REFLOG_ACTION"
 
-/* Request a detached checkout */
-#define RESET_HEAD_DETACH (1<<0)
-/* Request a reset rather than a checkout */
-#define RESET_HEAD_HARD (1<<1)
-/* Run the post-checkout hook */
-#define RESET_HEAD_RUN_POST_CHECKOUT_HOOK (1<<2)
-/* Only update refs, do not touch the worktree */
-#define RESET_HEAD_REFS_ONLY (1<<3)
-/* Update ORIG_HEAD as well as HEAD */
-#define RESET_ORIG_HEAD (1<<4)
+enum reset_working_tree_flags {
+	/* Request a detached checkout */
+	RESET_WORKING_TREE_DETACH = (1 << 0),
+
+	/* Request a reset rather than a checkout */
+	RESET_WORKING_TREE_HARD = (1 << 1),
+
+	/* Run the post-checkout hook */
+	RESET_WORKING_TREE_RUN_POST_CHECKOUT_HOOK = (1 << 2),
+
+	/* Only update refs, do not touch the worktree */
+	RESET_WORKING_TREE_REFS_ONLY = (1 << 3),
+
+	/* Update ORIG_HEAD as well as HEAD */
+	RESET_WORKING_TREE_UPDATE_ORIG_HEAD = (1 << 4),
+};
 
 struct reset_working_tree_options {
 	/*
@@ -33,7 +39,7 @@ struct reset_working_tree_options {
 	/*
 	 * Flags defined above.
 	 */
-	unsigned flags;
+	enum reset_working_tree_flags flags;
 	/*
 	 * Optional reflog message for branch, defaults to head_msg.
 	 */
@@ -45,7 +51,8 @@ struct reset_working_tree_options {
 	const char *head_msg;
 	/*
 	 * Optional reflog message for ORIG_HEAD, if this omitted and flags
-	 * contains RESET_ORIG_HEAD then default_reflog_action must be given.
+	 * contains RESET_WORKING_TREE_UPDATE_ORIG_HEAD then
+	 * default_reflog_action must be given.
 	 */
 	const char *orig_head_msg;
 	/*
diff --git a/sequencer.c b/sequencer.c
index d73ecf0384..4efe831178 100644
--- a/sequencer.c
+++ b/sequencer.c
@@ -4677,7 +4677,9 @@ static void create_autostash_internal(struct repository *r,
 	if (has_unstaged_changes(r, 1) ||
 	    has_uncommitted_changes(r, 1)) {
 		struct child_process stash = CHILD_PROCESS_INIT;
-		struct reset_working_tree_options ropts = { .flags = RESET_HEAD_HARD };
+		struct reset_working_tree_options ropts = {
+			.flags = RESET_WORKING_TREE_HARD,
+		};
 		struct object_id oid;
 
 		strvec_pushl(&stash.args,
@@ -4870,8 +4872,9 @@ static int checkout_onto(struct repository *r, struct replay_opts *opts,
 	struct reset_working_tree_options ropts = {
 		.oid = onto,
 		.orig_head = orig_head,
-		.flags = RESET_HEAD_DETACH | RESET_ORIG_HEAD |
-				RESET_HEAD_RUN_POST_CHECKOUT_HOOK,
+		.flags = RESET_WORKING_TREE_DETACH |
+			 RESET_WORKING_TREE_UPDATE_ORIG_HEAD |
+			 RESET_WORKING_TREE_RUN_POST_CHECKOUT_HOOK,
 		.head_msg = reflog_message(opts, "start", "checkout %s",
 					   onto_name),
 		.default_reflog_action = sequencer_reflog_action(opts)

-- 
2.55.0.rc0.738.g0c8ab3ebcc.dirty


^ permalink raw reply related

* [PATCH v6 05/10] reset: introduce dry-run mode
From: Patrick Steinhardt @ 2026-06-15 13:54 UTC (permalink / raw)
  To: git; +Cc: Pablo Sabater, Junio C Hamano, Kristoffer Haugsbakk, Phillip Wood
In-Reply-To: <20260615-b4-pks-history-drop-v6-0-2e329e536d78@pks.im>

In a subsequent commit we'll add another caller to `reset_working_tree()`
that wants to perform a dry-run check of whether it would be possible to
update the index and working tree when moving to a new commit. Introduce
a new flag that lets the caller perform this operation.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
 reset.c | 44 +++++++++++++++++++++++++++++++++-----------
 reset.h |  6 ++++++
 2 files changed, 39 insertions(+), 11 deletions(-)

diff --git a/reset.c b/reset.c
index 4ca7f23a25..99f2c1b012 100644
--- a/reset.c
+++ b/reset.c
@@ -93,11 +93,14 @@ int reset_working_tree(struct repository *r,
 	unsigned reset_hard = opts->flags & RESET_WORKING_TREE_HARD;
 	unsigned refs_only = opts->flags & RESET_WORKING_TREE_REFS_ONLY;
 	unsigned update_orig_head = opts->flags & RESET_WORKING_TREE_UPDATE_ORIG_HEAD;
+	unsigned dry_run = opts->flags & RESET_WORKING_TREE_DRY_RUN;
 	struct object_id *head = NULL, head_oid;
 	struct tree_desc desc[2] = { { NULL }, { NULL } };
 	struct lock_file lock = LOCK_INIT;
 	struct unpack_trees_options unpack_tree_opts = { 0 };
 	struct tree *tree;
+	struct index_state scratch_index = INDEX_STATE_INIT(r);
+	struct index_state *istate;
 	const char *action;
 	int ret = 0, nr = 0;
 
@@ -110,7 +113,7 @@ int reset_working_tree(struct repository *r,
 	if (opts->branch_msg && !opts->branch)
 		BUG("branch reflog message given without a branch");
 
-	if (!refs_only && repo_hold_locked_index(r, &lock, LOCK_REPORT_ON_ERROR) < 0) {
+	if (!refs_only && !dry_run && repo_hold_locked_index(r, &lock, LOCK_REPORT_ON_ERROR) < 0) {
 		ret = -1;
 		goto leave_reset_head;
 	}
@@ -125,16 +128,36 @@ int reset_working_tree(struct repository *r,
 	if (!oid)
 		oid = &head_oid;
 
-	if (refs_only)
-		return update_refs(r, opts, oid, head);
+	if (refs_only) {
+		if (!dry_run)
+			return update_refs(r, opts, oid, head);
+		return 0;
+	}
+
+	if (dry_run) {
+		if (read_index_from(&scratch_index, r->index_file, r->gitdir) < 0 ||
+		    index_state_unmerged_to_stage0(&scratch_index) < 0) {
+			ret = error(_("could not read index"));
+			goto leave_reset_head;
+		}
+
+		istate = &scratch_index;
+	} else {
+		if (repo_read_index_unmerged(r) < 0) {
+			ret = error(_("could not read index"));
+			goto leave_reset_head;
+		}
+		istate = r->index;
+	}
 
 	action = reset_hard ? "reset" : "checkout";
 	setup_unpack_trees_porcelain(&unpack_tree_opts, action);
 	unpack_tree_opts.head_idx = 1;
-	unpack_tree_opts.src_index = r->index;
-	unpack_tree_opts.dst_index = r->index;
+	unpack_tree_opts.src_index = istate;
+	unpack_tree_opts.dst_index = istate;
 	unpack_tree_opts.fn = reset_hard ? oneway_merge : twoway_merge;
-	unpack_tree_opts.update = 1;
+	unpack_tree_opts.update = !dry_run;
+	unpack_tree_opts.dry_run = dry_run;
 	unpack_tree_opts.merge = 1;
 	unpack_tree_opts.preserve_ignored = 0; /* FIXME: !overwrite_ignore */
 	unpack_tree_opts.skip_cache_tree_update = 1;
@@ -142,11 +165,6 @@ int reset_working_tree(struct repository *r,
 	if (reset_hard)
 		unpack_tree_opts.reset = UNPACK_RESET_PROTECT_UNTRACKED;
 
-	if (repo_read_index_unmerged(r) < 0) {
-		ret = error(_("could not read index"));
-		goto leave_reset_head;
-	}
-
 	if (!reset_hard && !fill_tree_descriptor(r, &desc[nr++], &head_oid)) {
 		ret = error(_("failed to find tree of %s"),
 			    oid_to_hex(&head_oid));
@@ -163,6 +181,9 @@ int reset_working_tree(struct repository *r,
 		goto leave_reset_head;
 	}
 
+	if (dry_run)
+		goto leave_reset_head;
+
 	tree = repo_parse_tree_indirect(r, oid);
 	if (!tree) {
 		ret = error(_("unable to read tree (%s)"), oid_to_hex(oid));
@@ -182,6 +203,7 @@ int reset_working_tree(struct repository *r,
 leave_reset_head:
 	rollback_lock_file(&lock);
 	clear_unpack_trees_porcelain(&unpack_tree_opts);
+	release_index(&scratch_index);
 	while (nr)
 		free((void *)desc[--nr].buffer);
 	return ret;
diff --git a/reset.h b/reset.h
index 2e5826de99..898e4a1e95 100644
--- a/reset.h
+++ b/reset.h
@@ -21,6 +21,12 @@ enum reset_working_tree_flags {
 
 	/* Update ORIG_HEAD as well as HEAD */
 	RESET_WORKING_TREE_UPDATE_ORIG_HEAD = (1 << 4),
+
+	/*
+	 * Perform a dry-run by performing the operation without updating
+	 * any user-visible state.
+	 */
+	RESET_WORKING_TREE_DRY_RUN = (1 << 5),
 };
 
 struct reset_working_tree_options {

-- 
2.55.0.rc0.738.g0c8ab3ebcc.dirty


^ permalink raw reply related

* [PATCH v6 06/10] reset: introduce ability to skip updating HEAD
From: Patrick Steinhardt @ 2026-06-15 13:54 UTC (permalink / raw)
  To: git; +Cc: Pablo Sabater, Junio C Hamano, Kristoffer Haugsbakk, Phillip Wood
In-Reply-To: <20260615-b4-pks-history-drop-v6-0-2e329e536d78@pks.im>

In a subsequent commit we'll introduce a new caller to
`reset_working_tree()` that really only wants to update the index and
working tree, without updating any references. Introduce a new flag that
makes the caller opt in to updating HEAD and adapt all callers to set
that flag.

Note that in a previous iteration we instead introduced a flag that made
callers opt out of updating any references. This was somewhat awkward
though because we already have the `UPDATE_ORIG_HEAD` flag, so the
result was somewhat inconsistent.

Suggested-by: Phillip Wood <phillip.wood123@gmail.com>
Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
 builtin/rebase.c | 14 ++++++++++----
 reset.c          |  9 +++++++--
 reset.h          |  9 ++++++---
 sequencer.c      |  4 +++-
 4 files changed, 26 insertions(+), 10 deletions(-)

diff --git a/builtin/rebase.c b/builtin/rebase.c
index 06dcbaf5e8..10a306310c 100644
--- a/builtin/rebase.c
+++ b/builtin/rebase.c
@@ -607,7 +607,8 @@ static int move_to_original_branch(struct rebase_options *opts)
 	strbuf_addf(&head_reflog, "%s (finish): returning to %s",
 		    opts->reflog_action, opts->head_name);
 	ropts.branch = opts->head_name;
-	ropts.flags = RESET_WORKING_TREE_REFS_ONLY;
+	ropts.flags = RESET_WORKING_TREE_REFS_ONLY |
+		      RESET_WORKING_TREE_UPDATE_HEAD;
 	ropts.branch_msg = branch_reflog.buf;
 	ropts.head_msg = head_reflog.buf;
 	ret = reset_working_tree(the_repository, &ropts);
@@ -693,6 +694,7 @@ static int run_am(struct rebase_options *opts)
 		ropts.oid = &opts->orig_head->object.oid;
 		ropts.branch = opts->head_name;
 		ropts.default_reflog_action = opts->reflog_action;
+		ropts.flags = RESET_WORKING_TREE_UPDATE_HEAD;
 		reset_working_tree(the_repository, &ropts);
 		error(_("\ngit encountered an error while preparing the "
 			"patches to replay\n"
@@ -862,7 +864,8 @@ static int checkout_up_to_date(struct rebase_options *options)
 		    options->reflog_action, options->switch_to);
 	ropts.oid = &options->orig_head->object.oid;
 	ropts.branch = options->head_name;
-	ropts.flags = RESET_WORKING_TREE_RUN_POST_CHECKOUT_HOOK;
+	ropts.flags = RESET_WORKING_TREE_RUN_POST_CHECKOUT_HOOK |
+		      RESET_WORKING_TREE_UPDATE_HEAD;
 	if (!ropts.branch)
 		ropts.flags |=  RESET_WORKING_TREE_DETACH;
 	ropts.head_msg = buf.buf;
@@ -1384,7 +1387,8 @@ int cmd_rebase(int argc,
 
 		rerere_clear(the_repository, &merge_rr);
 		string_list_clear(&merge_rr, 1);
-		ropts.flags = RESET_WORKING_TREE_HARD;
+		ropts.flags = RESET_WORKING_TREE_HARD |
+			      RESET_WORKING_TREE_UPDATE_HEAD;
 		if (reset_working_tree(the_repository, &ropts) < 0)
 			die(_("could not discard worktree changes"));
 		remove_branch_state(the_repository, 0);
@@ -1409,7 +1413,8 @@ int cmd_rebase(int argc,
 		ropts.oid = &options.orig_head->object.oid;
 		ropts.head_msg = head_msg.buf;
 		ropts.branch = options.head_name;
-		ropts.flags = RESET_WORKING_TREE_HARD;
+		ropts.flags = RESET_WORKING_TREE_HARD |
+			      RESET_WORKING_TREE_UPDATE_HEAD;
 		if (reset_working_tree(the_repository, &ropts) < 0)
 			die(_("could not move back to %s"),
 			    oid_to_hex(&options.orig_head->object.oid));
@@ -1877,6 +1882,7 @@ int cmd_rebase(int argc,
 	ropts.oid = &options.onto->object.oid;
 	ropts.orig_head = &options.orig_head->object.oid;
 	ropts.flags = RESET_WORKING_TREE_DETACH |
+		      RESET_WORKING_TREE_UPDATE_HEAD |
 		      RESET_WORKING_TREE_UPDATE_ORIG_HEAD |
 		      RESET_WORKING_TREE_RUN_POST_CHECKOUT_HOOK;
 	ropts.head_msg = msg.buf;
diff --git a/reset.c b/reset.c
index 99f2c1b012..4bde5d8dc6 100644
--- a/reset.c
+++ b/reset.c
@@ -92,6 +92,7 @@ int reset_working_tree(struct repository *r,
 	const char *switch_to_branch = opts->branch;
 	unsigned reset_hard = opts->flags & RESET_WORKING_TREE_HARD;
 	unsigned refs_only = opts->flags & RESET_WORKING_TREE_REFS_ONLY;
+	unsigned update_head = opts->flags & RESET_WORKING_TREE_UPDATE_HEAD;
 	unsigned update_orig_head = opts->flags & RESET_WORKING_TREE_UPDATE_ORIG_HEAD;
 	unsigned dry_run = opts->flags & RESET_WORKING_TREE_DRY_RUN;
 	struct object_id *head = NULL, head_oid;
@@ -113,6 +114,9 @@ int reset_working_tree(struct repository *r,
 	if (opts->branch_msg && !opts->branch)
 		BUG("branch reflog message given without a branch");
 
+	if (update_orig_head && !update_head)
+		BUG("cannot update ORIG_HEAD without updating HEAD" );
+
 	if (!refs_only && !dry_run && repo_hold_locked_index(r, &lock, LOCK_REPORT_ON_ERROR) < 0) {
 		ret = -1;
 		goto leave_reset_head;
@@ -129,7 +133,7 @@ int reset_working_tree(struct repository *r,
 		oid = &head_oid;
 
 	if (refs_only) {
-		if (!dry_run)
+		if (!dry_run && update_head)
 			return update_refs(r, opts, oid, head);
 		return 0;
 	}
@@ -197,7 +201,8 @@ int reset_working_tree(struct repository *r,
 		goto leave_reset_head;
 	}
 
-	if (oid != &head_oid || update_orig_head || switch_to_branch)
+	if (update_head &&
+	    (oid != &head_oid || update_orig_head || switch_to_branch))
 		ret = update_refs(r, opts, oid, head);
 
 leave_reset_head:
diff --git a/reset.h b/reset.h
index 898e4a1e95..38b2891b53 100644
--- a/reset.h
+++ b/reset.h
@@ -19,14 +19,17 @@ enum reset_working_tree_flags {
 	/* Only update refs, do not touch the worktree */
 	RESET_WORKING_TREE_REFS_ONLY = (1 << 3),
 
-	/* Update ORIG_HEAD as well as HEAD */
-	RESET_WORKING_TREE_UPDATE_ORIG_HEAD = (1 << 4),
+	/* Update HEAD */
+	RESET_WORKING_TREE_UPDATE_HEAD = (1 << 4),
+
+	/* Update ORIG_HEAD */
+	RESET_WORKING_TREE_UPDATE_ORIG_HEAD = (1 << 5),
 
 	/*
 	 * Perform a dry-run by performing the operation without updating
 	 * any user-visible state.
 	 */
-	RESET_WORKING_TREE_DRY_RUN = (1 << 5),
+	RESET_WORKING_TREE_DRY_RUN = (1 << 6),
 };
 
 struct reset_working_tree_options {
diff --git a/sequencer.c b/sequencer.c
index 4efe831178..e905b1b2d9 100644
--- a/sequencer.c
+++ b/sequencer.c
@@ -4678,7 +4678,8 @@ static void create_autostash_internal(struct repository *r,
 	    has_uncommitted_changes(r, 1)) {
 		struct child_process stash = CHILD_PROCESS_INIT;
 		struct reset_working_tree_options ropts = {
-			.flags = RESET_WORKING_TREE_HARD,
+			.flags = RESET_WORKING_TREE_HARD |
+				 RESET_WORKING_TREE_UPDATE_HEAD,
 		};
 		struct object_id oid;
 
@@ -4873,6 +4874,7 @@ static int checkout_onto(struct repository *r, struct replay_opts *opts,
 		.oid = onto,
 		.orig_head = orig_head,
 		.flags = RESET_WORKING_TREE_DETACH |
+			 RESET_WORKING_TREE_UPDATE_HEAD |
 			 RESET_WORKING_TREE_UPDATE_ORIG_HEAD |
 			 RESET_WORKING_TREE_RUN_POST_CHECKOUT_HOOK,
 		.head_msg = reflog_message(opts, "start", "checkout %s",

-- 
2.55.0.rc0.738.g0c8ab3ebcc.dirty


^ permalink raw reply related

* [PATCH v6 07/10] reset: allow the caller to specify the current HEAD object
From: Patrick Steinhardt @ 2026-06-15 13:54 UTC (permalink / raw)
  To: git; +Cc: Pablo Sabater, Junio C Hamano, Kristoffer Haugsbakk, Phillip Wood
In-Reply-To: <20260615-b4-pks-history-drop-v6-0-2e329e536d78@pks.im>

When calling `reset_working_tree()` we automatically derive the commit
that the callers wants to move from by reading the HEAD commit. Some
callers may already have resolved it, or they may want to move from a
different commit that doesn't match HEAD.

Introduce a new `oid_from` option that lets the caller specify the
commit.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
 reset.c | 5 ++++-
 reset.h | 5 +++++
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/reset.c b/reset.c
index 4bde5d8dc6..06f375f296 100644
--- a/reset.c
+++ b/reset.c
@@ -122,7 +122,10 @@ int reset_working_tree(struct repository *r,
 		goto leave_reset_head;
 	}
 
-	if (!repo_get_oid(r, "HEAD", &head_oid)) {
+	if (opts->oid_from) {
+		oidcpy(&head_oid, opts->oid_from);
+		head = &head_oid;
+	} else if (!repo_get_oid(r, "HEAD", &head_oid)) {
 		head = &head_oid;
 	} else if (!oid || !reset_hard) {
 		ret = error(_("could not determine HEAD revision"));
diff --git a/reset.h b/reset.h
index 38b2891b53..4c992ba671 100644
--- a/reset.h
+++ b/reset.h
@@ -37,6 +37,11 @@ struct reset_working_tree_options {
 	 * The commit to checkout/reset to. Defaults to HEAD.
 	 */
 	const struct object_id *oid;
+	/*
+	 * The commit to checkout/reset from when doing a two-way merge. This
+	 * is used as one of the sides to merge.
+	 */
+	const struct object_id *oid_from;
 	/*
 	 * Optional value to set ORIG_HEAD. Defaults to HEAD.
 	 */

-- 
2.55.0.rc0.738.g0c8ab3ebcc.dirty


^ permalink raw reply related

* [PATCH v6 08/10] reset: stop assuming that the caller passes in a clean index
From: Patrick Steinhardt @ 2026-06-15 13:54 UTC (permalink / raw)
  To: git; +Cc: Pablo Sabater, Junio C Hamano, Kristoffer Haugsbakk, Phillip Wood
In-Reply-To: <20260615-b4-pks-history-drop-v6-0-2e329e536d78@pks.im>

In 652bd0211d (rebase: use 'skip_cache_tree_update' option, 2022-11-10),
we updated `reset_working_tree()` to stop updating the index tree cache.
This was done as a performance optimization: the function is only called
by "sequencer.c" and "rebase.c", both of which assume a clean index
before they perform their operation, so we know that the end result will
be a clean index, too. Consequently, we can skip recomputing the cache
as we can instead use `prime_cache_tree()` directly.

In a subsequent commit we're about to add a new caller though where the
assumption doesn't hold anymore: the index may be dirty before calling
`reset_working_tree()`, and consequently we cannot prime the cache with
a given tree anymore as the index and tree will mismatch.

Adapt the logic so that we only skip the cache tree update in case we're
doing a hard reset. While we could introduce logic that only skips the
update in case the incoming index was dirty already, that doesn't really
feel worth it: after all, the mentioned commit says itself that the
performance improvement was negligible anyway.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
 reset.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/reset.c b/reset.c
index 06f375f296..ff87e3e357 100644
--- a/reset.c
+++ b/reset.c
@@ -167,10 +167,11 @@ int reset_working_tree(struct repository *r,
 	unpack_tree_opts.dry_run = dry_run;
 	unpack_tree_opts.merge = 1;
 	unpack_tree_opts.preserve_ignored = 0; /* FIXME: !overwrite_ignore */
-	unpack_tree_opts.skip_cache_tree_update = 1;
 	init_checkout_metadata(&unpack_tree_opts.meta, switch_to_branch, oid, NULL);
-	if (reset_hard)
+	if (reset_hard) {
+		unpack_tree_opts.skip_cache_tree_update = 1;
 		unpack_tree_opts.reset = UNPACK_RESET_PROTECT_UNTRACKED;
+	}

 	if (!reset_hard && !fill_tree_descriptor(r, &desc[nr++], &head_oid)) {
 		ret = error(_("failed to find tree of %s"),
@@ -197,7 +198,8 @@ int reset_working_tree(struct repository *r,
 		goto leave_reset_head;
 	}

-	prime_cache_tree(r, r->index, tree);
+	if (reset_hard)
+		prime_cache_tree(r, r->index, tree);

 	if (write_locked_index(r->index, &lock, COMMIT_LOCK) < 0) {
 		ret = error(_("could not write index"));

-- 
2.55.0.rc0.738.g0c8ab3ebcc.dirty

^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox