Git development
 help / color / mirror / Atom feed
* [PATCH v2 10/18] odb/source-loose: wire up `count_objects()` callback
From: Patrick Steinhardt @ 2026-06-01  8:20 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano
In-Reply-To: <20260601-b4-pks-odb-source-loose-v2-0-90ff159430af@pks.im>

Move `odb_source_loose_count_objects()` and its associated helpers from
"object-file.c" into "odb/source-loose.c" and wire it up as the
`count_objects()` callback of the loose source.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
 builtin/gc.c       |  6 +++---
 object-file.c      | 60 -----------------------------------------------------
 object-file.h      | 14 -------------
 odb/source-files.c |  2 +-
 odb/source-loose.c | 61 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 65 insertions(+), 78 deletions(-)

diff --git a/builtin/gc.c b/builtin/gc.c
index 84a66d3240..c26c93ee0f 100644
--- a/builtin/gc.c
+++ b/builtin/gc.c
@@ -466,6 +466,7 @@ static int rerere_gc_condition(struct gc_config *cfg UNUSED)
 
 static int too_many_loose_objects(int limit)
 {
+	struct odb_source_files *files = odb_source_files_downcast(the_repository->objects->sources);
 	/*
 	 * This is weird, but stems from legacy behaviour: the GC auto
 	 * threshold was always essentially interpreted as if it was rounded up
@@ -474,9 +475,8 @@ static int too_many_loose_objects(int limit)
 	int auto_threshold = DIV_ROUND_UP(limit, 256) * 256;
 	unsigned long loose_count;
 
-	if (odb_source_loose_count_objects(the_repository->objects->sources,
-					   ODB_COUNT_OBJECTS_APPROXIMATE,
-					   &loose_count) < 0)
+	if (odb_source_count_objects(&files->loose->base, ODB_COUNT_OBJECTS_APPROXIMATE,
+				     &loose_count) < 0)
 		return 0;
 
 	return loose_count > auto_threshold;
diff --git a/object-file.c b/object-file.c
index 11957aa44f..9b2044de37 100644
--- a/object-file.c
+++ b/object-file.c
@@ -1602,66 +1602,6 @@ int for_each_loose_file_in_source(struct odb_source *source,
 	return r;
 }
 
-static int count_loose_object(const struct object_id *oid UNUSED,
-			      struct object_info *oi UNUSED,
-			      void *payload)
-{
-	unsigned long *count = payload;
-	(*count)++;
-	return 0;
-}
-
-int odb_source_loose_count_objects(struct odb_source *source,
-				   enum odb_count_objects_flags flags,
-				   unsigned long *out)
-{
-	struct odb_source_files *files = odb_source_files_downcast(source);
-	const unsigned hexsz = source->odb->repo->hash_algo->hexsz - 2;
-	char *path = NULL;
-	DIR *dir = NULL;
-	int ret;
-
-	if (flags & ODB_COUNT_OBJECTS_APPROXIMATE) {
-		unsigned long count = 0;
-		struct dirent *ent;
-
-		path = xstrfmt("%s/17", source->path);
-
-		dir = opendir(path);
-		if (!dir) {
-			if (errno == ENOENT) {
-				*out = 0;
-				ret = 0;
-				goto out;
-			}
-
-			ret = error_errno("cannot open object shard '%s'", path);
-			goto out;
-		}
-
-		while ((ent = readdir(dir)) != NULL) {
-			if (strspn(ent->d_name, "0123456789abcdef") != hexsz ||
-			    ent->d_name[hexsz] != '\0')
-				continue;
-			count++;
-		}
-
-		*out = count * 256;
-		ret = 0;
-	} else {
-		struct odb_for_each_object_options opts = { 0 };
-		*out = 0;
-		ret = odb_source_for_each_object(&files->loose->base, NULL, count_loose_object,
-						 out, &opts);
-	}
-
-out:
-	if (dir)
-		closedir(dir);
-	free(path);
-	return ret;
-}
-
 static int check_stream_oid(git_zstream *stream,
 			    const char *hdr,
 			    unsigned long size,
diff --git a/object-file.h b/object-file.h
index 96760db0e1..bc72d89f54 100644
--- a/object-file.h
+++ b/object-file.h
@@ -96,20 +96,6 @@ int for_each_file_in_obj_subdir(unsigned int subdir_nr,
 				each_loose_subdir_fn subdir_cb,
 				void *data);
 
-/*
- * Count the number of loose objects in this source.
- *
- * The object count is approximated by opening a single sharding directory for
- * loose objects and scanning its contents. The result is then extrapolated by
- * 256. This should generally work as a reasonable estimate given that the
- * object hash is supposed to be indistinguishable from random.
- *
- * Returns 0 on success, a negative error code otherwise.
- */
-int odb_source_loose_count_objects(struct odb_source *source,
-				   enum odb_count_objects_flags flags,
-				   unsigned long *out);
-
 /**
  * format_object_header() is a thin wrapper around s xsnprintf() that
  * writes the initial "<type> <obj-len>" part of the loose object
diff --git a/odb/source-files.c b/odb/source-files.c
index 4a54b10e4a..d5454e170d 100644
--- a/odb/source-files.c
+++ b/odb/source-files.c
@@ -109,7 +109,7 @@ static int odb_source_files_count_objects(struct odb_source *source,
 	if (!(flags & ODB_COUNT_OBJECTS_APPROXIMATE)) {
 		unsigned long loose_count;
 
-		ret = odb_source_loose_count_objects(source, flags, &loose_count);
+		ret = odb_source_count_objects(&files->loose->base, flags, &loose_count);
 		if (ret < 0)
 			goto out;
 
diff --git a/odb/source-loose.c b/odb/source-loose.c
index 4b8d10bc87..27be066327 100644
--- a/odb/source-loose.c
+++ b/odb/source-loose.c
@@ -520,6 +520,66 @@ static int odb_source_loose_find_abbrev_len(struct odb_source *source,
 	return ret;
 }
 
+static int count_loose_object(const struct object_id *oid UNUSED,
+			      struct object_info *oi UNUSED,
+			      void *payload)
+{
+	unsigned long *count = payload;
+	(*count)++;
+	return 0;
+}
+
+static int odb_source_loose_count_objects(struct odb_source *source,
+					  enum odb_count_objects_flags flags,
+					  unsigned long *out)
+{
+	struct odb_source_loose *loose = odb_source_loose_downcast(source);
+	const unsigned hexsz = source->odb->repo->hash_algo->hexsz - 2;
+	char *path = NULL;
+	DIR *dir = NULL;
+	int ret;
+
+	if (flags & ODB_COUNT_OBJECTS_APPROXIMATE) {
+		unsigned long count = 0;
+		struct dirent *ent;
+
+		path = xstrfmt("%s/17", source->path);
+
+		dir = opendir(path);
+		if (!dir) {
+			if (errno == ENOENT) {
+				*out = 0;
+				ret = 0;
+				goto out;
+			}
+
+			ret = error_errno("cannot open object shard '%s'", path);
+			goto out;
+		}
+
+		while ((ent = readdir(dir)) != NULL) {
+			if (strspn(ent->d_name, "0123456789abcdef") != hexsz ||
+			    ent->d_name[hexsz] != '\0')
+				continue;
+			count++;
+		}
+
+		*out = count * 256;
+		ret = 0;
+	} else {
+		struct odb_for_each_object_options opts = { 0 };
+		*out = 0;
+		ret = odb_source_for_each_object(&loose->base, NULL, count_loose_object,
+						 out, &opts);
+	}
+
+out:
+	if (dir)
+		closedir(dir);
+	free(path);
+	return ret;
+}
+
 static void odb_source_loose_clear_cache(struct odb_source_loose *loose)
 {
 	oidtree_clear(loose->cache);
@@ -577,6 +637,7 @@ struct odb_source_loose *odb_source_loose_new(struct odb_source_files *files)
 	loose->base.read_object_stream = odb_source_loose_read_object_stream;
 	loose->base.for_each_object = odb_source_loose_for_each_object;
 	loose->base.find_abbrev_len = odb_source_loose_find_abbrev_len;
+	loose->base.count_objects = odb_source_loose_count_objects;
 
 	if (!is_absolute_path(loose->base.path))
 		chdir_notify_register(NULL, odb_source_loose_reparent, loose);

-- 
2.54.0.926.g75ba10bac6.dirty


^ permalink raw reply related

* [PATCH v2 09/18] odb/source-loose: wire up `find_abbrev_len()` callback
From: Patrick Steinhardt @ 2026-06-01  8:20 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano
In-Reply-To: <20260601-b4-pks-odb-source-loose-v2-0-90ff159430af@pks.im>

Move `odb_source_loose_find_abbrev_len()` and its associated helpers
from "object-file.c" into "odb/source-loose.c" and wire it up as the
`find_abbrev_len` callback of the loose source.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
 object-file.c      | 39 ---------------------------------------
 object-file.h      | 12 ------------
 odb/source-files.c |  2 +-
 odb/source-loose.c | 40 ++++++++++++++++++++++++++++++++++++++++
 4 files changed, 41 insertions(+), 52 deletions(-)

diff --git a/object-file.c b/object-file.c
index 157ecad3ea..11957aa44f 100644
--- a/object-file.c
+++ b/object-file.c
@@ -1662,45 +1662,6 @@ int odb_source_loose_count_objects(struct odb_source *source,
 	return ret;
 }
 
-struct find_abbrev_len_data {
-	const struct object_id *oid;
-	unsigned len;
-};
-
-static int find_abbrev_len_cb(const struct object_id *oid,
-			      struct object_info *oi UNUSED,
-			      void *cb_data)
-{
-	struct find_abbrev_len_data *data = cb_data;
-	unsigned len = oid_common_prefix_hexlen(oid, data->oid);
-	if (len != hash_algos[oid->algo].hexsz && len >= data->len)
-		data->len = len + 1;
-	return 0;
-}
-
-int odb_source_loose_find_abbrev_len(struct odb_source *source,
-				     const struct object_id *oid,
-				     unsigned min_len,
-				     unsigned *out)
-{
-	struct odb_source_files *files = odb_source_files_downcast(source);
-	struct odb_for_each_object_options opts = {
-		.prefix = oid,
-		.prefix_hex_len = min_len,
-	};
-	struct find_abbrev_len_data data = {
-		.oid = oid,
-		.len = min_len,
-	};
-	int ret;
-
-	ret = odb_source_for_each_object(&files->loose->base, NULL, find_abbrev_len_cb,
-					 &data, &opts);
-	*out = data.len;
-
-	return ret;
-}
-
 static int check_stream_oid(git_zstream *stream,
 			    const char *hdr,
 			    unsigned long size,
diff --git a/object-file.h b/object-file.h
index 9ee5649220..96760db0e1 100644
--- a/object-file.h
+++ b/object-file.h
@@ -110,18 +110,6 @@ int odb_source_loose_count_objects(struct odb_source *source,
 				   enum odb_count_objects_flags flags,
 				   unsigned long *out);
 
-/*
- * Find the shortest unique prefix for the given object ID, where `min_len` is
- * the minimum length that the prefix should have.
- *
- * Returns 0 on success, in which case the computed length will be written to
- * `out`. Otherwise, a negative error code is returned.
- */
-int odb_source_loose_find_abbrev_len(struct odb_source *source,
-				     const struct object_id *oid,
-				     unsigned min_len,
-				     unsigned *out);
-
 /**
  * format_object_header() is a thin wrapper around s xsnprintf() that
  * writes the initial "<type> <obj-len>" part of the loose object
diff --git a/odb/source-files.c b/odb/source-files.c
index 676a641739..4a54b10e4a 100644
--- a/odb/source-files.c
+++ b/odb/source-files.c
@@ -136,7 +136,7 @@ static int odb_source_files_find_abbrev_len(struct odb_source *source,
 	if (ret < 0)
 		goto out;
 
-	ret = odb_source_loose_find_abbrev_len(source, oid, len, &len);
+	ret = odb_source_find_abbrev_len(&files->loose->base, oid, len, &len);
 	if (ret < 0)
 		goto out;
 
diff --git a/odb/source-loose.c b/odb/source-loose.c
index 4e8b923498..4b8d10bc87 100644
--- a/odb/source-loose.c
+++ b/odb/source-loose.c
@@ -481,6 +481,45 @@ static int odb_source_loose_for_each_object(struct odb_source *source,
 					     NULL, NULL, &data);
 }
 
+struct find_abbrev_len_data {
+	const struct object_id *oid;
+	unsigned len;
+};
+
+static int find_abbrev_len_cb(const struct object_id *oid,
+			      struct object_info *oi UNUSED,
+			      void *cb_data)
+{
+	struct find_abbrev_len_data *data = cb_data;
+	unsigned len = oid_common_prefix_hexlen(oid, data->oid);
+	if (len != hash_algos[oid->algo].hexsz && len >= data->len)
+		data->len = len + 1;
+	return 0;
+}
+
+static int odb_source_loose_find_abbrev_len(struct odb_source *source,
+					    const struct object_id *oid,
+					    unsigned min_len,
+					    unsigned *out)
+{
+	struct odb_source_loose *loose = odb_source_loose_downcast(source);
+	struct odb_for_each_object_options opts = {
+		.prefix = oid,
+		.prefix_hex_len = min_len,
+	};
+	struct find_abbrev_len_data data = {
+		.oid = oid,
+		.len = min_len,
+	};
+	int ret;
+
+	ret = odb_source_for_each_object(&loose->base, NULL, find_abbrev_len_cb,
+					 &data, &opts);
+	*out = data.len;
+
+	return ret;
+}
+
 static void odb_source_loose_clear_cache(struct odb_source_loose *loose)
 {
 	oidtree_clear(loose->cache);
@@ -537,6 +576,7 @@ struct odb_source_loose *odb_source_loose_new(struct odb_source_files *files)
 	loose->base.read_object_info = odb_source_loose_read_object_info;
 	loose->base.read_object_stream = odb_source_loose_read_object_stream;
 	loose->base.for_each_object = odb_source_loose_for_each_object;
+	loose->base.find_abbrev_len = odb_source_loose_find_abbrev_len;
 
 	if (!is_absolute_path(loose->base.path))
 		chdir_notify_register(NULL, odb_source_loose_reparent, loose);

-- 
2.54.0.926.g75ba10bac6.dirty


^ permalink raw reply related

* [PATCH v2 08/18] odb/source-loose: wire up `for_each_object()` callback
From: Patrick Steinhardt @ 2026-06-01  8:20 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano
In-Reply-To: <20260601-b4-pks-odb-source-loose-v2-0-90ff159430af@pks.im>

Move `odb_source_loose_for_each_object()` and its associated helpers
from "object-file.c" into "odb/source-loose.c" and wire it up as the
`for_each_object()` callback of the loose source.

Again, as in the preceding commit, we are forced to expose a couple of
functions from "object-file.c" that are now used by both subsystems.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
 builtin/cat-file.c |   5 +-
 object-file.c      | 299 +++--------------------------------------------------
 object-file.h      |  32 +++---
 odb/source-files.c |   2 +-
 odb/source-loose.c | 264 ++++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 297 insertions(+), 305 deletions(-)

diff --git a/builtin/cat-file.c b/builtin/cat-file.c
index d9fbad5358..2958fc5357 100644
--- a/builtin/cat-file.c
+++ b/builtin/cat-file.c
@@ -862,8 +862,9 @@ static void batch_each_object(struct batch_options *opt,
 	 */
 	odb_prepare_alternates(the_repository->objects);
 	for (source = the_repository->objects->sources; source; source = source->next) {
-		int ret = odb_source_loose_for_each_object(source, NULL, batch_one_object_oi,
-							   &payload, &opts);
+		struct odb_source_files *files = odb_source_files_downcast(source);
+		int ret = odb_source_for_each_object(&files->loose->base, NULL, batch_one_object_oi,
+						     &payload, &opts);
 		if (ret)
 			break;
 	}
diff --git a/object-file.c b/object-file.c
index adfb672493..157ecad3ea 100644
--- a/object-file.c
+++ b/object-file.c
@@ -22,7 +22,6 @@
 #include "odb.h"
 #include "odb/streaming.h"
 #include "odb/transaction.h"
-#include "oidtree.h"
 #include "pack.h"
 #include "packfile.h"
 #include "path.h"
@@ -31,12 +30,6 @@
 #include "tempfile.h"
 #include "tmp-objdir.h"
 
-/* The maximum size for an object header. */
-#define MAX_HEADER_LEN 32
-
-static struct oidtree *odb_source_loose_cache(struct odb_source *source,
-					      const struct object_id *oid);
-
 static int get_conv_flags(unsigned flags)
 {
 	if (flags & INDEX_RENORMALIZE)
@@ -164,12 +157,6 @@ int stream_object_signature(struct repository *r,
 	return !oideq(oid, &real_oid) ? -1 : 0;
 }
 
-static int quick_has_loose(struct odb_source_loose *loose,
-			   const struct object_id *oid)
-{
-	return !!oidtree_contains(odb_source_loose_cache(&loose->files->base, oid), oid);
-}
-
 /*
  * Map and close the given loose object fd. The path argument is used for
  * error reporting.
@@ -227,9 +214,9 @@ enum unpack_loose_header_result unpack_loose_header(git_zstream *stream,
 	return ULHR_TOO_LONG;
 }
 
-static void *unpack_loose_rest(git_zstream *stream,
-			       void *buffer, unsigned long size,
-			       const struct object_id *oid)
+void *unpack_loose_rest(git_zstream *stream,
+			void *buffer, unsigned long size,
+			const struct object_id *oid)
 {
 	size_t bytes = strlen(buffer) + 1, n;
 	unsigned char *buf = xmallocz(size);
@@ -343,149 +330,6 @@ int parse_loose_header(const char *hdr, struct object_info *oi)
 	return 0;
 }
 
-int read_object_info_from_path(struct odb_source_loose *loose,
-			       const char *path,
-			       const struct object_id *oid,
-			       struct object_info *oi,
-			       enum object_info_flags flags)
-{
-	int ret;
-	int fd;
-	unsigned long mapsize;
-	void *map = NULL;
-	git_zstream stream, *stream_to_end = NULL;
-	char hdr[MAX_HEADER_LEN];
-	unsigned long size_scratch;
-	enum object_type type_scratch;
-	struct stat st;
-
-	/*
-	 * If we don't care about type or size, then we don't
-	 * need to look inside the object at all. Note that we
-	 * do not optimize out the stat call, even if the
-	 * caller doesn't care about the disk-size, since our
-	 * return value implicitly indicates whether the
-	 * object even exists.
-	 */
-	if (!oi || (!oi->typep && !oi->sizep && !oi->contentp)) {
-		struct stat st;
-
-		if ((!oi || (!oi->disk_sizep && !oi->mtimep)) && (flags & OBJECT_INFO_QUICK)) {
-			ret = quick_has_loose(loose, oid) ? 0 : -1;
-			goto out;
-		}
-
-		if (lstat(path, &st) < 0) {
-			ret = -1;
-			goto out;
-		}
-
-		if (oi) {
-			if (oi->disk_sizep)
-				*oi->disk_sizep = st.st_size;
-			if (oi->mtimep)
-				*oi->mtimep = st.st_mtime;
-		}
-
-		ret = 0;
-		goto out;
-	}
-
-	fd = git_open(path);
-	if (fd < 0) {
-		if (errno != ENOENT)
-			error_errno(_("unable to open loose object %s"), oid_to_hex(oid));
-		ret = -1;
-		goto out;
-	}
-
-	if (fstat(fd, &st)) {
-		close(fd);
-		ret = -1;
-		goto out;
-	}
-
-	mapsize = xsize_t(st.st_size);
-	if (!mapsize) {
-		close(fd);
-		ret = error(_("object file %s is empty"), path);
-		goto out;
-	}
-
-	map = xmmap(NULL, mapsize, PROT_READ, MAP_PRIVATE, fd, 0);
-	close(fd);
-	if (!map) {
-		ret = -1;
-		goto out;
-	}
-
-	if (oi->disk_sizep)
-		*oi->disk_sizep = mapsize;
-	if (oi->mtimep)
-		*oi->mtimep = st.st_mtime;
-
-	stream_to_end = &stream;
-
-	switch (unpack_loose_header(&stream, map, mapsize, hdr, sizeof(hdr))) {
-	case ULHR_OK:
-		if (!oi->sizep)
-			oi->sizep = &size_scratch;
-		if (!oi->typep)
-			oi->typep = &type_scratch;
-
-		if (parse_loose_header(hdr, oi) < 0) {
-			ret = error(_("unable to parse %s header"), oid_to_hex(oid));
-			goto corrupt;
-		}
-
-		if (*oi->typep < 0)
-			die(_("invalid object type"));
-
-		if (oi->contentp) {
-			*oi->contentp = unpack_loose_rest(&stream, hdr, *oi->sizep, oid);
-			if (!*oi->contentp) {
-				ret = -1;
-				goto corrupt;
-			}
-		}
-
-		break;
-	case ULHR_BAD:
-		ret = error(_("unable to unpack %s header"),
-			    oid_to_hex(oid));
-		goto corrupt;
-	case ULHR_TOO_LONG:
-		ret = error(_("header for %s too long, exceeds %d bytes"),
-			    oid_to_hex(oid), MAX_HEADER_LEN);
-		goto corrupt;
-	}
-
-	ret = 0;
-
-corrupt:
-	if (ret && (flags & OBJECT_INFO_DIE_IF_CORRUPT))
-		die(_("loose object %s (stored in %s) is corrupt"),
-		    oid_to_hex(oid), path);
-
-out:
-	if (stream_to_end)
-		git_inflate_end(stream_to_end);
-	if (map)
-		munmap(map, mapsize);
-	if (oi) {
-		if (oi->sizep == &size_scratch)
-			oi->sizep = NULL;
-		if (oi->typep == &type_scratch)
-			oi->typep = NULL;
-		if (oi->delta_base_oid)
-			oidclr(oi->delta_base_oid, loose->base.odb->repo->hash_algo);
-		if (!ret)
-			oi->whence = OI_LOOSE;
-	}
-
-	return ret;
-}
-
 static void hash_object_body(const struct git_hash_algo *algo, struct git_hash_ctx *c,
 			     const void *buf, unsigned long len,
 			     struct object_id *oid,
@@ -1667,13 +1511,13 @@ int read_pack_header(int fd, struct pack_header *header)
 	return 0;
 }
 
-static int for_each_file_in_obj_subdir(unsigned int subdir_nr,
-				       struct strbuf *path,
-				       const struct git_hash_algo *algop,
-				       each_loose_object_fn obj_cb,
-				       each_loose_cruft_fn cruft_cb,
-				       each_loose_subdir_fn subdir_cb,
-				       void *data)
+int for_each_file_in_obj_subdir(unsigned int subdir_nr,
+				struct strbuf *path,
+				const struct git_hash_algo *algop,
+				each_loose_object_fn obj_cb,
+				each_loose_cruft_fn cruft_cb,
+				each_loose_subdir_fn subdir_cb,
+				void *data)
 {
 	size_t origlen, baselen;
 	DIR *dir;
@@ -1758,78 +1602,6 @@ int for_each_loose_file_in_source(struct odb_source *source,
 	return r;
 }
 
-struct for_each_object_wrapper_data {
-	struct odb_source_loose *loose;
-	const struct object_info *request;
-	odb_for_each_object_cb cb;
-	void *cb_data;
-};
-
-static int for_each_object_wrapper_cb(const struct object_id *oid,
-				      const char *path,
-				      void *cb_data)
-{
-	struct for_each_object_wrapper_data *data = cb_data;
-
-	if (data->request) {
-		struct object_info oi = *data->request;
-
-		if (read_object_info_from_path(data->loose, path, oid, &oi, 0) < 0)
-			return -1;
-
-		return data->cb(oid, &oi, data->cb_data);
-	} else {
-		return data->cb(oid, NULL, data->cb_data);
-	}
-}
-
-static int for_each_prefixed_object_wrapper_cb(const struct object_id *oid,
-					       void *node_data UNUSED,
-					       void *cb_data)
-{
-	struct for_each_object_wrapper_data *data = cb_data;
-	if (data->request) {
-		struct object_info oi = *data->request;
-
-		if (odb_source_read_object_info(&data->loose->base,
-						oid, &oi, 0) < 0)
-			return -1;
-
-		return data->cb(oid, &oi, data->cb_data);
-	} else {
-		return data->cb(oid, NULL, data->cb_data);
-	}
-}
-
-int odb_source_loose_for_each_object(struct odb_source *source,
-				     const struct object_info *request,
-				     odb_for_each_object_cb cb,
-				     void *cb_data,
-				     const struct odb_for_each_object_options *opts)
-{
-	struct odb_source_files *files = odb_source_files_downcast(source);
-	struct for_each_object_wrapper_data data = {
-		.loose = files->loose,
-		.request = request,
-		.cb = cb,
-		.cb_data = cb_data,
-	};
-
-	/* There are no loose promisor objects, so we can return immediately. */
-	if ((opts->flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY))
-		return 0;
-	if ((opts->flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY) && !source->local)
-		return 0;
-
-	if (opts->prefix)
-		return oidtree_each(odb_source_loose_cache(source, opts->prefix),
-				    opts->prefix, opts->prefix_hex_len,
-				    for_each_prefixed_object_wrapper_cb, &data);
-
-	return for_each_loose_file_in_source(source, for_each_object_wrapper_cb,
-					     NULL, NULL, &data);
-}
-
 static int count_loose_object(const struct object_id *oid UNUSED,
 			      struct object_info *oi UNUSED,
 			      void *payload)
@@ -1843,6 +1615,7 @@ int odb_source_loose_count_objects(struct odb_source *source,
 				   enum odb_count_objects_flags flags,
 				   unsigned long *out)
 {
+	struct odb_source_files *files = odb_source_files_downcast(source);
 	const unsigned hexsz = source->odb->repo->hash_algo->hexsz - 2;
 	char *path = NULL;
 	DIR *dir = NULL;
@@ -1878,8 +1651,8 @@ int odb_source_loose_count_objects(struct odb_source *source,
 	} else {
 		struct odb_for_each_object_options opts = { 0 };
 		*out = 0;
-		ret = odb_source_loose_for_each_object(source, NULL, count_loose_object,
-						       out, &opts);
+		ret = odb_source_for_each_object(&files->loose->base, NULL, count_loose_object,
+						 out, &opts);
 	}
 
 out:
@@ -1910,6 +1683,7 @@ int odb_source_loose_find_abbrev_len(struct odb_source *source,
 				     unsigned min_len,
 				     unsigned *out)
 {
+	struct odb_source_files *files = odb_source_files_downcast(source);
 	struct odb_for_each_object_options opts = {
 		.prefix = oid,
 		.prefix_hex_len = min_len,
@@ -1920,54 +1694,13 @@ int odb_source_loose_find_abbrev_len(struct odb_source *source,
 	};
 	int ret;
 
-	ret = odb_source_loose_for_each_object(source, NULL, find_abbrev_len_cb,
-					       &data, &opts);
+	ret = odb_source_for_each_object(&files->loose->base, NULL, find_abbrev_len_cb,
+					 &data, &opts);
 	*out = data.len;
 
 	return ret;
 }
 
-static int append_loose_object(const struct object_id *oid,
-			       const char *path UNUSED,
-			       void *data)
-{
-	oidtree_insert(data, oid, NULL);
-	return 0;
-}
-
-static struct oidtree *odb_source_loose_cache(struct odb_source *source,
-					      const struct object_id *oid)
-{
-	struct odb_source_files *files = odb_source_files_downcast(source);
-	int subdir_nr = oid->hash[0];
-	struct strbuf buf = STRBUF_INIT;
-	size_t word_bits = bitsizeof(files->loose->subdir_seen[0]);
-	size_t word_index = subdir_nr / word_bits;
-	size_t mask = (size_t)1u << (subdir_nr % word_bits);
-	uint32_t *bitmap;
-
-	if (subdir_nr < 0 ||
-	    (size_t) subdir_nr >= bitsizeof(files->loose->subdir_seen))
-		BUG("subdir_nr out of range");
-
-	bitmap = &files->loose->subdir_seen[word_index];
-	if (*bitmap & mask)
-		return files->loose->cache;
-	if (!files->loose->cache) {
-		ALLOC_ARRAY(files->loose->cache, 1);
-		oidtree_init(files->loose->cache);
-	}
-	strbuf_addstr(&buf, source->path);
-	for_each_file_in_obj_subdir(subdir_nr, &buf,
-				    source->odb->repo->hash_algo,
-				    append_loose_object,
-				    NULL, NULL,
-				    files->loose->cache);
-	*bitmap |= mask;
-	strbuf_release(&buf);
-	return files->loose->cache;
-}
-
 static int check_stream_oid(git_zstream *stream,
 			    const char *hdr,
 			    unsigned long size,
diff --git a/object-file.h b/object-file.h
index d93b7ffad7..9ee5649220 100644
--- a/object-file.h
+++ b/object-file.h
@@ -6,6 +6,9 @@
 #include "odb.h"
 #include "odb/source-loose.h"
 
+/* The maximum size for an object header. */
+#define MAX_HEADER_LEN 32
+
 struct index_state;
 
 enum {
@@ -85,19 +88,13 @@ int for_each_loose_file_in_source(struct odb_source *source,
 				  each_loose_cruft_fn cruft_cb,
 				  each_loose_subdir_fn subdir_cb,
 				  void *data);
-
-/*
- * Iterate through all loose objects in the given object database source and
- * invoke the callback function for each of them. If an object info request is
- * given, then the object info will be read for every individual object and
- * passed to the callback as if `odb_source_loose_read_object_info()` was
- * called for the object.
- */
-int odb_source_loose_for_each_object(struct odb_source *source,
-				     const struct object_info *request,
-				     odb_for_each_object_cb cb,
-				     void *cb_data,
-				     const struct odb_for_each_object_options *opts);
+int for_each_file_in_obj_subdir(unsigned int subdir_nr,
+				struct strbuf *path,
+				const struct git_hash_algo *algop,
+				each_loose_object_fn obj_cb,
+				each_loose_cruft_fn cruft_cb,
+				each_loose_subdir_fn subdir_cb,
+				void *data);
 
 /*
  * Count the number of loose objects in this source.
@@ -188,12 +185,6 @@ int read_loose_object(struct repository *repo,
 		      void **contents,
 		      struct object_info *oi);
 
-int read_object_info_from_path(struct odb_source_loose *loose,
-			       const char *path,
-			       const struct object_id *oid,
-			       struct object_info *oi,
-			       enum object_info_flags flags);
-
 enum unpack_loose_header_result {
 	ULHR_OK,
 	ULHR_BAD,
@@ -217,6 +208,9 @@ enum unpack_loose_header_result unpack_loose_header(git_zstream *stream,
 						    unsigned long mapsize,
 						    void *buffer,
 						    unsigned long bufsiz);
+void *unpack_loose_rest(git_zstream *stream,
+			void *buffer, unsigned long size,
+			const struct object_id *oid);
 
 int parse_loose_header(const char *hdr, struct object_info *oi);
 
diff --git a/odb/source-files.c b/odb/source-files.c
index 90806ddf86..676a641739 100644
--- a/odb/source-files.c
+++ b/odb/source-files.c
@@ -82,7 +82,7 @@ static int odb_source_files_for_each_object(struct odb_source *source,
 	int ret;
 
 	if (!(opts->flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY)) {
-		ret = odb_source_loose_for_each_object(source, request, cb, cb_data, opts);
+		ret = odb_source_for_each_object(&files->loose->base, request, cb, cb_data, opts);
 		if (ret)
 			return ret;
 	}
diff --git a/odb/source-loose.c b/odb/source-loose.c
index 4b82c6f316..4e8b923498 100644
--- a/odb/source-loose.c
+++ b/odb/source-loose.c
@@ -2,6 +2,7 @@
 #include "abspath.h"
 #include "chdir-notify.h"
 #include "gettext.h"
+#include "hex.h"
 #include "loose.h"
 #include "object-file.h"
 #include "odb.h"
@@ -9,8 +10,198 @@
 #include "odb/source-loose.h"
 #include "odb/streaming.h"
 #include "oidtree.h"
+#include "repository.h"
 #include "strbuf.h"
 
+static int append_loose_object(const struct object_id *oid,
+			       const char *path UNUSED,
+			       void *data)
+{
+	oidtree_insert(data, oid, NULL);
+	return 0;
+}
+
+static struct oidtree *odb_source_loose_cache(struct odb_source_loose *loose,
+					      const struct object_id *oid)
+{
+	int subdir_nr = oid->hash[0];
+	struct strbuf buf = STRBUF_INIT;
+	size_t word_bits = bitsizeof(loose->subdir_seen[0]);
+	size_t word_index = subdir_nr / word_bits;
+	size_t mask = (size_t)1u << (subdir_nr % word_bits);
+	uint32_t *bitmap;
+
+	if (subdir_nr < 0 ||
+	    (size_t) subdir_nr >= bitsizeof(loose->subdir_seen))
+		BUG("subdir_nr out of range");
+
+	bitmap = &loose->subdir_seen[word_index];
+	if (*bitmap & mask)
+		return loose->cache;
+	if (!loose->cache) {
+		ALLOC_ARRAY(loose->cache, 1);
+		oidtree_init(loose->cache);
+	}
+	strbuf_addstr(&buf, loose->base.path);
+	for_each_file_in_obj_subdir(subdir_nr, &buf,
+				    loose->base.odb->repo->hash_algo,
+				    append_loose_object,
+				    NULL, NULL,
+				    loose->cache);
+	*bitmap |= mask;
+	strbuf_release(&buf);
+	return loose->cache;
+}
+
+static int quick_has_loose(struct odb_source_loose *loose,
+			   const struct object_id *oid)
+{
+	return !!oidtree_contains(odb_source_loose_cache(loose, oid), oid);
+}
+
+static int read_object_info_from_path(struct odb_source_loose *loose,
+				      const char *path,
+				      const struct object_id *oid,
+				      struct object_info *oi,
+				      enum object_info_flags flags)
+{
+	int ret;
+	int fd;
+	unsigned long mapsize;
+	void *map = NULL;
+	git_zstream stream, *stream_to_end = NULL;
+	char hdr[MAX_HEADER_LEN];
+	unsigned long size_scratch;
+	enum object_type type_scratch;
+	struct stat st;
+
+	/*
+	 * If we don't care about type or size, then we don't
+	 * need to look inside the object at all. Note that we
+	 * do not optimize out the stat call, even if the
+	 * caller doesn't care about the disk-size, since our
+	 * return value implicitly indicates whether the
+	 * object even exists.
+	 */
+	if (!oi || (!oi->typep && !oi->sizep && !oi->contentp)) {
+		struct stat st;
+
+		if ((!oi || (!oi->disk_sizep && !oi->mtimep)) && (flags & OBJECT_INFO_QUICK)) {
+			ret = quick_has_loose(loose, oid) ? 0 : -1;
+			goto out;
+		}
+
+		if (lstat(path, &st) < 0) {
+			ret = -1;
+			goto out;
+		}
+
+		if (oi) {
+			if (oi->disk_sizep)
+				*oi->disk_sizep = st.st_size;
+			if (oi->mtimep)
+				*oi->mtimep = st.st_mtime;
+		}
+
+		ret = 0;
+		goto out;
+	}
+
+	fd = git_open(path);
+	if (fd < 0) {
+		if (errno != ENOENT)
+			error_errno(_("unable to open loose object %s"), oid_to_hex(oid));
+		ret = -1;
+		goto out;
+	}
+
+	if (fstat(fd, &st)) {
+		close(fd);
+		ret = -1;
+		goto out;
+	}
+
+	mapsize = xsize_t(st.st_size);
+	if (!mapsize) {
+		close(fd);
+		ret = error(_("object file %s is empty"), path);
+		goto out;
+	}
+
+	map = xmmap(NULL, mapsize, PROT_READ, MAP_PRIVATE, fd, 0);
+	close(fd);
+	if (!map) {
+		ret = -1;
+		goto out;
+	}
+
+	if (oi->disk_sizep)
+		*oi->disk_sizep = mapsize;
+	if (oi->mtimep)
+		*oi->mtimep = st.st_mtime;
+
+	stream_to_end = &stream;
+
+	switch (unpack_loose_header(&stream, map, mapsize, hdr, sizeof(hdr))) {
+	case ULHR_OK:
+		if (!oi->sizep)
+			oi->sizep = &size_scratch;
+		if (!oi->typep)
+			oi->typep = &type_scratch;
+
+		if (parse_loose_header(hdr, oi) < 0) {
+			ret = error(_("unable to parse %s header"), oid_to_hex(oid));
+			goto corrupt;
+		}
+
+		if (*oi->typep < 0)
+			die(_("invalid object type"));
+
+		if (oi->contentp) {
+			*oi->contentp = unpack_loose_rest(&stream, hdr, *oi->sizep, oid);
+			if (!*oi->contentp) {
+				ret = -1;
+				goto corrupt;
+			}
+		}
+
+		break;
+	case ULHR_BAD:
+		ret = error(_("unable to unpack %s header"),
+			    oid_to_hex(oid));
+		goto corrupt;
+	case ULHR_TOO_LONG:
+		ret = error(_("header for %s too long, exceeds %d bytes"),
+			    oid_to_hex(oid), MAX_HEADER_LEN);
+		goto corrupt;
+	}
+
+	ret = 0;
+
+corrupt:
+	if (ret && (flags & OBJECT_INFO_DIE_IF_CORRUPT))
+		die(_("loose object %s (stored in %s) is corrupt"),
+		    oid_to_hex(oid), path);
+
+out:
+	if (stream_to_end)
+		git_inflate_end(stream_to_end);
+	if (map)
+		munmap(map, mapsize);
+	if (oi) {
+		if (oi->sizep == &size_scratch)
+			oi->sizep = NULL;
+		if (oi->typep == &type_scratch)
+			oi->typep = NULL;
+		if (oi->delta_base_oid)
+			oidclr(oi->delta_base_oid, loose->base.odb->repo->hash_algo);
+		if (!ret)
+			oi->whence = OI_LOOSE;
+	}
+
+	return ret;
+}
+
 static int odb_source_loose_read_object_info(struct odb_source *source,
 					     const struct object_id *oid,
 					     struct object_info *oi,
@@ -218,6 +409,78 @@ static int odb_source_loose_read_object_stream(struct odb_read_stream **out,
 	return -1;
 }
 
+struct for_each_object_wrapper_data {
+	struct odb_source_loose *loose;
+	const struct object_info *request;
+	odb_for_each_object_cb cb;
+	void *cb_data;
+};
+
+static int for_each_object_wrapper_cb(const struct object_id *oid,
+				      const char *path,
+				      void *cb_data)
+{
+	struct for_each_object_wrapper_data *data = cb_data;
+
+	if (data->request) {
+		struct object_info oi = *data->request;
+
+		if (read_object_info_from_path(data->loose, path, oid, &oi, 0) < 0)
+			return -1;
+
+		return data->cb(oid, &oi, data->cb_data);
+	} else {
+		return data->cb(oid, NULL, data->cb_data);
+	}
+}
+
+static int for_each_prefixed_object_wrapper_cb(const struct object_id *oid,
+					       void *node_data UNUSED,
+					       void *cb_data)
+{
+	struct for_each_object_wrapper_data *data = cb_data;
+	if (data->request) {
+		struct object_info oi = *data->request;
+
+		if (odb_source_read_object_info(&data->loose->base,
+						oid, &oi, 0) < 0)
+			return -1;
+
+		return data->cb(oid, &oi, data->cb_data);
+	} else {
+		return data->cb(oid, NULL, data->cb_data);
+	}
+}
+
+static int odb_source_loose_for_each_object(struct odb_source *source,
+					    const struct object_info *request,
+					    odb_for_each_object_cb cb,
+					    void *cb_data,
+					    const struct odb_for_each_object_options *opts)
+{
+	struct odb_source_loose *loose = odb_source_loose_downcast(source);
+	struct for_each_object_wrapper_data data = {
+		.loose = loose,
+		.request = request,
+		.cb = cb,
+		.cb_data = cb_data,
+	};
+
+	/* There are no loose promisor objects, so we can return immediately. */
+	if ((opts->flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY))
+		return 0;
+	if ((opts->flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY) && !source->local)
+		return 0;
+
+	if (opts->prefix)
+		return oidtree_each(odb_source_loose_cache(loose, opts->prefix),
+				    opts->prefix, opts->prefix_hex_len,
+				    for_each_prefixed_object_wrapper_cb, &data);
+
+	return for_each_loose_file_in_source(source, for_each_object_wrapper_cb,
+					     NULL, NULL, &data);
+}
+
 static void odb_source_loose_clear_cache(struct odb_source_loose *loose)
 {
 	oidtree_clear(loose->cache);
@@ -273,6 +536,7 @@ struct odb_source_loose *odb_source_loose_new(struct odb_source_files *files)
 	loose->base.reprepare = odb_source_loose_reprepare;
 	loose->base.read_object_info = odb_source_loose_read_object_info;
 	loose->base.read_object_stream = odb_source_loose_read_object_stream;
+	loose->base.for_each_object = odb_source_loose_for_each_object;
 
 	if (!is_absolute_path(loose->base.path))
 		chdir_notify_register(NULL, odb_source_loose_reparent, loose);

-- 
2.54.0.926.g75ba10bac6.dirty


^ permalink raw reply related

* [PATCH v2 07/18] odb/source-loose: wire up `read_object_stream()` callback
From: Patrick Steinhardt @ 2026-06-01  8:20 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano
In-Reply-To: <20260601-b4-pks-odb-source-loose-v2-0-90ff159430af@pks.im>

Move `odb_source_loose_read_object_stream()` and its associated helpers
from "object-file.c" into "odb/source-loose.c" and wire it up as the
`read_object_stream()` callback of the loose source.

As part of the move we are also forced to expose a couple of functions
from "object-file.h" that parse object headers in a somewhat-generic
way, as those functions are now used by both subsystems.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
 object-file.c      | 200 ++---------------------------------------------------
 object-file.h      |  31 +++++++--
 odb/source-files.c |   2 +-
 odb/source-loose.c | 189 ++++++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 222 insertions(+), 200 deletions(-)

diff --git a/object-file.c b/object-file.c
index fa174512a4..adfb672493 100644
--- a/object-file.c
+++ b/object-file.c
@@ -164,28 +164,6 @@ int stream_object_signature(struct repository *r,
 	return !oideq(oid, &real_oid) ? -1 : 0;
 }
 
-/*
- * Find "oid" as a loose object in given source, open the object and return its
- * file descriptor. Returns the file descriptor on success, negative on failure.
- *
- * The "path" out-parameter will give the path of the object we found (if any).
- * Note that it may point to static storage and is only valid until another
- * call to stat_loose_object().
- */
-static int open_loose_object(struct odb_source_loose *loose,
-			     const struct object_id *oid, const char **path)
-{
-	static struct strbuf buf = STRBUF_INIT;
-	int fd;
-
-	*path = odb_loose_path(&loose->files->base, &buf, oid);
-	fd = git_open(*path);
-	if (fd >= 0)
-		return fd;
-
-	return -1;
-}
-
 static int quick_has_loose(struct odb_source_loose *loose,
 			   const struct object_id *oid)
 {
@@ -215,42 +193,11 @@ static void *map_fd(int fd, const char *path, unsigned long *size)
 	return map;
 }
 
-static void *odb_source_loose_map_object(struct odb_source *source,
-					 const struct object_id *oid,
-					 unsigned long *size)
-{
-	struct odb_source_files *files = odb_source_files_downcast(source);
-	const char *p;
-	int fd = open_loose_object(files->loose, oid, &p);
-
-	if (fd < 0)
-		return NULL;
-	return map_fd(fd, p, size);
-}
-
-enum unpack_loose_header_result {
-	ULHR_OK,
-	ULHR_BAD,
-	ULHR_TOO_LONG,
-};
-
-/**
- * unpack_loose_header() initializes the data stream needed to unpack
- * a loose object header.
- *
- * Returns:
- *
- * - ULHR_OK on success
- * - ULHR_BAD on error
- * - ULHR_TOO_LONG if the header was too long
- *
- * It will only parse up to MAX_HEADER_LEN bytes.
- */
-static enum unpack_loose_header_result unpack_loose_header(git_zstream *stream,
-							   unsigned char *map,
-							   unsigned long mapsize,
-							   void *buffer,
-							   unsigned long bufsiz)
+enum unpack_loose_header_result unpack_loose_header(git_zstream *stream,
+						    unsigned char *map,
+						    unsigned long mapsize,
+						    void *buffer,
+						    unsigned long bufsiz)
 {
 	int status;
 
@@ -340,7 +287,7 @@ static void *unpack_loose_rest(git_zstream *stream,
  * too permissive for what we want to check. So do an anal
  * object header parse by hand.
  */
-static int parse_loose_header(const char *hdr, struct object_info *oi)
+int parse_loose_header(const char *hdr, struct object_info *oi)
 {
 	const char *type_buf = hdr;
 	size_t size;
@@ -2170,138 +2117,3 @@ struct odb_transaction *odb_transaction_files_begin(struct odb_source *source)
 
 	return &transaction->base;
 }
-
-struct odb_loose_read_stream {
-	struct odb_read_stream base;
-	git_zstream z;
-	enum {
-		ODB_LOOSE_READ_STREAM_INUSE,
-		ODB_LOOSE_READ_STREAM_DONE,
-		ODB_LOOSE_READ_STREAM_ERROR,
-	} z_state;
-	void *mapped;
-	unsigned long mapsize;
-	char hdr[32];
-	int hdr_avail;
-	int hdr_used;
-};
-
-static ssize_t read_istream_loose(struct odb_read_stream *_st, char *buf, size_t sz)
-{
-	struct odb_loose_read_stream *st =
-		container_of(_st, struct odb_loose_read_stream, base);
-	size_t total_read = 0;
-
-	switch (st->z_state) {
-	case ODB_LOOSE_READ_STREAM_DONE:
-		return 0;
-	case ODB_LOOSE_READ_STREAM_ERROR:
-		return -1;
-	default:
-		break;
-	}
-
-	if (st->hdr_used < st->hdr_avail) {
-		size_t to_copy = st->hdr_avail - st->hdr_used;
-		if (sz < to_copy)
-			to_copy = sz;
-		memcpy(buf, st->hdr + st->hdr_used, to_copy);
-		st->hdr_used += to_copy;
-		total_read += to_copy;
-	}
-
-	while (total_read < sz) {
-		int status;
-
-		st->z.next_out = (unsigned char *)buf + total_read;
-		st->z.avail_out = sz - total_read;
-		status = git_inflate(&st->z, Z_FINISH);
-
-		total_read = st->z.next_out - (unsigned char *)buf;
-
-		if (status == Z_STREAM_END) {
-			git_inflate_end(&st->z);
-			st->z_state = ODB_LOOSE_READ_STREAM_DONE;
-			break;
-		}
-		if (status != Z_OK && (status != Z_BUF_ERROR || total_read < sz)) {
-			git_inflate_end(&st->z);
-			st->z_state = ODB_LOOSE_READ_STREAM_ERROR;
-			return -1;
-		}
-	}
-	return total_read;
-}
-
-static int close_istream_loose(struct odb_read_stream *_st)
-{
-	struct odb_loose_read_stream *st =
-		container_of(_st, struct odb_loose_read_stream, base);
-
-	if (st->z_state == ODB_LOOSE_READ_STREAM_INUSE)
-		git_inflate_end(&st->z);
-	munmap(st->mapped, st->mapsize);
-	return 0;
-}
-
-int odb_source_loose_read_object_stream(struct odb_read_stream **out,
-					struct odb_source *source,
-					const struct object_id *oid)
-{
-	struct object_info oi = OBJECT_INFO_INIT;
-	struct odb_loose_read_stream *st;
-	unsigned long mapsize;
-	unsigned long size_ul;
-	void *mapped;
-
-	mapped = odb_source_loose_map_object(source, oid, &mapsize);
-	if (!mapped)
-		return -1;
-
-	/*
-	 * Note: we must allocate this structure early even though we may still
-	 * fail. This is because we need to initialize the zlib stream, and it
-	 * is not possible to copy the stream around after the fact because it
-	 * has self-referencing pointers.
-	 */
-	CALLOC_ARRAY(st, 1);
-
-	switch (unpack_loose_header(&st->z, mapped, mapsize, st->hdr,
-				    sizeof(st->hdr))) {
-	case ULHR_OK:
-		break;
-	case ULHR_BAD:
-	case ULHR_TOO_LONG:
-		goto error;
-	}
-
-	/*
-	 * object_info.sizep is unsigned long* (32-bit on Windows), but
-	 * st->base.size is size_t (64-bit). Use temporary variable.
-	 * Note: loose objects >4GB would still truncate here, but such
-	 * large loose objects are uncommon (they'd normally be packed).
-	 */
-	oi.sizep = &size_ul;
-	oi.typep = &st->base.type;
-
-	if (parse_loose_header(st->hdr, &oi) < 0 || st->base.type < 0)
-		goto error;
-	st->base.size = size_ul;
-
-	st->mapped = mapped;
-	st->mapsize = mapsize;
-	st->hdr_used = strlen(st->hdr) + 1;
-	st->hdr_avail = st->z.total_out;
-	st->z_state = ODB_LOOSE_READ_STREAM_INUSE;
-	st->base.close = close_istream_loose;
-	st->base.read = read_istream_loose;
-
-	*out = &st->base;
-
-	return 0;
-error:
-	git_inflate_end(&st->z);
-	munmap(mapped, mapsize);
-	free(st);
-	return -1;
-}
diff --git a/object-file.h b/object-file.h
index 8ac2832dac..d93b7ffad7 100644
--- a/object-file.h
+++ b/object-file.h
@@ -18,13 +18,8 @@ int index_fd(struct index_state *istate, struct object_id *oid, int fd, struct s
 int index_path(struct index_state *istate, struct object_id *oid, const char *path, struct stat *st, unsigned flags);
 
 struct object_info;
-struct odb_read_stream;
 struct odb_source;
 
-int odb_source_loose_read_object_stream(struct odb_read_stream **out,
-					struct odb_source *source,
-					const struct object_id *oid);
-
 /*
  * Return true iff an object database source has a loose object
  * with the specified name.  This function does not respect replace
@@ -199,6 +194,32 @@ int read_object_info_from_path(struct odb_source_loose *loose,
 			       struct object_info *oi,
 			       enum object_info_flags flags);
 
+enum unpack_loose_header_result {
+	ULHR_OK,
+	ULHR_BAD,
+	ULHR_TOO_LONG,
+};
+
+/**
+ * unpack_loose_header() initializes the data stream needed to unpack
+ * a loose object header.
+ *
+ * Returns:
+ *
+ * - ULHR_OK on success
+ * - ULHR_BAD on error
+ * - ULHR_TOO_LONG if the header was too long
+ *
+ * It will only parse up to MAX_HEADER_LEN bytes.
+ */
+enum unpack_loose_header_result unpack_loose_header(git_zstream *stream,
+						    unsigned char *map,
+						    unsigned long mapsize,
+						    void *buffer,
+						    unsigned long bufsiz);
+
+int parse_loose_header(const char *hdr, struct object_info *oi);
+
 struct odb_transaction;
 
 /*
diff --git a/odb/source-files.c b/odb/source-files.c
index 8d6924755f..90806ddf86 100644
--- a/odb/source-files.c
+++ b/odb/source-files.c
@@ -67,7 +67,7 @@ static int odb_source_files_read_object_stream(struct odb_read_stream **out,
 {
 	struct odb_source_files *files = odb_source_files_downcast(source);
 	if (!packfile_store_read_object_stream(out, files->packed, oid) ||
-	    !odb_source_loose_read_object_stream(out, source, oid))
+	    !odb_source_read_object_stream(out, &files->loose->base, oid))
 		return 0;
 	return -1;
 }
diff --git a/odb/source-loose.c b/odb/source-loose.c
index 50f387ecf3..4b82c6f316 100644
--- a/odb/source-loose.c
+++ b/odb/source-loose.c
@@ -1,11 +1,13 @@
 #include "git-compat-util.h"
 #include "abspath.h"
 #include "chdir-notify.h"
+#include "gettext.h"
 #include "loose.h"
 #include "object-file.h"
 #include "odb.h"
 #include "odb/source-files.h"
 #include "odb/source-loose.h"
+#include "odb/streaming.h"
 #include "oidtree.h"
 #include "strbuf.h"
 
@@ -30,6 +32,192 @@ static int odb_source_loose_read_object_info(struct odb_source *source,
 	return read_object_info_from_path(loose, buf.buf, oid, oi, flags);
 }
 
+/*
+ * Find "oid" as a loose object in given source, open the object and return its
+ * file descriptor. Returns the file descriptor on success, negative on failure.
+ *
+ * The "path" out-parameter will give the path of the object we found (if any).
+ * Note that it may point to static storage and is only valid until another
+ * call to open_loose_object().
+ */
+static int open_loose_object(struct odb_source_loose *loose,
+			     const struct object_id *oid, const char **path)
+{
+	static struct strbuf buf = STRBUF_INIT;
+	int fd;
+
+	*path = odb_loose_path(&loose->base, &buf, oid);
+	fd = git_open(*path);
+	if (fd >= 0)
+		return fd;
+
+	return -1;
+}
+
+static void *odb_source_loose_map_object(struct odb_source_loose *loose,
+					 const struct object_id *oid,
+					 unsigned long *size)
+{
+	const char *p;
+	int fd = open_loose_object(loose, oid, &p);
+	void *map = NULL;
+	struct stat st;
+
+	if (fd < 0)
+		return NULL;
+
+	if (!fstat(fd, &st)) {
+		*size = xsize_t(st.st_size);
+		if (!*size) {
+			/* mmap() is forbidden on empty files */
+			error(_("object file %s is empty"), p);
+			goto out;
+		}
+
+		map = xmmap(NULL, *size, PROT_READ, MAP_PRIVATE, fd, 0);
+	}
+
+out:
+	close(fd);
+	return map;
+}
+
+struct odb_loose_read_stream {
+	struct odb_read_stream base;
+	git_zstream z;
+	enum {
+		ODB_LOOSE_READ_STREAM_INUSE,
+		ODB_LOOSE_READ_STREAM_DONE,
+		ODB_LOOSE_READ_STREAM_ERROR,
+	} z_state;
+	void *mapped;
+	unsigned long mapsize;
+	char hdr[32];
+	int hdr_avail;
+	int hdr_used;
+};
+
+static ssize_t read_istream_loose(struct odb_read_stream *_st, char *buf, size_t sz)
+{
+	struct odb_loose_read_stream *st =
+		container_of(_st, struct odb_loose_read_stream, base);
+	size_t total_read = 0;
+
+	switch (st->z_state) {
+	case ODB_LOOSE_READ_STREAM_DONE:
+		return 0;
+	case ODB_LOOSE_READ_STREAM_ERROR:
+		return -1;
+	default:
+		break;
+	}
+
+	if (st->hdr_used < st->hdr_avail) {
+		size_t to_copy = st->hdr_avail - st->hdr_used;
+		if (sz < to_copy)
+			to_copy = sz;
+		memcpy(buf, st->hdr + st->hdr_used, to_copy);
+		st->hdr_used += to_copy;
+		total_read += to_copy;
+	}
+
+	while (total_read < sz) {
+		int status;
+
+		st->z.next_out = (unsigned char *)buf + total_read;
+		st->z.avail_out = sz - total_read;
+		status = git_inflate(&st->z, Z_FINISH);
+
+		total_read = st->z.next_out - (unsigned char *)buf;
+
+		if (status == Z_STREAM_END) {
+			git_inflate_end(&st->z);
+			st->z_state = ODB_LOOSE_READ_STREAM_DONE;
+			break;
+		}
+		if (status != Z_OK && (status != Z_BUF_ERROR || total_read < sz)) {
+			git_inflate_end(&st->z);
+			st->z_state = ODB_LOOSE_READ_STREAM_ERROR;
+			return -1;
+		}
+	}
+	return total_read;
+}
+
+static int close_istream_loose(struct odb_read_stream *_st)
+{
+	struct odb_loose_read_stream *st =
+		container_of(_st, struct odb_loose_read_stream, base);
+
+	if (st->z_state == ODB_LOOSE_READ_STREAM_INUSE)
+		git_inflate_end(&st->z);
+	munmap(st->mapped, st->mapsize);
+	return 0;
+}
+
+static int odb_source_loose_read_object_stream(struct odb_read_stream **out,
+					       struct odb_source *source,
+					       const struct object_id *oid)
+{
+	struct odb_source_loose *loose = odb_source_loose_downcast(source);
+	struct object_info oi = OBJECT_INFO_INIT;
+	struct odb_loose_read_stream *st;
+	unsigned long mapsize;
+	unsigned long size_ul;
+	void *mapped;
+
+	mapped = odb_source_loose_map_object(loose, oid, &mapsize);
+	if (!mapped)
+		return -1;
+
+	/*
+	 * Note: we must allocate this structure early even though we may still
+	 * fail. This is because we need to initialize the zlib stream, and it
+	 * is not possible to copy the stream around after the fact because it
+	 * has self-referencing pointers.
+	 */
+	CALLOC_ARRAY(st, 1);
+
+	switch (unpack_loose_header(&st->z, mapped, mapsize, st->hdr,
+				    sizeof(st->hdr))) {
+	case ULHR_OK:
+		break;
+	case ULHR_BAD:
+	case ULHR_TOO_LONG:
+		goto error;
+	}
+
+	/*
+	 * object_info.sizep is unsigned long* (32-bit on Windows), but
+	 * st->base.size is size_t (64-bit). Use temporary variable.
+	 * Note: loose objects >4GB would still truncate here, but such
+	 * large loose objects are uncommon (they'd normally be packed).
+	 */
+	oi.sizep = &size_ul;
+	oi.typep = &st->base.type;
+
+	if (parse_loose_header(st->hdr, &oi) < 0 || st->base.type < 0)
+		goto error;
+	st->base.size = size_ul;
+
+	st->mapped = mapped;
+	st->mapsize = mapsize;
+	st->hdr_used = strlen(st->hdr) + 1;
+	st->hdr_avail = st->z.total_out;
+	st->z_state = ODB_LOOSE_READ_STREAM_INUSE;
+	st->base.close = close_istream_loose;
+	st->base.read = read_istream_loose;
+
+	*out = &st->base;
+
+	return 0;
+error:
+	git_inflate_end(&st->z);
+	munmap(mapped, mapsize);
+	free(st);
+	return -1;
+}
+
 static void odb_source_loose_clear_cache(struct odb_source_loose *loose)
 {
 	oidtree_clear(loose->cache);
@@ -84,6 +272,7 @@ struct odb_source_loose *odb_source_loose_new(struct odb_source_files *files)
 	loose->base.close = odb_source_loose_close;
 	loose->base.reprepare = odb_source_loose_reprepare;
 	loose->base.read_object_info = odb_source_loose_read_object_info;
+	loose->base.read_object_stream = odb_source_loose_read_object_stream;
 
 	if (!is_absolute_path(loose->base.path))
 		chdir_notify_register(NULL, odb_source_loose_reparent, loose);

-- 
2.54.0.926.g75ba10bac6.dirty


^ permalink raw reply related

* [PATCH v2 06/18] odb/source-loose: wire up `read_object_info()` callback
From: Patrick Steinhardt @ 2026-06-01  8:20 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano
In-Reply-To: <20260601-b4-pks-odb-source-loose-v2-0-90ff159430af@pks.im>

Move `odb_source_loose_read_object_info()` from "object-file.c" into
"odb/source-loose.c" and wire it up as the `read_object_info()` callback
of the loose source. Callers that previously invoked it directly now go
through the generic `odb_source_read_object_info()` interface instead.

The function `read_object_info_from_path()` cannot be moved along with
it because it is still called by `for_each_object_wrapper_cb()`. It is
therefore kept in place, but adjusted to take a loose source to clarify
that it's always operating on this structure.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
 object-file.c      | 46 +++++++++++++---------------------------------
 object-file.h      | 11 ++++++-----
 odb/source-files.c |  2 +-
 odb/source-loose.c | 24 ++++++++++++++++++++++++
 4 files changed, 44 insertions(+), 39 deletions(-)

diff --git a/object-file.c b/object-file.c
index 0f4f1e7bdc..fa174512a4 100644
--- a/object-file.c
+++ b/object-file.c
@@ -396,13 +396,12 @@ static int parse_loose_header(const char *hdr, struct object_info *oi)
 	return 0;
 }
 
-static int read_object_info_from_path(struct odb_source *source,
-				      const char *path,
-				      const struct object_id *oid,
-				      struct object_info *oi,
-				      enum object_info_flags flags)
+int read_object_info_from_path(struct odb_source_loose *loose,
+			       const char *path,
+			       const struct object_id *oid,
+			       struct object_info *oi,
+			       enum object_info_flags flags)
 {
-	struct odb_source_files *files = odb_source_files_downcast(source);
 	int ret;
 	int fd;
 	unsigned long mapsize;
@@ -425,7 +424,7 @@ static int read_object_info_from_path(struct odb_source *source,
 		struct stat st;
 
 		if ((!oi || (!oi->disk_sizep && !oi->mtimep)) && (flags & OBJECT_INFO_QUICK)) {
-			ret = quick_has_loose(files->loose, oid) ? 0 : -1;
+			ret = quick_has_loose(loose, oid) ? 0 : -1;
 			goto out;
 		}
 
@@ -532,7 +531,7 @@ static int read_object_info_from_path(struct odb_source *source,
 		if (oi->typep == &type_scratch)
 			oi->typep = NULL;
 		if (oi->delta_base_oid)
-			oidclr(oi->delta_base_oid, source->odb->repo->hash_algo);
+			oidclr(oi->delta_base_oid, loose->base.odb->repo->hash_algo);
 		if (!ret)
 			oi->whence = OI_LOOSE;
 	}
@@ -540,26 +539,6 @@ static int read_object_info_from_path(struct odb_source *source,
 	return ret;
 }
 
-int odb_source_loose_read_object_info(struct odb_source *source,
-				      const struct object_id *oid,
-				      struct object_info *oi,
-				      enum object_info_flags flags)
-{
-	static struct strbuf buf = STRBUF_INIT;
-
-	/*
-	 * The second read shouldn't cause new loose objects to show up, unless
-	 * there was a race condition with a secondary process. We don't care
-	 * about this case though, so we simply skip reading loose objects a
-	 * second time.
-	 */
-	if (flags & OBJECT_INFO_SECOND_READ)
-		return -1;
-
-	odb_loose_path(source, &buf, oid);
-	return read_object_info_from_path(source, buf.buf, oid, oi, flags);
-}
-
 static void hash_object_body(const struct git_hash_algo *algo, struct git_hash_ctx *c,
 			     const void *buf, unsigned long len,
 			     struct object_id *oid,
@@ -1833,7 +1812,7 @@ int for_each_loose_file_in_source(struct odb_source *source,
 }
 
 struct for_each_object_wrapper_data {
-	struct odb_source *source;
+	struct odb_source_loose *loose;
 	const struct object_info *request;
 	odb_for_each_object_cb cb;
 	void *cb_data;
@@ -1848,7 +1827,7 @@ static int for_each_object_wrapper_cb(const struct object_id *oid,
 	if (data->request) {
 		struct object_info oi = *data->request;
 
-		if (read_object_info_from_path(data->source, path, oid, &oi, 0) < 0)
+		if (read_object_info_from_path(data->loose, path, oid, &oi, 0) < 0)
 			return -1;
 
 		return data->cb(oid, &oi, data->cb_data);
@@ -1865,8 +1844,8 @@ static int for_each_prefixed_object_wrapper_cb(const struct object_id *oid,
 	if (data->request) {
 		struct object_info oi = *data->request;
 
-		if (odb_source_loose_read_object_info(data->source,
-						      oid, &oi, 0) < 0)
+		if (odb_source_read_object_info(&data->loose->base,
+						oid, &oi, 0) < 0)
 			return -1;
 
 		return data->cb(oid, &oi, data->cb_data);
@@ -1881,8 +1860,9 @@ int odb_source_loose_for_each_object(struct odb_source *source,
 				     void *cb_data,
 				     const struct odb_for_each_object_options *opts)
 {
+	struct odb_source_files *files = odb_source_files_downcast(source);
 	struct for_each_object_wrapper_data data = {
-		.source = source,
+		.loose = files->loose,
 		.request = request,
 		.cb = cb,
 		.cb_data = cb_data,
diff --git a/object-file.h b/object-file.h
index 420a0fff2e..8ac2832dac 100644
--- a/object-file.h
+++ b/object-file.h
@@ -21,11 +21,6 @@ struct object_info;
 struct odb_read_stream;
 struct odb_source;
 
-int odb_source_loose_read_object_info(struct odb_source *source,
-				      const struct object_id *oid,
-				      struct object_info *oi,
-				      enum object_info_flags flags);
-
 int odb_source_loose_read_object_stream(struct odb_read_stream **out,
 					struct odb_source *source,
 					const struct object_id *oid);
@@ -198,6 +193,12 @@ int read_loose_object(struct repository *repo,
 		      void **contents,
 		      struct object_info *oi);
 
+int read_object_info_from_path(struct odb_source_loose *loose,
+			       const char *path,
+			       const struct object_id *oid,
+			       struct object_info *oi,
+			       enum object_info_flags flags);
+
 struct odb_transaction;
 
 /*
diff --git a/odb/source-files.c b/odb/source-files.c
index 59e3a70d80..8d6924755f 100644
--- a/odb/source-files.c
+++ b/odb/source-files.c
@@ -55,7 +55,7 @@ static int odb_source_files_read_object_info(struct odb_source *source,
 	struct odb_source_files *files = odb_source_files_downcast(source);
 
 	if (!packfile_store_read_object_info(files->packed, oid, oi, flags) ||
-	    !odb_source_loose_read_object_info(source, oid, oi, flags))
+	    !odb_source_read_object_info(&files->loose->base, oid, oi, flags))
 		return 0;
 
 	return -1;
diff --git a/odb/source-loose.c b/odb/source-loose.c
index 65c1076659..50f387ecf3 100644
--- a/odb/source-loose.c
+++ b/odb/source-loose.c
@@ -2,10 +2,33 @@
 #include "abspath.h"
 #include "chdir-notify.h"
 #include "loose.h"
+#include "object-file.h"
 #include "odb.h"
 #include "odb/source-files.h"
 #include "odb/source-loose.h"
 #include "oidtree.h"
+#include "strbuf.h"
+
+static int odb_source_loose_read_object_info(struct odb_source *source,
+					     const struct object_id *oid,
+					     struct object_info *oi,
+					     enum object_info_flags flags)
+{
+	struct odb_source_loose *loose = odb_source_loose_downcast(source);
+	static struct strbuf buf = STRBUF_INIT;
+
+	/*
+	 * The second read shouldn't cause new loose objects to show up, unless
+	 * there was a race condition with a secondary process. We don't care
+	 * about this case though, so we simply skip reading loose objects a
+	 * second time.
+	 */
+	if (flags & OBJECT_INFO_SECOND_READ)
+		return -1;
+
+	odb_loose_path(source, &buf, oid);
+	return read_object_info_from_path(loose, buf.buf, oid, oi, flags);
+}
 
 static void odb_source_loose_clear_cache(struct odb_source_loose *loose)
 {
@@ -60,6 +83,7 @@ struct odb_source_loose *odb_source_loose_new(struct odb_source_files *files)
 	loose->base.free = odb_source_loose_free;
 	loose->base.close = odb_source_loose_close;
 	loose->base.reprepare = odb_source_loose_reprepare;
+	loose->base.read_object_info = odb_source_loose_read_object_info;
 
 	if (!is_absolute_path(loose->base.path))
 		chdir_notify_register(NULL, odb_source_loose_reparent, loose);

-- 
2.54.0.926.g75ba10bac6.dirty


^ permalink raw reply related

* [PATCH v2 05/18] odb/source-loose: wire up `close()` callback
From: Patrick Steinhardt @ 2026-06-01  8:20 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano
In-Reply-To: <20260601-b4-pks-odb-source-loose-v2-0-90ff159430af@pks.im>

Wire up a new `close()` callback for the loose source and call it from
the "files" source via the generic `odb_source_close()` interface. The
callback itself is a no-op as the loose source has no resources that
need to be released on close.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
 odb/source-files.c | 1 +
 odb/source-loose.c | 6 ++++++
 2 files changed, 7 insertions(+)

diff --git a/odb/source-files.c b/odb/source-files.c
index 10832e81e4..59e3a70d80 100644
--- a/odb/source-files.c
+++ b/odb/source-files.c
@@ -36,6 +36,7 @@ static void odb_source_files_free(struct odb_source *source)
 static void odb_source_files_close(struct odb_source *source)
 {
 	struct odb_source_files *files = odb_source_files_downcast(source);
+	odb_source_close(&files->loose->base);
 	packfile_store_close(files->packed);
 }
 
diff --git a/odb/source-loose.c b/odb/source-loose.c
index e0fe0d513d..65c1076659 100644
--- a/odb/source-loose.c
+++ b/odb/source-loose.c
@@ -21,6 +21,11 @@ static void odb_source_loose_reprepare(struct odb_source *source)
 	odb_source_loose_clear_cache(loose);
 }
 
+static void odb_source_loose_close(struct odb_source *source UNUSED)
+{
+	/* Nothing to do. */
+}
+
 static void odb_source_loose_reparent(const char *name UNUSED,
 				      const char *old_cwd,
 				      const char *new_cwd,
@@ -53,6 +58,7 @@ struct odb_source_loose *odb_source_loose_new(struct odb_source_files *files)
 	loose->files = files;
 
 	loose->base.free = odb_source_loose_free;
+	loose->base.close = odb_source_loose_close;
 	loose->base.reprepare = odb_source_loose_reprepare;
 
 	if (!is_absolute_path(loose->base.path))

-- 
2.54.0.926.g75ba10bac6.dirty


^ permalink raw reply related

* [PATCH v2 04/18] odb/source-loose: wire up `reprepare()` callback
From: Patrick Steinhardt @ 2026-06-01  8:20 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano
In-Reply-To: <20260601-b4-pks-odb-source-loose-v2-0-90ff159430af@pks.im>

Move `odb_source_loose_reprepare()` from "object-file.c" into
"odb/source-loose.c" and wire it up as the `reprepare()` callback of the
loose source.

While at it, make `odb_source_loose_clear_cache()` static, as it is no
longer needed outside of its file.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
 object-file.c      | 6 ------
 object-file.h      | 3 ---
 odb/source-files.c | 2 +-
 odb/source-loose.c | 9 ++++++++-
 odb/source-loose.h | 2 --
 5 files changed, 9 insertions(+), 13 deletions(-)

diff --git a/object-file.c b/object-file.c
index 977d959d33..0f4f1e7bdc 100644
--- a/object-file.c
+++ b/object-file.c
@@ -2041,12 +2041,6 @@ static struct oidtree *odb_source_loose_cache(struct odb_source *source,
 	return files->loose->cache;
 }
 
-void odb_source_loose_reprepare(struct odb_source *source)
-{
-	struct odb_source_files *files = odb_source_files_downcast(source);
-	odb_source_loose_clear_cache(files->loose);
-}
-
 static int check_stream_oid(git_zstream *stream,
 			    const char *hdr,
 			    unsigned long size,
diff --git a/object-file.h b/object-file.h
index 02c9680980..420a0fff2e 100644
--- a/object-file.h
+++ b/object-file.h
@@ -21,9 +21,6 @@ struct object_info;
 struct odb_read_stream;
 struct odb_source;
 
-/* Reprepare the loose source by emptying the loose object cache. */
-void odb_source_loose_reprepare(struct odb_source *source);
-
 int odb_source_loose_read_object_info(struct odb_source *source,
 				      const struct object_id *oid,
 				      struct object_info *oi,
diff --git a/odb/source-files.c b/odb/source-files.c
index ccc637311b..10832e81e4 100644
--- a/odb/source-files.c
+++ b/odb/source-files.c
@@ -42,7 +42,7 @@ static void odb_source_files_close(struct odb_source *source)
 static void odb_source_files_reprepare(struct odb_source *source)
 {
 	struct odb_source_files *files = odb_source_files_downcast(source);
-	odb_source_loose_reprepare(&files->base);
+	odb_source_reprepare(&files->loose->base);
 	packfile_store_reprepare(files->packed);
 }
 
diff --git a/odb/source-loose.c b/odb/source-loose.c
index 92e18f5adb..e0fe0d513d 100644
--- a/odb/source-loose.c
+++ b/odb/source-loose.c
@@ -7,7 +7,7 @@
 #include "odb/source-loose.h"
 #include "oidtree.h"
 
-void odb_source_loose_clear_cache(struct odb_source_loose *loose)
+static void odb_source_loose_clear_cache(struct odb_source_loose *loose)
 {
 	oidtree_clear(loose->cache);
 	FREE_AND_NULL(loose->cache);
@@ -15,6 +15,12 @@ void odb_source_loose_clear_cache(struct odb_source_loose *loose)
 	       sizeof(loose->subdir_seen));
 }
 
+static void odb_source_loose_reprepare(struct odb_source *source)
+{
+	struct odb_source_loose *loose = odb_source_loose_downcast(source);
+	odb_source_loose_clear_cache(loose);
+}
+
 static void odb_source_loose_reparent(const char *name UNUSED,
 				      const char *old_cwd,
 				      const char *new_cwd,
@@ -47,6 +53,7 @@ struct odb_source_loose *odb_source_loose_new(struct odb_source_files *files)
 	loose->files = files;
 
 	loose->base.free = odb_source_loose_free;
+	loose->base.reprepare = odb_source_loose_reprepare;
 
 	if (!is_absolute_path(loose->base.path))
 		chdir_notify_register(NULL, odb_source_loose_reparent, loose);
diff --git a/odb/source-loose.h b/odb/source-loose.h
index bd989f0728..4dd4fd6ce3 100644
--- a/odb/source-loose.h
+++ b/odb/source-loose.h
@@ -44,6 +44,4 @@ static inline struct odb_source_loose *odb_source_loose_downcast(struct odb_sour
 	return container_of(source, struct odb_source_loose, base);
 }
 
-void odb_source_loose_clear_cache(struct odb_source_loose *loose);
-
 #endif

-- 
2.54.0.926.g75ba10bac6.dirty


^ permalink raw reply related

* [PATCH v2 03/18] odb/source-loose: start converting to a proper `struct odb_source`
From: Patrick Steinhardt @ 2026-06-01  8:20 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano
In-Reply-To: <20260601-b4-pks-odb-source-loose-v2-0-90ff159430af@pks.im>

Start converting `struct odb_source_loose` into a proper pluggable
`struct odb_source` by embedding the base struct and assigning it the
new `ODB_SOURCE_LOOSE` type. Furthermore, wire up lifecycle management
of this source by implementing the `free` callback and taking ownership
of the chdir notifications.

Note that the loose source is not yet functional as a standalone `struct
odb_source`, as it's missing all of the callback implementations. These
will be wired up in subsequent commits.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
 object-file.c      | 17 -----------------
 object-file.h      |  2 --
 odb/source-files.c |  2 +-
 odb/source-loose.c | 45 +++++++++++++++++++++++++++++++++++++++++++++
 odb/source-loose.h | 14 ++++++++++++++
 odb/source.h       |  3 +++
 6 files changed, 63 insertions(+), 20 deletions(-)

diff --git a/object-file.c b/object-file.c
index 7a1908bfc0..977d959d33 100644
--- a/object-file.c
+++ b/object-file.c
@@ -2041,14 +2041,6 @@ static struct oidtree *odb_source_loose_cache(struct odb_source *source,
 	return files->loose->cache;
 }
 
-static void odb_source_loose_clear_cache(struct odb_source_loose *loose)
-{
-	oidtree_clear(loose->cache);
-	FREE_AND_NULL(loose->cache);
-	memset(&loose->subdir_seen, 0,
-	       sizeof(loose->subdir_seen));
-}
-
 void odb_source_loose_reprepare(struct odb_source *source)
 {
 	struct odb_source_files *files = odb_source_files_downcast(source);
@@ -2205,15 +2197,6 @@ struct odb_transaction *odb_transaction_files_begin(struct odb_source *source)
 	return &transaction->base;
 }
 
-void odb_source_loose_free(struct odb_source_loose *loose)
-{
-	if (!loose)
-		return;
-	odb_source_loose_clear_cache(loose);
-	loose_object_map_clear(&loose->map);
-	free(loose);
-}
-
 struct odb_loose_read_stream {
 	struct odb_read_stream base;
 	git_zstream z;
diff --git a/object-file.h b/object-file.h
index 1d8312cf7f..02c9680980 100644
--- a/object-file.h
+++ b/object-file.h
@@ -21,8 +21,6 @@ struct object_info;
 struct odb_read_stream;
 struct odb_source;
 
-void odb_source_loose_free(struct odb_source_loose *loose);
-
 /* Reprepare the loose source by emptying the loose object cache. */
 void odb_source_loose_reprepare(struct odb_source *source);
 
diff --git a/odb/source-files.c b/odb/source-files.c
index 185cc6903e..ccc637311b 100644
--- a/odb/source-files.c
+++ b/odb/source-files.c
@@ -27,7 +27,7 @@ static void odb_source_files_free(struct odb_source *source)
 {
 	struct odb_source_files *files = odb_source_files_downcast(source);
 	chdir_notify_unregister(NULL, odb_source_files_reparent, files);
-	odb_source_loose_free(files->loose);
+	odb_source_free(&files->loose->base);
 	packfile_store_free(files->packed);
 	odb_source_release(&files->base);
 	free(files);
diff --git a/odb/source-loose.c b/odb/source-loose.c
index c9e7414814..92e18f5adb 100644
--- a/odb/source-loose.c
+++ b/odb/source-loose.c
@@ -1,10 +1,55 @@
 #include "git-compat-util.h"
+#include "abspath.h"
+#include "chdir-notify.h"
+#include "loose.h"
+#include "odb.h"
+#include "odb/source-files.h"
 #include "odb/source-loose.h"
+#include "oidtree.h"
+
+void odb_source_loose_clear_cache(struct odb_source_loose *loose)
+{
+	oidtree_clear(loose->cache);
+	FREE_AND_NULL(loose->cache);
+	memset(&loose->subdir_seen, 0,
+	       sizeof(loose->subdir_seen));
+}
+
+static void odb_source_loose_reparent(const char *name UNUSED,
+				      const char *old_cwd,
+				      const char *new_cwd,
+				      void *cb_data)
+{
+	struct odb_source_loose *loose = cb_data;
+	char *path = reparent_relative_path(old_cwd, new_cwd,
+					    loose->base.path);
+	free(loose->base.path);
+	loose->base.path = path;
+}
+
+static void odb_source_loose_free(struct odb_source *source)
+{
+	struct odb_source_loose *loose = odb_source_loose_downcast(source);
+	odb_source_loose_clear_cache(loose);
+	loose_object_map_clear(&loose->map);
+	chdir_notify_unregister(NULL, odb_source_loose_reparent, loose);
+	odb_source_release(&loose->base);
+	free(loose);
+}
 
 struct odb_source_loose *odb_source_loose_new(struct odb_source_files *files)
 {
 	struct odb_source_loose *loose;
+
 	CALLOC_ARRAY(loose, 1);
+	odb_source_init(&loose->base, files->base.odb, ODB_SOURCE_LOOSE,
+			files->base.path, files->base.local);
 	loose->files = files;
+
+	loose->base.free = odb_source_loose_free;
+
+	if (!is_absolute_path(loose->base.path))
+		chdir_notify_register(NULL, odb_source_loose_reparent, loose);
+
 	return loose;
 }
diff --git a/odb/source-loose.h b/odb/source-loose.h
index bf61e767c8..bd989f0728 100644
--- a/odb/source-loose.h
+++ b/odb/source-loose.h
@@ -12,6 +12,7 @@ struct oidtree;
  * file per object. This source is part of the files source.
  */
 struct odb_source_loose {
+	struct odb_source base;
 	struct odb_source_files *files;
 
 	/*
@@ -32,4 +33,17 @@ struct odb_source_loose {
 
 struct odb_source_loose *odb_source_loose_new(struct odb_source_files *files);
 
+/*
+ * Cast the given object database source to the loose backend. This will cause
+ * a BUG in case the source doesn't use this backend.
+ */
+static inline struct odb_source_loose *odb_source_loose_downcast(struct odb_source *source)
+{
+	if (source->type != ODB_SOURCE_LOOSE)
+		BUG("trying to downcast source of type '%d' to loose", source->type);
+	return container_of(source, struct odb_source_loose, base);
+}
+
+void odb_source_loose_clear_cache(struct odb_source_loose *loose);
+
 #endif
diff --git a/odb/source.h b/odb/source.h
index 0a440884e4..8bcb67787e 100644
--- a/odb/source.h
+++ b/odb/source.h
@@ -14,6 +14,9 @@ enum odb_source_type {
 	/* The "files" backend that uses loose objects and packfiles. */
 	ODB_SOURCE_FILES,
 
+	/* The "loose" backend that uses loose objects, only. */
+	ODB_SOURCE_LOOSE,
+
 	/* The "in-memory" backend that stores objects in memory. */
 	ODB_SOURCE_INMEMORY,
 };

-- 
2.54.0.926.g75ba10bac6.dirty


^ permalink raw reply related

* [PATCH v2 02/18] odb/source-loose: store pointer to "files" instead of generic source
From: Patrick Steinhardt @ 2026-06-01  8:20 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano
In-Reply-To: <20260601-b4-pks-odb-source-loose-v2-0-90ff159430af@pks.im>

The `struct odb_source_loose` holds a pointer to its owning parent
source. The way that Git is currently structured, this parent is always
the "files" source. In subsequent commits we're going to detangle that
so that the "loose" source doesn't have any owning parent source at all
so that it can be used as a completely standalone source.

Detangling this mess is somewhat intricate though, and is made even more
intricate because it's not always clear which kind of source one is
holding at a specific point in time -- either the parent "files" source,
or the child "loose" source.

Make this relationship more explicit by storing a pointer to the "files"
source instead of storing a pointer to a generic `struct odb_source`.
This will help make subsequent steps a bit clearer.

Note that this is a temporary step, only. At the end of this series
we will have dropped the parent pointer completely.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
 object-file.c      | 4 ++--
 odb/source-files.c | 2 +-
 odb/source-loose.c | 4 ++--
 odb/source-loose.h | 5 +++--
 4 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/object-file.c b/object-file.c
index 641bd9c079..7a1908bfc0 100644
--- a/object-file.c
+++ b/object-file.c
@@ -178,7 +178,7 @@ static int open_loose_object(struct odb_source_loose *loose,
 	static struct strbuf buf = STRBUF_INIT;
 	int fd;
 
-	*path = odb_loose_path(loose->source, &buf, oid);
+	*path = odb_loose_path(&loose->files->base, &buf, oid);
 	fd = git_open(*path);
 	if (fd >= 0)
 		return fd;
@@ -189,7 +189,7 @@ static int open_loose_object(struct odb_source_loose *loose,
 static int quick_has_loose(struct odb_source_loose *loose,
 			   const struct object_id *oid)
 {
-	return !!oidtree_contains(odb_source_loose_cache(loose->source, oid), oid);
+	return !!oidtree_contains(odb_source_loose_cache(&loose->files->base, oid), oid);
 }
 
 /*
diff --git a/odb/source-files.c b/odb/source-files.c
index b5abd20e97..185cc6903e 100644
--- a/odb/source-files.c
+++ b/odb/source-files.c
@@ -264,7 +264,7 @@ struct odb_source_files *odb_source_files_new(struct object_database *odb,
 
 	CALLOC_ARRAY(files, 1);
 	odb_source_init(&files->base, odb, ODB_SOURCE_FILES, path, local);
-	files->loose = odb_source_loose_new(&files->base);
+	files->loose = odb_source_loose_new(files);
 	files->packed = packfile_store_new(&files->base);
 
 	files->base.free = odb_source_files_free;
diff --git a/odb/source-loose.c b/odb/source-loose.c
index b944d21813..c9e7414814 100644
--- a/odb/source-loose.c
+++ b/odb/source-loose.c
@@ -1,10 +1,10 @@
 #include "git-compat-util.h"
 #include "odb/source-loose.h"
 
-struct odb_source_loose *odb_source_loose_new(struct odb_source *source)
+struct odb_source_loose *odb_source_loose_new(struct odb_source_files *files)
 {
 	struct odb_source_loose *loose;
 	CALLOC_ARRAY(loose, 1);
-	loose->source = source;
+	loose->files = files;
 	return loose;
 }
diff --git a/odb/source-loose.h b/odb/source-loose.h
index 8b4bac77ea..bf61e767c8 100644
--- a/odb/source-loose.h
+++ b/odb/source-loose.h
@@ -3,6 +3,7 @@
 
 #include "odb/source.h"
 
+struct odb_source_files;
 struct object_database;
 struct oidtree;
 
@@ -11,7 +12,7 @@ struct oidtree;
  * file per object. This source is part of the files source.
  */
 struct odb_source_loose {
-	struct odb_source *source;
+	struct odb_source_files *files;
 
 	/*
 	 * Used to store the results of readdir(3) calls when we are OK
@@ -29,6 +30,6 @@ struct odb_source_loose {
 	struct loose_object_map *map;
 };
 
-struct odb_source_loose *odb_source_loose_new(struct odb_source *source);
+struct odb_source_loose *odb_source_loose_new(struct odb_source_files *files);
 
 #endif

-- 
2.54.0.926.g75ba10bac6.dirty


^ permalink raw reply related

* [PATCH v2 01/18] odb/source-loose: move loose source into "odb/" subsystem
From: Patrick Steinhardt @ 2026-06-01  8:20 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano
In-Reply-To: <20260601-b4-pks-odb-source-loose-v2-0-90ff159430af@pks.im>

In subsequent patches we'll be turning `struct odb_source_loose` into a
proper `struct odb_source`. As a first step towards this goal, move its
struct out of "object-file.c" and into "odb/source-loose.c".

This detaches the implementation of the loose object source from the
generic object file code, following the same convention already used by
the "files" and "in-memory" sources.

No functional changes are intended.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
 Makefile           |  1 +
 meson.build        |  1 +
 object-file.c      |  8 --------
 object-file.h      | 21 +--------------------
 odb/source-loose.c | 10 ++++++++++
 odb/source-loose.h | 34 ++++++++++++++++++++++++++++++++++
 6 files changed, 47 insertions(+), 28 deletions(-)

diff --git a/Makefile b/Makefile
index a43b8ee067..01356235c3 100644
--- a/Makefile
+++ b/Makefile
@@ -1217,6 +1217,7 @@ LIB_OBJS += odb.o
 LIB_OBJS += odb/source.o
 LIB_OBJS += odb/source-files.o
 LIB_OBJS += odb/source-inmemory.o
+LIB_OBJS += odb/source-loose.o
 LIB_OBJS += odb/streaming.o
 LIB_OBJS += odb/transaction.o
 LIB_OBJS += oid-array.o
diff --git a/meson.build b/meson.build
index 664d831329..c85e598835 100644
--- a/meson.build
+++ b/meson.build
@@ -405,6 +405,7 @@ libgit_sources = [
   'odb/source.c',
   'odb/source-files.c',
   'odb/source-inmemory.c',
+  'odb/source-loose.c',
   'odb/streaming.c',
   'odb/transaction.c',
   'oid-array.c',
diff --git a/object-file.c b/object-file.c
index 90f995d000..641bd9c079 100644
--- a/object-file.c
+++ b/object-file.c
@@ -2205,14 +2205,6 @@ struct odb_transaction *odb_transaction_files_begin(struct odb_source *source)
 	return &transaction->base;
 }
 
-struct odb_source_loose *odb_source_loose_new(struct odb_source *source)
-{
-	struct odb_source_loose *loose;
-	CALLOC_ARRAY(loose, 1);
-	loose->source = source;
-	return loose;
-}
-
 void odb_source_loose_free(struct odb_source_loose *loose)
 {
 	if (!loose)
diff --git a/object-file.h b/object-file.h
index 5241b8dd5c..1d8312cf7f 100644
--- a/object-file.h
+++ b/object-file.h
@@ -4,6 +4,7 @@
 #include "git-zlib.h"
 #include "object.h"
 #include "odb.h"
+#include "odb/source-loose.h"
 
 struct index_state;
 
@@ -20,26 +21,6 @@ struct object_info;
 struct odb_read_stream;
 struct odb_source;
 
-struct odb_source_loose {
-	struct odb_source *source;
-
-	/*
-	 * Used to store the results of readdir(3) calls when we are OK
-	 * sacrificing accuracy due to races for speed. That includes
-	 * object existence with OBJECT_INFO_QUICK, as well as
-	 * our search for unique abbreviated hashes. Don't use it for tasks
-	 * requiring greater accuracy!
-	 *
-	 * Be sure to call odb_load_loose_cache() before using.
-	 */
-	uint32_t subdir_seen[8]; /* 256 bits */
-	struct oidtree *cache;
-
-	/* Map between object IDs for loose objects. */
-	struct loose_object_map *map;
-};
-
-struct odb_source_loose *odb_source_loose_new(struct odb_source *source);
 void odb_source_loose_free(struct odb_source_loose *loose);
 
 /* Reprepare the loose source by emptying the loose object cache. */
diff --git a/odb/source-loose.c b/odb/source-loose.c
new file mode 100644
index 0000000000..b944d21813
--- /dev/null
+++ b/odb/source-loose.c
@@ -0,0 +1,10 @@
+#include "git-compat-util.h"
+#include "odb/source-loose.h"
+
+struct odb_source_loose *odb_source_loose_new(struct odb_source *source)
+{
+	struct odb_source_loose *loose;
+	CALLOC_ARRAY(loose, 1);
+	loose->source = source;
+	return loose;
+}
diff --git a/odb/source-loose.h b/odb/source-loose.h
new file mode 100644
index 0000000000..8b4bac77ea
--- /dev/null
+++ b/odb/source-loose.h
@@ -0,0 +1,34 @@
+#ifndef ODB_SOURCE_LOOSE_H
+#define ODB_SOURCE_LOOSE_H
+
+#include "odb/source.h"
+
+struct object_database;
+struct oidtree;
+
+/*
+ * An object database source that stores its objects in loose format, one
+ * file per object. This source is part of the files source.
+ */
+struct odb_source_loose {
+	struct odb_source *source;
+
+	/*
+	 * Used to store the results of readdir(3) calls when we are OK
+	 * sacrificing accuracy due to races for speed. That includes
+	 * object existence with OBJECT_INFO_QUICK, as well as
+	 * our search for unique abbreviated hashes. Don't use it for tasks
+	 * requiring greater accuracy!
+	 *
+	 * Be sure to call odb_load_loose_cache() before using.
+	 */
+	uint32_t subdir_seen[8]; /* 256 bits */
+	struct oidtree *cache;
+
+	/* Map between object IDs for loose objects. */
+	struct loose_object_map *map;
+};
+
+struct odb_source_loose *odb_source_loose_new(struct odb_source *source);
+
+#endif

-- 
2.54.0.926.g75ba10bac6.dirty


^ permalink raw reply related

* [PATCH v2 00/18] odb: make loose object source a proper `struct odb_source`
From: Patrick Steinhardt @ 2026-06-01  8:20 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano
In-Reply-To: <20260521-b4-pks-odb-source-loose-v1-0-6553b399be2d@pks.im>

Hi,

this patch series converts the loose object source into a proper `struct
odb_source` so that it can be used via our generic interfaces.

The patch series is relatively straight-forward, as the source basically
already exists as such and the interfaces already match. So for most of
the part we are just moving around some code and converting functions
that were previously called directly into callbacks.

I guess the only part that needs some attention is that there is some
confusion at first with the `struct odb_source_loose::source` parent
pointer that initially points at the owning `struct odb_source_files`.
This relationship doesn't make much sense, as a loose source can totally
exist standalone without the files source.

We're thus getting rid of this relationship in this series, too. I found
it quite hard to reason about which pointer one is holding at any point
in time though, doubly so because the parent pointer was named "source",
which is rather generic. The second commit thus renames the pointer to
`files` and converts it into `struct odb_source_files` to make the
transition cleaner, but the whole pointer will be dropped at the end of
this series.

The series is built on top of aec3f58750 (Sync with 'maint', 2026-05-21)
with ps/odb-in-memory at d2902a4549 (t/unit-tests: add tests for the
in-memory object source, 2026-04-10) merged into it.

Changes in v2:
  - Some smaller typo fixes.
  - Link to v1: https://patch.msgid.link/20260521-b4-pks-odb-source-loose-v1-0-6553b399be2d@pks.im

Thanks!

Patrick

---
Patrick Steinhardt (18):
      odb/source-loose: move loose source into "odb/" subsystem
      odb/source-loose: store pointer to "files" instead of generic source
      odb/source-loose: start converting to a proper `struct odb_source`
      odb/source-loose: wire up `reprepare()` callback
      odb/source-loose: wire up `close()` callback
      odb/source-loose: wire up `read_object_info()` callback
      odb/source-loose: wire up `read_object_stream()` callback
      odb/source-loose: wire up `for_each_object()` callback
      odb/source-loose: wire up `find_abbrev_len()` callback
      odb/source-loose: wire up `count_objects()` callback
      odb/source-loose: drop `odb_source_loose_has_object()`
      odb/source-loose: wire up `freshen_object()` callback
      loose: refactor object map to operate on `struct odb_source_loose`
      odb/source-loose: wire up `write_object()` callback
      object-file: refactor writing objects to use loose source
      odb/source-loose: wire up `write_object_stream()` callback
      odb/source-loose: stub out remaining callbacks
      odb/source-loose: drop pointer to the "files" source

 Makefile               |   1 +
 builtin/cat-file.c     |   5 +-
 builtin/gc.c           |   6 +-
 builtin/pack-objects.c |  12 +-
 http-walker.c          |   3 +-
 http.c                 |   6 +-
 loose.c                |  45 ++-
 loose.h                |   4 +-
 meson.build            |   1 +
 object-file.c          | 796 ++++---------------------------------------------
 object-file.h          | 149 ++++-----
 odb/source-files.c     |  28 +-
 odb/source-loose.c     | 736 +++++++++++++++++++++++++++++++++++++++++++++
 odb/source-loose.h     |  48 +++
 odb/source.h           |   3 +
 15 files changed, 973 insertions(+), 870 deletions(-)

Range-diff versus v1:

 1:  f25aaf0889 =  1:  7c97c1687c odb/source-loose: move loose source into "odb/" subsystem
 2:  0bfebeb0da =  2:  1e1e267b39 odb/source-loose: store pointer to "files" instead of generic source
 3:  35787e6ca6 !  3:  847cb523ee odb/source-loose: start converting to a proper `struct odb_source`
    @@ odb/source-loose.h: struct odb_source_loose {
      
     +/*
     + * Cast the given object database source to the loose backend. This will cause
    -+ * a BUG in case the source uses doesn't use this backend.
    ++ * a BUG in case the source doesn't use this backend.
     + */
     +static inline struct odb_source_loose *odb_source_loose_downcast(struct odb_source *source)
     +{
 4:  392962c177 =  4:  af543598ee odb/source-loose: wire up `reprepare()` callback
 5:  b4102668c3 =  5:  884f573f89 odb/source-loose: wire up `close()` callback
 6:  63da6e4abb =  6:  de85ffb4a9 odb/source-loose: wire up `read_object_info()` callback
 7:  12b0c5c32d =  7:  522aaa9c3d odb/source-loose: wire up `read_object_stream()` callback
 8:  8df176e282 =  8:  75cf3f4428 odb/source-loose: wire up `for_each_object()` callback
 9:  6199ae90e0 =  9:  87c1c9ae5e odb/source-loose: wire up `find_abbrev_len()` callback
10:  d0b1ef48d4 = 10:  f6405c8070 odb/source-loose: wire up `count_objects()` callback
11:  0476d8b0c4 = 11:  0e8d6b6487 odb/source-loose: drop `odb_source_loose_has_object()`
12:  27bb7b0724 = 12:  58cc626dd1 odb/source-loose: wire up `freshen_object()` callback
13:  f8ce6a169d = 13:  51a22e7400 loose: refactor object map to operate on `struct odb_source_loose`
14:  7ab570b776 = 14:  a9a88d6200 odb/source-loose: wire up `write_object()` callback
15:  8c9240aaa0 = 15:  9236d2fd26 object-file: refactor writing objects to use loose source
16:  de69621fa1 ! 16:  6316efb890 odb/source-loose: wire up `write_object_stream()` callback
    @@ object-file.h: int index_path(struct index_state *istate, struct object_id *oid,
      
     -int odb_source_loose_write_stream(struct odb_source_loose *loose,
     +/*
    -+ * Write the given stream into the loose object source. The only difference to
    -+ * the generic implementation of this function is that we don't perform an
    ++ * Write the given stream into the loose object source. The only difference
    ++ * from the generic implementation of this function is that we don't perform an
     + * object existence check here.
     + *
     + * TODO: We should stop exposing this function altogether and move it into
17:  f2d45e1a56 = 17:  789ec50474 odb/source-loose: stub out remaining callbacks
18:  070052fc22 = 18:  0a64d23377 odb/source-loose: drop pointer to the "files" source

---
base-commit: 072edab49f312c80561b2899f03f361f74fc38e4
change-id: 20260413-b4-pks-odb-source-loose-4900c8ca91db


^ permalink raw reply

* [PATCH v2] prio-queue: use cascade-down for faster extract-min
From: Kristofer Karlsson via GitGitGadget @ 2026-06-01  8:17 UTC (permalink / raw)
  To: git; +Cc: Kristofer Karlsson, Kristofer Karlsson
In-Reply-To: <pull.2132.git.1780250236304.gitgitgadget@gmail.com>

From: Kristofer Karlsson <krka@spotify.com>

Add sift_up_rebalance(), an alternative to sift_down_root() that
halves the number of comparisons per extract-min.

The standard extract places the last array element at the root and
sifts it down.  At each level this requires two comparisons (left
vs right child, then element vs winner) and a swap.

sift_up_rebalance() instead promotes the smaller child into the
root slot at each level — one comparison and one copy — until the
vacancy reaches a leaf.  The last array element is placed at the
vacancy and sifted up to restore heap order.  In practice the
sift-up rarely moves more than a level or two because the last
array element tends to be large.

Work per extract drops from 2d comparisons + d swaps to
d comparisons + d copies + a short sift-up.

prio_queue_get() now calls sift_up_rebalance() instead of placing
the last element at root and calling sift_down_root().

sift_down_root() and prio_queue_replace() are left unchanged.

Synthetic benchmark (10 rounds of 10M put+get cycles, CPU-pinned,
same compiler and Makefile flags):

Ascending keys (git's typical pattern — parents have lower
priority than children):

  queue width  baseline  patched  speedup
           10     4.39s    3.91s    1.12x
          100     9.10s    6.61s    1.38x
        1,000    11.84s    9.25s    1.28x
       10,000    17.50s   13.92s    1.26x
      100,000    23.97s   20.19s    1.19x

Descending keys (worst case — last element always sinks to leaf):

  queue width  baseline  patched  speedup
           10     4.94s    4.95s    1.00x
          100     9.75s    9.42s    1.03x
        1,000    15.01s   15.29s    0.98x
       10,000    24.79s   23.88s    1.04x
      100,000    29.69s   28.24s    1.05x

Random keys:

  queue width  baseline  patched  speedup
           10     5.05s    4.99s    1.01x
          100     9.90s    9.50s    1.04x
        1,000    15.35s   14.77s    1.04x
       10,000    25.35s   24.21s    1.05x
      100,000    65.71s   63.38s    1.04x

No regressions in any scenario.

End-to-end benchmark on the linux kernel repo (1.4M commits,
range v5.0..v6.0, 311K commits, 20 interleaved runs, 1 warmup):

  Command                      baseline  patched  speedup
  rev-list --count v5.0..v6.0    484ms     474ms    1.02x

The improvement scales with DAG width: wider DAGs produce larger
priority queues, amplifying the per-level savings.  In small or
narrow repositories the queues stay shallow and the sift-down
cost is already negligible.

Signed-off-by: Kristofer Karlsson <krka@spotify.com>
---
    prio-queue: use cascade-down sift for faster extract-min
    
    This is a small optimization to prio_queue_get() that reduces the number
    of comparisons per extract-min from 2d to d (where d is the sift
    distance).
    
    The standard extract places the last array element at the root and sifts
    it down, comparing against both children at each level. The new
    sift_up_rebalance() instead promotes the smaller child at each level
    (one comparison and one copy) leaving a vacancy that sinks to a leaf.
    The last element is placed there and sifted up, which in practice rarely
    moves more than a level or two.
    
    The improvement shows clearly in synthetic benchmarks (up to 1.38x for
    ascending keys at queue width 100) but is modest end-to-end since
    sift_down_root is only a fraction of total runtime. On the linux kernel
    repo, rev-list --count v5.0..v6.0 improves by ~2%. The effect scales
    with DAG width.
    
    Changes since v1:
    
     * Kept sift_down_root() and prio_queue_replace() completely unchanged,
       preserving René's optimization that avoids the get+put overhead for
       replace. The cascade approach now only applies to prio_queue_get().
    
     * Extracted the new logic into a separate sift_up_rebalance() function
       rather than inlining it in prio_queue_get().
    
     * Updated benchmark numbers for ascending, descending and random
       insertion ordering. No regressions in any scenario.

Published-As: https://github.com/gitgitgadget/git/releases/tag/pr-2132%2Fspkrka%2Fcascade-sift-down-v2
Fetch-It-Via: git fetch https://github.com/gitgitgadget/git pr-2132/spkrka/cascade-sift-down-v2
Pull-Request: https://github.com/gitgitgadget/git/pull/2132

Range-diff vs v1:

 1:  9ca2fab4dc ! 1:  6051d44e59 prio-queue: use cascade-down sift for faster extract-min
     @@ Metadata
      Author: Kristofer Karlsson <krka@spotify.com>
      
       ## Commit message ##
     -    prio-queue: use cascade-down sift for faster extract-min
     -
     -    Replace the standard sift-down in prio_queue_get() with a
     -    cascade-down approach.
     -
     -    The standard approach places the last array element at the root,
     -    then sifts it down.  At each level this requires two comparisons
     -    (left vs right child, then element vs winner) and, when the
     -    element is larger, a swap (three 16-byte copies).
     -
     -    The cascade approach instead promotes the smaller child into the
     -    vacant root slot at each level — one comparison and one copy.
     -    The vacancy sinks to a leaf, where the last array element is
     -    placed and sifted up if needed — typically zero levels since the
     -    last array element tends to be large.
     -
     -    In the common case, work per extract drops from 2d comparisons
     -    + 3d copies to d comparisons + d copies: roughly half the
     -    comparisons and a third of the data movement.  The sift-up phase
     -    can add work when the last element is smaller than ancestors of
     -    the leaf vacancy, but this is rare in practice.
     -
     -    Simplify prio_queue_replace() to a plain get+put sequence.  This
     -    is semantically equivalent: the old implementation wrote to slot 0
     -    and sifted down, which has the same observable effect as removing
     -    the root and inserting a new element.  No caller observes queue
     -    state between the two operations.  The previous implementation
     -    shared sift_down_root() with get, but the cascade approach no
     -    longer accommodates that cleanly since sift_down_root() now
     -    expects the element to reinsert at queue->array[queue->nr], left
     -    there by prio_queue_get() after decrementing nr.  This is fine in
     -    practice: replace is only called from pop_most_recent_commit()
     -    (fetch-pack, object-name, walker) and show-branch — none of
     -    which appear in any hot path.
     -
     -    A synthetic benchmark (10 rounds of 10M put+get cycles, ascending
     -    integer keys, CPU-pinned, median of 3 runs, same compiler and
     -    Makefile flags) shows consistent improvement across all queue
     -    sizes, with no regressions:
     -
     -        queue width       baseline    cascade    speedup
     -        ------------------------------------------------
     -                 10        4.32s      3.97s      1.09x
     -                100        7.95s      6.49s      1.23x
     -              1,000       11.30s      9.66s      1.17x
     -             10,000       16.34s     14.15s      1.16x
     -            100,000       21.43s     18.66s      1.15x
     -
     -    With descending keys (worst case — the last element always sinks
     -    to a leaf in both approaches) the cascade still wins slightly
     -    (1-4%) by replacing swaps with copies, and never regresses.
     -
     -    In end-to-end git commands the improvement is modest because
     -    sift_down_root is only ~8% of total runtime.  Profiling
     -    rev-list --count on a 2.5M-commit monorepo shows sift_down_root
     -    dropping from 8.2% to 0.4% of total runtime.  The improvement
     -    scales with DAG width: wider DAGs produce larger priority queues,
     -    amplifying the per-level savings.  In small or narrow repos the
     -    queues stay shallow and the effect is negligible.
     +    prio-queue: use cascade-down for faster extract-min
     +
     +    Add sift_up_rebalance(), an alternative to sift_down_root() that
     +    halves the number of comparisons per extract-min.
     +
     +    The standard extract places the last array element at the root and
     +    sifts it down.  At each level this requires two comparisons (left
     +    vs right child, then element vs winner) and a swap.
     +
     +    sift_up_rebalance() instead promotes the smaller child into the
     +    root slot at each level — one comparison and one copy — until the
     +    vacancy reaches a leaf.  The last array element is placed at the
     +    vacancy and sifted up to restore heap order.  In practice the
     +    sift-up rarely moves more than a level or two because the last
     +    array element tends to be large.
     +
     +    Work per extract drops from 2d comparisons + d swaps to
     +    d comparisons + d copies + a short sift-up.
     +
     +    prio_queue_get() now calls sift_up_rebalance() instead of placing
     +    the last element at root and calling sift_down_root().
     +
     +    sift_down_root() and prio_queue_replace() are left unchanged.
     +
     +    Synthetic benchmark (10 rounds of 10M put+get cycles, CPU-pinned,
     +    same compiler and Makefile flags):
     +
     +    Ascending keys (git's typical pattern — parents have lower
     +    priority than children):
     +
     +      queue width  baseline  patched  speedup
     +               10     4.39s    3.91s    1.12x
     +              100     9.10s    6.61s    1.38x
     +            1,000    11.84s    9.25s    1.28x
     +           10,000    17.50s   13.92s    1.26x
     +          100,000    23.97s   20.19s    1.19x
     +
     +    Descending keys (worst case — last element always sinks to leaf):
     +
     +      queue width  baseline  patched  speedup
     +               10     4.94s    4.95s    1.00x
     +              100     9.75s    9.42s    1.03x
     +            1,000    15.01s   15.29s    0.98x
     +           10,000    24.79s   23.88s    1.04x
     +          100,000    29.69s   28.24s    1.05x
     +
     +    Random keys:
     +
     +      queue width  baseline  patched  speedup
     +               10     5.05s    4.99s    1.01x
     +              100     9.90s    9.50s    1.04x
     +            1,000    15.35s   14.77s    1.04x
     +           10,000    25.35s   24.21s    1.05x
     +          100,000    65.71s   63.38s    1.04x
     +
     +    No regressions in any scenario.
     +
     +    End-to-end benchmark on the linux kernel repo (1.4M commits,
     +    range v5.0..v6.0, 311K commits, 20 interleaved runs, 1 warmup):
     +
     +      Command                      baseline  patched  speedup
     +      rev-list --count v5.0..v6.0    484ms     474ms    1.02x
     +
     +    The improvement scales with DAG width: wider DAGs produce larger
     +    priority queues, amplifying the per-level savings.  In small or
     +    narrow repositories the queues stay shallow and the sift-down
     +    cost is already negligible.
      
          Signed-off-by: Kristofer Karlsson <krka@spotify.com>
      
       ## prio-queue.c ##
      @@ prio-queue.c: static void sift_down_root(struct prio_queue *queue)
     - {
     - 	size_t ix, child;
     + 	}
     + }
       
     --	/* Push down the one at the root */
     --	for (ix = 0; ix * 2 + 1 < queue->nr; ix = child) {
     --		child = ix * 2 + 1; /* left */
     ++static void sift_up_rebalance(struct prio_queue *queue)
     ++{
     ++	size_t ix, child;
     ++
     ++	/* Cascade: promote smaller child at each level. */
      +	for (ix = 0; (child = ix * 2 + 1) < queue->nr; ix = child) {
     - 		if (child + 1 < queue->nr &&
     - 		    compare(queue, child, child + 1) >= 0)
     - 			child++; /* use right child */
     ++		if (child + 1 < queue->nr &&
     ++		    compare(queue, child, child + 1) >= 0)
     ++			child++;
      +		queue->array[ix] = queue->array[child];
      +	}
     - 
     --		if (compare(queue, ix, child) <= 0)
     -+	/* Place queue->array[queue->nr] (left by caller) and sift up. */
     ++
     ++	/* Place the last element at the vacancy and sift up. */
      +	queue->array[ix] = queue->array[queue->nr];
      +	while (ix) {
      +		size_t parent = (ix - 1) / 2;
      +		if (compare(queue, parent, ix) <= 0)
     - 			break;
     --
     --		swap(queue, child, ix);
     ++			break;
      +		swap(queue, parent, ix);
      +		ix = parent;
     - 	}
     - }
     - 
     ++	}
     ++}
     ++
     + void *prio_queue_get(struct prio_queue *queue)
     + {
     + 	void *result;
      @@ prio-queue.c: void *prio_queue_get(struct prio_queue *queue)
       	if (!--queue->nr)
       		return result;
       
      -	queue->array[0] = queue->array[queue->nr];
     - 	sift_down_root(queue);
     +-	sift_down_root(queue);
     ++	sift_up_rebalance(queue);
       	return result;
       }
     -@@ prio-queue.c: void prio_queue_replace(struct prio_queue *queue, void *thing)
     - 		queue->array[queue->nr - 1].ctr = queue->insertion_ctr++;
     - 		queue->array[queue->nr - 1].data = thing;
     - 	} else {
     --		queue->array[0].ctr = queue->insertion_ctr++;
     --		queue->array[0].data = thing;
     --		sift_down_root(queue);
     -+		prio_queue_get(queue);
     -+		prio_queue_put(queue, thing);
     - 	}
     - }
     + 


 prio-queue.c | 26 ++++++++++++++++++++++++--
 1 file changed, 24 insertions(+), 2 deletions(-)

diff --git a/prio-queue.c b/prio-queue.c
index 9748528ce6..66d445b078 100644
--- a/prio-queue.c
+++ b/prio-queue.c
@@ -76,6 +76,29 @@ static void sift_down_root(struct prio_queue *queue)
 	}
 }
 
+static void sift_up_rebalance(struct prio_queue *queue)
+{
+	size_t ix, child;
+
+	/* Cascade: promote smaller child at each level. */
+	for (ix = 0; (child = ix * 2 + 1) < queue->nr; ix = child) {
+		if (child + 1 < queue->nr &&
+		    compare(queue, child, child + 1) >= 0)
+			child++;
+		queue->array[ix] = queue->array[child];
+	}
+
+	/* Place the last element at the vacancy and sift up. */
+	queue->array[ix] = queue->array[queue->nr];
+	while (ix) {
+		size_t parent = (ix - 1) / 2;
+		if (compare(queue, parent, ix) <= 0)
+			break;
+		swap(queue, parent, ix);
+		ix = parent;
+	}
+}
+
 void *prio_queue_get(struct prio_queue *queue)
 {
 	void *result;
@@ -89,8 +112,7 @@ void *prio_queue_get(struct prio_queue *queue)
 	if (!--queue->nr)
 		return result;
 
-	queue->array[0] = queue->array[queue->nr];
-	sift_down_root(queue);
+	sift_up_rebalance(queue);
 	return result;
 }
 

base-commit: 1666c1265231b0bc5f613fbbf3f0a9896cdef76e
-- 
gitgitgadget

^ permalink raw reply related

* Re: [PATCH] test-lib: fix typo in test summary message
From: Patrick Steinhardt @ 2026-06-01  8:12 UTC (permalink / raw)
  To: Amogh; +Cc: git
In-Reply-To: <20260525053633.73153-1-amoghdambal1@gmail.com>

On Sun, May 24, 2026 at 10:36:33PM -0700, Amogh wrote:
> There's a small typo ("passin", should be "passing") in the
> summary/description message for t0000-basic. Even though this isn't a
> user-facing string it should improve the developer experience + reduce
> confusion when working on the codebase.

I'd be surprised if this actually causes any confusion, so I feel like
the explanation is a bit lofty. But ultimately, one less typo is a good
thing, so the change makes sense to me.

Your patch is missing the "Signed-off-by:" trailer though.

Thanks!

Patrick

^ permalink raw reply

* Re: git-history drops signatures
From: Patrick Steinhardt @ 2026-06-01  8:03 UTC (permalink / raw)
  To: Alix Brunet; +Cc: git
In-Reply-To: <CAPCeX5a6HxD8pAcE9th8+0zhsa-nabRrJQpjKXJrg02zc6EAOw@mail.gmail.com>

Hi Alix,

On Sat, May 30, 2026 at 12:44:25PM +0200, Alix Brunet wrote:
> Hey team ;
> 
> I noticed `git history` drops signatures,
> Even though `git rebase` can keep / re-sign commits (`-S`)
> 
> Will this ever be implemented?

right now it does drop signatures indeed, but I agree that this is
something we should eventually implement. I didn't have the intent to
work on this soon though as I still have a couple of other features for
git-history(1) that I'd like to prioritize.

All to say: if anyone cares for this feature and has the capacity, then
please feel free to have a go at it :) Otherwise it'll probably take a
while.

Thanks!

Patrick

^ permalink raw reply

* [PATCH 2/2] builtin/init-db: deprecate alias for git-init(1)
From: Patrick Steinhardt @ 2026-06-01  7:56 UTC (permalink / raw)
  To: git
In-Reply-To: <20260601-pks-deprecate-git-init-db-v1-0-ea3e6eebe674@pks.im>

The git-init-db(1) command was initially only initializing the object
database of a Git repository. This has changed over time so that the
command also initializes all the other data structures, which is why we
have eventually introduced git-init(1) as a more aptly named replacement
for it.

This has all happened in 2007 already, and with 5c94f87e6b (use 'init'
instead of 'init-db' for shipped docs and tools, 2007-01-12) we have
also adapted all user-facing documentation to mention the replacement.
It is thus safe to assume that (almost) nobody uses git-init-db(1)
nowadays anymore.

Deprecate the command in favor of git-init(1) and wire up the removal
when compiling Git with breaking changes enabled.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
 Documentation/BreakingChanges.adoc | 3 +++
 Documentation/Makefile             | 1 +
 Documentation/git-init-db.adoc     | 5 +++++
 Documentation/meson.build          | 2 +-
 Makefile                           | 2 +-
 git.c                              | 2 ++
 t/t5502-quickfetch.sh              | 4 ++--
 t/t5503-tagfollow.sh               | 2 +-
 8 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/Documentation/BreakingChanges.adoc b/Documentation/BreakingChanges.adoc
index 73bb939359..89b7482f54 100644
--- a/Documentation/BreakingChanges.adoc
+++ b/Documentation/BreakingChanges.adoc
@@ -300,6 +300,9 @@ references.
 +
 These features will be removed.
 
+* The git-init-db(1) command is an alias for its modern drop-in replacement
+  git-init(1). The alias will be removed.
+
 * Support for "--stdin" option in the "name-rev" command was
   deprecated (and hidden from the documentation) in the Git 2.40
   timeframe, in preference to its synonym "--annotate-stdin".  Git 3.0
diff --git a/Documentation/Makefile b/Documentation/Makefile
index 2699f0b24a..3769856b58 100644
--- a/Documentation/Makefile
+++ b/Documentation/Makefile
@@ -541,6 +541,7 @@ lint-docs-meson:
 		sort >tmp-meson-diff/meson.adoc && \
 	ls git*.adoc scalar.adoc | \
 		grep -v -e git-bisect-lk2009.adoc \
+			-e git-init-db.adoc \
 			-e git-pack-redundant.adoc \
 			-e git-tools.adoc \
 			-e git-whatchanged.adoc \
diff --git a/Documentation/git-init-db.adoc b/Documentation/git-init-db.adoc
index 18bf1a3c8c..9802fc9f3d 100644
--- a/Documentation/git-init-db.adoc
+++ b/Documentation/git-init-db.adoc
@@ -11,6 +11,11 @@ SYNOPSIS
 [verse]
 'git init-db' [-q | --quiet] [--bare] [--template=<template-directory>] [--separate-git-dir <git-dir>] [--shared[=<permissions>]]
 
+WARNING
+-------
+
+`git init-db` has been deprecated in favor of `git init`, which is a drop-in
+replacement for `git init-db`.
 
 DESCRIPTION
 -----------
diff --git a/Documentation/meson.build b/Documentation/meson.build
index f4854f802d..0f127d752d 100644
--- a/Documentation/meson.build
+++ b/Documentation/meson.build
@@ -72,7 +72,6 @@ manpages = {
   'git-http-push.adoc' : 1,
   'git-imap-send.adoc' : 1,
   'git-index-pack.adoc' : 1,
-  'git-init-db.adoc' : 1,
   'git-init.adoc' : 1,
   'git-instaweb.adoc' : 1,
   'git-interpret-trailers.adoc' : 1,
@@ -212,6 +211,7 @@ manpages = {
 }
 
 manpages_breaking_changes = {
+  'git-init-db.adoc' : 1,
   'git-pack-redundant.adoc' : 1,
   'git-whatchanged.adoc' : 1,
 }
diff --git a/Makefile b/Makefile
index b03f74ee8c..4420231753 100644
--- a/Makefile
+++ b/Makefile
@@ -894,7 +894,6 @@ BUILT_INS += git-cherry-pick$X
 BUILT_INS += git-format-patch$X
 BUILT_INS += git-format-rev$X
 BUILT_INS += git-fsck-objects$X
-BUILT_INS += git-init-db$X
 BUILT_INS += git-maintenance$X
 BUILT_INS += git-merge-subtree$X
 BUILT_INS += git-restore$X
@@ -904,6 +903,7 @@ BUILT_INS += git-status$X
 BUILT_INS += git-switch$X
 BUILT_INS += git-version$X
 ifndef WITH_BREAKING_CHANGES
+BUILT_INS += git-init-db$X
 BUILT_INS += git-whatchanged$X
 endif
 
diff --git a/git.c b/git.c
index a72394b599..6bf6a60360 100644
--- a/git.c
+++ b/git.c
@@ -591,7 +591,9 @@ static struct cmd_struct commands[] = {
 	{ "hook", cmd_hook, RUN_SETUP_GENTLY },
 	{ "index-pack", cmd_index_pack, RUN_SETUP_GENTLY | NO_PARSEOPT },
 	{ "init", cmd_init },
+#ifndef WITH_BREAKING_CHANGES
 	{ "init-db", cmd_init },
+#endif
 	{ "interpret-trailers", cmd_interpret_trailers, RUN_SETUP_GENTLY },
 	{ "last-modified", cmd_last_modified, RUN_SETUP },
 	{ "log", cmd_log, RUN_SETUP },
diff --git a/t/t5502-quickfetch.sh b/t/t5502-quickfetch.sh
index b160f8b7fb..a2b62f551a 100755
--- a/t/t5502-quickfetch.sh
+++ b/t/t5502-quickfetch.sh
@@ -25,7 +25,7 @@ test_expect_success 'clone without alternate' '
 	(
 		mkdir cloned &&
 		cd cloned &&
-		git init-db &&
+		git init &&
 		git remote add -f origin ..
 	) &&
 	cnt=$( (
@@ -94,7 +94,7 @@ test_expect_success 'quickfetch should not copy from alternate' '
 	(
 		mkdir quickclone &&
 		cd quickclone &&
-		git init-db &&
+		git init &&
 		(cd ../.git/objects && pwd) >.git/objects/info/alternates &&
 		git remote add origin .. &&
 		git fetch -k -k
diff --git a/t/t5503-tagfollow.sh b/t/t5503-tagfollow.sh
index febe441041..31ec352c5c 100755
--- a/t/t5503-tagfollow.sh
+++ b/t/t5503-tagfollow.sh
@@ -32,7 +32,7 @@ test_expect_success setup '
 	(
 		mkdir cloned &&
 		cd cloned &&
-		git init-db &&
+		git init &&
 		git remote add -f origin ..
 	) &&
 

-- 
2.54.0.926.g75ba10bac6.dirty


^ permalink raw reply related

* [PATCH 1/2] builtin/init-db: rename to "builtin/init.c"
From: Patrick Steinhardt @ 2026-06-01  7:55 UTC (permalink / raw)
  To: git
In-Reply-To: <20260601-pks-deprecate-git-init-db-v1-0-ea3e6eebe674@pks.im>

Rename "builtin/init-db.c" to "builtin/init.c" to match the modern
git-init(1) command name instead of its ancient alias git-init-db(1).

Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
 Makefile                      | 4 ++--
 builtin.h                     | 2 +-
 builtin/{init-db.c => init.c} | 8 ++++----
 git.c                         | 4 ++--
 meson.build                   | 2 +-
 5 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/Makefile b/Makefile
index b31ecb0756..b03f74ee8c 100644
--- a/Makefile
+++ b/Makefile
@@ -894,7 +894,7 @@ BUILT_INS += git-cherry-pick$X
 BUILT_INS += git-format-patch$X
 BUILT_INS += git-format-rev$X
 BUILT_INS += git-fsck-objects$X
-BUILT_INS += git-init$X
+BUILT_INS += git-init-db$X
 BUILT_INS += git-maintenance$X
 BUILT_INS += git-merge-subtree$X
 BUILT_INS += git-restore$X
@@ -1428,7 +1428,7 @@ BUILTIN_OBJS += builtin/help.o
 BUILTIN_OBJS += builtin/history.o
 BUILTIN_OBJS += builtin/hook.o
 BUILTIN_OBJS += builtin/index-pack.o
-BUILTIN_OBJS += builtin/init-db.o
+BUILTIN_OBJS += builtin/init.o
 BUILTIN_OBJS += builtin/interpret-trailers.o
 BUILTIN_OBJS += builtin/last-modified.o
 BUILTIN_OBJS += builtin/log.o
diff --git a/builtin.h b/builtin.h
index 4e47a4ebd3..bd072aa0e4 100644
--- a/builtin.h
+++ b/builtin.h
@@ -200,7 +200,7 @@ int cmd_help(int argc, const char **argv, const char *prefix, struct repository
 int cmd_history(int argc, const char **argv, const char *prefix, struct repository *repo);
 int cmd_hook(int argc, const char **argv, const char *prefix, struct repository *repo);
 int cmd_index_pack(int argc, const char **argv, const char *prefix, struct repository *repo);
-int cmd_init_db(int argc, const char **argv, const char *prefix, struct repository *repo);
+int cmd_init(int argc, const char **argv, const char *prefix, struct repository *repo);
 int cmd_interpret_trailers(int argc, const char **argv, const char *prefix, struct repository *repo);
 int cmd_last_modified(int argc, const char **argv, const char *prefix, struct repository *repo);
 int cmd_log_reflog(int argc, const char **argv, const char *prefix, struct repository *repo);
diff --git a/builtin/init-db.c b/builtin/init.c
similarity index 98%
rename from builtin/init-db.c
rename to builtin/init.c
index c55517ad94..9184f2fc2c 100644
--- a/builtin/init-db.c
+++ b/builtin/init.c
@@ -69,10 +69,10 @@ static const char *const init_db_usage[] = {
  * On the other hand, it might just make lookup slower and messier. You
  * be the judge.  The default case is to have one DB per managed directory.
  */
-int cmd_init_db(int argc,
-		const char **argv,
-		const char *prefix,
-		struct repository *repo UNUSED)
+int cmd_init(int argc,
+	     const char **argv,
+	     const char *prefix,
+	     struct repository *repo UNUSED)
 {
 	char *git_dir;
 	const char *real_git_dir = NULL;
diff --git a/git.c b/git.c
index 36f08891ef..a72394b599 100644
--- a/git.c
+++ b/git.c
@@ -590,8 +590,8 @@ static struct cmd_struct commands[] = {
 	{ "history", cmd_history, RUN_SETUP },
 	{ "hook", cmd_hook, RUN_SETUP_GENTLY },
 	{ "index-pack", cmd_index_pack, RUN_SETUP_GENTLY | NO_PARSEOPT },
-	{ "init", cmd_init_db },
-	{ "init-db", cmd_init_db },
+	{ "init", cmd_init },
+	{ "init-db", cmd_init },
 	{ "interpret-trailers", cmd_interpret_trailers, RUN_SETUP_GENTLY },
 	{ "last-modified", cmd_last_modified, RUN_SETUP },
 	{ "log", cmd_log, RUN_SETUP },
diff --git a/meson.build b/meson.build
index 064fe2e2f1..682e46e7eb 100644
--- a/meson.build
+++ b/meson.build
@@ -634,7 +634,7 @@ builtin_sources = [
   'builtin/history.c',
   'builtin/hook.c',
   'builtin/index-pack.c',
-  'builtin/init-db.c',
+  'builtin/init.c',
   'builtin/interpret-trailers.c',
   'builtin/last-modified.c',
   'builtin/log.c',

-- 
2.54.0.926.g75ba10bac6.dirty


^ permalink raw reply related

* [PATCH 0/2] Deprecate git-init-db(1) alias
From: Patrick Steinhardt @ 2026-06-01  7:55 UTC (permalink / raw)
  To: git

Hi,

this small patch series deprecates the git-init-db(1) alias in favor of
git-init(1).

Patrick

---
Patrick Steinhardt (2):
      builtin/init-db: rename to "builtin/init.c"
      builtin/init-db: deprecate alias for git-init(1)

 Documentation/BreakingChanges.adoc | 3 +++
 Documentation/Makefile             | 1 +
 Documentation/git-init-db.adoc     | 5 +++++
 Documentation/meson.build          | 2 +-
 Makefile                           | 4 ++--
 builtin.h                          | 2 +-
 builtin/{init-db.c => init.c}      | 8 ++++----
 git.c                              | 6 ++++--
 meson.build                        | 2 +-
 t/t5502-quickfetch.sh              | 4 ++--
 t/t5503-tagfollow.sh               | 2 +-
 11 files changed, 25 insertions(+), 14 deletions(-)


---
base-commit: 1666c1265231b0bc5f613fbbf3f0a9896cdef76e
change-id: 20260601-pks-deprecate-git-init-db-c0e8d7f8b94e


^ permalink raw reply

* Re: [PATCH v2 0/2] commit: remove deprecated functions
From: Junio C Hamano @ 2026-06-01  7:14 UTC (permalink / raw)
  To: Jeff King
  Cc: kristofferhaugsbakk, Kristoffer Haugsbakk, Patrick Steinhardt,
	git
In-Reply-To: <20260529083716.GE1106035@coredump.intra.peff.net>

Jeff King <peff@peff.net> writes:

> On Thu, May 28, 2026 at 09:00:09AM +0200, kristofferhaugsbakk@fastmail.com wrote:
>
>> Topic summary: Remove deprecated comments that were slated for removal
>> after Git 2.53.0.
>
> This looks obviously correct to me, but the whole topic made me wonder:
> was it worth retaining the old names and deprecating them, versus just
> removing them back then?
>
> Topics in flight would have needed an update then, but they did
> eventually anyway. So it feels like the total amount of work done is
> larger, compared to just fixing them as the topics were merged. Either
> way the compiler tells us, and the adjustments themselves are small.

Your alternative approach will depend on the integrator doing all
the fixups at the merge time.

The amount of effort required by the entire community as a whole may
have been larger, but the way the rename was carried out did spread
them thinner.

Admittedly, with help from rerere and merge-fix mechanism, such a
"fixup at the merge time" typically needs to be done only once per
the other conflicting topic in flight, but still, when constructing
a workflow, I try to avoid having to depend on the single bottleneck
for a task that does not need to be performed by the single
bottleneck, especially when the single bottleneck has other tasks
that can only be done by the single bottleneck.

> Not a huge deal either way, but just pondering for future such
> situations.
>
> -Peff

^ permalink raw reply

* Re: [PATCH] sub-process: use gentle handshake to avoid die() on startup failure
From: Junio C Hamano @ 2026-06-01  6:43 UTC (permalink / raw)
  To: Michael Montalbo via GitGitGadget; +Cc: git, Michael Montalbo
In-Reply-To: <pull.2133.git.1780287309846.gitgitgadget@gmail.com>

"Michael Montalbo via GitGitGadget" <gitgitgadget@gmail.com> writes:

> diff --git a/sub-process.c b/sub-process.c
> index 83bf0a0e82..22c68bd10d 100644
> --- a/sub-process.c
> +++ b/sub-process.c
> @@ -132,18 +132,19 @@ static int handshake_version(struct child_process *process,
>  	if (packet_flush_gently(process->in))
>  		return error("Could not write flush packet");
>  
> -	if (!(line = packet_read_line(process->out, NULL)) ||
> +	if (packet_read_line_gently(process->out, NULL, &line) <= 0 ||
>  	    !skip_prefix(line, welcome_prefix, &p) ||
>  	    strcmp(p, "-server"))
>  		return error("Unexpected line '%s', expected %s-server",
>  			     line ? line : "<flush packet>", welcome_prefix);

If `packet_read_line_gently()` returns `< 0` (due to an EOF or read
error), `line` will be `NULL`.  The error message printed will be:

    `Unexpected line '<flush packet>', expected filter-server`

This is misleading when the remote process didn't send a flush
packet; it hung up or crashed.



> -	if (!(line = packet_read_line(process->out, NULL)) ||
> +	if (packet_read_line_gently(process->out, NULL, &line) <= 0 ||
>  	    !skip_prefix(line, "version=", &p) ||
>  	    strtol_i(p, 10, chosen_version))
>  		return error("Unexpected line '%s', expected version",
>  			     line ? line : "<flush packet>");

Ditto.

> -	if ((line = packet_read_line(process->out, NULL)))
> -		return error("Unexpected line '%s', expected flush", line);
> +	if (packet_read_line_gently(process->out, NULL, &line) < 0 || line)
> +		return error("Unexpected line '%s', expected flush",
> +			     line ? line : "<read error>");

We catch error return (< 0) or a line with payload (!!line) and
report an error here, because we want to see <flush> here.  OK.


> @@ -171,7 +172,7 @@ static int handshake_capabilities(struct child_process *process,
>  	if (packet_flush_gently(process->in))
>  		return error("Could not write flush packet");
>  
> -	while ((line = packet_read_line(process->out, NULL))) {
> +	while (packet_read_line_gently(process->out, NULL, &line) > 0) {
>  		const char *p;
>  		if (!skip_prefix(line, "capability=", &p))
>  			continue;

While this correctly stops the loop if packet_read_line_gently()
returns a non-positive value, doesn't it introduce a subtle bug?

`packet_read_line_gently()` returns:

  - `> 0` for a normal line (which keeps the loop running).

  - `0` for a flush packet (which we expect as the normal terminator
    of the capabilities list, stopping the loop).

  - `< 0` for an EOF or read error (which also stops the loop).

In the original code, an EOF or read error would have caused
`packet_read_line()` to call `die()`, aborting the process.

With the new code, if the child process dies or closes its pipe
during the capabilities handshake, the loop will terminate, and the
function will return `0` (success). The parent process will proceed
as if the capabilities were successfully negotiated.  Any further
communication with the child process would fail so the damage may
not be huge, but somebody must check if the loop terminated because
of a flush packet, or an error.

	while (1) {
	        const char *p;
        	int len = packet_read_line_gently(process->out, NULL, &line);

		if (len < 0)
			return error(_("subprocess `%s` failed to give capabilities"),
				process->args.v[0]);
		if (!skip_prefix(line, "capability=", &p))
			continue;
		...

or something, perhaps?

^ permalink raw reply

* Re: [PATCH] prio-queue: use cascade-down sift for faster extract-min
From: Kristofer Karlsson @ 2026-06-01  6:21 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Kristofer Karlsson via GitGitGadget, git
In-Reply-To: <xmqq5x42aipu.fsf@gitster.g>

Thanks for the quick and very valid feedback! I already started
investigating - I think I was too quick (and wrong) when I reasoned
about the replace operation.I will rework it a bit and come back with
a patch version 2 soon that ensures that neither get and replace have
regressed in any way.

- Kristofer

On Mon, 1 Jun 2026 at 08:16, Junio C Hamano <gitster@pobox.com> wrote:
>
> "Kristofer Karlsson via GitGitGadget" <gitgitgadget@gmail.com>
> writes:
>
> > diff --git a/prio-queue.c b/prio-queue.c
> > index 9748528ce6..18005c43c4 100644
> > --- a/prio-queue.c
> > +++ b/prio-queue.c
> > @@ -62,17 +62,21 @@ static void sift_down_root(struct prio_queue *queue)
> >  {
> >       size_t ix, child;
> >
> > -     /* Push down the one at the root */
> > -     for (ix = 0; ix * 2 + 1 < queue->nr; ix = child) {
> > -             child = ix * 2 + 1; /* left */
> > +     for (ix = 0; (child = ix * 2 + 1) < queue->nr; ix = child) {
> >               if (child + 1 < queue->nr &&
> >                   compare(queue, child, child + 1) >= 0)
> >                       child++; /* use right child */
> > +             queue->array[ix] = queue->array[child];
> > +     }
> >
> > -             if (compare(queue, ix, child) <= 0)
> > +     /* Place queue->array[queue->nr] (left by caller) and sift up. */
> > +     queue->array[ix] = queue->array[queue->nr];
>
> Here we always sift/bubble up the last element.
>
> I am wondering if it makes sense to teach sift_down_root to take an
> extra argument, "struct prio_queue_entry entry" (passed by value)
> and sift/bubble it up, not always queue->array[queue->nr], and ...
>
> > +     while (ix) {
> > +             size_t parent = (ix - 1) / 2;
> > +             if (compare(queue, parent, ix) <= 0)
> >                       break;
> > -
> > -             swap(queue, child, ix);
> > +             swap(queue, parent, ix);
> > +             ix = parent;
> >       }
> >  }
> >
> > @@ -89,7 +93,6 @@ void *prio_queue_get(struct prio_queue *queue)
> >       if (!--queue->nr)
> >               return result;
> >
> > -     queue->array[0] = queue->array[queue->nr];
> >       sift_down_root(queue);
> >       return result;
> >  }
> > @@ -111,8 +114,7 @@ void prio_queue_replace(struct prio_queue *queue, void *thing)
> >               queue->array[queue->nr - 1].ctr = queue->insertion_ctr++;
> >               queue->array[queue->nr - 1].data = thing;
> >       } else {
> > -             queue->array[0].ctr = queue->insertion_ctr++;
> > -             queue->array[0].data = thing;
> > -             sift_down_root(queue);
> > +             prio_queue_get(queue);
> > +             prio_queue_put(queue, thing);
>
> ... update this part in the else clause to do something like
>
>                 struct prio_queue_entry entry;
>                 entry.ctr = queue->insertion_ctr++;
>                 entry.data = thing;
>                 sift_down_root(queue, entry);
>
> to retain the optimization?  It would perform a single cascade-down
> sift, followed by a single sift-up, so it would save a comparison, a
> copy, and a swap in the worset case compared to the get+put sequence?
>
> Of course, the original sift_down_root() caller (i.e. prio_queue_get())
> needs to pass queue->array[queue->nr] as the second parameter to match.
>
> >       }
> >  }
> >
> > base-commit: c69baaf57ba26cf117c2b6793802877f19738b0d

^ permalink raw reply

* Re: [PATCH] prio-queue: use cascade-down sift for faster extract-min
From: Junio C Hamano @ 2026-06-01  6:16 UTC (permalink / raw)
  To: Kristofer Karlsson via GitGitGadget; +Cc: git, Kristofer Karlsson
In-Reply-To: <pull.2132.git.1780250236304.gitgitgadget@gmail.com>

"Kristofer Karlsson via GitGitGadget" <gitgitgadget@gmail.com>
writes:

> diff --git a/prio-queue.c b/prio-queue.c
> index 9748528ce6..18005c43c4 100644
> --- a/prio-queue.c
> +++ b/prio-queue.c
> @@ -62,17 +62,21 @@ static void sift_down_root(struct prio_queue *queue)
>  {
>  	size_t ix, child;
>  
> -	/* Push down the one at the root */
> -	for (ix = 0; ix * 2 + 1 < queue->nr; ix = child) {
> -		child = ix * 2 + 1; /* left */
> +	for (ix = 0; (child = ix * 2 + 1) < queue->nr; ix = child) {
>  		if (child + 1 < queue->nr &&
>  		    compare(queue, child, child + 1) >= 0)
>  			child++; /* use right child */
> +		queue->array[ix] = queue->array[child];
> +	}
>  
> -		if (compare(queue, ix, child) <= 0)
> +	/* Place queue->array[queue->nr] (left by caller) and sift up. */
> +	queue->array[ix] = queue->array[queue->nr];

Here we always sift/bubble up the last element.

I am wondering if it makes sense to teach sift_down_root to take an
extra argument, "struct prio_queue_entry entry" (passed by value)
and sift/bubble it up, not always queue->array[queue->nr], and ...

> +	while (ix) {
> +		size_t parent = (ix - 1) / 2;
> +		if (compare(queue, parent, ix) <= 0)
>  			break;
> -
> -		swap(queue, child, ix);
> +		swap(queue, parent, ix);
> +		ix = parent;
>  	}
>  }
>  
> @@ -89,7 +93,6 @@ void *prio_queue_get(struct prio_queue *queue)
>  	if (!--queue->nr)
>  		return result;
>  
> -	queue->array[0] = queue->array[queue->nr];
>  	sift_down_root(queue);
>  	return result;
>  }
> @@ -111,8 +114,7 @@ void prio_queue_replace(struct prio_queue *queue, void *thing)
>  		queue->array[queue->nr - 1].ctr = queue->insertion_ctr++;
>  		queue->array[queue->nr - 1].data = thing;
>  	} else {
> -		queue->array[0].ctr = queue->insertion_ctr++;
> -		queue->array[0].data = thing;
> -		sift_down_root(queue);
> +		prio_queue_get(queue);
> +		prio_queue_put(queue, thing);

... update this part in the else clause to do something like

		struct prio_queue_entry entry;
		entry.ctr = queue->insertion_ctr++;
		entry.data = thing;
		sift_down_root(queue, entry);

to retain the optimization?  It would perform a single cascade-down
sift, followed by a single sift-up, so it would save a comparison, a
copy, and a swap in the worset case compared to the get+put sequence?

Of course, the original sift_down_root() caller (i.e. prio_queue_get())
needs to pass queue->array[queue->nr] as the second parameter to match.

>  	}
>  }
>
> base-commit: c69baaf57ba26cf117c2b6793802877f19738b0d

^ permalink raw reply

* Mirror repositories for submodules
From: Benson Muite @ 2026-06-01  6:11 UTC (permalink / raw)
  To: git

Hi,

Would a contribution to add mirror repositories as alternate submodule
sources be considered for inclusion?  Some projects have mirror
repositories on other hosting services, and may have bandwidth limits on
their primary hosting service.  Being able to indicate mirror
repositories for where to check for updates and sources for submodules
when doing `git clone --recurse-submodules https://my.repo ` or `git
submodule update --init --recursive` would be helpful when there is a
timeout.

Regards,
Benson

^ permalink raw reply

* Re: [PATCH v2 0/2] commit: remove deprecated functions
From: Patrick Steinhardt @ 2026-06-01  5:58 UTC (permalink / raw)
  To: Jeff King; +Cc: kristofferhaugsbakk, Junio C Hamano, Kristoffer Haugsbakk, git
In-Reply-To: <20260529083716.GE1106035@coredump.intra.peff.net>

On Fri, May 29, 2026 at 04:37:16AM -0400, Jeff King wrote:
> On Thu, May 28, 2026 at 09:00:09AM +0200, kristofferhaugsbakk@fastmail.com wrote:
> 
> > Topic summary: Remove deprecated comments that were slated for removal
> > after Git 2.53.0.
> 
> This looks obviously correct to me, but the whole topic made me wonder:
> was it worth retaining the old names and deprecating them, versus just
> removing them back then?
> 
> Topics in flight would have needed an update then, but they did
> eventually anyway. So it feels like the total amount of work done is
> larger, compared to just fixing them as the topics were merged. Either
> way the compiler tells us, and the adjustments themselves are small.
> 
> Not a huge deal either way, but just pondering for future such
> situations.

Yeah, I'm always very torn on these myself. I guess ultimately it's
always going to be dependent on the actual refactoring: the bigger the
blast radius, the more it makes sense to have a transition period so
that we don't break in-flight patch series.

Where exactly that line is is a different question of course.

Patrick

^ permalink raw reply

* Re: git hook question
From: Jeff King @ 2026-06-01  5:55 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Wesley Schwengle, Git maillinglist
In-Reply-To: <xmqqcxyaakpy.fsf@gitster.g>

On Mon, Jun 01, 2026 at 02:33:13PM +0900, Junio C Hamano wrote:

> Jeff King <peff@peff.net> writes:
> 
> > I don't think so; the command is expected to handle (or ignore) the
> > arguments as appropriate.
> 
> We should also caution that the command is expected to handle not
> just the arguments but its standard input.  Not reading any and
> exiting may be a no-no for some hooks.

Perhaps. I think we've tried to make Git resilient to hooks which do not
read all of their input (by ignoring SIGPIPE). It may be a bug for a
hook to ignore stdin, but depending on what the hook is trying to do,
that information might or might not be relevant.

I do think there is a gotcha for hooks that expect the stdin of their
commands to be hooked up to a terminal to interact with the user.

I certainly don't have any objection to calling more of this out in the
docs, though.

-Peff

^ permalink raw reply

* Re: git hook question
From: Junio C Hamano @ 2026-06-01  5:33 UTC (permalink / raw)
  To: Jeff King; +Cc: Wesley Schwengle, Git maillinglist
In-Reply-To: <20260529052141.GA1099450@coredump.intra.peff.net>

Jeff King <peff@peff.net> writes:

> I don't think so; the command is expected to handle (or ignore) the
> arguments as appropriate.

We should also caution that the command is expected to handle not
just the arguments but its standard input.  Not reading any and
exiting may be a no-no for some hooks.

And unlike command line arguments, there is no handy way to say "I
do not care what the input is" (other than putting "cat >/dev/null;" 
in front of what you really want to do, that is).

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox