git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Patrick Steinhardt <ps@pks.im>
To: git@vger.kernel.org
Cc: Taylor Blau <me@ttaylorr.com>, Toon Claes <toon@iotcl.com>
Subject: [PATCH v2 8/9] midx: stop duplicating info redundant with its owning source
Date: Thu, 07 Aug 2025 10:09:58 +0200	[thread overview]
Message-ID: <20250807-b4-pks-midx-deduplicate-source-info-v2-8-bcffb8fc119c@pks.im> (raw)
In-Reply-To: <20250807-b4-pks-midx-deduplicate-source-info-v2-0-bcffb8fc119c@pks.im>

Multi-pack indices store some information that is redundant with their
owning source:

  - The locality bit that tracks whether the source is the primary
    object source or an alternate.

  - The object directory path the multi-pack index is located in.

  - The pointer to the owning parent directory.

All of this information is already contained in `struct odb_source`. So
now that we always have that struct available when loading a multi-pack
index we have it readily accessible.

Drop the redundant information and instead store a pointer to the object
source.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
 builtin/repack.c          |  5 +++--
 midx-write.c              |  9 +++++----
 midx.c                    | 21 +++++++++++----------
 midx.h                    |  7 ++-----
 pack-bitmap.c             | 13 +++++++------
 pack-revindex.c           | 14 +++++++-------
 t/helper/test-read-midx.c |  2 +-
 7 files changed, 36 insertions(+), 35 deletions(-)

diff --git a/builtin/repack.c b/builtin/repack.c
index 94dec26f18..5af3e27357 100644
--- a/builtin/repack.c
+++ b/builtin/repack.c
@@ -223,9 +223,10 @@ static void mark_packs_for_deletion(struct existing_packs *existing,
 static void remove_redundant_pack(const char *dir_name, const char *base_name)
 {
 	struct strbuf buf = STRBUF_INIT;
-	struct multi_pack_index *m = get_multi_pack_index(the_repository->objects->sources);
+	struct odb_source *source = the_repository->objects->sources;
+	struct multi_pack_index *m = get_multi_pack_index(source);
 	strbuf_addf(&buf, "%s.pack", base_name);
-	if (m && m->local && midx_contains_pack(m, buf.buf))
+	if (m && source->local && midx_contains_pack(m, buf.buf))
 		clear_midx_file(the_repository);
 	strbuf_insertf(&buf, 0, "%s/", dir_name);
 	unlink_pack_path(buf.buf, 1);
diff --git a/midx-write.c b/midx-write.c
index bf7c01d4b1..84f76856d6 100644
--- a/midx-write.c
+++ b/midx-write.c
@@ -981,10 +981,11 @@ static int link_midx_to_chain(struct multi_pack_index *m)
 	for (i = 0; i < ARRAY_SIZE(midx_exts); i++) {
 		const unsigned char *hash = get_midx_checksum(m);
 
-		get_midx_filename_ext(m->repo->hash_algo, &from, m->object_dir,
+		get_midx_filename_ext(m->source->odb->repo->hash_algo, &from,
+				      m->source->path,
 				      hash, midx_exts[i].non_split);
-		get_split_midx_filename_ext(m->repo->hash_algo, &to,
-					    m->object_dir, hash,
+		get_split_midx_filename_ext(m->source->odb->repo->hash_algo, &to,
+					    m->source->path, hash,
 					    midx_exts[i].split);
 
 		if (link(from.buf, to.buf) < 0 && errno != ENOENT) {
@@ -1109,7 +1110,7 @@ static int write_midx_internal(struct odb_source *source,
 			if (flags & MIDX_WRITE_BITMAP && load_midx_revindex(m)) {
 				error(_("could not load reverse index for MIDX %s"),
 				      hash_to_hex_algop(get_midx_checksum(m),
-							m->repo->hash_algo));
+							m->source->odb->repo->hash_algo));
 				result = 1;
 				goto cleanup;
 			}
diff --git a/midx.c b/midx.c
index 831a7e9b5f..81bf3c4d5f 100644
--- a/midx.c
+++ b/midx.c
@@ -26,7 +26,7 @@ int cmp_idx_or_pack_name(const char *idx_or_pack_name,
 
 const unsigned char *get_midx_checksum(struct multi_pack_index *m)
 {
-	return m->data + m->data_len - m->repo->hash_algo->rawsz;
+	return m->data + m->data_len - m->source->odb->repo->hash_algo->rawsz;
 }
 
 void get_midx_filename(const struct git_hash_algo *hash_algo,
@@ -128,11 +128,10 @@ static struct multi_pack_index *load_multi_pack_index_one(struct odb_source *sou
 	midx_map = xmmap(NULL, midx_size, PROT_READ, MAP_PRIVATE, fd, 0);
 	close(fd);
 
-	FLEX_ALLOC_STR(m, object_dir, source->path);
+	CALLOC_ARRAY(m, 1);
 	m->data = midx_map;
 	m->data_len = midx_size;
-	m->local = source->local;
-	m->repo = r;
+	m->source = source;
 
 	m->signature = get_be32(m->data);
 	if (m->signature != MIDX_SIGNATURE)
@@ -446,7 +445,7 @@ static uint32_t midx_for_pack(struct multi_pack_index **_m,
 int prepare_midx_pack(struct multi_pack_index *m,
 		      uint32_t pack_int_id)
 {
-	struct repository *r = m->repo;
+	struct repository *r = m->source->odb->repo;
 	struct strbuf pack_name = STRBUF_INIT;
 	struct strbuf key = STRBUF_INIT;
 	struct packed_git *p;
@@ -458,7 +457,7 @@ int prepare_midx_pack(struct multi_pack_index *m,
 	if (m->packs[pack_int_id])
 		return 0;
 
-	strbuf_addf(&pack_name, "%s/pack/%s", m->object_dir,
+	strbuf_addf(&pack_name, "%s/pack/%s", m->source->path,
 		    m->pack_names[pack_int_id]);
 
 	/* pack_map holds the ".pack" name, but we have the .idx */
@@ -469,7 +468,8 @@ int prepare_midx_pack(struct multi_pack_index *m,
 					strhash(key.buf), key.buf,
 					struct packed_git, packmap_ent);
 	if (!p) {
-		p = add_packed_git(r, pack_name.buf, pack_name.len, m->local);
+		p = add_packed_git(r, pack_name.buf, pack_name.len,
+				   m->source->local);
 		if (p) {
 			install_packed_git(r, p);
 			list_add_tail(&p->mru, &r->objects->packed_git_mru);
@@ -528,7 +528,8 @@ int bsearch_one_midx(const struct object_id *oid, struct multi_pack_index *m,
 		     uint32_t *result)
 {
 	int ret = bsearch_hash(oid->hash, m->chunk_oid_fanout,
-			       m->chunk_oid_lookup, m->repo->hash_algo->rawsz,
+			       m->chunk_oid_lookup,
+			       m->source->odb->repo->hash_algo->rawsz,
 			       result);
 	if (result)
 		*result += m->num_objects_in_base;
@@ -559,7 +560,7 @@ struct object_id *nth_midxed_object_oid(struct object_id *oid,
 	n = midx_for_object(&m, n);
 
 	oidread(oid, m->chunk_oid_lookup + st_mult(m->hash_len, n),
-		m->repo->hash_algo);
+		m->source->odb->repo->hash_algo);
 	return oid;
 }
 
@@ -734,7 +735,7 @@ int prepare_multi_pack_index_one(struct odb_source *source)
 
 int midx_checksum_valid(struct multi_pack_index *m)
 {
-	return hashfile_checksum_valid(m->repo->hash_algo,
+	return hashfile_checksum_valid(m->source->odb->repo->hash_algo,
 				       m->data, m->data_len);
 }
 
diff --git a/midx.h b/midx.h
index d162001fbb..71dbdec66e 100644
--- a/midx.h
+++ b/midx.h
@@ -35,6 +35,8 @@ struct odb_source;
 	"GIT_TEST_MULTI_PACK_INDEX_WRITE_INCREMENTAL"
 
 struct multi_pack_index {
+	struct odb_source *source;
+
 	const unsigned char *data;
 	size_t data_len;
 
@@ -50,7 +52,6 @@ struct multi_pack_index {
 	uint32_t num_objects;
 	int preferred_pack_idx;
 
-	int local;
 	int has_chain;
 
 	const unsigned char *chunk_pack_names;
@@ -71,10 +72,6 @@ struct multi_pack_index {
 
 	const char **pack_names;
 	struct packed_git **packs;
-
-	struct repository *repo;
-
-	char object_dir[FLEX_ARRAY];
 };
 
 #define MIDX_PROGRESS     (1 << 0)
diff --git a/pack-bitmap.c b/pack-bitmap.c
index fb0b11ca07..01e14c34bd 100644
--- a/pack-bitmap.c
+++ b/pack-bitmap.c
@@ -216,7 +216,7 @@ static uint32_t bitmap_num_objects(struct bitmap_index *index)
 static struct repository *bitmap_repo(struct bitmap_index *bitmap_git)
 {
 	if (bitmap_is_midx(bitmap_git))
-		return bitmap_git->midx->repo;
+		return bitmap_git->midx->source->odb->repo;
 	return bitmap_git->pack->repo;
 }
 
@@ -418,13 +418,13 @@ char *midx_bitmap_filename(struct multi_pack_index *midx)
 {
 	struct strbuf buf = STRBUF_INIT;
 	if (midx->has_chain)
-		get_split_midx_filename_ext(midx->repo->hash_algo, &buf,
-					    midx->object_dir,
+		get_split_midx_filename_ext(midx->source->odb->repo->hash_algo, &buf,
+					    midx->source->path,
 					    get_midx_checksum(midx),
 					    MIDX_EXT_BITMAP);
 	else
-		get_midx_filename_ext(midx->repo->hash_algo, &buf,
-				      midx->object_dir, get_midx_checksum(midx),
+		get_midx_filename_ext(midx->source->odb->repo->hash_algo, &buf,
+				      midx->source->path, get_midx_checksum(midx),
 				      MIDX_EXT_BITMAP);
 
 	return strbuf_detach(&buf, NULL);
@@ -463,7 +463,8 @@ static int open_midx_bitmap_1(struct bitmap_index *bitmap_git,
 
 	if (bitmap_git->pack || bitmap_git->midx) {
 		struct strbuf buf = STRBUF_INIT;
-		get_midx_filename(midx->repo->hash_algo, &buf, midx->object_dir);
+		get_midx_filename(midx->source->odb->repo->hash_algo, &buf,
+				  midx->source->path);
 		trace2_data_string("bitmap", bitmap_repo(bitmap_git),
 				   "ignoring extra midx bitmap file", buf.buf);
 		close(fd);
diff --git a/pack-revindex.c b/pack-revindex.c
index 0cc422a1e6..b206518dcb 100644
--- a/pack-revindex.c
+++ b/pack-revindex.c
@@ -379,25 +379,25 @@ int load_midx_revindex(struct multi_pack_index *m)
 		 * not want to accidentally call munmap() in the middle of the
 		 * MIDX.
 		 */
-		trace2_data_string("load_midx_revindex", m->repo,
+		trace2_data_string("load_midx_revindex", m->source->odb->repo,
 				   "source", "midx");
 		m->revindex_data = (const uint32_t *)m->chunk_revindex;
 		return 0;
 	}
 
-	trace2_data_string("load_midx_revindex", m->repo,
+	trace2_data_string("load_midx_revindex", m->source->odb->repo,
 			   "source", "rev");
 
 	if (m->has_chain)
-		get_split_midx_filename_ext(m->repo->hash_algo, &revindex_name,
-					    m->object_dir, get_midx_checksum(m),
+		get_split_midx_filename_ext(m->source->odb->repo->hash_algo, &revindex_name,
+					    m->source->path, get_midx_checksum(m),
 					    MIDX_EXT_REV);
 	else
-		get_midx_filename_ext(m->repo->hash_algo, &revindex_name,
-				      m->object_dir, get_midx_checksum(m),
+		get_midx_filename_ext(m->source->odb->repo->hash_algo, &revindex_name,
+				      m->source->path, get_midx_checksum(m),
 				      MIDX_EXT_REV);
 
-	ret = load_revindex_from_disk(m->repo->hash_algo,
+	ret = load_revindex_from_disk(m->source->odb->repo->hash_algo,
 				      revindex_name.buf,
 				      m->num_objects,
 				      &m->revindex_map,
diff --git a/t/helper/test-read-midx.c b/t/helper/test-read-midx.c
index bcb8ea7671..6de5d1665a 100644
--- a/t/helper/test-read-midx.c
+++ b/t/helper/test-read-midx.c
@@ -66,7 +66,7 @@ static int read_midx_file(const char *object_dir, const char *checksum,
 	for (i = 0; i < m->num_packs; i++)
 		printf("%s\n", m->pack_names[i]);
 
-	printf("object-dir: %s\n", m->object_dir);
+	printf("object-dir: %s\n", m->source->path);
 
 	if (show_objects) {
 		struct object_id oid;

-- 
2.51.0.rc0.215.g125493bb4a.dirty


  parent reply	other threads:[~2025-08-07  8:10 UTC|newest]

Thread overview: 56+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-07-29 14:12 [PATCH 0/8] midx: stop deduplicating info redundant with their sources Patrick Steinhardt
2025-07-29 14:12 ` [PATCH 1/8] odb: store locality in object database sources Patrick Steinhardt
2025-08-06 16:39   ` Toon Claes
2025-08-07  6:15     ` Patrick Steinhardt
2025-08-07  8:12   ` Karthik Nayak
2025-07-29 14:12 ` [PATCH 2/8] odb: allow `odb_find_source()` to fail Patrick Steinhardt
2025-07-29 14:12 ` [PATCH 3/8] odb: return newly created in-memory sources Patrick Steinhardt
2025-08-06 16:40   ` Toon Claes
2025-08-07  6:16     ` Patrick Steinhardt
2025-08-07  8:21   ` Karthik Nayak
2025-07-29 14:12 ` [PATCH 4/8] midx: drop redundant `struct repository` parameter Patrick Steinhardt
2025-07-29 14:12 ` [PATCH 5/8] midx: load multi-pack indices via their source Patrick Steinhardt
2025-08-07  8:49   ` Karthik Nayak
2025-08-07  8:51     ` Karthik Nayak
2025-07-29 14:12 ` [PATCH 6/8] midx: write " Patrick Steinhardt
2025-08-07  8:55   ` Karthik Nayak
2025-07-29 14:12 ` [PATCH 7/8] midx: stop duplicating info redundant with its owning source Patrick Steinhardt
2025-07-29 14:12 ` [PATCH 8/8] midx: compute paths via their source Patrick Steinhardt
2025-07-29 18:33 ` [PATCH 0/8] midx: stop deduplicating info redundant with their sources Junio C Hamano
2025-07-30  5:21   ` Patrick Steinhardt
2025-08-07  8:09 ` [PATCH v2 0/9] midx: stop duplicating " Patrick Steinhardt
2025-08-07  8:09   ` [PATCH v2 1/9] odb: store locality in object database sources Patrick Steinhardt
2025-08-07 22:10     ` Taylor Blau
2025-08-07  8:09   ` [PATCH v2 2/9] odb: allow `odb_find_source()` to fail Patrick Steinhardt
2025-08-07 22:12     ` Taylor Blau
2025-08-11 11:56       ` Patrick Steinhardt
2025-08-07  8:09   ` [PATCH v2 3/9] odb: return newly created in-memory sources Patrick Steinhardt
2025-08-07 22:16     ` Taylor Blau
2025-08-11 11:56       ` Patrick Steinhardt
2025-08-07  8:09   ` [PATCH v2 4/9] odb: simplify calling `link_alt_odb_entry()` Patrick Steinhardt
2025-08-07 22:21     ` Taylor Blau
2025-08-07  8:09   ` [PATCH v2 5/9] midx: drop redundant `struct repository` parameter Patrick Steinhardt
2025-08-07  8:09   ` [PATCH v2 6/9] midx: load multi-pack indices via their source Patrick Steinhardt
2025-08-07 22:25     ` Taylor Blau
2025-08-07  8:09   ` [PATCH v2 7/9] midx: write " Patrick Steinhardt
2025-08-07 22:25     ` Taylor Blau
2025-08-07  8:09   ` Patrick Steinhardt [this message]
2025-08-07  8:09   ` [PATCH v2 9/9] midx: compute paths " Patrick Steinhardt
2025-08-07 22:27   ` [PATCH v2 0/9] midx: stop duplicating info redundant with their sources Taylor Blau
2025-08-11 11:56     ` Patrick Steinhardt
2025-08-07  8:58 ` [PATCH 0/8] midx: stop deduplicating " Karthik Nayak
2025-08-11 13:46 ` [PATCH v3 00/10] midx: stop duplicating " Patrick Steinhardt
2025-08-11 13:46   ` [PATCH v3 01/10] odb: store locality in object database sources Patrick Steinhardt
2025-08-11 13:46   ` [PATCH v3 02/10] odb: allow `odb_find_source()` to fail Patrick Steinhardt
2025-08-11 13:46   ` [PATCH v3 03/10] odb: consistently use "dir" to refer to alternate's directory Patrick Steinhardt
2025-08-11 13:46   ` [PATCH v3 04/10] odb: return newly created in-memory sources Patrick Steinhardt
2025-08-11 13:46   ` [PATCH v3 05/10] odb: simplify calling `link_alt_odb_entry()` Patrick Steinhardt
2025-08-11 13:46   ` [PATCH v3 06/10] midx: drop redundant `struct repository` parameter Patrick Steinhardt
2025-08-11 13:46   ` [PATCH v3 07/10] midx: load multi-pack indices via their source Patrick Steinhardt
2025-08-11 13:46   ` [PATCH v3 08/10] midx: write " Patrick Steinhardt
2025-08-11 13:46   ` [PATCH v3 09/10] midx: stop duplicating info redundant with its owning source Patrick Steinhardt
2025-08-11 13:46   ` [PATCH v3 10/10] midx: compute paths via their source Patrick Steinhardt
2025-08-28 22:46   ` [PATCH v3 00/10] midx: stop duplicating info redundant with their sources Junio C Hamano
2025-08-29  0:34     ` Taylor Blau
2025-08-30 13:39       ` Derrick Stolee
2025-09-02  6:36         ` Patrick Steinhardt

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250807-b4-pks-midx-deduplicate-source-info-v2-8-bcffb8fc119c@pks.im \
    --to=ps@pks.im \
    --cc=git@vger.kernel.org \
    --cc=me@ttaylorr.com \
    --cc=toon@iotcl.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).