git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Junio C Hamano <gitster@pobox.com>
To: Patrick Steinhardt <ps@pks.im>, Taylor Blau <me@ttaylorr.com>
Cc: git@vger.kernel.org
Subject: Re: [PATCH 6/9] pack-bitmap: expose function to iterate over bitmapped objects
Date: Mon, 24 Feb 2025 10:05:27 -0800	[thread overview]
Message-ID: <xmqqseo35ic8.fsf@gitster.g> (raw)
In-Reply-To: <20250221-pks-cat-file-object-type-filter-v1-6-0852530888e2@pks.im> (Patrick Steinhardt's message of "Fri, 21 Feb 2025 08:47:31 +0100")

Patrick Steinhardt <ps@pks.im> writes:

> Expose a function that allows the caller to iterate over all bitmapped
> objects of a specific type. This mechanism allows us to use the object
> type-specific bitmaps to enumerate all objects of that type without
> having to scan through a complete packfile.
>
> This functionality will be used in a subsequent commit.
>
> Signed-off-by: Patrick Steinhardt <ps@pks.im>
> ---
>  builtin/pack-objects.c |  3 ++-
>  builtin/rev-list.c     |  3 ++-
>  pack-bitmap.c          | 65 +++++++++++++++++++++++++++++++-------------------
>  pack-bitmap.h          | 12 +++++++++-
>  reachable.c            |  3 ++-
>  5 files changed, 57 insertions(+), 29 deletions(-)

After 2189649b (pack-bitmap.c: keep track of each layer's type
bitmaps, 2024-11-19) added <type>_all bitmaps to the bitmap_index
struct, this step would need some adjustment, I am afraid.

Taylor Cc'ed.

Thanks.

> diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
> index 58a9b161262..8f99e2b4fa8 100644
> --- a/builtin/pack-objects.c
> +++ b/builtin/pack-objects.c
> @@ -1735,7 +1735,8 @@ static int add_object_entry(const struct object_id *oid, enum object_type type,
>  static int add_object_entry_from_bitmap(const struct object_id *oid,
>  					enum object_type type,
>  					int flags UNUSED, uint32_t name_hash,
> -					struct packed_git *pack, off_t offset)
> +					struct packed_git *pack, off_t offset,
> +					void *payload UNUSED)
>  {
>  	display_progress(progress_state, ++nr_seen);
>  
> diff --git a/builtin/rev-list.c b/builtin/rev-list.c
> index bb26bee0d45..1100dd2abe7 100644
> --- a/builtin/rev-list.c
> +++ b/builtin/rev-list.c
> @@ -429,7 +429,8 @@ static int show_object_fast(
>  	int exclude UNUSED,
>  	uint32_t name_hash UNUSED,
>  	struct packed_git *found_pack UNUSED,
> -	off_t found_offset UNUSED)
> +	off_t found_offset UNUSED,
> +	void *payload UNUSED)
>  {
>  	fprintf(stdout, "%s\n", oid_to_hex(oid));
>  	return 1;
> diff --git a/pack-bitmap.c b/pack-bitmap.c
> index 6406953d322..fc92e0aae65 100644
> --- a/pack-bitmap.c
> +++ b/pack-bitmap.c
> @@ -1509,50 +1509,45 @@ static void show_extended_objects(struct bitmap_index *bitmap_git,
>  		    (obj->type == OBJ_TAG && !revs->tag_objects))
>  			continue;
>  
> -		show_reach(&obj->oid, obj->type, 0, eindex->hashes[i], NULL, 0);
> +		show_reach(&obj->oid, obj->type, 0, eindex->hashes[i], NULL, 0, NULL);
>  	}
>  }
>  
> -static void init_type_iterator(struct ewah_iterator *it,
> -			       struct bitmap_index *bitmap_git,
> -			       enum object_type type)
> +static struct ewah_bitmap *ewah_for_type(struct bitmap_index *bitmap_git,
> +					 enum object_type type)
>  {
>  	switch (type) {
>  	case OBJ_COMMIT:
> -		ewah_iterator_init(it, bitmap_git->commits);
> -		break;
> -
> +		return bitmap_git->commits;
>  	case OBJ_TREE:
> -		ewah_iterator_init(it, bitmap_git->trees);
> -		break;
> -
> +		return bitmap_git->trees;
>  	case OBJ_BLOB:
> -		ewah_iterator_init(it, bitmap_git->blobs);
> -		break;
> -
> +		return bitmap_git->blobs;
>  	case OBJ_TAG:
> -		ewah_iterator_init(it, bitmap_git->tags);
> -		break;
> -
> +		return bitmap_git->tags;
>  	default:
>  		BUG("object type %d not stored by bitmap type index", type);
> -		break;
>  	}
>  }
>  
> -static void show_objects_for_type(
> -	struct bitmap_index *bitmap_git,
> -	enum object_type object_type,
> -	show_reachable_fn show_reach)
> +static void init_type_iterator(struct ewah_iterator *it,
> +			       struct bitmap_index *bitmap_git,
> +			       enum object_type type)
> +{
> +	ewah_iterator_init(it, ewah_for_type(bitmap_git, type));
> +}
> +
> +static void for_each_bitmapped_object_internal(struct bitmap_index *bitmap_git,
> +					       struct bitmap *objects,
> +					       enum object_type object_type,
> +					       show_reachable_fn show_reach,
> +					       void *payload)
>  {
>  	size_t i = 0;
>  	uint32_t offset;
> -
>  	struct ewah_iterator it;
>  	eword_t filter;
>  
> -	struct bitmap *objects = bitmap_git->result;
> -
>  	init_type_iterator(&it, bitmap_git, object_type);
>  
>  	for (i = 0; i < objects->word_alloc &&
> @@ -1595,11 +1590,31 @@ static void show_objects_for_type(
>  			if (bitmap_git->hashes)
>  				hash = get_be32(bitmap_git->hashes + index_pos);
>  
> -			show_reach(&oid, object_type, 0, hash, pack, ofs);
> +			show_reach(&oid, object_type, 0, hash, pack, ofs, payload);
>  		}
>  	}
>  }
>  
> +static void show_objects_for_type(
> +	struct bitmap_index *bitmap_git,
> +	enum object_type object_type,
> +	show_reachable_fn show_reach)
> +{
> +	for_each_bitmapped_object_internal(bitmap_git, bitmap_git->result,
> +					   object_type, show_reach, NULL);
> +}
> +
> +void for_each_bitmapped_object(struct bitmap_index *bitmap_git,
> +			       enum object_type object_type,
> +			       show_reachable_fn show_reach,
> +			       void *payload)
> +{
> +	struct bitmap *bitmap = ewah_to_bitmap(ewah_for_type(bitmap_git, object_type));
> +	for_each_bitmapped_object_internal(bitmap_git, bitmap,
> +					   object_type, show_reach, payload);
> +	bitmap_free(bitmap);
> +}
> +
>  static int in_bitmapped_pack(struct bitmap_index *bitmap_git,
>  			     struct object_list *roots)
>  {
> diff --git a/pack-bitmap.h b/pack-bitmap.h
> index d7f4b8b8e95..3368e79ed5a 100644
> --- a/pack-bitmap.h
> +++ b/pack-bitmap.h
> @@ -50,7 +50,8 @@ typedef int (*show_reachable_fn)(
>  	int flags,
>  	uint32_t hash,
>  	struct packed_git *found_pack,
> -	off_t found_offset);
> +	off_t found_offset,
> +	void *payload);
>  
>  struct bitmap_index;
>  
> @@ -78,6 +79,15 @@ int test_bitmap_pseudo_merges(struct repository *r);
>  int test_bitmap_pseudo_merge_commits(struct repository *r, uint32_t n);
>  int test_bitmap_pseudo_merge_objects(struct repository *r, uint32_t n);
>  
> +/*
> + * Iterate through all bitmapped objects of the given type and execute the
> + * `show_reach` for each of them.
> + */
> + void for_each_bitmapped_object(struct bitmap_index *bitmap_git,
> +			       enum object_type object_type,
> +			       show_reachable_fn show_reach,
> +			       void *payload);
> +
>  #define GIT_TEST_PACK_USE_BITMAP_BOUNDARY_TRAVERSAL \
>  	"GIT_TEST_PACK_USE_BITMAP_BOUNDARY_TRAVERSAL"
>  
> diff --git a/reachable.c b/reachable.c
> index ecf7ccf5041..dd33c7f07dd 100644
> --- a/reachable.c
> +++ b/reachable.c
> @@ -337,7 +337,8 @@ static int mark_object_seen(const struct object_id *oid,
>  			     int exclude UNUSED,
>  			     uint32_t name_hash UNUSED,
>  			     struct packed_git *found_pack UNUSED,
> -			     off_t found_offset UNUSED)
> +			     off_t found_offset UNUSED,
> +			     void *payload UNUSED)
>  {
>  	struct object *obj = lookup_object_by_type(the_repository, oid, type);
>  	if (!obj)

  reply	other threads:[~2025-02-24 18:05 UTC|newest]

Thread overview: 72+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-02-21  7:47 [PATCH 0/9] builtin/cat-file: allow filtering objects in batch mode Patrick Steinhardt
2025-02-21  7:47 ` [PATCH 1/9] builtin/cat-file: rename variable that tracks usage Patrick Steinhardt
2025-02-21  7:47 ` [PATCH 2/9] builtin/cat-file: wire up an option to filter objects Patrick Steinhardt
2025-02-26 15:20   ` Toon Claes
2025-02-28 10:51     ` Patrick Steinhardt
2025-02-28 17:44       ` Junio C Hamano
2025-03-03 10:40         ` Patrick Steinhardt
2025-02-27 11:20   ` Karthik Nayak
2025-02-21  7:47 ` [PATCH 3/9] builtin/cat-file: support "blob:none" objects filter Patrick Steinhardt
2025-02-26 15:22   ` Toon Claes
2025-02-27 11:26   ` Karthik Nayak
2025-02-21  7:47 ` [PATCH 4/9] builtin/cat-file: support "blob:limit=" " Patrick Steinhardt
2025-02-21  7:47 ` [PATCH 5/9] builtin/cat-file: support "object:type=" " Patrick Steinhardt
2025-02-26 15:23   ` Toon Claes
2025-02-28 10:51     ` Patrick Steinhardt
2025-02-21  7:47 ` [PATCH 6/9] pack-bitmap: expose function to iterate over bitmapped objects Patrick Steinhardt
2025-02-24 18:05   ` Junio C Hamano [this message]
2025-02-25  6:59     ` Patrick Steinhardt
2025-02-25 16:59       ` Junio C Hamano
2025-02-27 23:26       ` Taylor Blau
2025-02-28 10:54         ` Patrick Steinhardt
2025-02-27 23:23     ` Taylor Blau
2025-02-27 23:32       ` Junio C Hamano
2025-02-27 23:39         ` Taylor Blau
2025-02-21  7:47 ` [PATCH 7/9] pack-bitmap: introduce function to check whether a pack is bitmapped Patrick Steinhardt
2025-02-27 23:33   ` Taylor Blau
2025-02-21  7:47 ` [PATCH 8/9] builtin/cat-file: deduplicate logic to iterate over all objects Patrick Steinhardt
2025-02-21  7:47 ` [PATCH 9/9] builtin/cat-file: use bitmaps to efficiently filter by object type Patrick Steinhardt
2025-02-27 11:38   ` Karthik Nayak
2025-02-27 23:48   ` Taylor Blau
2025-03-27  9:43 ` [PATCH v2 00/10] builtin/cat-file: allow filtering objects in batch mode Patrick Steinhardt
2025-03-27  9:43   ` [PATCH v2 01/10] builtin/cat-file: rename variable that tracks usage Patrick Steinhardt
2025-04-01  9:51     ` Karthik Nayak
2025-04-02 11:13       ` Patrick Steinhardt
2025-04-07 20:25         ` Junio C Hamano
2025-03-27  9:43   ` [PATCH v2 02/10] builtin/cat-file: wire up an option to filter objects Patrick Steinhardt
2025-04-01 11:45     ` Toon Claes
2025-04-02 11:13       ` Patrick Steinhardt
2025-04-01 12:05     ` Karthik Nayak
2025-04-02 11:13       ` Patrick Steinhardt
2025-03-27  9:43   ` [PATCH v2 03/10] builtin/cat-file: support "blob:none" objects filter Patrick Steinhardt
2025-04-01 12:22     ` Karthik Nayak
2025-04-01 12:31       ` Karthik Nayak
2025-04-02 11:13         ` Patrick Steinhardt
2025-03-27  9:43   ` [PATCH v2 04/10] builtin/cat-file: support "blob:limit=" " Patrick Steinhardt
2025-03-27  9:44   ` [PATCH v2 05/10] builtin/cat-file: support "object:type=" " Patrick Steinhardt
2025-03-27  9:44   ` [PATCH v2 06/10] pack-bitmap: allow passing payloads to `show_reachable_fn()` Patrick Steinhardt
2025-04-01 12:17     ` Toon Claes
2025-04-02 11:13       ` Patrick Steinhardt
2025-03-27  9:44   ` [PATCH v2 07/10] pack-bitmap: add function to iterate over filtered bitmapped objects Patrick Steinhardt
2025-03-27  9:44   ` [PATCH v2 08/10] pack-bitmap: introduce function to check whether a pack is bitmapped Patrick Steinhardt
2025-04-01 11:46     ` Toon Claes
2025-04-02 11:13       ` Patrick Steinhardt
2025-03-27  9:44   ` [PATCH v2 09/10] builtin/cat-file: deduplicate logic to iterate over all objects Patrick Steinhardt
2025-04-01 12:13     ` Toon Claes
2025-04-02 11:13       ` Patrick Steinhardt
2025-04-03 18:24         ` Toon Claes
2025-03-27  9:44   ` [PATCH v2 10/10] builtin/cat-file: use bitmaps to efficiently filter by object type Patrick Steinhardt
2025-04-02 11:13 ` [PATCH v3 00/11] builtin/cat-file: allow filtering objects in batch mode Patrick Steinhardt
2025-04-02 11:13   ` [PATCH v3 01/11] builtin/cat-file: rename variable that tracks usage Patrick Steinhardt
2025-04-02 11:13   ` [PATCH v3 02/11] builtin/cat-file: introduce function to report object status Patrick Steinhardt
2025-04-02 11:13   ` [PATCH v3 03/11] builtin/cat-file: wire up an option to filter objects Patrick Steinhardt
2025-04-02 11:13   ` [PATCH v3 04/11] builtin/cat-file: support "blob:none" objects filter Patrick Steinhardt
2025-04-02 11:13   ` [PATCH v3 05/11] builtin/cat-file: support "blob:limit=" " Patrick Steinhardt
2025-04-02 11:13   ` [PATCH v3 06/11] builtin/cat-file: support "object:type=" " Patrick Steinhardt
2025-04-02 11:13   ` [PATCH v3 07/11] pack-bitmap: allow passing payloads to `show_reachable_fn()` Patrick Steinhardt
2025-04-02 11:13   ` [PATCH v3 08/11] pack-bitmap: add function to iterate over filtered bitmapped objects Patrick Steinhardt
2025-04-02 11:13   ` [PATCH v3 09/11] pack-bitmap: introduce function to check whether a pack is bitmapped Patrick Steinhardt
2025-04-02 11:13   ` [PATCH v3 10/11] builtin/cat-file: deduplicate logic to iterate over all objects Patrick Steinhardt
2025-04-02 11:13   ` [PATCH v3 11/11] builtin/cat-file: use bitmaps to efficiently filter by object type Patrick Steinhardt
2025-04-03  8:17   ` [PATCH v3 00/11] builtin/cat-file: allow filtering objects in batch mode Karthik Nayak
2025-04-08  0:32     ` Junio C Hamano

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=xmqqseo35ic8.fsf@gitster.g \
    --to=gitster@pobox.com \
    --cc=git@vger.kernel.org \
    --cc=me@ttaylorr.com \
    --cc=ps@pks.im \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).