All of lore.kernel.org
 help / color / mirror / Atom feed
From: Junio C Hamano <gitster@pobox.com>
To: Patrick Steinhardt <ps@pks.im>, Taylor Blau <me@ttaylorr.com>
Cc: git@vger.kernel.org
Subject: Re: [PATCH 6/9] pack-bitmap: expose function to iterate over bitmapped objects
Date: Mon, 24 Feb 2025 10:05:27 -0800	[thread overview]
Message-ID: <xmqqseo35ic8.fsf@gitster.g> (raw)
In-Reply-To: <20250221-pks-cat-file-object-type-filter-v1-6-0852530888e2@pks.im> (Patrick Steinhardt's message of "Fri, 21 Feb 2025 08:47:31 +0100")

Patrick Steinhardt <ps@pks.im> writes:

> Expose a function that allows the caller to iterate over all bitmapped
> objects of a specific type. This mechanism allows us to use the object
> type-specific bitmaps to enumerate all objects of that type without
> having to scan through a complete packfile.
>
> This functionality will be used in a subsequent commit.
>
> Signed-off-by: Patrick Steinhardt <ps@pks.im>
> ---
>  builtin/pack-objects.c |  3 ++-
>  builtin/rev-list.c     |  3 ++-
>  pack-bitmap.c          | 65 +++++++++++++++++++++++++++++++-------------------
>  pack-bitmap.h          | 12 +++++++++-
>  reachable.c            |  3 ++-
>  5 files changed, 57 insertions(+), 29 deletions(-)

After 2189649b (pack-bitmap.c: keep track of each layer's type
bitmaps, 2024-11-19) added <type>_all bitmaps to the bitmap_index
struct, this step would need some adjustment, I am afraid.

Taylor Cc'ed.

Thanks.

> diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
> index 58a9b161262..8f99e2b4fa8 100644
> --- a/builtin/pack-objects.c
> +++ b/builtin/pack-objects.c
> @@ -1735,7 +1735,8 @@ static int add_object_entry(const struct object_id *oid, enum object_type type,
>  static int add_object_entry_from_bitmap(const struct object_id *oid,
>  					enum object_type type,
>  					int flags UNUSED, uint32_t name_hash,
> -					struct packed_git *pack, off_t offset)
> +					struct packed_git *pack, off_t offset,
> +					void *payload UNUSED)
>  {
>  	display_progress(progress_state, ++nr_seen);
>  
> diff --git a/builtin/rev-list.c b/builtin/rev-list.c
> index bb26bee0d45..1100dd2abe7 100644
> --- a/builtin/rev-list.c
> +++ b/builtin/rev-list.c
> @@ -429,7 +429,8 @@ static int show_object_fast(
>  	int exclude UNUSED,
>  	uint32_t name_hash UNUSED,
>  	struct packed_git *found_pack UNUSED,
> -	off_t found_offset UNUSED)
> +	off_t found_offset UNUSED,
> +	void *payload UNUSED)
>  {
>  	fprintf(stdout, "%s\n", oid_to_hex(oid));
>  	return 1;
> diff --git a/pack-bitmap.c b/pack-bitmap.c
> index 6406953d322..fc92e0aae65 100644
> --- a/pack-bitmap.c
> +++ b/pack-bitmap.c
> @@ -1509,50 +1509,45 @@ static void show_extended_objects(struct bitmap_index *bitmap_git,
>  		    (obj->type == OBJ_TAG && !revs->tag_objects))
>  			continue;
>  
> -		show_reach(&obj->oid, obj->type, 0, eindex->hashes[i], NULL, 0);
> +		show_reach(&obj->oid, obj->type, 0, eindex->hashes[i], NULL, 0, NULL);
>  	}
>  }
>  
> -static void init_type_iterator(struct ewah_iterator *it,
> -			       struct bitmap_index *bitmap_git,
> -			       enum object_type type)
> +static struct ewah_bitmap *ewah_for_type(struct bitmap_index *bitmap_git,
> +					 enum object_type type)
>  {
>  	switch (type) {
>  	case OBJ_COMMIT:
> -		ewah_iterator_init(it, bitmap_git->commits);
> -		break;
> -
> +		return bitmap_git->commits;
>  	case OBJ_TREE:
> -		ewah_iterator_init(it, bitmap_git->trees);
> -		break;
> -
> +		return bitmap_git->trees;
>  	case OBJ_BLOB:
> -		ewah_iterator_init(it, bitmap_git->blobs);
> -		break;
> -
> +		return bitmap_git->blobs;
>  	case OBJ_TAG:
> -		ewah_iterator_init(it, bitmap_git->tags);
> -		break;
> -
> +		return bitmap_git->tags;
>  	default:
>  		BUG("object type %d not stored by bitmap type index", type);
> -		break;
>  	}
>  }
>  
> -static void show_objects_for_type(
> -	struct bitmap_index *bitmap_git,
> -	enum object_type object_type,
> -	show_reachable_fn show_reach)
> +static void init_type_iterator(struct ewah_iterator *it,
> +			       struct bitmap_index *bitmap_git,
> +			       enum object_type type)
> +{
> +	ewah_iterator_init(it, ewah_for_type(bitmap_git, type));
> +}
> +
> +static void for_each_bitmapped_object_internal(struct bitmap_index *bitmap_git,
> +					       struct bitmap *objects,
> +					       enum object_type object_type,
> +					       show_reachable_fn show_reach,
> +					       void *payload)
>  {
>  	size_t i = 0;
>  	uint32_t offset;
> -
>  	struct ewah_iterator it;
>  	eword_t filter;
>  
> -	struct bitmap *objects = bitmap_git->result;
> -
>  	init_type_iterator(&it, bitmap_git, object_type);
>  
>  	for (i = 0; i < objects->word_alloc &&
> @@ -1595,11 +1590,31 @@ static void show_objects_for_type(
>  			if (bitmap_git->hashes)
>  				hash = get_be32(bitmap_git->hashes + index_pos);
>  
> -			show_reach(&oid, object_type, 0, hash, pack, ofs);
> +			show_reach(&oid, object_type, 0, hash, pack, ofs, payload);
>  		}
>  	}
>  }
>  
> +static void show_objects_for_type(
> +	struct bitmap_index *bitmap_git,
> +	enum object_type object_type,
> +	show_reachable_fn show_reach)
> +{
> +	for_each_bitmapped_object_internal(bitmap_git, bitmap_git->result,
> +					   object_type, show_reach, NULL);
> +}
> +
> +void for_each_bitmapped_object(struct bitmap_index *bitmap_git,
> +			       enum object_type object_type,
> +			       show_reachable_fn show_reach,
> +			       void *payload)
> +{
> +	struct bitmap *bitmap = ewah_to_bitmap(ewah_for_type(bitmap_git, object_type));
> +	for_each_bitmapped_object_internal(bitmap_git, bitmap,
> +					   object_type, show_reach, payload);
> +	bitmap_free(bitmap);
> +}
> +
>  static int in_bitmapped_pack(struct bitmap_index *bitmap_git,
>  			     struct object_list *roots)
>  {
> diff --git a/pack-bitmap.h b/pack-bitmap.h
> index d7f4b8b8e95..3368e79ed5a 100644
> --- a/pack-bitmap.h
> +++ b/pack-bitmap.h
> @@ -50,7 +50,8 @@ typedef int (*show_reachable_fn)(
>  	int flags,
>  	uint32_t hash,
>  	struct packed_git *found_pack,
> -	off_t found_offset);
> +	off_t found_offset,
> +	void *payload);
>  
>  struct bitmap_index;
>  
> @@ -78,6 +79,15 @@ int test_bitmap_pseudo_merges(struct repository *r);
>  int test_bitmap_pseudo_merge_commits(struct repository *r, uint32_t n);
>  int test_bitmap_pseudo_merge_objects(struct repository *r, uint32_t n);
>  
> +/*
> + * Iterate through all bitmapped objects of the given type and execute the
> + * `show_reach` for each of them.
> + */
> + void for_each_bitmapped_object(struct bitmap_index *bitmap_git,
> +			       enum object_type object_type,
> +			       show_reachable_fn show_reach,
> +			       void *payload);
> +
>  #define GIT_TEST_PACK_USE_BITMAP_BOUNDARY_TRAVERSAL \
>  	"GIT_TEST_PACK_USE_BITMAP_BOUNDARY_TRAVERSAL"
>  
> diff --git a/reachable.c b/reachable.c
> index ecf7ccf5041..dd33c7f07dd 100644
> --- a/reachable.c
> +++ b/reachable.c
> @@ -337,7 +337,8 @@ static int mark_object_seen(const struct object_id *oid,
>  			     int exclude UNUSED,
>  			     uint32_t name_hash UNUSED,
>  			     struct packed_git *found_pack UNUSED,
> -			     off_t found_offset UNUSED)
> +			     off_t found_offset UNUSED,
> +			     void *payload UNUSED)
>  {
>  	struct object *obj = lookup_object_by_type(the_repository, oid, type);
>  	if (!obj)

  reply	other threads:[~2025-02-24 18:05 UTC|newest]

Thread overview: 72+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-02-21  7:47 [PATCH 0/9] builtin/cat-file: allow filtering objects in batch mode Patrick Steinhardt
2025-02-21  7:47 ` [PATCH 1/9] builtin/cat-file: rename variable that tracks usage Patrick Steinhardt
2025-02-21  7:47 ` [PATCH 2/9] builtin/cat-file: wire up an option to filter objects Patrick Steinhardt
2025-02-26 15:20   ` Toon Claes
2025-02-28 10:51     ` Patrick Steinhardt
2025-02-28 17:44       ` Junio C Hamano
2025-03-03 10:40         ` Patrick Steinhardt
2025-02-27 11:20   ` Karthik Nayak
2025-02-21  7:47 ` [PATCH 3/9] builtin/cat-file: support "blob:none" objects filter Patrick Steinhardt
2025-02-26 15:22   ` Toon Claes
2025-02-27 11:26   ` Karthik Nayak
2025-02-21  7:47 ` [PATCH 4/9] builtin/cat-file: support "blob:limit=" " Patrick Steinhardt
2025-02-21  7:47 ` [PATCH 5/9] builtin/cat-file: support "object:type=" " Patrick Steinhardt
2025-02-26 15:23   ` Toon Claes
2025-02-28 10:51     ` Patrick Steinhardt
2025-02-21  7:47 ` [PATCH 6/9] pack-bitmap: expose function to iterate over bitmapped objects Patrick Steinhardt
2025-02-24 18:05   ` Junio C Hamano [this message]
2025-02-25  6:59     ` Patrick Steinhardt
2025-02-25 16:59       ` Junio C Hamano
2025-02-27 23:26       ` Taylor Blau
2025-02-28 10:54         ` Patrick Steinhardt
2025-02-27 23:23     ` Taylor Blau
2025-02-27 23:32       ` Junio C Hamano
2025-02-27 23:39         ` Taylor Blau
2025-02-21  7:47 ` [PATCH 7/9] pack-bitmap: introduce function to check whether a pack is bitmapped Patrick Steinhardt
2025-02-27 23:33   ` Taylor Blau
2025-02-21  7:47 ` [PATCH 8/9] builtin/cat-file: deduplicate logic to iterate over all objects Patrick Steinhardt
2025-02-21  7:47 ` [PATCH 9/9] builtin/cat-file: use bitmaps to efficiently filter by object type Patrick Steinhardt
2025-02-27 11:38   ` Karthik Nayak
2025-02-27 23:48   ` Taylor Blau
2025-03-27  9:43 ` [PATCH v2 00/10] builtin/cat-file: allow filtering objects in batch mode Patrick Steinhardt
2025-03-27  9:43   ` [PATCH v2 01/10] builtin/cat-file: rename variable that tracks usage Patrick Steinhardt
2025-04-01  9:51     ` Karthik Nayak
2025-04-02 11:13       ` Patrick Steinhardt
2025-04-07 20:25         ` Junio C Hamano
2025-03-27  9:43   ` [PATCH v2 02/10] builtin/cat-file: wire up an option to filter objects Patrick Steinhardt
2025-04-01 11:45     ` Toon Claes
2025-04-02 11:13       ` Patrick Steinhardt
2025-04-01 12:05     ` Karthik Nayak
2025-04-02 11:13       ` Patrick Steinhardt
2025-03-27  9:43   ` [PATCH v2 03/10] builtin/cat-file: support "blob:none" objects filter Patrick Steinhardt
2025-04-01 12:22     ` Karthik Nayak
2025-04-01 12:31       ` Karthik Nayak
2025-04-02 11:13         ` Patrick Steinhardt
2025-03-27  9:43   ` [PATCH v2 04/10] builtin/cat-file: support "blob:limit=" " Patrick Steinhardt
2025-03-27  9:44   ` [PATCH v2 05/10] builtin/cat-file: support "object:type=" " Patrick Steinhardt
2025-03-27  9:44   ` [PATCH v2 06/10] pack-bitmap: allow passing payloads to `show_reachable_fn()` Patrick Steinhardt
2025-04-01 12:17     ` Toon Claes
2025-04-02 11:13       ` Patrick Steinhardt
2025-03-27  9:44   ` [PATCH v2 07/10] pack-bitmap: add function to iterate over filtered bitmapped objects Patrick Steinhardt
2025-03-27  9:44   ` [PATCH v2 08/10] pack-bitmap: introduce function to check whether a pack is bitmapped Patrick Steinhardt
2025-04-01 11:46     ` Toon Claes
2025-04-02 11:13       ` Patrick Steinhardt
2025-03-27  9:44   ` [PATCH v2 09/10] builtin/cat-file: deduplicate logic to iterate over all objects Patrick Steinhardt
2025-04-01 12:13     ` Toon Claes
2025-04-02 11:13       ` Patrick Steinhardt
2025-04-03 18:24         ` Toon Claes
2025-03-27  9:44   ` [PATCH v2 10/10] builtin/cat-file: use bitmaps to efficiently filter by object type Patrick Steinhardt
2025-04-02 11:13 ` [PATCH v3 00/11] builtin/cat-file: allow filtering objects in batch mode Patrick Steinhardt
2025-04-02 11:13   ` [PATCH v3 01/11] builtin/cat-file: rename variable that tracks usage Patrick Steinhardt
2025-04-02 11:13   ` [PATCH v3 02/11] builtin/cat-file: introduce function to report object status Patrick Steinhardt
2025-04-02 11:13   ` [PATCH v3 03/11] builtin/cat-file: wire up an option to filter objects Patrick Steinhardt
2025-04-02 11:13   ` [PATCH v3 04/11] builtin/cat-file: support "blob:none" objects filter Patrick Steinhardt
2025-04-02 11:13   ` [PATCH v3 05/11] builtin/cat-file: support "blob:limit=" " Patrick Steinhardt
2025-04-02 11:13   ` [PATCH v3 06/11] builtin/cat-file: support "object:type=" " Patrick Steinhardt
2025-04-02 11:13   ` [PATCH v3 07/11] pack-bitmap: allow passing payloads to `show_reachable_fn()` Patrick Steinhardt
2025-04-02 11:13   ` [PATCH v3 08/11] pack-bitmap: add function to iterate over filtered bitmapped objects Patrick Steinhardt
2025-04-02 11:13   ` [PATCH v3 09/11] pack-bitmap: introduce function to check whether a pack is bitmapped Patrick Steinhardt
2025-04-02 11:13   ` [PATCH v3 10/11] builtin/cat-file: deduplicate logic to iterate over all objects Patrick Steinhardt
2025-04-02 11:13   ` [PATCH v3 11/11] builtin/cat-file: use bitmaps to efficiently filter by object type Patrick Steinhardt
2025-04-03  8:17   ` [PATCH v3 00/11] builtin/cat-file: allow filtering objects in batch mode Karthik Nayak
2025-04-08  0:32     ` Junio C Hamano

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=xmqqseo35ic8.fsf@gitster.g \
    --to=gitster@pobox.com \
    --cc=git@vger.kernel.org \
    --cc=me@ttaylorr.com \
    --cc=ps@pks.im \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.