All of lore.kernel.org
 help / color / mirror / Atom feed
From: Toon Claes <toon@iotcl.com>
To: Patrick Steinhardt <ps@pks.im>, git@vger.kernel.org
Subject: Re: [PATCH 4/6] object-file: generalize counting objects
Date: Wed, 11 Mar 2026 14:53:20 +0100	[thread overview]
Message-ID: <87pl5albfz.fsf@iotcl.com> (raw)
In-Reply-To: <20260310-b4-pks-odb-source-count-objects-v1-4-109e07d425f4@pks.im>

Patrick Steinhardt <ps@pks.im> writes:

> Generalize the function introduced in the preceding commit to not only
> be able to approximate the number of loose objects, but to also provide
> an accurate count. The behaviour can be toggled via a new flag.
>
> Signed-off-by: Patrick Steinhardt <ps@pks.im>
> ---
>  builtin/gc.c  |  5 +++--
>  object-file.c | 58 +++++++++++++++++++++++++++++++++++++---------------------
>  object-file.h |  5 +++--
>  odb.h         |  9 +++++++++
>  4 files changed, 52 insertions(+), 25 deletions(-)
>
> diff --git a/builtin/gc.c b/builtin/gc.c
> index a08c7554cb..3a64d28da8 100644
> --- a/builtin/gc.c
> +++ b/builtin/gc.c
> @@ -474,8 +474,9 @@ static int too_many_loose_objects(int limit)
>  	int auto_threshold = DIV_ROUND_UP(limit, 256) * 256;
>  	unsigned long loose_count;
>  
> -	if (odb_source_loose_approximate_object_count(the_repository->objects->sources,
> -						      &loose_count) < 0)
> +	if (odb_source_loose_count_objects(the_repository->objects->sources,
> +					   ODB_COUNT_OBJECTS_APPROXIMATE,
> +					   &loose_count) < 0)
>  		return 0;
>  
>  	return loose_count > auto_threshold;
> diff --git a/object-file.c b/object-file.c
> index da67e3c9ff..d35cec201f 100644
> --- a/object-file.c
> +++ b/object-file.c
> @@ -1868,40 +1868,56 @@ int odb_source_loose_for_each_object(struct odb_source *source,
>  					     NULL, NULL, &data);
>  }
>  
> -int odb_source_loose_approximate_object_count(struct odb_source *source,
> -					      unsigned long *out)
> +static int count_loose_object(const struct object_id *oid UNUSED,
> +			      struct object_info *oi UNUSED,
> +			      void *payload)
> +{
> +	unsigned long *count = payload;
> +	(*count)++;
> +	return 0;
> +}
> +
> +int odb_source_loose_count_objects(struct odb_source *source,
> +				   enum odb_count_objects_flags flags,
> +				   unsigned long *out)
>  {
>  	const unsigned hexsz = source->odb->repo->hash_algo->hexsz - 2;
> -	unsigned long count = 0;
> -	struct dirent *ent;
>  	char *path = NULL;
>  	DIR *dir = NULL;
>  	int ret;
>  
> -	path = xstrfmt("%s/17", source->path);
> +	if (flags & ODB_COUNT_OBJECTS_APPROXIMATE) {
> +		unsigned long count = 0;
> +		struct dirent *ent;
>  
> -	dir = opendir(path);
> -	if (!dir) {
> -		if (errno == ENOENT) {
> -			*out = 0;
> -			ret = 0;
> +		path = xstrfmt("%s/17", source->path);
> +
> +		dir = opendir(path);
> +		if (!dir) {
> +			if (errno == ENOENT) {
> +				*out = 0;
> +				ret = 0;
> +				goto out;
> +			}
> +
> +			ret = error_errno("cannot open object shard '%s'", path);
>  			goto out;
>  		}
>  
> -		ret = error_errno("cannot open object shard '%s'", path);
> -		goto out;
> -	}
> +		while ((ent = readdir(dir)) != NULL) {
> +			if (strspn(ent->d_name, "0123456789abcdef") != hexsz ||
> +			    ent->d_name[hexsz] != '\0')
> +				continue;
> +			count++;
> +		}
>  
> -	while ((ent = readdir(dir)) != NULL) {
> -		if (strspn(ent->d_name, "0123456789abcdef") != hexsz ||
> -		    ent->d_name[hexsz] != '\0')
> -			continue;
> -		count++;
> +		*out = count * 256;
> +		ret = 0;
> +	} else {
> +		ret = odb_source_loose_for_each_object(source, NULL, count_loose_object,
> +						       out, 0);

Isn't `*out` uninitialized here? Should we add `*out = 0;` before this
line?

>  	}
>  
> -	*out = count * 256;
> -	ret = 0;
> -
>  out:
>  	if (dir)
>  		closedir(dir);
> diff --git a/object-file.h b/object-file.h
> index b870ea9fa8..f8d8805a18 100644
> --- a/object-file.h
> +++ b/object-file.h
> @@ -149,8 +149,9 @@ int odb_source_loose_for_each_object(struct odb_source *source,
>   *
>   * Returns 0 on success, a negative error code otherwise.
>   */
> -int odb_source_loose_approximate_object_count(struct odb_source *source,
> -					      unsigned long *out);
> +int odb_source_loose_count_objects(struct odb_source *source,
> +				   enum odb_count_objects_flags flags,
> +				   unsigned long *out);
>  
>  /**
>   * format_object_header() is a thin wrapper around s xsnprintf() that
> diff --git a/odb.h b/odb.h
> index 7a583e3873..e6057477f6 100644
> --- a/odb.h
> +++ b/odb.h
> @@ -500,6 +500,15 @@ int odb_for_each_object(struct object_database *odb,
>  			void *cb_data,
>  			unsigned flags);
>  
> +enum odb_count_objects_flags {
> +	/*
> +	 * Instead of providing an accurate count, allow the number of objects
> +	 * to be approximated. Details of how this approximation works are
> +	 * subject to the specific source's implementation.
> +	 */
> +	ODB_COUNT_OBJECTS_APPROXIMATE = (1 << 0),
> +};
> +
>  enum {
>  	/*
>  	 * By default, `odb_write_object()` does not actually write anything
>
> -- 
> 2.53.0.880.g73c4285caa.dirty
>
>

-- 
Cheers,
Toon

  reply	other threads:[~2026-03-11 13:53 UTC|newest]

Thread overview: 27+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-03-10 15:18 [PATCH 0/6] odb: introduce generic object counting Patrick Steinhardt
2026-03-10 15:18 ` [PATCH 1/6] odb: stop including "odb/source.h" Patrick Steinhardt
2026-03-10 15:18 ` [PATCH 2/6] packfile: extract logic to count number of objects Patrick Steinhardt
2026-03-11 12:41   ` Toon Claes
2026-03-11 13:55     ` Patrick Steinhardt
2026-03-10 15:18 ` [PATCH 3/6] object-file: extract logic to approximate object count Patrick Steinhardt
2026-03-10 17:44   ` Junio C Hamano
2026-03-11 12:47   ` Toon Claes
2026-03-11 13:58     ` Patrick Steinhardt
2026-03-10 15:18 ` [PATCH 4/6] object-file: generalize counting objects Patrick Steinhardt
2026-03-11 13:53   ` Toon Claes [this message]
2026-03-11 14:01     ` Patrick Steinhardt
2026-03-10 15:18 ` [PATCH 5/6] odb/source: introduce generic object counting Patrick Steinhardt
2026-03-10 17:51   ` Junio C Hamano
2026-03-11  6:44     ` Patrick Steinhardt
2026-03-11 15:03   ` Toon Claes
2026-03-10 15:18 ` [PATCH 6/6] odb: " Patrick Steinhardt
2026-03-11 15:30   ` Toon Claes
2026-03-12  6:57     ` Patrick Steinhardt
2026-03-12  8:42 ` [PATCH v2 0/6] " Patrick Steinhardt
2026-03-12  8:42   ` [PATCH v2 1/6] odb: stop including "odb/source.h" Patrick Steinhardt
2026-03-12  8:42   ` [PATCH v2 2/6] packfile: extract logic to count number of objects Patrick Steinhardt
2026-03-12  8:42   ` [PATCH v2 3/6] object-file: extract logic to approximate object count Patrick Steinhardt
2026-03-12  8:42   ` [PATCH v2 4/6] object-file: generalize counting objects Patrick Steinhardt
2026-03-12  8:43   ` [PATCH v2 5/6] odb/source: introduce generic object counting Patrick Steinhardt
2026-03-12  8:43   ` [PATCH v2 6/6] odb: " Patrick Steinhardt
2026-03-13 11:52   ` [PATCH v2 0/6] " Toon Claes

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=87pl5albfz.fsf@iotcl.com \
    --to=toon@iotcl.com \
    --cc=git@vger.kernel.org \
    --cc=ps@pks.im \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.