From: Derrick Stolee <stolee@gmail.com>
To: Lidong Yan <yldhome2d2@gmail.com>
Cc: 502024330056@smail.nju.edu.cn, git@vger.kernel.org,
gitster@pobox.com, toon@iotcl.com
Subject: [PATCH v5.1 3.5/4] revision: make helper for pathspec to bloom key
Date: Thu, 10 Jul 2025 09:51:13 -0400 [thread overview]
Message-ID: <3c59af48-23fe-4cc7-87e9-1de94f509a2b@gmail.com> (raw)
In-Reply-To: <20250710084829.2171855-5-502024330056@smail.nju.edu.cn>
On 7/10/2025 4:48 AM, Lidong Yan wrote:
> @@ -710,23 +709,26 @@ static void prepare_to_use_bloom_filter(struct rev_info *revs)
> if (!revs->pruning.pathspec.nr)
> return;
>
> - revs->bloom_keyvecs_nr = 1;
> - CALLOC_ARRAY(revs->bloom_keyvecs, 1);
> - pi = &revs->pruning.pathspec.items[0];
> + revs->bloom_keyvecs_nr = revs->pruning.pathspec.nr;
> + CALLOC_ARRAY(revs->bloom_keyvecs, revs->bloom_keyvecs_nr);
> + for (int i = 0; i < revs->pruning.pathspec.nr; i++) {
> + pi = &revs->pruning.pathspec.items[i];
>
> - /* remove single trailing slash from path, if needed */
> - if (pi->len > 0 && pi->match[pi->len - 1] == '/') {
> - path_alloc = xmemdupz(pi->match, pi->len - 1);
> - path = path_alloc;
> - } else
> - path = pi->match;
> + /* remove single trailing slash from path, if needed */
> + if (pi->len > 0 && pi->match[pi->len - 1] == '/') {
> + path_alloc = xmemdupz(pi->match, pi->len - 1);
> + path = path_alloc;
> + } else
> + path = pi->match;
>
> - len = strlen(path);
> - if (!len)
> - goto fail;
> + len = strlen(path);
> + if (!len)
> + goto fail;
>
> - revs->bloom_keyvecs[0] =
> - bloom_keyvec_new(path, len, revs->bloom_filter_settings);
> + revs->bloom_keyvecs[i] =
> + bloom_keyvec_new(path, len, revs->bloom_filter_settings);
> + FREE_AND_NULL(path_alloc);
> + }
This diff is still bigger than I was hoping, so I'm sending a couple
of patches that simplify this code movement. Feel free to ignore
them as being too nit-picky.
--- >8 ---
From 69fa36dc615e140ae842b536f7da792beaebb272 Mon Sep 17 00:00:00 2001
From: Derrick Stolee <stolee@gmail.com>
Date: Thu, 10 Jul 2025 08:06:29 -0400
Subject: [PATCH v5.1 3.5/4] revision: make helper for pathspec to bloom key
When preparing to use bloom filters in a revision walk, Git populates a
boom_keyvec with an array of bloom keys for the components of a path.
Before we create the ability to map multiple pathspecs to multiple
bloom_keyvecs, extract the conversion from a pathspec to a bloom_keyvec
into its own helper method. This simplifies the state that persists in
prepare_to_use_bloom_filter() as well as makes the next change much
simpler.
Signed-off-by: Derrick Stolee <stolee@gmail.com>
---
revision.c | 50 +++++++++++++++++++++++++++++++-------------------
1 file changed, 31 insertions(+), 19 deletions(-)
diff --git a/revision.c b/revision.c
index 22bcfab7f93..4c09b594c55 100644
--- a/revision.c
+++ b/revision.c
@@ -687,14 +687,37 @@ static int forbid_bloom_filters(struct pathspec *spec)
static void release_revisions_bloom_keyvecs(struct rev_info *revs);
-static void prepare_to_use_bloom_filter(struct rev_info *revs)
+static int convert_pathspec_to_filter(const struct pathspec_item *pi,
+ struct bloom_keyvec **bloom_keyvec,
+ const struct bloom_filter_settings *settings)
{
- struct pathspec_item *pi;
- struct bloom_keyvec *bloom_keyvec;
- char *path_alloc = NULL;
- const char *path, *p;
size_t len;
+ const char *path;
+ char *path_alloc = NULL;
+ int res = 0;
+
+ /* remove single trailing slash from path, if needed */
+ if (pi->len > 0 && pi->match[pi->len - 1] == '/') {
+ path_alloc = xmemdupz(pi->match, pi->len - 1);
+ path = path_alloc;
+ } else
+ path = pi->match;
+
+ len = strlen(path);
+ if (!len) {
+ res = -1;
+ goto cleanup;
+ }
+
+ *bloom_keyvec = bloom_keyvec_new(path, len, settings);
+cleanup:
+ FREE_AND_NULL(path_alloc);
+ return res;
+}
+
+static void prepare_to_use_bloom_filter(struct rev_info *revs)
+{
if (!revs->commits)
return;
@@ -712,22 +735,12 @@ static void prepare_to_use_bloom_filter(struct rev_info *revs)
revs->bloom_keyvecs_nr = 1;
CALLOC_ARRAY(revs->bloom_keyvecs, 1);
- pi = &revs->pruning.pathspec.items[0];
- /* remove single trailing slash from path, if needed */
- if (pi->len > 0 && pi->match[pi->len - 1] == '/') {
- path_alloc = xmemdupz(pi->match, pi->len - 1);
- path = path_alloc;
- } else
- path = pi->match;
-
- len = strlen(path);
- if (!len)
+ if (convert_pathspec_to_filter(&revs->pruning.pathspec.items[0],
+ &revs->bloom_keyvecs[0],
+ revs->bloom_filter_settings))
goto fail;
- revs->bloom_keyvecs[0] =
- bloom_keyvec_new(path, len, revs->bloom_filter_settings);
-
if (trace2_is_enabled() && !bloom_filter_atexit_registered) {
atexit(trace2_bloom_filter_statistics_atexit);
bloom_filter_atexit_registered = 1;
@@ -737,7 +750,6 @@ static void prepare_to_use_bloom_filter(struct rev_info *revs)
fail:
revs->bloom_filter_settings = NULL;
- free(path_alloc);
release_revisions_bloom_keyvecs(revs);
}
--
2.47.2.vfs.0.2
next prev parent reply other threads:[~2025-07-10 13:51 UTC|newest]
Thread overview: 72+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-06-25 12:55 [PATCH 0/2] bloom: use bloom filter given multiple pathspec Lidong Yan
2025-06-25 12:55 ` [PATCH 1/2] bloom: replace struct bloom_key * with struct bloom_keyvec Lidong Yan
2025-06-25 17:43 ` Junio C Hamano
2025-06-26 3:44 ` Lidong Yan
2025-06-25 12:55 ` [PATCH 2/2] bloom: enable multiple pathspec bloom keys Lidong Yan
2025-06-27 13:50 ` Junio C Hamano
2025-06-27 14:24 ` Lidong Yan
2025-06-27 18:09 ` Junio C Hamano
2025-07-01 5:52 ` Lidong Yan
2025-07-01 15:19 ` Junio C Hamano
2025-07-02 7:14 ` Lidong Yan
2025-07-02 15:48 ` Junio C Hamano
2025-07-03 1:52 ` Lidong Yan
2025-07-04 12:09 ` Lidong Yan
2025-07-01 8:50 ` SZEDER Gábor
2025-07-01 11:40 ` Lidong Yan
2025-07-01 15:43 ` Junio C Hamano
2025-06-27 20:39 ` Junio C Hamano
2025-06-28 2:54 ` Lidong Yan
2025-06-25 17:32 ` [PATCH 0/2] bloom: use bloom filter given multiple pathspec Junio C Hamano
2025-06-26 3:34 ` Lidong Yan
2025-06-26 14:15 ` Junio C Hamano
2025-06-27 6:21 ` [PATCH v2 0/2] bloom: enable bloom filter optimization for multiple pathspec elements in revision traversal Lidong Yan
2025-06-28 4:21 ` [PATCH v3 " Lidong Yan
2025-07-04 11:14 ` [PATCH v4 0/4] " Lidong Yan
2025-07-04 11:14 ` [PATCH v4 1/4] bloom: add test helper to return murmur3 hash Lidong Yan
2025-07-04 11:14 ` [PATCH v4 2/4] bloom: rename function operates on bloom_key Lidong Yan
2025-07-04 11:14 ` [PATCH v4 3/4] bloom: replace struct bloom_key * with struct bloom_keyvec Lidong Yan
2025-07-07 11:35 ` Derrick Stolee
2025-07-07 14:14 ` Lidong Yan
2025-07-04 11:14 ` [PATCH v4 4/4] bloom: optimize multiple pathspec items in revision traversal Lidong Yan
2025-07-07 11:43 ` Derrick Stolee
2025-07-07 14:18 ` Lidong Yan
2025-07-07 15:14 ` Junio C Hamano
2025-07-10 8:48 ` [PATCH v5 0/4] bloom: enable bloom filter optimization for multiple pathspec elements " Lidong Yan
2025-07-10 8:48 ` [PATCH v5 1/4] bloom: add test helper to return murmur3 hash Lidong Yan
2025-07-10 8:48 ` [PATCH v5 2/4] bloom: rename function operates on bloom_key Lidong Yan
2025-07-10 8:48 ` [PATCH v5 3/4] bloom: replace struct bloom_key * with struct bloom_keyvec Lidong Yan
2025-07-10 16:17 ` Junio C Hamano
2025-07-11 12:46 ` Lidong Yan
2025-07-11 15:06 ` Junio C Hamano
2025-07-10 8:48 ` [PATCH v5 4/4] bloom: optimize multiple pathspec items in revision traversal Lidong Yan
2025-07-10 13:51 ` Derrick Stolee [this message]
2025-07-10 15:42 ` [PATCH v5.1 3.5/4] revision: make helper for pathspec to bloom key Lidong Yan
2025-07-10 13:55 ` [PATCH v5.1 4/4] bloom: optimize multiple pathspec items in revision Derrick Stolee
2025-07-10 15:49 ` Lidong Yan
2025-07-10 13:49 ` [PATCH v5 0/4] bloom: enable bloom filter optimization for multiple pathspec elements in revision traversal Derrick Stolee
2025-07-12 9:35 ` [PATCH v6 0/5] " Lidong Yan
2025-07-12 9:35 ` [PATCH v6 1/5] bloom: add test helper to return murmur3 hash Lidong Yan
2025-07-12 9:35 ` [PATCH v6 2/5] bloom: rename function operates on bloom_key Lidong Yan
2025-07-12 9:35 ` [PATCH v6 3/5] bloom: replace struct bloom_key * with struct bloom_keyvec Lidong Yan
2025-07-12 9:35 ` [PATCH v6 4/5] revision: make helper for pathspec to bloom keyvec Lidong Yan
2025-07-12 9:35 ` [PATCH v6 5/5] To enable optimize multiple pathspec items in revision traversal, return 0 if all pathspec item is literal in forbid_bloom_filters(). Add for loops to initialize and check each pathspec item's bloom_keyvec when optimization is possible Lidong Yan
2025-07-12 9:47 ` Lidong Yan
2025-07-12 9:51 ` [PATCH v6 5/5] bloom: optimize multiple pathspec items in revision Lidong Yan
2025-07-14 16:51 ` Derrick Stolee
2025-07-14 17:01 ` Junio C Hamano
2025-07-15 1:37 ` Lidong Yan
2025-07-15 2:56 ` [RESEND][PATCH " Lidong Yan
2025-07-14 16:53 ` [PATCH v6 0/5] bloom: enable bloom filter optimization for multiple pathspec elements in revision traversal Derrick Stolee
2025-07-14 17:02 ` Junio C Hamano
2025-07-15 1:34 ` Lidong Yan
2025-07-15 2:48 ` Derrick Stolee
2025-07-15 15:09 ` Junio C Hamano
2025-06-28 4:21 ` [PATCH v3 1/2] bloom: replace struct bloom_key * with struct bloom_keyvec Lidong Yan
2025-07-02 15:08 ` Patrick Steinhardt
2025-07-02 15:49 ` Lidong Yan
2025-07-02 18:28 ` Junio C Hamano
2025-07-03 1:41 ` Lidong Yan
2025-06-28 4:21 ` [PATCH v3 2/2] bloom: optimize multiple pathspec items in revision traversal Lidong Yan
2025-06-27 6:21 ` [PATCH v2 1/2] bloom: replace struct bloom_key * with struct bloom_keyvec Lidong Yan
2025-06-27 6:21 ` [PATCH v2 2/2] bloom: optimize multiple pathspec items in revision traversal Lidong Yan
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=3c59af48-23fe-4cc7-87e9-1de94f509a2b@gmail.com \
--to=stolee@gmail.com \
--cc=502024330056@smail.nju.edu.cn \
--cc=git@vger.kernel.org \
--cc=gitster@pobox.com \
--cc=toon@iotcl.com \
--cc=yldhome2d2@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).