From: "John Cai via GitGitGadget" <gitgitgadget@gmail.com>
To: git@vger.kernel.org
Cc: John Cai <johncai86@gmail.com>, John Cai <johncai86@gmail.com>
Subject: [PATCH] promisor-remote.c: use oidset for deduplication
Date: Thu, 13 Jan 2022 20:32:05 +0000 [thread overview]
Message-ID: <pull.1187.git.git.1642105926064.gitgitgadget@gmail.com> (raw)
From: John Cai <johncai86@gmail.com>
swap out oid_array for oidset in promisor-remote.c since we get
a deduplicated set of oids to operate on.
swap our calls for oid_array for oidset in callers of
promisor_remote_get_direct().
Helped-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: John Cai <johncai86@gmail.com>
---
promisor-remote.c: use oidset for deduplication
swap out oid_array for oidset in promisor-remote.c since we get a
deduplicated set of oids to operate on. Any direct fetch we can save
will be well worth it.
oidset does use a slightly larger memory footprint than oid_array, so
this change is a tradeoff between memory footprint and network calls.
What I'm not sure about is if it's worth swapping out all the calls of
oid_array for oidset in callers of promisor_remote_get_direct().
Published-As: https://github.com/gitgitgadget/git/releases/tag/pr-git-1187%2Fjohn-cai%2Fjc-dedupe-promisor-fetch-v1
Fetch-It-Via: git fetch https://github.com/gitgitgadget/git pr-git-1187/john-cai/jc-dedupe-promisor-fetch-v1
Pull-Request: https://github.com/git/git/pull/1187
builtin/index-pack.c | 9 +++---
builtin/pack-objects.c | 9 +++---
diff.c | 13 +++-----
diffcore-rename.c | 12 +++----
diffcore.h | 3 +-
merge-ort.c | 8 ++---
object-file.c | 4 ++-
promisor-remote.c | 72 ++++++++++++++----------------------------
promisor-remote.h | 6 ++--
read-cache.c | 9 +++---
10 files changed, 57 insertions(+), 88 deletions(-)
diff --git a/builtin/index-pack.c b/builtin/index-pack.c
index 3c2e6aee3cc..7de2eb42794 100644
--- a/builtin/index-pack.c
+++ b/builtin/index-pack.c
@@ -1387,18 +1387,17 @@ static void fix_unresolved_deltas(struct hashfile *f)
/*
* Prefetch the delta bases.
*/
- struct oid_array to_fetch = OID_ARRAY_INIT;
+ struct oidset to_fetch = OIDSET_INIT;
for (i = 0; i < nr_ref_deltas; i++) {
struct ref_delta_entry *d = sorted_by_pos[i];
if (!oid_object_info_extended(the_repository, &d->oid,
NULL,
OBJECT_INFO_FOR_PREFETCH))
continue;
- oid_array_append(&to_fetch, &d->oid);
+ oidset_insert(&to_fetch, &d->oid);
}
- promisor_remote_get_direct(the_repository,
- to_fetch.oid, to_fetch.nr);
- oid_array_clear(&to_fetch);
+ promisor_remote_get_direct(the_repository, &to_fetch);
+ oidset_clear(&to_fetch);
}
for (i = 0; i < nr_ref_deltas; i++) {
diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index ba2006f2212..4cef3bd78b2 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -1894,7 +1894,7 @@ static int can_reuse_delta(const struct object_id *base_oid,
}
static void prefetch_to_pack(uint32_t object_index_start) {
- struct oid_array to_fetch = OID_ARRAY_INIT;
+ struct oidset to_fetch = OIDSET_INIT;
uint32_t i;
for (i = object_index_start; i < to_pack.nr_objects; i++) {
@@ -1905,11 +1905,10 @@ static void prefetch_to_pack(uint32_t object_index_start) {
NULL,
OBJECT_INFO_FOR_PREFETCH))
continue;
- oid_array_append(&to_fetch, &entry->idx.oid);
+ oidset_insert(&to_fetch, &entry->idx.oid);
}
- promisor_remote_get_direct(the_repository,
- to_fetch.oid, to_fetch.nr);
- oid_array_clear(&to_fetch);
+ promisor_remote_get_direct(the_repository, &to_fetch);
+ oidset_clear(&to_fetch);
}
static void check_object(struct object_entry *entry, uint32_t object_index)
diff --git a/diff.c b/diff.c
index c862771a589..e48a5c7bfcc 100644
--- a/diff.c
+++ b/diff.c
@@ -6609,14 +6609,14 @@ void diffcore_fix_diff_index(void)
}
void diff_add_if_missing(struct repository *r,
- struct oid_array *to_fetch,
+ struct oidset *to_fetch,
const struct diff_filespec *filespec)
{
if (filespec && filespec->oid_valid &&
!S_ISGITLINK(filespec->mode) &&
oid_object_info_extended(r, &filespec->oid, NULL,
OBJECT_INFO_FOR_PREFETCH))
- oid_array_append(to_fetch, &filespec->oid);
+ oidset_insert(to_fetch, &filespec->oid);
}
void diff_queued_diff_prefetch(void *repository)
@@ -6624,7 +6624,7 @@ void diff_queued_diff_prefetch(void *repository)
struct repository *repo = repository;
int i;
struct diff_queue_struct *q = &diff_queued_diff;
- struct oid_array to_fetch = OID_ARRAY_INIT;
+ struct oidset to_fetch = OIDSET_INIT;
for (i = 0; i < q->nr; i++) {
struct diff_filepair *p = q->queue[i];
@@ -6632,12 +6632,9 @@ void diff_queued_diff_prefetch(void *repository)
diff_add_if_missing(repo, &to_fetch, p->two);
}
- /*
- * NEEDSWORK: Consider deduplicating the OIDs sent.
- */
- promisor_remote_get_direct(repo, to_fetch.oid, to_fetch.nr);
+ promisor_remote_get_direct(repo, &to_fetch);
- oid_array_clear(&to_fetch);
+ oidset_clear(&to_fetch);
}
void diffcore_std(struct diff_options *options)
diff --git a/diffcore-rename.c b/diffcore-rename.c
index bebd4ed6a42..2fe89ef01b8 100644
--- a/diffcore-rename.c
+++ b/diffcore-rename.c
@@ -95,7 +95,7 @@ static void inexact_prefetch(void *prefetch_options)
{
struct inexact_prefetch_options *options = prefetch_options;
int i;
- struct oid_array to_fetch = OID_ARRAY_INIT;
+ struct oidset to_fetch = OIDSET_INIT;
for (i = 0; i < rename_dst_nr; i++) {
if (rename_dst[i].p->renamed_pair)
@@ -118,8 +118,8 @@ static void inexact_prefetch(void *prefetch_options)
diff_add_if_missing(options->repo, &to_fetch,
rename_src[i].p->one);
}
- promisor_remote_get_direct(options->repo, to_fetch.oid, to_fetch.nr);
- oid_array_clear(&to_fetch);
+ promisor_remote_get_direct(options->repo, &to_fetch);
+ oidset_clear(&to_fetch);
}
static int estimate_similarity(struct repository *r,
@@ -837,7 +837,7 @@ static void basename_prefetch(void *prefetch_options)
struct strintmap *dests = options->dests;
struct dir_rename_info *info = options->info;
int i;
- struct oid_array to_fetch = OID_ARRAY_INIT;
+ struct oidset to_fetch = OIDSET_INIT;
/*
* TODO: The following loops mirror the code/logic from
@@ -890,8 +890,8 @@ static void basename_prefetch(void *prefetch_options)
}
}
- promisor_remote_get_direct(options->repo, to_fetch.oid, to_fetch.nr);
- oid_array_clear(&to_fetch);
+ promisor_remote_get_direct(options->repo, &to_fetch);
+ oidset_clear(&to_fetch);
}
static int find_basename_matches(struct diff_options *options,
diff --git a/diffcore.h b/diffcore.h
index badc2261c20..9f593e01165 100644
--- a/diffcore.h
+++ b/diffcore.h
@@ -11,6 +11,7 @@ struct repository;
struct strintmap;
struct strmap;
struct userdiff_driver;
+struct oidset;
/* This header file is internal between diff.c and its diff transformers
* (e.g. diffcore-rename, diffcore-pickaxe). Never include this header
@@ -229,7 +230,7 @@ int diffcore_count_changes(struct repository *r,
* repository, add that OID to to_fetch.
*/
void diff_add_if_missing(struct repository *r,
- struct oid_array *to_fetch,
+ struct oidset *to_fetch,
const struct diff_filespec *filespec);
#endif
diff --git a/merge-ort.c b/merge-ort.c
index c3197970219..107c086bb6a 100644
--- a/merge-ort.c
+++ b/merge-ort.c
@@ -3903,7 +3903,7 @@ static void prefetch_for_content_merges(struct merge_options *opt,
struct string_list *plist)
{
struct string_list_item *e;
- struct oid_array to_fetch = OID_ARRAY_INIT;
+ struct oidset to_fetch = OIDSET_INIT;
if (opt->repo != the_repository || !has_promisor_remote())
return;
@@ -3939,12 +3939,12 @@ static void prefetch_for_content_merges(struct merge_options *opt,
S_ISREG(vi->mode) &&
oid_object_info_extended(opt->repo, &vi->oid, NULL,
OBJECT_INFO_FOR_PREFETCH))
- oid_array_append(&to_fetch, &vi->oid);
+ oidset_insert(&to_fetch, &vi->oid);
}
}
- promisor_remote_get_direct(opt->repo, to_fetch.oid, to_fetch.nr);
- oid_array_clear(&to_fetch);
+ promisor_remote_get_direct(opt->repo, &to_fetch);
+ oidset_clear(&to_fetch);
}
static void process_entries(struct merge_options *opt,
diff --git a/object-file.c b/object-file.c
index 8be57f48de7..baec8f099de 100644
--- a/object-file.c
+++ b/object-file.c
@@ -1519,6 +1519,7 @@ static int do_oid_object_info_extended(struct repository *r,
struct cached_object *co;
struct pack_entry e;
int rtype;
+ struct oidset to_fetch = OIDSET_INIT;
const struct object_id *real = oid;
int already_retried = 0;
@@ -1587,7 +1588,8 @@ static int do_oid_object_info_extended(struct repository *r,
* TODO Investigate checking promisor_remote_get_direct()
* TODO return value and stopping on error here.
*/
- promisor_remote_get_direct(r, real, 1);
+ oidset_insert(&to_fetch, real);
+ promisor_remote_get_direct(r, &to_fetch);
already_retried = 1;
continue;
}
diff --git a/promisor-remote.c b/promisor-remote.c
index db2ebdc66ef..00166269f60 100644
--- a/promisor-remote.c
+++ b/promisor-remote.c
@@ -12,12 +12,13 @@ struct promisor_remote_config {
static int fetch_objects(struct repository *repo,
const char *remote_name,
- const struct object_id *oids,
- int oid_nr)
+ struct oidset *oids)
{
struct child_process child = CHILD_PROCESS_INIT;
- int i;
FILE *child_in;
+ struct oidset_iter iter;
+ const struct object_id *oid;
+ oidset_iter_init(oids, &iter);
child.git_cmd = 1;
child.in = -1;
@@ -31,10 +32,10 @@ static int fetch_objects(struct repository *repo,
die(_("promisor-remote: unable to fork off fetch subprocess"));
child_in = xfdopen(child.in, "w");
- trace2_data_intmax("promisor", repo, "fetch_count", oid_nr);
+ trace2_data_intmax("promisor", repo, "fetch_count", oidset_size(oids));
- for (i = 0; i < oid_nr; i++) {
- if (fputs(oid_to_hex(&oids[i]), child_in) < 0)
+ while((oid = oidset_iter_next(&iter))) {
+ if (fputs(oid_to_hex(oid), child_in) < 0)
die_errno(_("promisor-remote: could not write to fetch subprocess"));
if (fputc('\n', child_in) < 0)
die_errno(_("promisor-remote: could not write to fetch subprocess"));
@@ -198,70 +199,43 @@ int repo_has_promisor_remote(struct repository *r)
return !!repo_promisor_remote_find(r, NULL);
}
-static int remove_fetched_oids(struct repository *repo,
- struct object_id **oids,
- int oid_nr, int to_free)
+static void remove_fetched_oids(struct repository *repo,
+ struct oidset *oids)
{
- int i, remaining_nr = 0;
- int *remaining = xcalloc(oid_nr, sizeof(*remaining));
- struct object_id *old_oids = *oids;
- struct object_id *new_oids;
-
- for (i = 0; i < oid_nr; i++)
- if (oid_object_info_extended(repo, &old_oids[i], NULL,
- OBJECT_INFO_SKIP_FETCH_OBJECT)) {
- remaining[i] = 1;
- remaining_nr++;
- }
-
- if (remaining_nr) {
- int j = 0;
- CALLOC_ARRAY(new_oids, remaining_nr);
- for (i = 0; i < oid_nr; i++)
- if (remaining[i])
- oidcpy(&new_oids[j++], &old_oids[i]);
- *oids = new_oids;
- if (to_free)
- free(old_oids);
- }
+ struct oidset_iter iter;
+ const struct object_id *oid;
- free(remaining);
+ oidset_iter_init(oids, &iter);
- return remaining_nr;
+ while((oid = oidset_iter_next(&iter)))
+ if (oid_object_info_extended(repo, oid, NULL,
+ OBJECT_INFO_SKIP_FETCH_OBJECT)){
+ oidset_remove(oids, oid);
+ }
}
int promisor_remote_get_direct(struct repository *repo,
- const struct object_id *oids,
- int oid_nr)
+ struct oidset *oids)
{
struct promisor_remote *r;
- struct object_id *remaining_oids = (struct object_id *)oids;
- int remaining_nr = oid_nr;
- int to_free = 0;
int res = -1;
- if (oid_nr == 0)
+ if (oidset_size(oids) == 0)
return 0;
promisor_remote_init(repo);
for (r = repo->promisor_remote_config->promisors; r; r = r->next) {
- if (fetch_objects(repo, r->name, remaining_oids, remaining_nr) < 0) {
- if (remaining_nr == 1)
+ if (fetch_objects(repo, r->name, oids) < 0) {
+ if (oidset_size(oids) == 1)
continue;
- remaining_nr = remove_fetched_oids(repo, &remaining_oids,
- remaining_nr, to_free);
- if (remaining_nr) {
- to_free = 1;
+ remove_fetched_oids(repo, oids);
+ if (oidset_size(oids))
continue;
- }
}
res = 0;
break;
}
- if (to_free)
- free(remaining_oids);
-
return res;
}
diff --git a/promisor-remote.h b/promisor-remote.h
index edc45ab0f5f..c90837d3592 100644
--- a/promisor-remote.h
+++ b/promisor-remote.h
@@ -3,8 +3,7 @@
#include "repository.h"
-struct object_id;
-
+struct oidset;
/*
* Promisor remote linked list
*
@@ -45,7 +44,6 @@ static inline int has_promisor_remote(void)
* If oid_nr is 0, this function returns 0 (success) immediately.
*/
int promisor_remote_get_direct(struct repository *repo,
- const struct object_id *oids,
- int oid_nr);
+ struct oidset *oids);
#endif /* PROMISOR_REMOTE_H */
diff --git a/read-cache.c b/read-cache.c
index cbe73f14e5e..b78801b1f20 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -3732,7 +3732,7 @@ void prefetch_cache_entries(const struct index_state *istate,
must_prefetch_predicate must_prefetch)
{
int i;
- struct oid_array to_fetch = OID_ARRAY_INIT;
+ struct oidset to_fetch = OIDSET_INIT;
for (i = 0; i < istate->cache_nr; i++) {
struct cache_entry *ce = istate->cache[i];
@@ -3743,9 +3743,8 @@ void prefetch_cache_entries(const struct index_state *istate,
NULL,
OBJECT_INFO_FOR_PREFETCH))
continue;
- oid_array_append(&to_fetch, &ce->oid);
+ oidset_insert(&to_fetch, &ce->oid);
}
- promisor_remote_get_direct(the_repository,
- to_fetch.oid, to_fetch.nr);
- oid_array_clear(&to_fetch);
+ promisor_remote_get_direct(the_repository, &to_fetch);
+ oidset_clear(&to_fetch);
}
base-commit: 1ffcbaa1a5f10c9f706314d77f88de20a4a498c2
--
gitgitgadget
next reply other threads:[~2022-01-13 20:32 UTC|newest]
Thread overview: 7+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-01-13 20:32 John Cai via GitGitGadget [this message]
2022-01-13 23:45 ` [PATCH] promisor-remote.c: use oidset for deduplication Junio C Hamano
2022-01-14 12:11 ` Ævar Arnfjörð Bjarmason
2022-01-14 19:14 ` Junio C Hamano
2022-01-14 23:12 ` Junio C Hamano
2022-01-24 22:55 ` John Cai
2022-01-25 19:17 ` Jonathan Tan
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=pull.1187.git.git.1642105926064.gitgitgadget@gmail.com \
--to=gitgitgadget@gmail.com \
--cc=git@vger.kernel.org \
--cc=johncai86@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).