* [PATCH 01/20] packfile: pass down repository to `odb_pack_name`
2024-10-21 9:57 [PATCH 00/20] packfile: avoid using the 'the_repository' global variable Karthik Nayak
@ 2024-10-21 9:57 ` Karthik Nayak
2024-10-21 21:06 ` Taylor Blau
2024-10-21 9:57 ` [PATCH 02/20] packfile: pass down repository to `unuse_one_window` Karthik Nayak
` (28 subsequent siblings)
29 siblings, 1 reply; 184+ messages in thread
From: Karthik Nayak @ 2024-10-21 9:57 UTC (permalink / raw)
To: karthik.188; +Cc: git
The function `odb_pack_name` currently relies on the global variable
`the_repository`. To eliminate global variable usage in `packfile.c`, we
should progressively shift the dependency on the_repository to higher
layers. Let's remove its usage from this function and any related ones.
Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
---
builtin/fast-import.c | 8 ++++----
builtin/index-pack.c | 4 ++--
builtin/pack-redundant.c | 2 +-
http.c | 15 +++++++++------
packfile.c | 27 ++++++++++++++-------------
packfile.h | 13 ++++++++-----
6 files changed, 38 insertions(+), 31 deletions(-)
diff --git a/builtin/fast-import.c b/builtin/fast-import.c
index 1e7ab67f6e..7ad950627c 100644
--- a/builtin/fast-import.c
+++ b/builtin/fast-import.c
@@ -805,7 +805,7 @@ static char *keep_pack(const char *curr_index_name)
struct strbuf name = STRBUF_INIT;
int keep_fd;
- odb_pack_name(&name, pack_data->hash, "keep");
+ odb_pack_name(the_repository, &name, pack_data->hash, "keep");
keep_fd = odb_pack_keep(name.buf);
if (keep_fd < 0)
die_errno("cannot create keep file");
@@ -813,11 +813,11 @@ static char *keep_pack(const char *curr_index_name)
if (close(keep_fd))
die_errno("failed to write keep file");
- odb_pack_name(&name, pack_data->hash, "pack");
+ odb_pack_name(the_repository, &name, pack_data->hash, "pack");
if (finalize_object_file(pack_data->pack_name, name.buf))
die("cannot store pack file");
- odb_pack_name(&name, pack_data->hash, "idx");
+ odb_pack_name(the_repository, &name, pack_data->hash, "idx");
if (finalize_object_file(curr_index_name, name.buf))
die("cannot store index file");
free((void *)curr_index_name);
@@ -831,7 +831,7 @@ static void unkeep_all_packs(void)
for (k = 0; k < pack_id; k++) {
struct packed_git *p = all_packs[k];
- odb_pack_name(&name, p->hash, "keep");
+ odb_pack_name(the_repository, &name, p->hash, "keep");
unlink_or_warn(name.buf);
}
strbuf_release(&name);
diff --git a/builtin/index-pack.c b/builtin/index-pack.c
index 9d23b41b3a..97afc69625 100644
--- a/builtin/index-pack.c
+++ b/builtin/index-pack.c
@@ -1479,7 +1479,7 @@ static void write_special_file(const char *suffix, const char *msg,
if (pack_name)
filename = derive_filename(pack_name, "pack", suffix, &name_buf);
else
- filename = odb_pack_name(&name_buf, hash, suffix);
+ filename = odb_pack_name(the_repository, &name_buf, hash, suffix);
fd = odb_pack_keep(filename);
if (fd < 0) {
@@ -1507,7 +1507,7 @@ static void rename_tmp_packfile(const char **final_name,
{
if (!*final_name || strcmp(*final_name, curr_name)) {
if (!*final_name)
- *final_name = odb_pack_name(name, hash, ext);
+ *final_name = odb_pack_name(the_repository, name, hash, ext);
if (finalize_object_file(curr_name, *final_name))
die(_("unable to rename temporary '*.%s' file to '%s'"),
ext, *final_name);
diff --git a/builtin/pack-redundant.c b/builtin/pack-redundant.c
index 5809613002..60f806e672 100644
--- a/builtin/pack-redundant.c
+++ b/builtin/pack-redundant.c
@@ -688,7 +688,7 @@ int cmd_pack_redundant(int argc, const char **argv, const char *prefix UNUSED, s
pl = red = pack_list_difference(local_packs, min);
while (pl) {
printf("%s\n%s\n",
- sha1_pack_index_name(pl->pack->hash),
+ sha1_pack_index_name(the_repository, pl->pack->hash),
pl->pack->pack_name);
pl = pl->next;
}
diff --git a/http.c b/http.c
index d59e59f66b..309669c203 100644
--- a/http.c
+++ b/http.c
@@ -2388,7 +2388,7 @@ static char *fetch_pack_index(unsigned char *hash, const char *base_url)
strbuf_addf(&buf, "objects/pack/pack-%s.idx", hash_to_hex(hash));
url = strbuf_detach(&buf, NULL);
- strbuf_addf(&buf, "%s.temp", sha1_pack_index_name(hash));
+ strbuf_addf(&buf, "%s.temp", sha1_pack_index_name(the_repository, hash));
tmp = strbuf_detach(&buf, NULL);
if (http_get_file(url, tmp, NULL) != HTTP_OK) {
@@ -2407,8 +2407,10 @@ static int fetch_and_setup_pack_index(struct packed_git **packs_head,
char *tmp_idx = NULL;
int ret;
- if (has_pack_index(sha1)) {
- new_pack = parse_pack_index(sha1, sha1_pack_index_name(sha1));
+ if (has_pack_index(the_repository, sha1)) {
+ new_pack = parse_pack_index(the_repository, sha1,
+ sha1_pack_index_name(the_repository,
+ sha1));
if (!new_pack)
return -1; /* parse_pack_index() already issued error message */
goto add_pack;
@@ -2418,7 +2420,7 @@ static int fetch_and_setup_pack_index(struct packed_git **packs_head,
if (!tmp_idx)
return -1;
- new_pack = parse_pack_index(sha1, tmp_idx);
+ new_pack = parse_pack_index(the_repository, sha1, tmp_idx);
if (!new_pack) {
unlink(tmp_idx);
free(tmp_idx);
@@ -2429,7 +2431,7 @@ static int fetch_and_setup_pack_index(struct packed_git **packs_head,
ret = verify_pack_index(new_pack);
if (!ret) {
close_pack_index(new_pack);
- ret = finalize_object_file(tmp_idx, sha1_pack_index_name(sha1));
+ ret = finalize_object_file(tmp_idx, sha1_pack_index_name(the_repository, sha1));
}
free(tmp_idx);
if (ret)
@@ -2563,7 +2565,8 @@ struct http_pack_request *new_direct_http_pack_request(
preq->url = url;
- strbuf_addf(&preq->tmpfile, "%s.temp", sha1_pack_name(packed_git_hash));
+ strbuf_addf(&preq->tmpfile, "%s.temp", sha1_pack_name(the_repository,
+ packed_git_hash));
preq->packfile = fopen(preq->tmpfile.buf, "a");
if (!preq->packfile) {
error("Unable to open local file %s for pack",
diff --git a/packfile.c b/packfile.c
index df4ba67719..e4569ea29d 100644
--- a/packfile.c
+++ b/packfile.c
@@ -25,26 +25,25 @@
#include "pack-revindex.h"
#include "promisor-remote.h"
-char *odb_pack_name(struct strbuf *buf,
- const unsigned char *hash,
- const char *ext)
+char *odb_pack_name(struct repository *repo, struct strbuf *buf,
+ const unsigned char *hash, const char *ext)
{
strbuf_reset(buf);
- strbuf_addf(buf, "%s/pack/pack-%s.%s", repo_get_object_directory(the_repository),
+ strbuf_addf(buf, "%s/pack/pack-%s.%s", repo_get_object_directory(repo),
hash_to_hex(hash), ext);
return buf->buf;
}
-char *sha1_pack_name(const unsigned char *sha1)
+char *sha1_pack_name(struct repository *repo, const unsigned char *sha1)
{
static struct strbuf buf = STRBUF_INIT;
- return odb_pack_name(&buf, sha1, "pack");
+ return odb_pack_name(repo, &buf, sha1, "pack");
}
-char *sha1_pack_index_name(const unsigned char *sha1)
+char *sha1_pack_index_name(struct repository *repo, const unsigned char *sha1)
{
static struct strbuf buf = STRBUF_INIT;
- return odb_pack_name(&buf, sha1, "idx");
+ return odb_pack_name(repo, &buf, sha1, "idx");
}
static unsigned int pack_used_ctr;
@@ -237,14 +236,16 @@ static struct packed_git *alloc_packed_git(int extra)
return p;
}
-struct packed_git *parse_pack_index(unsigned char *sha1, const char *idx_path)
+struct packed_git *parse_pack_index(struct repository *repo,
+ unsigned char *sha1,
+ const char *idx_path)
{
- const char *path = sha1_pack_name(sha1);
+ const char *path = sha1_pack_name(repo, sha1);
size_t alloc = st_add(strlen(path), 1);
struct packed_git *p = alloc_packed_git(alloc);
memcpy(p->pack_name, path, alloc); /* includes NUL */
- hashcpy(p->hash, sha1, the_repository->hash_algo);
+ hashcpy(p->hash, sha1, repo->hash_algo);
if (check_packed_git_idx(idx_path, p)) {
free(p);
return NULL;
@@ -2151,10 +2152,10 @@ int has_object_kept_pack(const struct object_id *oid, unsigned flags)
return find_kept_pack_entry(the_repository, oid, flags, &e);
}
-int has_pack_index(const unsigned char *sha1)
+int has_pack_index(struct repository *repo, const unsigned char *sha1)
{
struct stat st;
- if (stat(sha1_pack_index_name(sha1), &st))
+ if (stat(sha1_pack_index_name(repo, sha1), &st))
return 0;
return 1;
}
diff --git a/packfile.h b/packfile.h
index 0f78658229..507ac602b5 100644
--- a/packfile.h
+++ b/packfile.h
@@ -29,21 +29,22 @@ struct pack_entry {
*
* Example: odb_pack_name(out, sha1, "idx") => ".git/objects/pack/pack-1234..idx"
*/
-char *odb_pack_name(struct strbuf *buf, const unsigned char *sha1, const char *ext);
+char *odb_pack_name(struct repository *repo, struct strbuf *buf,
+ const unsigned char *sha1, const char *ext);
/*
* Return the name of the (local) packfile with the specified sha1 in
* its name. The return value is a pointer to memory that is
* overwritten each time this function is called.
*/
-char *sha1_pack_name(const unsigned char *sha1);
+char *sha1_pack_name(struct repository *repo, const unsigned char *sha1);
/*
* Return the name of the (local) pack index file with the specified
* sha1 in its name. The return value is a pointer to memory that is
* overwritten each time this function is called.
*/
-char *sha1_pack_index_name(const unsigned char *sha1);
+char *sha1_pack_index_name(struct repository *repo, const unsigned char *sha1);
/*
* Return the basename of the packfile, omitting any containing directory
@@ -51,7 +52,9 @@ char *sha1_pack_index_name(const unsigned char *sha1);
*/
const char *pack_basename(struct packed_git *p);
-struct packed_git *parse_pack_index(unsigned char *sha1, const char *idx_path);
+struct packed_git *parse_pack_index(struct repository *repo,
+ unsigned char *sha1,
+ const char *idx_path);
typedef void each_file_in_pack_dir_fn(const char *full_path, size_t full_path_len,
const char *file_name, void *data);
@@ -193,7 +196,7 @@ int find_kept_pack_entry(struct repository *r, const struct object_id *oid, unsi
int has_object_pack(const struct object_id *oid);
int has_object_kept_pack(const struct object_id *oid, unsigned flags);
-int has_pack_index(const unsigned char *sha1);
+int has_pack_index(struct repository *repo, const unsigned char *sha1);
/*
* Return 1 if an object in a promisor packfile is or refers to the given
--
2.47.0
^ permalink raw reply related [flat|nested] 184+ messages in thread
* Re: [PATCH 01/20] packfile: pass down repository to `odb_pack_name`
2024-10-21 9:57 ` [PATCH 01/20] packfile: pass down repository to `odb_pack_name` Karthik Nayak
@ 2024-10-21 21:06 ` Taylor Blau
2024-10-22 8:51 ` karthik nayak
0 siblings, 1 reply; 184+ messages in thread
From: Taylor Blau @ 2024-10-21 21:06 UTC (permalink / raw)
To: Karthik Nayak; +Cc: git
On Mon, Oct 21, 2024 at 11:57:44AM +0200, Karthik Nayak wrote:
> diff --git a/builtin/pack-redundant.c b/builtin/pack-redundant.c
> index 5809613002..60f806e672 100644
> --- a/builtin/pack-redundant.c
> +++ b/builtin/pack-redundant.c
> @@ -688,7 +688,7 @@ int cmd_pack_redundant(int argc, const char **argv, const char *prefix UNUSED, s
> pl = red = pack_list_difference(local_packs, min);
> while (pl) {
> printf("%s\n%s\n",
> - sha1_pack_index_name(pl->pack->hash),
> + sha1_pack_index_name(the_repository, pl->pack->hash),
> pl->pack->pack_name);
> pl = pl->next;
> }
I am a little surprised to see sha1_pack_index_name() converted
similarly here, as this patch promises only to touch the
'odb_pack_name()' function.
> diff --git a/packfile.h b/packfile.h
> index 0f78658229..507ac602b5 100644
> --- a/packfile.h
> +++ b/packfile.h
Indeed, it looks like odb_pack_name(), sha1_pack_name(),
sha1_pack_index_name(), parse_pack_index(), and has_pack_index() are all
modified. Were these meant to go in separate patches?
Thanks,
Taylor
^ permalink raw reply [flat|nested] 184+ messages in thread
* Re: [PATCH 01/20] packfile: pass down repository to `odb_pack_name`
2024-10-21 21:06 ` Taylor Blau
@ 2024-10-22 8:51 ` karthik nayak
2024-10-22 16:37 ` Taylor Blau
0 siblings, 1 reply; 184+ messages in thread
From: karthik nayak @ 2024-10-22 8:51 UTC (permalink / raw)
To: Taylor Blau; +Cc: git
[-- Attachment #1: Type: text/plain, Size: 1616 bytes --]
Taylor Blau <me@ttaylorr.com> writes:
> On Mon, Oct 21, 2024 at 11:57:44AM +0200, Karthik Nayak wrote:
>> diff --git a/builtin/pack-redundant.c b/builtin/pack-redundant.c
>> index 5809613002..60f806e672 100644
>> --- a/builtin/pack-redundant.c
>> +++ b/builtin/pack-redundant.c
>> @@ -688,7 +688,7 @@ int cmd_pack_redundant(int argc, const char **argv, const char *prefix UNUSED, s
>> pl = red = pack_list_difference(local_packs, min);
>> while (pl) {
>> printf("%s\n%s\n",
>> - sha1_pack_index_name(pl->pack->hash),
>> + sha1_pack_index_name(the_repository, pl->pack->hash),
>> pl->pack->pack_name);
>> pl = pl->next;
>> }
>
> I am a little surprised to see sha1_pack_index_name() converted
> similarly here, as this patch promises only to touch the
> 'odb_pack_name()' function.
>
>> diff --git a/packfile.h b/packfile.h
>> index 0f78658229..507ac602b5 100644
>> --- a/packfile.h
>> +++ b/packfile.h
>
> Indeed, it looks like odb_pack_name(), sha1_pack_name(),
> sha1_pack_index_name(), parse_pack_index(), and has_pack_index() are all
> modified. Were these meant to go in separate patches?
>
Nope this is intentional, each commit tries to pick a base function and
modifies all layers above it (I should have explicitly specified my
approach in the cover).
In this commit, we try to modify `odb_pack_name()` and therefore
sha1_pack_name(), sha1_pack_index_name(), parse_pack_index(), and
has_pack_index() too. Otherwise, we'd have a lot lot more commits.
This simplies the review too, but yes, will add more instructions to the
next version.
> Thanks,
> Taylor
Karthik
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 690 bytes --]
^ permalink raw reply [flat|nested] 184+ messages in thread
* Re: [PATCH 01/20] packfile: pass down repository to `odb_pack_name`
2024-10-22 8:51 ` karthik nayak
@ 2024-10-22 16:37 ` Taylor Blau
0 siblings, 0 replies; 184+ messages in thread
From: Taylor Blau @ 2024-10-22 16:37 UTC (permalink / raw)
To: karthik nayak; +Cc: git
On Tue, Oct 22, 2024 at 04:51:04AM -0400, karthik nayak wrote:
> Taylor Blau <me@ttaylorr.com> writes:
>
> > On Mon, Oct 21, 2024 at 11:57:44AM +0200, Karthik Nayak wrote:
> >> diff --git a/builtin/pack-redundant.c b/builtin/pack-redundant.c
> >> index 5809613002..60f806e672 100644
> >> --- a/builtin/pack-redundant.c
> >> +++ b/builtin/pack-redundant.c
> >> @@ -688,7 +688,7 @@ int cmd_pack_redundant(int argc, const char **argv, const char *prefix UNUSED, s
> >> pl = red = pack_list_difference(local_packs, min);
> >> while (pl) {
> >> printf("%s\n%s\n",
> >> - sha1_pack_index_name(pl->pack->hash),
> >> + sha1_pack_index_name(the_repository, pl->pack->hash),
> >> pl->pack->pack_name);
> >> pl = pl->next;
> >> }
> >
> > I am a little surprised to see sha1_pack_index_name() converted
> > similarly here, as this patch promises only to touch the
> > 'odb_pack_name()' function.
> >
> >> diff --git a/packfile.h b/packfile.h
> >> index 0f78658229..507ac602b5 100644
> >> --- a/packfile.h
> >> +++ b/packfile.h
> >
> > Indeed, it looks like odb_pack_name(), sha1_pack_name(),
> > sha1_pack_index_name(), parse_pack_index(), and has_pack_index() are all
> > modified. Were these meant to go in separate patches?
> >
>
> Nope this is intentional, each commit tries to pick a base function and
> modifies all layers above it (I should have explicitly specified my
> approach in the cover).
>
> In this commit, we try to modify `odb_pack_name()` and therefore
> sha1_pack_name(), sha1_pack_index_name(), parse_pack_index(), and
> has_pack_index() too. Otherwise, we'd have a lot lot more commits.
>
> This simplies the review too, but yes, will add more instructions to the
> next version.
Makes sense, thanks for clarifying. I agree that this would be good
information to have in the patch message to avoid confusion.
Thanks,
Taylor
^ permalink raw reply [flat|nested] 184+ messages in thread
* [PATCH 02/20] packfile: pass down repository to `unuse_one_window`
2024-10-21 9:57 [PATCH 00/20] packfile: avoid using the 'the_repository' global variable Karthik Nayak
2024-10-21 9:57 ` [PATCH 01/20] packfile: pass down repository to `odb_pack_name` Karthik Nayak
@ 2024-10-21 9:57 ` Karthik Nayak
2024-10-21 21:08 ` Taylor Blau
2024-10-21 9:57 ` [PATCH 03/20] packfile: pass down repository to `close_one_pack` Karthik Nayak
` (27 subsequent siblings)
29 siblings, 1 reply; 184+ messages in thread
From: Karthik Nayak @ 2024-10-21 9:57 UTC (permalink / raw)
To: karthik.188; +Cc: git
The function `unuse_one_window` currently relies on the global variable
`the_repository`. To eliminate global variable usage in `packfile.c`, we
should progressively shift the dependency on the_repository to higher
layers. Let's remove its usage from this function and any related ones.
Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
---
builtin/pack-objects.c | 12 ++++++------
pack-check.c | 6 +++---
packfile.c | 25 +++++++++++++------------
packfile.h | 3 ++-
streaming.c | 2 +-
5 files changed, 25 insertions(+), 23 deletions(-)
diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index 0fc0680b40..4dd6ada184 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -401,7 +401,7 @@ static int check_pack_inflate(struct packed_git *p,
memset(&stream, 0, sizeof(stream));
git_inflate_init(&stream);
do {
- in = use_pack(p, w_curs, offset, &stream.avail_in);
+ in = use_pack(the_repository, p, w_curs, offset, &stream.avail_in);
stream.next_in = in;
stream.next_out = fakebuf;
stream.avail_out = sizeof(fakebuf);
@@ -424,7 +424,7 @@ static void copy_pack_data(struct hashfile *f,
unsigned long avail;
while (len) {
- in = use_pack(p, w_curs, offset, &avail);
+ in = use_pack(the_repository, p, w_curs, offset, &avail);
if (avail > len)
avail = (unsigned long)len;
hashwrite(f, in, avail);
@@ -2071,7 +2071,7 @@ static void check_object(struct object_entry *entry, uint32_t object_index)
enum object_type type;
unsigned long in_pack_size;
- buf = use_pack(p, &w_curs, entry->in_pack_offset, &avail);
+ buf = use_pack(the_repository, p, &w_curs, entry->in_pack_offset, &avail);
/*
* We want in_pack_type even if we do not reuse delta
@@ -2105,7 +2105,7 @@ static void check_object(struct object_entry *entry, uint32_t object_index)
case OBJ_REF_DELTA:
if (reuse_delta && !entry->preferred_base) {
oidread(&base_ref,
- use_pack(p, &w_curs,
+ use_pack(the_repository, p, &w_curs,
entry->in_pack_offset + used,
NULL),
the_repository->hash_algo);
@@ -2114,7 +2114,7 @@ static void check_object(struct object_entry *entry, uint32_t object_index)
entry->in_pack_header_size = used + the_hash_algo->rawsz;
break;
case OBJ_OFS_DELTA:
- buf = use_pack(p, &w_curs,
+ buf = use_pack(the_repository, p, &w_curs,
entry->in_pack_offset + used, NULL);
used_0 = 0;
c = buf[used_0++];
@@ -2574,7 +2574,7 @@ unsigned long oe_get_size_slow(struct packing_data *pack,
packing_data_lock(&to_pack);
w_curs = NULL;
- buf = use_pack(p, &w_curs, e->in_pack_offset, &avail);
+ buf = use_pack(the_repository, p, &w_curs, e->in_pack_offset, &avail);
used = unpack_object_header_buffer(buf, avail, &type, &size);
if (used == 0)
die(_("unable to parse object header of %s"),
diff --git a/pack-check.c b/pack-check.c
index e883dae3f2..e4636e9897 100644
--- a/pack-check.c
+++ b/pack-check.c
@@ -34,7 +34,7 @@ int check_pack_crc(struct packed_git *p, struct pack_window **w_curs,
do {
unsigned long avail;
- void *data = use_pack(p, w_curs, offset, &avail);
+ void *data = use_pack(the_repository, p, w_curs, offset, &avail);
if (avail > len)
avail = len;
data_crc = crc32(data_crc, data, avail);
@@ -70,7 +70,7 @@ static int verify_packfile(struct repository *r,
r->hash_algo->init_fn(&ctx);
do {
unsigned long remaining;
- unsigned char *in = use_pack(p, w_curs, offset, &remaining);
+ unsigned char *in = use_pack(the_repository, p, w_curs, offset, &remaining);
offset += remaining;
if (!pack_sig_ofs)
pack_sig_ofs = p->pack_size - r->hash_algo->rawsz;
@@ -79,7 +79,7 @@ static int verify_packfile(struct repository *r,
r->hash_algo->update_fn(&ctx, in, remaining);
} while (offset < pack_sig_ofs);
r->hash_algo->final_fn(hash, &ctx);
- pack_sig = use_pack(p, w_curs, pack_sig_ofs, NULL);
+ pack_sig = use_pack(the_repository, p, w_curs, pack_sig_ofs, NULL);
if (!hasheq(hash, pack_sig, the_repository->hash_algo))
err = error("%s pack checksum mismatch",
p->pack_name);
diff --git a/packfile.c b/packfile.c
index e4569ea29d..b0a3bfcd72 100644
--- a/packfile.c
+++ b/packfile.c
@@ -273,14 +273,14 @@ static void scan_windows(struct packed_git *p,
}
}
-static int unuse_one_window(struct packed_git *current)
+static int unuse_one_window(struct repository *repo, struct packed_git *current)
{
struct packed_git *p, *lru_p = NULL;
struct pack_window *lru_w = NULL, *lru_l = NULL;
if (current)
scan_windows(current, &lru_p, &lru_w, &lru_l);
- for (p = the_repository->objects->packed_git; p; p = p->next)
+ for (p = repo->objects->packed_git; p; p = p->next)
scan_windows(p, &lru_p, &lru_w, &lru_l);
if (lru_p) {
munmap(lru_w->base, lru_w->len);
@@ -625,10 +625,9 @@ static int in_window(struct pack_window *win, off_t offset)
&& (offset + the_hash_algo->rawsz) <= (win_off + win->len);
}
-unsigned char *use_pack(struct packed_git *p,
- struct pack_window **w_cursor,
- off_t offset,
- unsigned long *left)
+unsigned char *use_pack(struct repository *repo, struct packed_git *p,
+ struct pack_window **w_cursor,
+ off_t offset, unsigned long *left)
{
struct pack_window *win = *w_cursor;
@@ -666,7 +665,7 @@ unsigned char *use_pack(struct packed_git *p,
win->len = (size_t)len;
pack_mapped += win->len;
while (packed_git_limit < pack_mapped
- && unuse_one_window(p))
+ && unuse_one_window(repo, p))
; /* nothing */
win->base = xmmap_gently(NULL, win->len,
PROT_READ, MAP_PRIVATE,
@@ -1129,7 +1128,7 @@ unsigned long get_size_from_delta(struct packed_git *p,
git_inflate_init(&stream);
do {
- in = use_pack(p, w_curs, curpos, &stream.avail_in);
+ in = use_pack(the_repository, p, w_curs, curpos, &stream.avail_in);
stream.next_in = in;
/*
* Note: the window section returned by use_pack() must be
@@ -1185,7 +1184,7 @@ int unpack_object_header(struct packed_git *p,
* the maximum deflated object size is 2^137, which is just
* insane, so we know won't exceed what we have been given.
*/
- base = use_pack(p, w_curs, *curpos, &left);
+ base = use_pack(the_repository, p, w_curs, *curpos, &left);
used = unpack_object_header_buffer(base, left, &type, sizep);
if (!used) {
type = OBJ_BAD;
@@ -1217,7 +1216,7 @@ off_t get_delta_base(struct packed_git *p,
enum object_type type,
off_t delta_obj_offset)
{
- unsigned char *base_info = use_pack(p, w_curs, *curpos, NULL);
+ unsigned char *base_info = use_pack(the_repository, p, w_curs, *curpos, NULL);
off_t base_offset;
/* use_pack() assured us we have [base_info, base_info + 20)
@@ -1264,7 +1263,8 @@ static int get_delta_base_oid(struct packed_git *p,
off_t delta_obj_offset)
{
if (type == OBJ_REF_DELTA) {
- unsigned char *base = use_pack(p, w_curs, curpos, NULL);
+ unsigned char *base = use_pack(the_repository, p, w_curs,
+ curpos, NULL);
oidread(oid, base, the_repository->hash_algo);
return 0;
} else if (type == OBJ_OFS_DELTA) {
@@ -1636,7 +1636,8 @@ static void *unpack_compressed_entry(struct packed_git *p,
git_inflate_init(&stream);
do {
- in = use_pack(p, w_curs, curpos, &stream.avail_in);
+ in = use_pack(the_repository, p, w_curs, curpos,
+ &stream.avail_in);
stream.next_in = in;
/*
* Note: we must ensure the window section returned by
diff --git a/packfile.h b/packfile.h
index 507ac602b5..90a1f2e1cf 100644
--- a/packfile.h
+++ b/packfile.h
@@ -110,7 +110,8 @@ uint32_t get_pack_fanout(struct packed_git *p, uint32_t value);
struct raw_object_store;
-unsigned char *use_pack(struct packed_git *, struct pack_window **, off_t, unsigned long *);
+unsigned char *use_pack(struct repository *repo, struct packed_git *,
+ struct pack_window **, off_t, unsigned long *);
void close_pack_windows(struct packed_git *);
void close_pack(struct packed_git *);
void close_object_store(struct raw_object_store *o);
diff --git a/streaming.c b/streaming.c
index 38839511af..58b3b3cff7 100644
--- a/streaming.c
+++ b/streaming.c
@@ -292,7 +292,7 @@ static ssize_t read_istream_pack_non_delta(struct git_istream *st, char *buf,
struct pack_window *window = NULL;
unsigned char *mapped;
- mapped = use_pack(st->u.in_pack.pack, &window,
+ mapped = use_pack(the_repository, st->u.in_pack.pack, &window,
st->u.in_pack.pos, &st->z.avail_in);
st->z.next_out = (unsigned char *)buf + total_read;
--
2.47.0
^ permalink raw reply related [flat|nested] 184+ messages in thread
* Re: [PATCH 02/20] packfile: pass down repository to `unuse_one_window`
2024-10-21 9:57 ` [PATCH 02/20] packfile: pass down repository to `unuse_one_window` Karthik Nayak
@ 2024-10-21 21:08 ` Taylor Blau
0 siblings, 0 replies; 184+ messages in thread
From: Taylor Blau @ 2024-10-21 21:08 UTC (permalink / raw)
To: Karthik Nayak; +Cc: git
On Mon, Oct 21, 2024 at 11:57:45AM +0200, Karthik Nayak wrote:
> The function `unuse_one_window` currently relies on the global variable
> `the_repository`. To eliminate global variable usage in `packfile.c`, we
> should progressively shift the dependency on the_repository to higher
> layers. Let's remove its usage from this function and any related ones.
>
> Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
> ---
> builtin/pack-objects.c | 12 ++++++------
> pack-check.c | 6 +++---
> packfile.c | 25 +++++++++++++------------
> packfile.h | 3 ++-
> streaming.c | 2 +-
> 5 files changed, 25 insertions(+), 23 deletions(-)
All looks correct, as unuse_one_window() already uses the_repository, so
I don't think any behavior is changed here...
Thanks,
Taylor
^ permalink raw reply [flat|nested] 184+ messages in thread
* [PATCH 03/20] packfile: pass down repository to `close_one_pack`
2024-10-21 9:57 [PATCH 00/20] packfile: avoid using the 'the_repository' global variable Karthik Nayak
2024-10-21 9:57 ` [PATCH 01/20] packfile: pass down repository to `odb_pack_name` Karthik Nayak
2024-10-21 9:57 ` [PATCH 02/20] packfile: pass down repository to `unuse_one_window` Karthik Nayak
@ 2024-10-21 9:57 ` Karthik Nayak
2024-10-21 9:57 ` [PATCH 04/20] packfile: pass down repository to `add_packed_git` Karthik Nayak
` (26 subsequent siblings)
29 siblings, 0 replies; 184+ messages in thread
From: Karthik Nayak @ 2024-10-21 9:57 UTC (permalink / raw)
To: karthik.188; +Cc: git
The function `close_one_pack` currently relies on the global variable
`the_repository`. To eliminate global variable usage in `packfile.c`, we
should progressively shift the dependency on the_repository to higher
layers. Let's remove its usage from this function and any related ones.
Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
---
builtin/pack-objects.c | 6 +++---
midx.c | 2 +-
pack-bitmap.c | 4 ++--
pack-check.c | 2 +-
packfile.c | 24 ++++++++++++------------
packfile.h | 2 +-
6 files changed, 20 insertions(+), 20 deletions(-)
diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index 4dd6ada184..ec321da8dc 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -1482,7 +1482,7 @@ static int want_found_object(const struct object_id *oid, int exclude,
if (incremental)
return 0;
- if (!is_pack_valid(p))
+ if (!is_pack_valid(the_repository, p))
return -1;
/*
@@ -1560,7 +1560,7 @@ static int want_object_in_pack_one(struct packed_git *p,
if (offset) {
if (!*found_pack) {
- if (!is_pack_valid(p))
+ if (!is_pack_valid(the_repository, p))
return -1;
*found_offset = offset;
*found_pack = p;
@@ -3513,7 +3513,7 @@ static void read_packs_list_from_stdin(void)
struct packed_git *p = item->util;
if (!p)
die(_("could not find pack '%s'"), item->string);
- if (!is_pack_valid(p))
+ if (!is_pack_valid(the_repository, p))
die(_("packfile %s cannot be accessed"), p->pack_name);
}
diff --git a/midx.c b/midx.c
index 67e0d64004..4a05f74606 100644
--- a/midx.c
+++ b/midx.c
@@ -597,7 +597,7 @@ int fill_midx_entry(struct repository *r,
* answer, as it may have been deleted since the MIDX was
* loaded!
*/
- if (!is_pack_valid(p))
+ if (!is_pack_valid(r, p))
return 0;
if (oidset_size(&p->bad_objects) &&
diff --git a/pack-bitmap.c b/pack-bitmap.c
index 32b222a7af..067d1741d2 100644
--- a/pack-bitmap.c
+++ b/pack-bitmap.c
@@ -451,7 +451,7 @@ static int open_midx_bitmap_1(struct bitmap_index *bitmap_git,
}
preferred = bitmap_git->midx->packs[preferred_pack];
- if (!is_pack_valid(preferred)) {
+ if (!is_pack_valid(the_repository, preferred)) {
warning(_("preferred pack (%s) is invalid"),
preferred->pack_name);
goto cleanup;
@@ -498,7 +498,7 @@ static int open_pack_bitmap_1(struct bitmap_index *bitmap_git, struct packed_git
return -1;
}
- if (!is_pack_valid(packfile)) {
+ if (!is_pack_valid(the_repository, packfile)) {
close(fd);
return -1;
}
diff --git a/pack-check.c b/pack-check.c
index e4636e9897..bb649edbc1 100644
--- a/pack-check.c
+++ b/pack-check.c
@@ -64,7 +64,7 @@ static int verify_packfile(struct repository *r,
int err = 0;
struct idx_entry *entries;
- if (!is_pack_valid(p))
+ if (!is_pack_valid(the_repository, p))
return error("packfile %s cannot be accessed", p->pack_name);
r->hash_algo->init_fn(&ctx);
diff --git a/packfile.c b/packfile.c
index b0a3bfcd72..4588004223 100644
--- a/packfile.c
+++ b/packfile.c
@@ -462,13 +462,13 @@ static void find_lru_pack(struct packed_git *p, struct packed_git **lru_p, struc
*accept_windows_inuse = has_windows_inuse;
}
-static int close_one_pack(void)
+static int close_one_pack(struct repository *repo)
{
struct packed_git *p, *lru_p = NULL;
struct pack_window *mru_w = NULL;
int accept_windows_inuse = 1;
- for (p = the_repository->objects->packed_git; p; p = p->next) {
+ for (p = repo->objects->packed_git; p; p = p->next) {
if (p->pack_fd == -1)
continue;
find_lru_pack(p, &lru_p, &mru_w, &accept_windows_inuse);
@@ -535,7 +535,7 @@ const char *pack_basename(struct packed_git *p)
* Do not call this directly as this leaks p->pack_fd on error return;
* call open_packed_git() instead.
*/
-static int open_packed_git_1(struct packed_git *p)
+static int open_packed_git_1(struct repository *repo, struct packed_git *p)
{
struct stat st;
struct pack_header hdr;
@@ -557,7 +557,7 @@ static int open_packed_git_1(struct packed_git *p)
pack_max_fds = 1;
}
- while (pack_max_fds <= pack_open_fds && close_one_pack())
+ while (pack_max_fds <= pack_open_fds && close_one_pack(repo))
; /* nothing */
p->pack_fd = git_open(p->pack_name);
@@ -599,14 +599,14 @@ static int open_packed_git_1(struct packed_git *p)
if (read_result != hashsz)
return error("packfile %s signature is unavailable", p->pack_name);
idx_hash = ((unsigned char *)p->index_data) + p->index_size - hashsz * 2;
- if (!hasheq(hash, idx_hash, the_repository->hash_algo))
+ if (!hasheq(hash, idx_hash, repo->hash_algo))
return error("packfile %s does not match index", p->pack_name);
return 0;
}
-static int open_packed_git(struct packed_git *p)
+static int open_packed_git(struct repository *repo, struct packed_git *p)
{
- if (!open_packed_git_1(p))
+ if (!open_packed_git_1(repo, p))
return 0;
close_pack_fd(p);
return -1;
@@ -636,7 +636,7 @@ unsigned char *use_pack(struct repository *repo, struct packed_git *p,
* hash, and the in_window function above wouldn't match
* don't allow an offset too close to the end of the file.
*/
- if (!p->pack_size && p->pack_fd == -1 && open_packed_git(p))
+ if (!p->pack_size && p->pack_fd == -1 && open_packed_git(repo, p))
die("packfile %s cannot be accessed", p->pack_name);
if (offset > (p->pack_size - the_hash_algo->rawsz))
die("offset beyond end of packfile (truncated pack?)");
@@ -654,7 +654,7 @@ unsigned char *use_pack(struct repository *repo, struct packed_git *p,
size_t window_align = packed_git_window_size / 2;
off_t len;
- if (p->pack_fd == -1 && open_packed_git(p))
+ if (p->pack_fd == -1 && open_packed_git(repo, p))
die("packfile %s cannot be accessed", p->pack_name);
CALLOC_ARRAY(win, 1);
@@ -1994,7 +1994,7 @@ off_t find_pack_entry_one(const unsigned char *sha1,
return 0;
}
-int is_pack_valid(struct packed_git *p)
+int is_pack_valid(struct repository *repo, struct packed_git *p)
{
/* An already open pack is known to be valid. */
if (p->pack_fd != -1)
@@ -2012,7 +2012,7 @@ int is_pack_valid(struct packed_git *p)
}
/* Force the pack to open to prove its valid. */
- return !open_packed_git(p);
+ return !open_packed_git(repo, p);
}
struct packed_git *find_sha1_pack(const unsigned char *sha1,
@@ -2049,7 +2049,7 @@ static int fill_pack_entry(const struct object_id *oid,
* answer, as it may have been deleted since the index was
* loaded!
*/
- if (!is_pack_valid(p))
+ if (!is_pack_valid(the_repository, p))
return 0;
e->offset = offset;
e->p = p;
diff --git a/packfile.h b/packfile.h
index 90a1f2e1cf..b74d649c23 100644
--- a/packfile.h
+++ b/packfile.h
@@ -163,7 +163,7 @@ off_t nth_packed_object_offset(const struct packed_git *, uint32_t n);
*/
off_t find_pack_entry_one(const unsigned char *sha1, struct packed_git *);
-int is_pack_valid(struct packed_git *);
+int is_pack_valid(struct repository *repo, struct packed_git *);
void *unpack_entry(struct repository *r, struct packed_git *, off_t, enum object_type *, unsigned long *);
unsigned long unpack_object_header_buffer(const unsigned char *buf, unsigned long len, enum object_type *type, unsigned long *sizep);
unsigned long get_size_from_delta(struct packed_git *, struct pack_window **, off_t);
--
2.47.0
^ permalink raw reply related [flat|nested] 184+ messages in thread
* [PATCH 04/20] packfile: pass down repository to `add_packed_git`
2024-10-21 9:57 [PATCH 00/20] packfile: avoid using the 'the_repository' global variable Karthik Nayak
` (2 preceding siblings ...)
2024-10-21 9:57 ` [PATCH 03/20] packfile: pass down repository to `close_one_pack` Karthik Nayak
@ 2024-10-21 9:57 ` Karthik Nayak
2024-10-21 9:57 ` [PATCH 05/20] packfile: pass down repository to `unpack_object_header` Karthik Nayak
` (25 subsequent siblings)
29 siblings, 0 replies; 184+ messages in thread
From: Karthik Nayak @ 2024-10-21 9:57 UTC (permalink / raw)
To: karthik.188; +Cc: git
The function `add_packed_git` currently relies on the global variable
`the_repository`. To eliminate global variable usage in `packfile.c`, we
should progressively shift the dependency on the_repository to higher
layers. Let's remove its usage from this function and any related ones.
Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
---
builtin/fast-import.c | 2 +-
builtin/index-pack.c | 6 ++++--
builtin/pack-objects.c | 2 +-
commit-graph.c | 2 +-
connected.c | 2 +-
midx-write.c | 2 +-
midx.c | 2 +-
packfile.c | 21 +++++++++++----------
packfile.h | 6 ++++--
9 files changed, 25 insertions(+), 20 deletions(-)
diff --git a/builtin/fast-import.c b/builtin/fast-import.c
index 7ad950627c..51d1cc0deb 100644
--- a/builtin/fast-import.c
+++ b/builtin/fast-import.c
@@ -888,7 +888,7 @@ static void end_packfile(void)
idx_name = keep_pack(create_index());
/* Register the packfile with core git's machinery. */
- new_p = add_packed_git(idx_name, strlen(idx_name), 1);
+ new_p = add_packed_git(the_repository, idx_name, strlen(idx_name), 1);
if (!new_p)
die("core git rejected index %s", idx_name);
all_packs[pack_id] = new_p;
diff --git a/builtin/index-pack.c b/builtin/index-pack.c
index 97afc69625..eaefb41761 100644
--- a/builtin/index-pack.c
+++ b/builtin/index-pack.c
@@ -1552,7 +1552,8 @@ static void final(const char *final_pack_name, const char *curr_pack_name,
if (do_fsck_object) {
struct packed_git *p;
- p = add_packed_git(final_index_name, strlen(final_index_name), 0);
+ p = add_packed_git(the_repository, final_index_name,
+ strlen(final_index_name), 0);
if (p)
install_packed_git(the_repository, p);
}
@@ -1650,7 +1651,8 @@ static void read_v2_anomalous_offsets(struct packed_git *p,
static void read_idx_option(struct pack_idx_option *opts, const char *pack_name)
{
- struct packed_git *p = add_packed_git(pack_name, strlen(pack_name), 1);
+ struct packed_git *p = add_packed_git(the_repository, pack_name,
+ strlen(pack_name), 1);
if (!p)
die(_("Cannot open existing pack file '%s'"), pack_name);
diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index ec321da8dc..26e3090c85 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -2174,7 +2174,7 @@ static void check_object(struct object_entry *entry, uint32_t object_index)
* object size from the delta header.
*/
delta_pos = entry->in_pack_offset + entry->in_pack_header_size;
- canonical_size = get_size_from_delta(p, &w_curs, delta_pos);
+ canonical_size = get_size_from_delta(the_repository, p, &w_curs, delta_pos);
if (canonical_size == 0)
goto give_up;
SET_SIZE(entry, canonical_size);
diff --git a/commit-graph.c b/commit-graph.c
index 5bd89c0acd..1c333a9c52 100644
--- a/commit-graph.c
+++ b/commit-graph.c
@@ -1914,7 +1914,7 @@ static int fill_oids_from_packs(struct write_commit_graph_context *ctx,
struct packed_git *p;
strbuf_setlen(&packname, dirlen);
strbuf_addstr(&packname, pack_indexes->items[i].string);
- p = add_packed_git(packname.buf, packname.len, 1);
+ p = add_packed_git(the_repository, packname.buf, packname.len, 1);
if (!p) {
ret = error(_("error adding pack %s"), packname.buf);
goto cleanup;
diff --git a/connected.c b/connected.c
index 87cc4b57a1..235890efd0 100644
--- a/connected.c
+++ b/connected.c
@@ -54,7 +54,7 @@ int check_connected(oid_iterate_fn fn, void *cb_data,
strbuf_add(&idx_file, transport->pack_lockfiles.items[0].string,
base_len);
strbuf_addstr(&idx_file, ".idx");
- new_pack = add_packed_git(idx_file.buf, idx_file.len, 1);
+ new_pack = add_packed_git(the_repository, idx_file.buf, idx_file.len, 1);
strbuf_release(&idx_file);
}
diff --git a/midx-write.c b/midx-write.c
index b3a5f6c516..c57726ef94 100644
--- a/midx-write.c
+++ b/midx-write.c
@@ -154,7 +154,7 @@ static void add_pack_to_midx(const char *full_path, size_t full_path_len,
return;
ALLOC_GROW(ctx->info, ctx->nr + 1, ctx->alloc);
- p = add_packed_git(full_path, full_path_len, 0);
+ p = add_packed_git(the_repository, full_path, full_path_len, 0);
if (!p) {
warning(_("failed to add packfile '%s'"),
full_path);
diff --git a/midx.c b/midx.c
index 4a05f74606..94609456a2 100644
--- a/midx.c
+++ b/midx.c
@@ -455,7 +455,7 @@ int prepare_midx_pack(struct repository *r, struct multi_pack_index *m,
strbuf_addf(&pack_name, "%s/pack/%s", m->object_dir,
m->pack_names[pack_int_id]);
- p = add_packed_git(pack_name.buf, pack_name.len, m->local);
+ p = add_packed_git(r, pack_name.buf, pack_name.len, m->local);
strbuf_release(&pack_name);
if (!p)
diff --git a/packfile.c b/packfile.c
index 4588004223..f300119bb1 100644
--- a/packfile.c
+++ b/packfile.c
@@ -706,7 +706,8 @@ void unuse_pack(struct pack_window **w_cursor)
}
}
-struct packed_git *add_packed_git(const char *path, size_t path_len, int local)
+struct packed_git *add_packed_git(struct repository *repo, const char *path,
+ size_t path_len, int local)
{
struct stat st;
size_t alloc;
@@ -751,9 +752,9 @@ struct packed_git *add_packed_git(const char *path, size_t path_len, int local)
p->pack_size = st.st_size;
p->pack_local = local;
p->mtime = st.st_mtime;
- if (path_len < the_hash_algo->hexsz ||
- get_hash_hex(path + path_len - the_hash_algo->hexsz, p->hash))
- hashclr(p->hash, the_repository->hash_algo);
+ if (path_len < repo->hash_algo->hexsz ||
+ get_hash_hex(path + path_len - repo->hash_algo->hexsz, p->hash))
+ hashclr(p->hash, repo->hash_algo);
return p;
}
@@ -880,7 +881,8 @@ static void prepare_pack(const char *full_name, size_t full_name_len,
/* Don't reopen a pack we already have. */
if (!hashmap_get(&data->r->objects->pack_map, &hent, pack_name)) {
- p = add_packed_git(full_name, full_name_len, data->local);
+ p = add_packed_git(data->r, full_name, full_name_len,
+ data->local);
if (p)
install_packed_git(data->r, p);
}
@@ -1113,9 +1115,8 @@ unsigned long unpack_object_header_buffer(const unsigned char *buf,
return used;
}
-unsigned long get_size_from_delta(struct packed_git *p,
- struct pack_window **w_curs,
- off_t curpos)
+unsigned long get_size_from_delta(struct repository *repo, struct packed_git *p,
+ struct pack_window **w_curs, off_t curpos)
{
const unsigned char *data;
unsigned char delta_head[20], *in;
@@ -1128,7 +1129,7 @@ unsigned long get_size_from_delta(struct packed_git *p,
git_inflate_init(&stream);
do {
- in = use_pack(the_repository, p, w_curs, curpos, &stream.avail_in);
+ in = use_pack(repo, p, w_curs, curpos, &stream.avail_in);
stream.next_in = in;
/*
* Note: the window section returned by use_pack() must be
@@ -1559,7 +1560,7 @@ int packed_object_info(struct repository *r, struct packed_git *p,
type = OBJ_BAD;
goto out;
}
- *oi->sizep = get_size_from_delta(p, &w_curs, tmp_pos);
+ *oi->sizep = get_size_from_delta(r, p, &w_curs, tmp_pos);
if (*oi->sizep == 0) {
type = OBJ_BAD;
goto out;
diff --git a/packfile.h b/packfile.h
index b74d649c23..22d053a3af 100644
--- a/packfile.h
+++ b/packfile.h
@@ -117,7 +117,8 @@ void close_pack(struct packed_git *);
void close_object_store(struct raw_object_store *o);
void unuse_pack(struct pack_window **);
void clear_delta_base_cache(void);
-struct packed_git *add_packed_git(const char *path, size_t path_len, int local);
+struct packed_git *add_packed_git(struct repository *repo, const char *path,
+ size_t path_len, int local);
/*
* Unlink the .pack and associated extension files.
@@ -166,7 +167,8 @@ off_t find_pack_entry_one(const unsigned char *sha1, struct packed_git *);
int is_pack_valid(struct repository *repo, struct packed_git *);
void *unpack_entry(struct repository *r, struct packed_git *, off_t, enum object_type *, unsigned long *);
unsigned long unpack_object_header_buffer(const unsigned char *buf, unsigned long len, enum object_type *type, unsigned long *sizep);
-unsigned long get_size_from_delta(struct packed_git *, struct pack_window **, off_t);
+unsigned long get_size_from_delta(struct repository *repo, struct packed_git *,
+ struct pack_window **, off_t);
int unpack_object_header(struct packed_git *, struct pack_window **, off_t *, unsigned long *);
off_t get_delta_base(struct packed_git *p, struct pack_window **w_curs,
off_t *curpos, enum object_type type,
--
2.47.0
^ permalink raw reply related [flat|nested] 184+ messages in thread
* [PATCH 05/20] packfile: pass down repository to `unpack_object_header`
2024-10-21 9:57 [PATCH 00/20] packfile: avoid using the 'the_repository' global variable Karthik Nayak
` (3 preceding siblings ...)
2024-10-21 9:57 ` [PATCH 04/20] packfile: pass down repository to `add_packed_git` Karthik Nayak
@ 2024-10-21 9:57 ` Karthik Nayak
2024-10-21 9:57 ` [PATCH 06/20] packfile: pass down repository to `get_delta_base` Karthik Nayak
` (24 subsequent siblings)
29 siblings, 0 replies; 184+ messages in thread
From: Karthik Nayak @ 2024-10-21 9:57 UTC (permalink / raw)
To: karthik.188; +Cc: git
The function `unpack_object_header` currently relies on the global
variable `the_repository`. To eliminate global variable usage in
`packfile.c`, we should progressively shift the dependency on
the_repository to higher layers. Let's remove its usage from this
function and any related ones.
Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
---
builtin/pack-objects.c | 3 ++-
pack-bitmap.c | 3 ++-
pack-check.c | 3 ++-
packfile.c | 13 ++++++-------
packfile.h | 3 ++-
streaming.c | 4 ++--
6 files changed, 16 insertions(+), 13 deletions(-)
diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index 26e3090c85..3893135b59 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -1033,7 +1033,8 @@ static void write_reused_pack_one(struct packed_git *reuse_packfile,
offset - (hashfile_total(out) - pack_start));
cur = offset;
- type = unpack_object_header(reuse_packfile, w_curs, &cur, &size);
+ type = unpack_object_header(the_repository, reuse_packfile, w_curs,
+ &cur, &size);
assert(type >= 0);
if (type == OBJ_OFS_DELTA) {
diff --git a/pack-bitmap.c b/pack-bitmap.c
index 067d1741d2..96c91a080e 100644
--- a/pack-bitmap.c
+++ b/pack-bitmap.c
@@ -2067,7 +2067,8 @@ static int try_partial_reuse(struct bitmap_index *bitmap_git,
return -1; /* not actually in the pack */
delta_obj_offset = offset;
- type = unpack_object_header(pack->p, w_curs, &offset, &size);
+ type = unpack_object_header(the_repository, pack->p, w_curs, &offset,
+ &size);
if (type < 0)
return -1; /* broken packfile, punt */
diff --git a/pack-check.c b/pack-check.c
index bb649edbc1..e2c3b264e7 100644
--- a/pack-check.c
+++ b/pack-check.c
@@ -127,7 +127,8 @@ static int verify_packfile(struct repository *r,
}
curpos = entries[i].offset;
- type = unpack_object_header(p, w_curs, &curpos, &size);
+ type = unpack_object_header(the_repository, p, w_curs, &curpos,
+ &size);
unuse_pack(w_curs);
if (type == OBJ_BLOB && big_file_threshold <= size) {
diff --git a/packfile.c b/packfile.c
index f300119bb1..7a0d1957e9 100644
--- a/packfile.c
+++ b/packfile.c
@@ -1169,9 +1169,8 @@ unsigned long get_size_from_delta(struct repository *repo, struct packed_git *p,
return get_delta_hdr_size(&data, delta_head+sizeof(delta_head));
}
-int unpack_object_header(struct packed_git *p,
- struct pack_window **w_curs,
- off_t *curpos,
+int unpack_object_header(struct repository *r, struct packed_git *p,
+ struct pack_window **w_curs, off_t *curpos,
unsigned long *sizep)
{
unsigned char *base;
@@ -1185,7 +1184,7 @@ int unpack_object_header(struct packed_git *p,
* the maximum deflated object size is 2^137, which is just
* insane, so we know won't exceed what we have been given.
*/
- base = use_pack(the_repository, p, w_curs, *curpos, &left);
+ base = use_pack(r, p, w_curs, *curpos, &left);
used = unpack_object_header_buffer(base, left, &type, sizep);
if (!used) {
type = OBJ_BAD;
@@ -1332,7 +1331,7 @@ static enum object_type packed_to_object_type(struct repository *r,
if (!base_offset)
goto unwind;
curpos = obj_offset = base_offset;
- type = unpack_object_header(p, w_curs, &curpos, &size);
+ type = unpack_object_header(r, p, w_curs, &curpos, &size);
if (type <= OBJ_NONE) {
/* If getting the base itself fails, we first
* retry the base, otherwise unwind */
@@ -1548,7 +1547,7 @@ int packed_object_info(struct repository *r, struct packed_git *p,
if (!*oi->contentp)
type = OBJ_BAD;
} else {
- type = unpack_object_header(p, &w_curs, &curpos, &size);
+ type = unpack_object_header(r, p, &w_curs, &curpos, &size);
}
if (!oi->contentp && oi->sizep) {
@@ -1736,7 +1735,7 @@ void *unpack_entry(struct repository *r, struct packed_git *p, off_t obj_offset,
}
}
- type = unpack_object_header(p, &w_curs, &curpos, &size);
+ type = unpack_object_header(r, p, &w_curs, &curpos, &size);
if (type != OBJ_OFS_DELTA && type != OBJ_REF_DELTA)
break;
diff --git a/packfile.h b/packfile.h
index 22d053a3af..488d78ae9f 100644
--- a/packfile.h
+++ b/packfile.h
@@ -169,7 +169,8 @@ void *unpack_entry(struct repository *r, struct packed_git *, off_t, enum object
unsigned long unpack_object_header_buffer(const unsigned char *buf, unsigned long len, enum object_type *type, unsigned long *sizep);
unsigned long get_size_from_delta(struct repository *repo, struct packed_git *,
struct pack_window **, off_t);
-int unpack_object_header(struct packed_git *, struct pack_window **, off_t *, unsigned long *);
+int unpack_object_header(struct repository *repo, struct packed_git *,
+ struct pack_window **, off_t *, unsigned long *);
off_t get_delta_base(struct packed_git *p, struct pack_window **w_curs,
off_t *curpos, enum object_type type,
off_t delta_obj_offset);
diff --git a/streaming.c b/streaming.c
index 58b3b3cff7..56154349fa 100644
--- a/streaming.c
+++ b/streaming.c
@@ -334,7 +334,7 @@ static int close_istream_pack_non_delta(struct git_istream *st)
}
static int open_istream_pack_non_delta(struct git_istream *st,
- struct repository *r UNUSED,
+ struct repository *r,
const struct object_id *oid UNUSED,
enum object_type *type UNUSED)
{
@@ -343,7 +343,7 @@ static int open_istream_pack_non_delta(struct git_istream *st,
window = NULL;
- in_pack_type = unpack_object_header(st->u.in_pack.pack,
+ in_pack_type = unpack_object_header(r, st->u.in_pack.pack,
&window,
&st->u.in_pack.pos,
&st->size);
--
2.47.0
^ permalink raw reply related [flat|nested] 184+ messages in thread
* [PATCH 06/20] packfile: pass down repository to `get_delta_base`
2024-10-21 9:57 [PATCH 00/20] packfile: avoid using the 'the_repository' global variable Karthik Nayak
` (4 preceding siblings ...)
2024-10-21 9:57 ` [PATCH 05/20] packfile: pass down repository to `unpack_object_header` Karthik Nayak
@ 2024-10-21 9:57 ` Karthik Nayak
2024-10-21 9:57 ` [PATCH 07/20] packfile: use provided repository in `packed_object_info` Karthik Nayak
` (23 subsequent siblings)
29 siblings, 0 replies; 184+ messages in thread
From: Karthik Nayak @ 2024-10-21 9:57 UTC (permalink / raw)
To: karthik.188; +Cc: git
The function `get_delta_base` currently relies on the global variable
`the_repository`. To eliminate global variable usage in `packfile.c`, we
should progressively shift the dependency on the_repository to higher
layers. Let's remove its usage from this function and any related ones.
Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
---
builtin/pack-objects.c | 3 ++-
pack-bitmap.c | 4 ++--
packfile.c | 42 ++++++++++++++++++++----------------------
packfile.h | 6 +++---
4 files changed, 27 insertions(+), 28 deletions(-)
diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index 3893135b59..a10eae239e 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -1044,7 +1044,8 @@ static void write_reused_pack_one(struct packed_git *reuse_packfile,
unsigned char header[MAX_PACK_OBJECT_HEADER];
unsigned len;
- base_offset = get_delta_base(reuse_packfile, w_curs, &cur, type, offset);
+ base_offset = get_delta_base(the_repository, reuse_packfile,
+ w_curs, &cur, type, offset);
assert(base_offset != 0);
/* Convert to REF_DELTA if we must... */
diff --git a/pack-bitmap.c b/pack-bitmap.c
index 96c91a080e..d959e30682 100644
--- a/pack-bitmap.c
+++ b/pack-bitmap.c
@@ -2085,8 +2085,8 @@ static int try_partial_reuse(struct bitmap_index *bitmap_git,
* and the normal slow path will complain about it in
* more detail.
*/
- base_offset = get_delta_base(pack->p, w_curs, &offset, type,
- delta_obj_offset);
+ base_offset = get_delta_base(the_repository, pack->p, w_curs,
+ &offset, type, delta_obj_offset);
if (!base_offset)
return 0;
diff --git a/packfile.c b/packfile.c
index 7a0d1957e9..ee46898b35 100644
--- a/packfile.c
+++ b/packfile.c
@@ -1210,13 +1210,11 @@ const struct packed_git *has_packed_and_bad(struct repository *r,
return NULL;
}
-off_t get_delta_base(struct packed_git *p,
- struct pack_window **w_curs,
- off_t *curpos,
- enum object_type type,
- off_t delta_obj_offset)
+off_t get_delta_base(struct repository *repo, struct packed_git *p,
+ struct pack_window **w_curs, off_t *curpos,
+ enum object_type type, off_t delta_obj_offset)
{
- unsigned char *base_info = use_pack(the_repository, p, w_curs, *curpos, NULL);
+ unsigned char *base_info = use_pack(repo, p, w_curs, *curpos, NULL);
off_t base_offset;
/* use_pack() assured us we have [base_info, base_info + 20)
@@ -1243,7 +1241,7 @@ off_t get_delta_base(struct packed_git *p,
} else if (type == OBJ_REF_DELTA) {
/* The base entry _must_ be in the same pack */
base_offset = find_pack_entry_one(base_info, p);
- *curpos += the_hash_algo->rawsz;
+ *curpos += repo->hash_algo->rawsz;
} else
die("I am totally screwed");
return base_offset;
@@ -1255,22 +1253,19 @@ off_t get_delta_base(struct packed_git *p,
* the final object lookup), but more expensive for OFS deltas (we
* have to load the revidx to convert the offset back into a sha1).
*/
-static int get_delta_base_oid(struct packed_git *p,
- struct pack_window **w_curs,
- off_t curpos,
- struct object_id *oid,
- enum object_type type,
+static int get_delta_base_oid(struct repository *repo, struct packed_git *p,
+ struct pack_window **w_curs, off_t curpos,
+ struct object_id *oid, enum object_type type,
off_t delta_obj_offset)
{
if (type == OBJ_REF_DELTA) {
- unsigned char *base = use_pack(the_repository, p, w_curs,
- curpos, NULL);
- oidread(oid, base, the_repository->hash_algo);
+ unsigned char *base = use_pack(repo, p, w_curs, curpos, NULL);
+ oidread(oid, base, repo->hash_algo);
return 0;
} else if (type == OBJ_OFS_DELTA) {
uint32_t base_pos;
- off_t base_offset = get_delta_base(p, w_curs, &curpos,
- type, delta_obj_offset);
+ off_t base_offset = get_delta_base(repo, p, w_curs, &curpos, type,
+ delta_obj_offset);
if (!base_offset)
return -1;
@@ -1327,7 +1322,8 @@ static enum object_type packed_to_object_type(struct repository *r,
}
poi_stack[poi_stack_nr++] = obj_offset;
/* If parsing the base offset fails, just unwind */
- base_offset = get_delta_base(p, w_curs, &curpos, type, obj_offset);
+ base_offset = get_delta_base(r, p, w_curs, &curpos, type,
+ obj_offset);
if (!base_offset)
goto unwind;
curpos = obj_offset = base_offset;
@@ -1553,8 +1549,9 @@ int packed_object_info(struct repository *r, struct packed_git *p,
if (!oi->contentp && oi->sizep) {
if (type == OBJ_OFS_DELTA || type == OBJ_REF_DELTA) {
off_t tmp_pos = curpos;
- off_t base_offset = get_delta_base(p, &w_curs, &tmp_pos,
- type, obj_offset);
+ off_t base_offset = get_delta_base(r, p, &w_curs,
+ &tmp_pos, type,
+ obj_offset);
if (!base_offset) {
type = OBJ_BAD;
goto out;
@@ -1600,7 +1597,7 @@ int packed_object_info(struct repository *r, struct packed_git *p,
if (oi->delta_base_oid) {
if (type == OBJ_OFS_DELTA || type == OBJ_REF_DELTA) {
- if (get_delta_base_oid(p, &w_curs, curpos,
+ if (get_delta_base_oid(r, p, &w_curs, curpos,
oi->delta_base_oid,
type, obj_offset) < 0) {
type = OBJ_BAD;
@@ -1739,7 +1736,8 @@ void *unpack_entry(struct repository *r, struct packed_git *p, off_t obj_offset,
if (type != OBJ_OFS_DELTA && type != OBJ_REF_DELTA)
break;
- base_offset = get_delta_base(p, &w_curs, &curpos, type, obj_offset);
+ base_offset = get_delta_base(r, p, &w_curs, &curpos, type,
+ obj_offset);
if (!base_offset) {
error("failed to validate delta base reference "
"at offset %"PRIuMAX" from %s",
diff --git a/packfile.h b/packfile.h
index 488d78ae9f..050dc516b1 100644
--- a/packfile.h
+++ b/packfile.h
@@ -171,9 +171,9 @@ unsigned long get_size_from_delta(struct repository *repo, struct packed_git *,
struct pack_window **, off_t);
int unpack_object_header(struct repository *repo, struct packed_git *,
struct pack_window **, off_t *, unsigned long *);
-off_t get_delta_base(struct packed_git *p, struct pack_window **w_curs,
- off_t *curpos, enum object_type type,
- off_t delta_obj_offset);
+off_t get_delta_base(struct repository *repo, struct packed_git *p,
+ struct pack_window **w_curs, off_t *curpos,
+ enum object_type type, off_t delta_obj_offset);
void release_pack_memory(size_t);
--
2.47.0
^ permalink raw reply related [flat|nested] 184+ messages in thread
* [PATCH 07/20] packfile: use provided repository in `packed_object_info`
2024-10-21 9:57 [PATCH 00/20] packfile: avoid using the 'the_repository' global variable Karthik Nayak
` (5 preceding siblings ...)
2024-10-21 9:57 ` [PATCH 06/20] packfile: pass down repository to `get_delta_base` Karthik Nayak
@ 2024-10-21 9:57 ` Karthik Nayak
2024-10-21 9:57 ` [PATCH 08/20] packfile: pass down repository to `unpack_compressed_entry` Karthik Nayak
` (22 subsequent siblings)
29 siblings, 0 replies; 184+ messages in thread
From: Karthik Nayak @ 2024-10-21 9:57 UTC (permalink / raw)
To: karthik.188; +Cc: git
The function `packed_object_info` receives a repository struct as an
argument. Yet, it still uses the global 'the_repository' variable
within, let's swap it with the provided repository variable.
Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
---
builtin/pack-objects.c | 2 +-
packfile.c | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index a10eae239e..c2555d4986 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -3383,7 +3383,7 @@ static int add_object_entry_from_pack(const struct object_id *oid,
struct object_info oi = OBJECT_INFO_INIT;
oi.typep = &type;
- if (packed_object_info(the_repository, p, ofs, &oi) < 0) {
+ if (packed_object_info(revs->repo, p, ofs, &oi) < 0) {
die(_("could not get type of object %s in pack %s"),
oid_to_hex(oid), p->pack_name);
} else if (type == OBJ_COMMIT) {
diff --git a/packfile.c b/packfile.c
index ee46898b35..458db59b3a 100644
--- a/packfile.c
+++ b/packfile.c
@@ -1604,7 +1604,7 @@ int packed_object_info(struct repository *r, struct packed_git *p,
goto out;
}
} else
- oidclr(oi->delta_base_oid, the_repository->hash_algo);
+ oidclr(oi->delta_base_oid, r->hash_algo);
}
oi->whence = in_delta_base_cache(p, obj_offset) ? OI_DBCACHED :
--
2.47.0
^ permalink raw reply related [flat|nested] 184+ messages in thread
* [PATCH 08/20] packfile: pass down repository to `unpack_compressed_entry`
2024-10-21 9:57 [PATCH 00/20] packfile: avoid using the 'the_repository' global variable Karthik Nayak
` (6 preceding siblings ...)
2024-10-21 9:57 ` [PATCH 07/20] packfile: use provided repository in `packed_object_info` Karthik Nayak
@ 2024-10-21 9:57 ` Karthik Nayak
2024-10-21 9:57 ` [PATCH 09/20] packfile: pass down repository to `nth_packed_object_id` Karthik Nayak
` (21 subsequent siblings)
29 siblings, 0 replies; 184+ messages in thread
From: Karthik Nayak @ 2024-10-21 9:57 UTC (permalink / raw)
To: karthik.188; +Cc: git
The function `unpack_compressed_entry` currently relies on the global
variable `the_repository`. To eliminate global variable usage in
`packfile.c`, we should progressively shift the dependency on
the_repository to higher layers. Let's remove its usage from this
function.
Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
---
packfile.c | 18 ++++++++++--------
1 file changed, 10 insertions(+), 8 deletions(-)
diff --git a/packfile.c b/packfile.c
index 458db59b3a..54f3b9f0a7 100644
--- a/packfile.c
+++ b/packfile.c
@@ -1615,10 +1615,11 @@ int packed_object_info(struct repository *r, struct packed_git *p,
return type;
}
-static void *unpack_compressed_entry(struct packed_git *p,
- struct pack_window **w_curs,
- off_t curpos,
- unsigned long size)
+static void *unpack_compressed_entry(struct repository *repo,
+ struct packed_git *p,
+ struct pack_window **w_curs,
+ off_t curpos,
+ unsigned long size)
{
int st;
git_zstream stream;
@@ -1633,8 +1634,7 @@ static void *unpack_compressed_entry(struct packed_git *p,
git_inflate_init(&stream);
do {
- in = use_pack(the_repository, p, w_curs, curpos,
- &stream.avail_in);
+ in = use_pack(repo, p, w_curs, curpos, &stream.avail_in);
stream.next_in = in;
/*
* Note: we must ensure the window section returned by
@@ -1777,7 +1777,8 @@ void *unpack_entry(struct repository *r, struct packed_git *p, off_t obj_offset,
case OBJ_BLOB:
case OBJ_TAG:
if (!base_from_cache)
- data = unpack_compressed_entry(p, &w_curs, curpos, size);
+ data = unpack_compressed_entry(r, p, &w_curs, curpos,
+ size);
break;
default:
data = NULL;
@@ -1838,7 +1839,8 @@ void *unpack_entry(struct repository *r, struct packed_git *p, off_t obj_offset,
if (!base)
continue;
- delta_data = unpack_compressed_entry(p, &w_curs, curpos, delta_size);
+ delta_data = unpack_compressed_entry(r, p, &w_curs, curpos,
+ delta_size);
if (!delta_data) {
error("failed to unpack compressed delta "
--
2.47.0
^ permalink raw reply related [flat|nested] 184+ messages in thread
* [PATCH 09/20] packfile: pass down repository to `nth_packed_object_id`
2024-10-21 9:57 [PATCH 00/20] packfile: avoid using the 'the_repository' global variable Karthik Nayak
` (7 preceding siblings ...)
2024-10-21 9:57 ` [PATCH 08/20] packfile: pass down repository to `unpack_compressed_entry` Karthik Nayak
@ 2024-10-21 9:57 ` Karthik Nayak
2024-10-21 9:57 ` [PATCH 10/20] packfile: pass down repository to `find_pack_entry_one` Karthik Nayak
` (20 subsequent siblings)
29 siblings, 0 replies; 184+ messages in thread
From: Karthik Nayak @ 2024-10-21 9:57 UTC (permalink / raw)
To: karthik.188; +Cc: git
The function `nth_packed_object_id` currently relies on the global
variable `the_repository`. To eliminate global variable usage in
`packfile.c`, we should progressively shift the dependency on
the_repository to higher layers. Let's remove its usage from this
function and any related ones.
Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
---
builtin/pack-objects.c | 6 +++---
midx-write.c | 2 +-
object-name.c | 8 ++++----
pack-bitmap.c | 2 +-
pack-check.c | 2 +-
packfile.c | 21 ++++++++++-----------
packfile.h | 3 ++-
t/helper/test-pack-mtimes.c | 2 +-
8 files changed, 23 insertions(+), 23 deletions(-)
diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index c2555d4986..adf55d892f 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -1059,7 +1059,7 @@ static void write_reused_pack_one(struct packed_git *reuse_packfile,
(uintmax_t)base_offset,
reuse_packfile->pack_name);
- nth_packed_object_id(&base_oid, reuse_packfile,
+ nth_packed_object_id(the_repository, &base_oid, reuse_packfile,
pack_pos_to_index(reuse_packfile, base_pos));
len = encode_in_pack_object_header(header, sizeof(header),
@@ -2141,7 +2141,7 @@ static void check_object(struct object_entry *entry, uint32_t object_index)
uint32_t pos;
if (offset_to_pack_pos(p, ofs, &pos) < 0)
goto give_up;
- if (!nth_packed_object_id(&base_ref, p,
+ if (!nth_packed_object_id(the_repository, &base_ref, p,
pack_pos_to_index(p, pos)))
have_base = 1;
}
@@ -4036,7 +4036,7 @@ static void loosen_unused_packed_objects(void)
die(_("cannot open pack index"));
for (i = 0; i < p->num_objects; i++) {
- nth_packed_object_id(&oid, p, i);
+ nth_packed_object_id(the_repository, &oid, p, i);
if (!packlist_find(&to_pack, &oid) &&
!has_sha1_pack_kept_or_nonlocal(&oid) &&
!loosened_object_can_be_discarded(&oid, p->mtime)) {
diff --git a/midx-write.c b/midx-write.c
index c57726ef94..4696b8326c 100644
--- a/midx-write.c
+++ b/midx-write.c
@@ -227,7 +227,7 @@ static void fill_pack_entry(uint32_t pack_int_id,
struct pack_midx_entry *entry,
int preferred)
{
- if (nth_packed_object_id(&entry->oid, p, cur_object) < 0)
+ if (nth_packed_object_id(the_repository, &entry->oid, p, cur_object) < 0)
die(_("failed to locate object %d in packfile"), cur_object);
entry->pack_int_id = pack_int_id;
diff --git a/object-name.c b/object-name.c
index c892fbe80a..43023884ef 100644
--- a/object-name.c
+++ b/object-name.c
@@ -188,7 +188,7 @@ static void unique_in_pack(struct packed_git *p,
*/
for (i = first; i < num && !ds->ambiguous; i++) {
struct object_id oid;
- nth_packed_object_id(&oid, p, i);
+ nth_packed_object_id(ds->repo, &oid, p, i);
if (!match_hash(len, ds->bin_pfx.hash, oid.hash))
break;
update_candidates(ds, &oid);
@@ -776,14 +776,14 @@ static void find_abbrev_len_for_pack(struct packed_git *p,
*/
mad->init_len = 0;
if (!match) {
- if (!nth_packed_object_id(&oid, p, first))
+ if (!nth_packed_object_id(mad->repo, &oid, p, first))
extend_abbrev_len(&oid, mad);
} else if (first < num - 1) {
- if (!nth_packed_object_id(&oid, p, first + 1))
+ if (!nth_packed_object_id(mad->repo, &oid, p, first + 1))
extend_abbrev_len(&oid, mad);
}
if (first > 0) {
- if (!nth_packed_object_id(&oid, p, first - 1))
+ if (!nth_packed_object_id(mad->repo, &oid, p, first - 1))
extend_abbrev_len(&oid, mad);
}
mad->init_len = mad->cur_len;
diff --git a/pack-bitmap.c b/pack-bitmap.c
index d959e30682..96716c785b 100644
--- a/pack-bitmap.c
+++ b/pack-bitmap.c
@@ -318,7 +318,7 @@ static int nth_bitmap_object_oid(struct bitmap_index *index,
{
if (index->midx)
return nth_midxed_object_oid(oid, index->midx, n) ? 0 : -1;
- return nth_packed_object_id(oid, index->pack, n);
+ return nth_packed_object_id(the_repository, oid, index->pack, n);
}
static int load_bitmap_entries_v1(struct bitmap_index *index)
diff --git a/pack-check.c b/pack-check.c
index e2c3b264e7..a5551809c1 100644
--- a/pack-check.c
+++ b/pack-check.c
@@ -111,7 +111,7 @@ static int verify_packfile(struct repository *r,
off_t curpos;
int data_valid;
- if (nth_packed_object_id(&oid, p, entries[i].nr) < 0)
+ if (nth_packed_object_id(r, &oid, p, entries[i].nr) < 0)
BUG("unable to get oid of object %lu from %s",
(unsigned long)entries[i].nr, p->pack_name);
diff --git a/packfile.c b/packfile.c
index 54f3b9f0a7..92c919d628 100644
--- a/packfile.c
+++ b/packfile.c
@@ -1273,7 +1273,7 @@ static int get_delta_base_oid(struct repository *repo, struct packed_git *p,
if (offset_to_pack_pos(p, base_offset, &base_pos) < 0)
return -1;
- return nth_packed_object_id(oid, p,
+ return nth_packed_object_id(repo, oid, p,
pack_pos_to_index(p, base_pos));
} else
return -1;
@@ -1288,7 +1288,7 @@ static int retry_bad_packed_offset(struct repository *r,
struct object_id oid;
if (offset_to_pack_pos(p, obj_offset, &pos) < 0)
return OBJ_BAD;
- nth_packed_object_id(&oid, p, pack_pos_to_index(p, pos));
+ nth_packed_object_id(r, &oid, p, pack_pos_to_index(p, pos));
mark_bad_packed_object(p, &oid);
type = oid_object_info(r, &oid, NULL);
if (type <= OBJ_NONE)
@@ -1723,7 +1723,7 @@ void *unpack_entry(struct repository *r, struct packed_git *p, off_t obj_offset,
index_pos = pack_pos_to_index(p, pack_pos);
if (check_pack_crc(p, &w_curs, obj_offset, len, index_pos)) {
struct object_id oid;
- nth_packed_object_id(&oid, p, index_pos);
+ nth_packed_object_id(r, &oid, p, index_pos);
error("bad packed object CRC for %s",
oid_to_hex(&oid));
mark_bad_packed_object(p, &oid);
@@ -1813,7 +1813,7 @@ void *unpack_entry(struct repository *r, struct packed_git *p, off_t obj_offset,
if (!(offset_to_pack_pos(p, obj_offset, &pos))) {
struct object_info oi = OBJECT_INFO_INIT;
- nth_packed_object_id(&base_oid, p,
+ nth_packed_object_id(r, &base_oid, p,
pack_pos_to_index(p, pos));
error("failed to read delta base object %s"
" at offset %"PRIuMAX" from %s",
@@ -1917,12 +1917,11 @@ int bsearch_pack(const struct object_id *oid, const struct packed_git *p, uint32
index_lookup, index_lookup_width, result);
}
-int nth_packed_object_id(struct object_id *oid,
- struct packed_git *p,
- uint32_t n)
+int nth_packed_object_id(struct repository *repo, struct object_id *oid,
+ struct packed_git *p, uint32_t n)
{
const unsigned char *index = p->index_data;
- const unsigned int hashsz = the_hash_algo->rawsz;
+ const unsigned int hashsz = repo->hash_algo->rawsz;
if (!index) {
if (open_pack_index(p))
return -1;
@@ -1933,11 +1932,11 @@ int nth_packed_object_id(struct object_id *oid,
index += 4 * 256;
if (p->index_version == 1) {
oidread(oid, index + st_add(st_mult(hashsz + 4, n), 4),
- the_repository->hash_algo);
+ repo->hash_algo);
} else {
index += 8;
oidread(oid, index + st_mult(hashsz, n),
- the_repository->hash_algo);
+ repo->hash_algo);
}
return 0;
}
@@ -2194,7 +2193,7 @@ int for_each_object_in_pack(struct packed_git *p,
else
index_pos = i;
- if (nth_packed_object_id(&oid, p, index_pos) < 0)
+ if (nth_packed_object_id(the_repository, &oid, p, index_pos) < 0)
return error("unable to get sha1 of object %u in %s",
index_pos, p->pack_name);
diff --git a/packfile.h b/packfile.h
index 050dc516b1..f744af6e9b 100644
--- a/packfile.h
+++ b/packfile.h
@@ -150,7 +150,8 @@ int bsearch_pack(const struct object_id *oid, const struct packed_git *p, uint32
* parameter. Open the index if it is not already open. Returns 0 on success,
* negative otherwise.
*/
-int nth_packed_object_id(struct object_id *, struct packed_git *, uint32_t n);
+int nth_packed_object_id(struct repository *repo, struct object_id *,
+ struct packed_git *, uint32_t n);
/*
* Return the offset of the nth object within the specified packfile.
diff --git a/t/helper/test-pack-mtimes.c b/t/helper/test-pack-mtimes.c
index f8f9afbb5b..ebd980b308 100644
--- a/t/helper/test-pack-mtimes.c
+++ b/t/helper/test-pack-mtimes.c
@@ -16,7 +16,7 @@ static void dump_mtimes(struct packed_git *p)
for (i = 0; i < p->num_objects; i++) {
struct object_id oid;
- if (nth_packed_object_id(&oid, p, i) < 0)
+ if (nth_packed_object_id(the_repository, &oid, p, i) < 0)
die("could not load object id at position %"PRIu32, i);
printf("%s %"PRIu32"\n",
--
2.47.0
^ permalink raw reply related [flat|nested] 184+ messages in thread
* [PATCH 10/20] packfile: pass down repository to `find_pack_entry_one`
2024-10-21 9:57 [PATCH 00/20] packfile: avoid using the 'the_repository' global variable Karthik Nayak
` (8 preceding siblings ...)
2024-10-21 9:57 ` [PATCH 09/20] packfile: pass down repository to `nth_packed_object_id` Karthik Nayak
@ 2024-10-21 9:57 ` Karthik Nayak
2024-10-21 9:57 ` [PATCH 11/20] packfile: pass down repository to `fill_pack_entry` Karthik Nayak
` (19 subsequent siblings)
29 siblings, 0 replies; 184+ messages in thread
From: Karthik Nayak @ 2024-10-21 9:57 UTC (permalink / raw)
To: karthik.188; +Cc: git
The function `find_pack_entry_one` currently relies on the global
variable `the_repository`. To eliminate global variable usage in
`packfile.c`, we should progressively shift the dependency on
the_repository to higher layers. Let's remove its usage from this
function and any related ones.
Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
---
builtin/fast-import.c | 4 ++--
builtin/pack-objects.c | 4 ++--
connected.c | 5 +++--
http-push.c | 5 +++--
http-walker.c | 2 +-
midx.c | 2 +-
pack-bitmap.c | 6 ++++--
packfile.c | 15 ++++++++-------
packfile.h | 6 ++++--
t/helper/test-find-pack.c | 2 +-
10 files changed, 29 insertions(+), 22 deletions(-)
diff --git a/builtin/fast-import.c b/builtin/fast-import.c
index 51d1cc0deb..a6743db85c 100644
--- a/builtin/fast-import.c
+++ b/builtin/fast-import.c
@@ -966,7 +966,7 @@ static int store_object(
if (e->idx.offset) {
duplicate_count_by_type[type]++;
return 1;
- } else if (find_sha1_pack(oid.hash,
+ } else if (find_sha1_pack(the_repository, oid.hash,
get_all_packs(the_repository))) {
e->type = type;
e->pack_id = MAX_PACK_ID;
@@ -1167,7 +1167,7 @@ static void stream_blob(uintmax_t len, struct object_id *oidout, uintmax_t mark)
duplicate_count_by_type[OBJ_BLOB]++;
truncate_pack(&checkpoint);
- } else if (find_sha1_pack(oid.hash,
+ } else if (find_sha1_pack(the_repository, oid.hash,
get_all_packs(the_repository))) {
e->type = OBJ_BLOB;
e->pack_id = MAX_PACK_ID;
diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index adf55d892f..d41259a423 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -1558,7 +1558,7 @@ static int want_object_in_pack_one(struct packed_git *p,
if (p == *found_pack)
offset = *found_offset;
else
- offset = find_pack_entry_one(oid->hash, p);
+ offset = find_pack_entry_one(the_repository, oid->hash, p);
if (offset) {
if (!*found_pack) {
@@ -3986,7 +3986,7 @@ static int has_sha1_pack_kept_or_nonlocal(const struct object_id *oid)
while (p) {
if ((!p->pack_local || p->pack_keep ||
p->pack_keep_in_core) &&
- find_pack_entry_one(oid->hash, p)) {
+ find_pack_entry_one(the_repository, oid->hash, p)) {
last_found = p;
return 1;
}
diff --git a/connected.c b/connected.c
index 235890efd0..00b7de34c6 100644
--- a/connected.c
+++ b/connected.c
@@ -78,7 +78,7 @@ int check_connected(oid_iterate_fn fn, void *cb_data,
for (p = get_all_packs(the_repository); p; p = p->next) {
if (!p->pack_promisor)
continue;
- if (find_pack_entry_one(oid->hash, p))
+ if (find_pack_entry_one(the_repository, oid->hash, p))
goto promisor_pack_found;
}
/*
@@ -144,7 +144,8 @@ int check_connected(oid_iterate_fn fn, void *cb_data,
* are sure the ref is good and not sending it to
* rev-list for verification.
*/
- if (new_pack && find_pack_entry_one(oid->hash, new_pack))
+ if (new_pack && find_pack_entry_one(the_repository, oid->hash,
+ new_pack))
continue;
if (fprintf(rev_list_in, "%s\n", oid_to_hex(oid)) < 0)
diff --git a/http-push.c b/http-push.c
index aad89f2eab..cb6cf1696e 100644
--- a/http-push.c
+++ b/http-push.c
@@ -309,7 +309,8 @@ static void start_fetch_packed(struct transfer_request *request)
struct transfer_request *check_request = request_queue_head;
struct http_pack_request *preq;
- target = find_sha1_pack(request->obj->oid.hash, repo->packs);
+ target = find_sha1_pack(the_repository, request->obj->oid.hash,
+ repo->packs);
if (!target) {
fprintf(stderr, "Unable to fetch %s, will not be able to update server info refs\n", oid_to_hex(&request->obj->oid));
repo->can_update_info_refs = 0;
@@ -681,7 +682,7 @@ static int add_send_request(struct object *obj, struct remote_lock *lock)
get_remote_object_list(obj->oid.hash[0]);
if (obj->flags & (REMOTE | PUSHING))
return 0;
- target = find_sha1_pack(obj->oid.hash, repo->packs);
+ target = find_sha1_pack(the_repository, obj->oid.hash, repo->packs);
if (target) {
obj->flags |= REMOTE;
return 0;
diff --git a/http-walker.c b/http-walker.c
index fb2d86d5e7..0a11ed6ecf 100644
--- a/http-walker.c
+++ b/http-walker.c
@@ -431,7 +431,7 @@ static int http_fetch_pack(struct walker *walker, struct alt_base *repo, unsigne
if (fetch_indices(walker, repo))
return -1;
- target = find_sha1_pack(sha1, repo->packs);
+ target = find_sha1_pack(the_repository, sha1, repo->packs);
if (!target)
return -1;
close_pack_index(target);
diff --git a/midx.c b/midx.c
index 94609456a2..c76df95d6d 100644
--- a/midx.c
+++ b/midx.c
@@ -973,7 +973,7 @@ int verify_midx_file(struct repository *r, const char *object_dir, unsigned flag
}
m_offset = e.offset;
- p_offset = find_pack_entry_one(oid.hash, e.p);
+ p_offset = find_pack_entry_one(r, oid.hash, e.p);
if (m_offset != p_offset)
midx_report(_("incorrect object offset for oid[%d] = %s: %"PRIx64" != %"PRIx64),
diff --git a/pack-bitmap.c b/pack-bitmap.c
index 96716c785b..b699875555 100644
--- a/pack-bitmap.c
+++ b/pack-bitmap.c
@@ -935,7 +935,8 @@ static inline int bitmap_position_packfile(struct bitmap_index *bitmap_git,
const struct object_id *oid)
{
uint32_t pos;
- off_t offset = find_pack_entry_one(oid->hash, bitmap_git->pack);
+ off_t offset = find_pack_entry_one(the_repository, oid->hash,
+ bitmap_git->pack);
if (!offset)
return -1;
@@ -1609,7 +1610,8 @@ static int in_bitmapped_pack(struct bitmap_index *bitmap_git,
if (bsearch_midx(&object->oid, bitmap_git->midx, NULL))
return 1;
} else {
- if (find_pack_entry_one(object->oid.hash, bitmap_git->pack) > 0)
+ if (find_pack_entry_one(the_repository, object->oid.hash,
+ bitmap_git->pack) > 0)
return 1;
}
}
diff --git a/packfile.c b/packfile.c
index 92c919d628..bf70fd60a8 100644
--- a/packfile.c
+++ b/packfile.c
@@ -1240,7 +1240,7 @@ off_t get_delta_base(struct repository *repo, struct packed_git *p,
*curpos += used;
} else if (type == OBJ_REF_DELTA) {
/* The base entry _must_ be in the same pack */
- base_offset = find_pack_entry_one(base_info, p);
+ base_offset = find_pack_entry_one(repo, base_info, p);
*curpos += repo->hash_algo->rawsz;
} else
die("I am totally screwed");
@@ -1975,8 +1975,8 @@ off_t nth_packed_object_offset(const struct packed_git *p, uint32_t n)
}
}
-off_t find_pack_entry_one(const unsigned char *sha1,
- struct packed_git *p)
+off_t find_pack_entry_one(struct repository *repo, const unsigned char *sha1,
+ struct packed_git *p)
{
const unsigned char *index = p->index_data;
struct object_id oid;
@@ -1987,7 +1987,7 @@ off_t find_pack_entry_one(const unsigned char *sha1,
return 0;
}
- hashcpy(oid.hash, sha1, the_repository->hash_algo);
+ hashcpy(oid.hash, sha1, repo->hash_algo);
if (bsearch_pack(&oid, p, &result))
return nth_packed_object_offset(p, result);
return 0;
@@ -2014,13 +2014,14 @@ int is_pack_valid(struct repository *repo, struct packed_git *p)
return !open_packed_git(repo, p);
}
-struct packed_git *find_sha1_pack(const unsigned char *sha1,
+struct packed_git *find_sha1_pack(struct repository *repo,
+ const unsigned char *sha1,
struct packed_git *packs)
{
struct packed_git *p;
for (p = packs; p; p = p->next) {
- if (find_pack_entry_one(sha1, p))
+ if (find_pack_entry_one(repo, sha1, p))
return p;
}
return NULL;
@@ -2037,7 +2038,7 @@ static int fill_pack_entry(const struct object_id *oid,
oidset_contains(&p->bad_objects, oid))
return 0;
- offset = find_pack_entry_one(oid->hash, p);
+ offset = find_pack_entry_one(the_repository, oid->hash, p);
if (!offset)
return 0;
diff --git a/packfile.h b/packfile.h
index f744af6e9b..983d6df385 100644
--- a/packfile.h
+++ b/packfile.h
@@ -87,7 +87,8 @@ struct packed_git *get_all_packs(struct repository *r);
*/
unsigned long repo_approximate_object_count(struct repository *r);
-struct packed_git *find_sha1_pack(const unsigned char *sha1,
+struct packed_git *find_sha1_pack(struct repository *repo,
+ const unsigned char *sha1,
struct packed_git *packs);
void pack_report(void);
@@ -163,7 +164,8 @@ off_t nth_packed_object_offset(const struct packed_git *, uint32_t n);
* If the object named sha1 is present in the specified packfile,
* return its offset within the packfile; otherwise, return 0.
*/
-off_t find_pack_entry_one(const unsigned char *sha1, struct packed_git *);
+off_t find_pack_entry_one(struct repository *repo, const unsigned char *sha1,
+ struct packed_git *);
int is_pack_valid(struct repository *repo, struct packed_git *);
void *unpack_entry(struct repository *r, struct packed_git *, off_t, enum object_type *, unsigned long *);
diff --git a/t/helper/test-find-pack.c b/t/helper/test-find-pack.c
index 14b2b0c12c..c5cdea98f6 100644
--- a/t/helper/test-find-pack.c
+++ b/t/helper/test-find-pack.c
@@ -40,7 +40,7 @@ int cmd__find_pack(int argc, const char **argv)
die("cannot parse %s as an object name", argv[0]);
for (p = get_all_packs(the_repository); p; p = p->next)
- if (find_pack_entry_one(oid.hash, p)) {
+ if (find_pack_entry_one(the_repository, oid.hash, p)) {
printf("%s\n", p->pack_name);
actual_count++;
}
--
2.47.0
^ permalink raw reply related [flat|nested] 184+ messages in thread
* [PATCH 11/20] packfile: pass down repository to `fill_pack_entry`
2024-10-21 9:57 [PATCH 00/20] packfile: avoid using the 'the_repository' global variable Karthik Nayak
` (9 preceding siblings ...)
2024-10-21 9:57 ` [PATCH 10/20] packfile: pass down repository to `find_pack_entry_one` Karthik Nayak
@ 2024-10-21 9:57 ` Karthik Nayak
2024-10-21 9:57 ` [PATCH 12/20] packfile: pass down repository to `has_object[_kept]_pack` Karthik Nayak
` (18 subsequent siblings)
29 siblings, 0 replies; 184+ messages in thread
From: Karthik Nayak @ 2024-10-21 9:57 UTC (permalink / raw)
To: karthik.188; +Cc: git
The function `fill_pack_entry` currently relies on the global variable
`the_repository`. To eliminate global variable usage in `packfile.c`, we
should progressively shift the dependency on the_repository to higher
layers. Let's remove its usage from this function and any related ones.
Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
---
packfile.c | 13 ++++++-------
1 file changed, 6 insertions(+), 7 deletions(-)
diff --git a/packfile.c b/packfile.c
index bf70fd60a8..236c5c0479 100644
--- a/packfile.c
+++ b/packfile.c
@@ -2028,9 +2028,8 @@ struct packed_git *find_sha1_pack(struct repository *repo,
}
-static int fill_pack_entry(const struct object_id *oid,
- struct pack_entry *e,
- struct packed_git *p)
+static int fill_pack_entry(struct repository *repo, const struct object_id *oid,
+ struct pack_entry *e, struct packed_git *p)
{
off_t offset;
@@ -2038,7 +2037,7 @@ static int fill_pack_entry(const struct object_id *oid,
oidset_contains(&p->bad_objects, oid))
return 0;
- offset = find_pack_entry_one(the_repository, oid->hash, p);
+ offset = find_pack_entry_one(repo, oid->hash, p);
if (!offset)
return 0;
@@ -2049,7 +2048,7 @@ static int fill_pack_entry(const struct object_id *oid,
* answer, as it may have been deleted since the index was
* loaded!
*/
- if (!is_pack_valid(the_repository, p))
+ if (!is_pack_valid(repo, p))
return 0;
e->offset = offset;
e->p = p;
@@ -2072,7 +2071,7 @@ int find_pack_entry(struct repository *r, const struct object_id *oid, struct pa
list_for_each(pos, &r->objects->packed_git_mru) {
struct packed_git *p = list_entry(pos, struct packed_git, mru);
- if (!p->multi_pack_index && fill_pack_entry(oid, e, p)) {
+ if (!p->multi_pack_index && fill_pack_entry(r, oid, e, p)) {
list_move(&p->mru, &r->objects->packed_git_mru);
return 1;
}
@@ -2134,7 +2133,7 @@ int find_kept_pack_entry(struct repository *r,
for (cache = kept_pack_cache(r, flags); *cache; cache++) {
struct packed_git *p = *cache;
- if (fill_pack_entry(oid, e, p))
+ if (fill_pack_entry(r, oid, e, p))
return 1;
}
--
2.47.0
^ permalink raw reply related [flat|nested] 184+ messages in thread
* [PATCH 12/20] packfile: pass down repository to `has_object[_kept]_pack`
2024-10-21 9:57 [PATCH 00/20] packfile: avoid using the 'the_repository' global variable Karthik Nayak
` (10 preceding siblings ...)
2024-10-21 9:57 ` [PATCH 11/20] packfile: pass down repository to `fill_pack_entry` Karthik Nayak
@ 2024-10-21 9:57 ` Karthik Nayak
2024-10-21 9:57 ` [PATCH 13/20] packfile: pass down repository to `for_each_packed_object` Karthik Nayak
` (17 subsequent siblings)
29 siblings, 0 replies; 184+ messages in thread
From: Karthik Nayak @ 2024-10-21 9:57 UTC (permalink / raw)
To: karthik.188; +Cc: git
The functions `has_object[_kept]_pack` currently rely on the global
variable `the_repository`. To eliminate global variable usage in
`packfile.c`, we should progressively shift the dependency on
the_repository to higher layers. Let's remove its usage from these
functions and any related ones.
Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
---
builtin/count-objects.c | 2 +-
builtin/fsck.c | 2 +-
builtin/pack-objects.c | 4 ++--
diff.c | 3 ++-
list-objects.c | 3 ++-
pack-bitmap.c | 2 +-
packfile.c | 9 +++++----
packfile.h | 5 +++--
prune-packed.c | 2 +-
reachable.c | 2 +-
revision.c | 4 ++--
11 files changed, 21 insertions(+), 17 deletions(-)
diff --git a/builtin/count-objects.c b/builtin/count-objects.c
index 04d80887e0..1e89148ed7 100644
--- a/builtin/count-objects.c
+++ b/builtin/count-objects.c
@@ -67,7 +67,7 @@ static int count_loose(const struct object_id *oid, const char *path,
else {
loose_size += on_disk_bytes(st);
loose++;
- if (verbose && has_object_pack(oid))
+ if (verbose && has_object_pack(the_repository, oid))
packed_loose++;
}
return 0;
diff --git a/builtin/fsck.c b/builtin/fsck.c
index 7f4e2f0414..bb56eb98ac 100644
--- a/builtin/fsck.c
+++ b/builtin/fsck.c
@@ -272,7 +272,7 @@ static void check_reachable_object(struct object *obj)
if (!(obj->flags & HAS_OBJ)) {
if (is_promisor_object(&obj->oid))
return;
- if (has_object_pack(&obj->oid))
+ if (has_object_pack(the_repository, &obj->oid))
return; /* it is in pack - forget about it */
printf_ln(_("missing %s %s"),
printable_type(&obj->oid, obj->type),
diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index d41259a423..321e77ebf3 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -1531,7 +1531,7 @@ static int want_found_object(const struct object_id *oid, int exclude,
return 0;
if (ignore_packed_keep_in_core && p->pack_keep_in_core)
return 0;
- if (has_object_kept_pack(oid, flags))
+ if (has_object_kept_pack(the_repository, oid, flags))
return 0;
}
@@ -3629,7 +3629,7 @@ static void show_cruft_commit(struct commit *commit, void *data)
static int cruft_include_check_obj(struct object *obj, void *data UNUSED)
{
- return !has_object_kept_pack(&obj->oid, IN_CORE_KEEP_PACKS);
+ return !has_object_kept_pack(the_repository, &obj->oid, IN_CORE_KEEP_PACKS);
}
static int cruft_include_check(struct commit *commit, void *data)
diff --git a/diff.c b/diff.c
index dceac20d18..1d483bdf37 100644
--- a/diff.c
+++ b/diff.c
@@ -4041,7 +4041,8 @@ static int reuse_worktree_file(struct index_state *istate,
* objects however would tend to be slower as they need
* to be individually opened and inflated.
*/
- if (!FAST_WORKING_DIRECTORY && !want_file && has_object_pack(oid))
+ if (!FAST_WORKING_DIRECTORY && !want_file &&
+ has_object_pack(the_repository, oid))
return 0;
/*
diff --git a/list-objects.c b/list-objects.c
index 985d008799..31236a8dc9 100644
--- a/list-objects.c
+++ b/list-objects.c
@@ -41,7 +41,8 @@ static void show_object(struct traversal_context *ctx,
{
if (!ctx->show_object)
return;
- if (ctx->revs->unpacked && has_object_pack(&object->oid))
+ if (ctx->revs->unpacked && has_object_pack(ctx->revs->repo,
+ &object->oid))
return;
ctx->show_object(object, name, ctx->show_data);
diff --git a/pack-bitmap.c b/pack-bitmap.c
index b699875555..97237acb24 100644
--- a/pack-bitmap.c
+++ b/pack-bitmap.c
@@ -1891,7 +1891,7 @@ static void filter_packed_objects_from_bitmap(struct bitmap_index *bitmap_git,
bitmap_unset(result, i);
for (i = 0; i < eindex->count; ++i) {
- if (has_object_pack(&eindex->objects[i]->oid))
+ if (has_object_pack(the_repository, &eindex->objects[i]->oid))
bitmap_unset(result, objects_nr + i);
}
}
diff --git a/packfile.c b/packfile.c
index 236c5c0479..6bc7b6e9eb 100644
--- a/packfile.c
+++ b/packfile.c
@@ -2140,16 +2140,17 @@ int find_kept_pack_entry(struct repository *r,
return 0;
}
-int has_object_pack(const struct object_id *oid)
+int has_object_pack(struct repository *repo, const struct object_id *oid)
{
struct pack_entry e;
- return find_pack_entry(the_repository, oid, &e);
+ return find_pack_entry(repo, oid, &e);
}
-int has_object_kept_pack(const struct object_id *oid, unsigned flags)
+int has_object_kept_pack(struct repository *repo, const struct object_id *oid,
+ unsigned flags)
{
struct pack_entry e;
- return find_kept_pack_entry(the_repository, oid, flags, &e);
+ return find_kept_pack_entry(repo, oid, flags, &e);
}
int has_pack_index(struct repository *repo, const unsigned char *sha1)
diff --git a/packfile.h b/packfile.h
index 983d6df385..ec4aff63b4 100644
--- a/packfile.h
+++ b/packfile.h
@@ -200,8 +200,9 @@ const struct packed_git *has_packed_and_bad(struct repository *, const struct ob
int find_pack_entry(struct repository *r, const struct object_id *oid, struct pack_entry *e);
int find_kept_pack_entry(struct repository *r, const struct object_id *oid, unsigned flags, struct pack_entry *e);
-int has_object_pack(const struct object_id *oid);
-int has_object_kept_pack(const struct object_id *oid, unsigned flags);
+int has_object_pack(struct repository *repo, const struct object_id *oid);
+int has_object_kept_pack(struct repository *repo, const struct object_id *oid,
+ unsigned flags);
int has_pack_index(struct repository *repo, const unsigned char *sha1);
diff --git a/prune-packed.c b/prune-packed.c
index 2bb99c29df..d1c65ab10e 100644
--- a/prune-packed.c
+++ b/prune-packed.c
@@ -24,7 +24,7 @@ static int prune_object(const struct object_id *oid, const char *path,
{
int *opts = data;
- if (!has_object_pack(oid))
+ if (!has_object_pack(the_repository, oid))
return 0;
if (*opts & PRUNE_PACKED_DRY_RUN)
diff --git a/reachable.c b/reachable.c
index 3e9b3dd0a4..09d2c50079 100644
--- a/reachable.c
+++ b/reachable.c
@@ -239,7 +239,7 @@ static int want_recent_object(struct recent_data *data,
const struct object_id *oid)
{
if (data->ignore_in_core_kept_packs &&
- has_object_kept_pack(oid, IN_CORE_KEEP_PACKS))
+ has_object_kept_pack(data->revs->repo, oid, IN_CORE_KEEP_PACKS))
return 0;
return 1;
}
diff --git a/revision.c b/revision.c
index f5f5b84f2b..d1d152a67b 100644
--- a/revision.c
+++ b/revision.c
@@ -4103,10 +4103,10 @@ enum commit_action get_commit_action(struct rev_info *revs, struct commit *commi
{
if (commit->object.flags & SHOWN)
return commit_ignore;
- if (revs->unpacked && has_object_pack(&commit->object.oid))
+ if (revs->unpacked && has_object_pack(revs->repo, &commit->object.oid))
return commit_ignore;
if (revs->no_kept_objects) {
- if (has_object_kept_pack(&commit->object.oid,
+ if (has_object_kept_pack(revs->repo, &commit->object.oid,
revs->keep_pack_cache_flags))
return commit_ignore;
}
--
2.47.0
^ permalink raw reply related [flat|nested] 184+ messages in thread
* [PATCH 13/20] packfile: pass down repository to `for_each_packed_object`
2024-10-21 9:57 [PATCH 00/20] packfile: avoid using the 'the_repository' global variable Karthik Nayak
` (11 preceding siblings ...)
2024-10-21 9:57 ` [PATCH 12/20] packfile: pass down repository to `has_object[_kept]_pack` Karthik Nayak
@ 2024-10-21 9:57 ` Karthik Nayak
2024-10-21 9:57 ` [PATCH 14/20] packfile: pass down repository to `is_promisor_object` Karthik Nayak
` (16 subsequent siblings)
29 siblings, 0 replies; 184+ messages in thread
From: Karthik Nayak @ 2024-10-21 9:57 UTC (permalink / raw)
To: karthik.188; +Cc: git
The function `for_each_packed_object` currently relies on the global
variable `the_repository`. To eliminate global variable usage in
`packfile.c`, we should progressively shift the dependency on
the_repository to higher layers. Let's remove its usage from this
function and any related ones.
Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
---
builtin/cat-file.c | 9 +++++++--
builtin/fsck.c | 10 ++++++++--
builtin/pack-objects.c | 8 ++++----
builtin/repack.c | 2 +-
commit-graph.c | 6 +++---
object-store-ll.h | 7 ++++---
packfile.c | 20 +++++++++++---------
reachable.c | 2 +-
revision.c | 2 +-
9 files changed, 40 insertions(+), 26 deletions(-)
diff --git a/builtin/cat-file.c b/builtin/cat-file.c
index bfdfb51c7c..f6afe67bef 100644
--- a/builtin/cat-file.c
+++ b/builtin/cat-file.c
@@ -827,7 +827,9 @@ static int batch_objects(struct batch_options *opt)
cb.seen = &seen;
for_each_loose_object(batch_unordered_loose, &cb, 0);
- for_each_packed_object(batch_unordered_packed, &cb,
+ for_each_packed_object(the_repository,
+ batch_unordered_packed,
+ &cb,
FOR_EACH_OBJECT_PACK_ORDER);
oidset_clear(&seen);
@@ -835,7 +837,10 @@ static int batch_objects(struct batch_options *opt)
struct oid_array sa = OID_ARRAY_INIT;
for_each_loose_object(collect_loose_object, &sa, 0);
- for_each_packed_object(collect_packed_object, &sa, 0);
+ for_each_packed_object(the_repository,
+ collect_packed_object,
+ &sa,
+ 0);
oid_array_for_each_unique(&sa, batch_object_cb, &cb);
diff --git a/builtin/fsck.c b/builtin/fsck.c
index bb56eb98ac..ccf6a8eab2 100644
--- a/builtin/fsck.c
+++ b/builtin/fsck.c
@@ -391,7 +391,10 @@ static void check_connectivity(void)
* traversal.
*/
for_each_loose_object(mark_loose_unreachable_referents, NULL, 0);
- for_each_packed_object(mark_packed_unreachable_referents, NULL, 0);
+ for_each_packed_object(the_repository,
+ mark_packed_unreachable_referents,
+ NULL,
+ 0);
}
/* Look up all the requirements, warn about missing objects.. */
@@ -966,7 +969,10 @@ int cmd_fsck(int argc,
if (connectivity_only) {
for_each_loose_object(mark_loose_for_connectivity, NULL, 0);
- for_each_packed_object(mark_packed_for_connectivity, NULL, 0);
+ for_each_packed_object(the_repository,
+ mark_packed_for_connectivity,
+ NULL,
+ 0);
} else {
prepare_alt_odb(the_repository);
for (odb = the_repository->objects->odb; odb; odb = odb->next)
diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index 321e77ebf3..4b91dc0add 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -3540,9 +3540,8 @@ static void read_packs_list_from_stdin(void)
for_each_string_list_item(item, &include_packs) {
struct packed_git *p = item->util;
- for_each_object_in_pack(p,
- add_object_entry_from_pack,
- &revs,
+ for_each_object_in_pack(the_repository, p,
+ add_object_entry_from_pack, &revs,
FOR_EACH_OBJECT_PACK_ORDER);
}
@@ -3929,7 +3928,8 @@ static int add_object_in_unpacked_pack(const struct object_id *oid,
static void add_objects_in_unpacked_packs(void)
{
- if (for_each_packed_object(add_object_in_unpacked_pack, NULL,
+ if (for_each_packed_object(the_repository,
+ add_object_in_unpacked_pack, NULL,
FOR_EACH_OBJECT_PACK_ORDER |
FOR_EACH_OBJECT_LOCAL_ONLY |
FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS |
diff --git a/builtin/repack.c b/builtin/repack.c
index d6bb37e84a..96a4fa234b 100644
--- a/builtin/repack.c
+++ b/builtin/repack.c
@@ -404,7 +404,7 @@ static void repack_promisor_objects(const struct pack_objects_args *args,
* {type -> existing pack order} ordering when computing deltas instead
* of a {type -> size} ordering, which may produce better deltas.
*/
- for_each_packed_object(write_oid, &cmd,
+ for_each_packed_object(the_repository, write_oid, &cmd,
FOR_EACH_OBJECT_PROMISOR_ONLY);
if (cmd.in == -1) {
diff --git a/commit-graph.c b/commit-graph.c
index 1c333a9c52..8c72c3ac10 100644
--- a/commit-graph.c
+++ b/commit-graph.c
@@ -1923,8 +1923,8 @@ static int fill_oids_from_packs(struct write_commit_graph_context *ctx,
ret = error(_("error opening index for %s"), packname.buf);
goto cleanup;
}
- for_each_object_in_pack(p, add_packed_commits, ctx,
- FOR_EACH_OBJECT_PACK_ORDER);
+ for_each_object_in_pack(the_repository, p, add_packed_commits,
+ ctx, FOR_EACH_OBJECT_PACK_ORDER);
close_pack(p);
free(p);
}
@@ -1960,7 +1960,7 @@ static void fill_oids_from_all_packs(struct write_commit_graph_context *ctx)
ctx->progress = start_delayed_progress(
_("Finding commits for commit graph among packed objects"),
ctx->approx_nr_objects);
- for_each_packed_object(add_packed_commits, ctx,
+ for_each_packed_object(the_repository, add_packed_commits, ctx,
FOR_EACH_OBJECT_PACK_ORDER);
if (ctx->progress_done < ctx->approx_nr_objects)
display_progress(ctx->progress, ctx->approx_nr_objects);
diff --git a/object-store-ll.h b/object-store-ll.h
index 53b8e693b1..710130cd06 100644
--- a/object-store-ll.h
+++ b/object-store-ll.h
@@ -542,10 +542,11 @@ typedef int each_packed_object_fn(const struct object_id *oid,
struct packed_git *pack,
uint32_t pos,
void *data);
-int for_each_object_in_pack(struct packed_git *p,
+int for_each_object_in_pack(struct repository *repo,
+ struct packed_git *p,
each_packed_object_fn, void *data,
enum for_each_object_flags flags);
-int for_each_packed_object(each_packed_object_fn, void *,
- enum for_each_object_flags flags);
+int for_each_packed_object(struct repository *repo, each_packed_object_fn,
+ void *, enum for_each_object_flags flags);
#endif /* OBJECT_STORE_LL_H */
diff --git a/packfile.c b/packfile.c
index 6bc7b6e9eb..aea8e9f429 100644
--- a/packfile.c
+++ b/packfile.c
@@ -2161,7 +2161,8 @@ int has_pack_index(struct repository *repo, const unsigned char *sha1)
return 1;
}
-int for_each_object_in_pack(struct packed_git *p,
+int for_each_object_in_pack(struct repository *repo,
+ struct packed_git *p,
each_packed_object_fn cb, void *data,
enum for_each_object_flags flags)
{
@@ -2169,7 +2170,7 @@ int for_each_object_in_pack(struct packed_git *p,
int r = 0;
if (flags & FOR_EACH_OBJECT_PACK_ORDER) {
- if (load_pack_revindex(the_repository, p))
+ if (load_pack_revindex(repo, p))
return -1;
}
@@ -2194,7 +2195,7 @@ int for_each_object_in_pack(struct packed_git *p,
else
index_pos = i;
- if (nth_packed_object_id(the_repository, &oid, p, index_pos) < 0)
+ if (nth_packed_object_id(repo, &oid, p, index_pos) < 0)
return error("unable to get sha1 of object %u in %s",
index_pos, p->pack_name);
@@ -2205,15 +2206,15 @@ int for_each_object_in_pack(struct packed_git *p,
return r;
}
-int for_each_packed_object(each_packed_object_fn cb, void *data,
- enum for_each_object_flags flags)
+int for_each_packed_object(struct repository *repo, each_packed_object_fn cb,
+ void *data, enum for_each_object_flags flags)
{
struct packed_git *p;
int r = 0;
int pack_errors = 0;
- prepare_packed_git(the_repository);
- for (p = get_all_packs(the_repository); p; p = p->next) {
+ prepare_packed_git(repo);
+ for (p = get_all_packs(repo); p; p = p->next) {
if ((flags & FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local)
continue;
if ((flags & FOR_EACH_OBJECT_PROMISOR_ONLY) &&
@@ -2229,7 +2230,7 @@ int for_each_packed_object(each_packed_object_fn cb, void *data,
pack_errors = 1;
continue;
}
- r = for_each_object_in_pack(p, cb, data, flags);
+ r = for_each_object_in_pack(repo, p, cb, data, flags);
if (r)
break;
}
@@ -2298,7 +2299,8 @@ int is_promisor_object(const struct object_id *oid)
if (!promisor_objects_prepared) {
if (repo_has_promisor_remote(the_repository)) {
- for_each_packed_object(add_promisor_object,
+ for_each_packed_object(the_repository,
+ add_promisor_object,
&promisor_objects,
FOR_EACH_OBJECT_PROMISOR_ONLY |
FOR_EACH_OBJECT_PACK_ORDER);
diff --git a/reachable.c b/reachable.c
index 09d2c50079..ecf7ccf504 100644
--- a/reachable.c
+++ b/reachable.c
@@ -324,7 +324,7 @@ int add_unseen_recent_objects_to_traversal(struct rev_info *revs,
if (ignore_in_core_kept_packs)
flags |= FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS;
- r = for_each_packed_object(add_recent_packed, &data, flags);
+ r = for_each_packed_object(revs->repo, add_recent_packed, &data, flags);
done:
oidset_clear(&data.extra_recent_oids);
diff --git a/revision.c b/revision.c
index d1d152a67b..d7913d7608 100644
--- a/revision.c
+++ b/revision.c
@@ -3915,7 +3915,7 @@ int prepare_revision_walk(struct rev_info *revs)
revs->treesame.name = "treesame";
if (revs->exclude_promisor_objects) {
- for_each_packed_object(mark_uninteresting, revs,
+ for_each_packed_object(revs->repo, mark_uninteresting, revs,
FOR_EACH_OBJECT_PROMISOR_ONLY);
}
--
2.47.0
^ permalink raw reply related [flat|nested] 184+ messages in thread
* [PATCH 14/20] packfile: pass down repository to `is_promisor_object`
2024-10-21 9:57 [PATCH 00/20] packfile: avoid using the 'the_repository' global variable Karthik Nayak
` (12 preceding siblings ...)
2024-10-21 9:57 ` [PATCH 13/20] packfile: pass down repository to `for_each_packed_object` Karthik Nayak
@ 2024-10-21 9:57 ` Karthik Nayak
2024-10-21 9:57 ` [PATCH 15/20] object-store: pass down repository to `each_packed_object_fn` Karthik Nayak
` (15 subsequent siblings)
29 siblings, 0 replies; 184+ messages in thread
From: Karthik Nayak @ 2024-10-21 9:57 UTC (permalink / raw)
To: karthik.188; +Cc: git
The function `is_promisor_object` currently relies on the global
variable `the_repository`. To eliminate global variable usage in
`packfile.c`, we should progressively shift the dependency on
the_repository to higher layers. Let's remove its usage from this
function and any related ones.
Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
---
builtin/fsck.c | 10 +++++-----
builtin/pack-objects.c | 3 ++-
builtin/rev-list.c | 2 +-
fsck.c | 2 +-
list-objects.c | 4 ++--
packfile.c | 6 +++---
packfile.h | 2 +-
promisor-remote.c | 2 +-
revision.c | 6 +++---
tag.c | 2 +-
10 files changed, 20 insertions(+), 19 deletions(-)
diff --git a/builtin/fsck.c b/builtin/fsck.c
index ccf6a8eab2..9c4e0622b5 100644
--- a/builtin/fsck.c
+++ b/builtin/fsck.c
@@ -150,7 +150,7 @@ static int mark_object(struct object *obj, enum object_type type,
return 0;
obj->flags |= REACHABLE;
- if (is_promisor_object(&obj->oid))
+ if (is_promisor_object(the_repository, &obj->oid))
/*
* Further recursion does not need to be performed on this
* object since it is a promisor object (so it does not need to
@@ -270,7 +270,7 @@ static void check_reachable_object(struct object *obj)
* do a full fsck
*/
if (!(obj->flags & HAS_OBJ)) {
- if (is_promisor_object(&obj->oid))
+ if (is_promisor_object(the_repository, &obj->oid))
return;
if (has_object_pack(the_repository, &obj->oid))
return; /* it is in pack - forget about it */
@@ -491,7 +491,7 @@ static void fsck_handle_reflog_oid(const char *refname, struct object_id *oid,
refname, timestamp);
obj->flags |= USED;
mark_object_reachable(obj);
- } else if (!is_promisor_object(oid)) {
+ } else if (!is_promisor_object(the_repository, oid)) {
error(_("%s: invalid reflog entry %s"),
refname, oid_to_hex(oid));
errors_found |= ERROR_REACHABLE;
@@ -534,7 +534,7 @@ static int fsck_handle_ref(const char *refname, const char *referent UNUSED, con
obj = parse_object(the_repository, oid);
if (!obj) {
- if (is_promisor_object(oid)) {
+ if (is_promisor_object(the_repository, oid)) {
/*
* Increment default_refs anyway, because this is a
* valid ref.
@@ -1017,7 +1017,7 @@ int cmd_fsck(int argc,
&oid);
if (!obj || !(obj->flags & HAS_OBJ)) {
- if (is_promisor_object(&oid))
+ if (is_promisor_object(the_repository, &oid))
continue;
error(_("%s: object missing"), oid_to_hex(&oid));
errors_found |= ERROR_OBJECT;
diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index 4b91dc0add..16e7f5d4ec 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -3859,7 +3859,8 @@ static void show_object__ma_allow_promisor(struct object *obj, const char *name,
* Quietly ignore EXPECTED missing objects. This avoids problems with
* staging them now and getting an odd error later.
*/
- if (!has_object(the_repository, &obj->oid, 0) && is_promisor_object(&obj->oid))
+ if (!has_object(the_repository, &obj->oid, 0) &&
+ is_promisor_object(the_repository, &obj->oid))
return;
show_object(obj, name, data);
diff --git a/builtin/rev-list.c b/builtin/rev-list.c
index f62bcbf2b1..43c42621e3 100644
--- a/builtin/rev-list.c
+++ b/builtin/rev-list.c
@@ -121,7 +121,7 @@ static inline void finish_object__ma(struct object *obj)
return;
case MA_ALLOW_PROMISOR:
- if (is_promisor_object(&obj->oid))
+ if (is_promisor_object(the_repository, &obj->oid))
return;
die("unexpected missing %s object '%s'",
type_name(obj->type), oid_to_hex(&obj->oid));
diff --git a/fsck.c b/fsck.c
index 3756f52459..87ce999a49 100644
--- a/fsck.c
+++ b/fsck.c
@@ -1295,7 +1295,7 @@ static int fsck_blobs(struct oidset *blobs_found, struct oidset *blobs_done,
buf = repo_read_object_file(the_repository, oid, &type, &size);
if (!buf) {
- if (is_promisor_object(oid))
+ if (is_promisor_object(the_repository, oid))
continue;
ret |= report(options,
oid, OBJ_BLOB, msg_missing,
diff --git a/list-objects.c b/list-objects.c
index 31236a8dc9..51c8c380d3 100644
--- a/list-objects.c
+++ b/list-objects.c
@@ -75,7 +75,7 @@ static void process_blob(struct traversal_context *ctx,
*/
if (ctx->revs->exclude_promisor_objects &&
!repo_has_object_file(the_repository, &obj->oid) &&
- is_promisor_object(&obj->oid))
+ is_promisor_object(the_repository, &obj->oid))
return;
pathlen = path->len;
@@ -180,7 +180,7 @@ static void process_tree(struct traversal_context *ctx,
* an incomplete list of missing objects.
*/
if (revs->exclude_promisor_objects &&
- is_promisor_object(&obj->oid))
+ is_promisor_object(the_repository, &obj->oid))
return;
if (!revs->do_not_die_on_missing_objects)
diff --git a/packfile.c b/packfile.c
index aea8e9f429..1867c2d844 100644
--- a/packfile.c
+++ b/packfile.c
@@ -2292,14 +2292,14 @@ static int add_promisor_object(const struct object_id *oid,
return 0;
}
-int is_promisor_object(const struct object_id *oid)
+int is_promisor_object(struct repository *repo, const struct object_id *oid)
{
static struct oidset promisor_objects;
static int promisor_objects_prepared;
if (!promisor_objects_prepared) {
- if (repo_has_promisor_remote(the_repository)) {
- for_each_packed_object(the_repository,
+ if (repo_has_promisor_remote(repo)) {
+ for_each_packed_object(repo,
add_promisor_object,
&promisor_objects,
FOR_EACH_OBJECT_PROMISOR_ONLY |
diff --git a/packfile.h b/packfile.h
index ec4aff63b4..afec4bbd74 100644
--- a/packfile.h
+++ b/packfile.h
@@ -210,7 +210,7 @@ int has_pack_index(struct repository *repo, const unsigned char *sha1);
* Return 1 if an object in a promisor packfile is or refers to the given
* object, 0 otherwise.
*/
-int is_promisor_object(const struct object_id *oid);
+int is_promisor_object(struct repository *repo, const struct object_id *oid);
/*
* Expose a function for fuzz testing.
diff --git a/promisor-remote.c b/promisor-remote.c
index 9345ae3db2..c714f4f007 100644
--- a/promisor-remote.c
+++ b/promisor-remote.c
@@ -283,7 +283,7 @@ void promisor_remote_get_direct(struct repository *repo,
}
for (i = 0; i < remaining_nr; i++) {
- if (is_promisor_object(&remaining_oids[i]))
+ if (is_promisor_object(repo, &remaining_oids[i]))
die(_("could not fetch %s from promisor remote"),
oid_to_hex(&remaining_oids[i]));
}
diff --git a/revision.c b/revision.c
index d7913d7608..df1037dcaa 100644
--- a/revision.c
+++ b/revision.c
@@ -390,7 +390,7 @@ static struct object *get_reference(struct rev_info *revs, const char *name,
if (!object) {
if (revs->ignore_missing)
return NULL;
- if (revs->exclude_promisor_objects && is_promisor_object(oid))
+ if (revs->exclude_promisor_objects && is_promisor_object(revs->repo, oid))
return NULL;
if (revs->do_not_die_on_missing_objects) {
oidset_insert(&revs->missing_commits, oid);
@@ -432,7 +432,7 @@ static struct commit *handle_commit(struct rev_info *revs,
if (revs->ignore_missing_links || (flags & UNINTERESTING))
return NULL;
if (revs->exclude_promisor_objects &&
- is_promisor_object(&tag->tagged->oid))
+ is_promisor_object(revs->repo, &tag->tagged->oid))
return NULL;
if (revs->do_not_die_on_missing_objects && oid) {
oidset_insert(&revs->missing_commits, oid);
@@ -1211,7 +1211,7 @@ static int process_parents(struct rev_info *revs, struct commit *commit,
revs->do_not_die_on_missing_objects;
if (repo_parse_commit_gently(revs->repo, p, gently) < 0) {
if (revs->exclude_promisor_objects &&
- is_promisor_object(&p->object.oid)) {
+ is_promisor_object(revs->repo, &p->object.oid)) {
if (revs->first_parent_only)
break;
continue;
diff --git a/tag.c b/tag.c
index d24170e340..beef9571b5 100644
--- a/tag.c
+++ b/tag.c
@@ -84,7 +84,7 @@ struct object *deref_tag(struct repository *r, struct object *o, const char *war
o = NULL;
}
if (!o && warn) {
- if (last_oid && is_promisor_object(last_oid))
+ if (last_oid && is_promisor_object(r, last_oid))
return NULL;
if (!warnlen)
warnlen = strlen(warn);
--
2.47.0
^ permalink raw reply related [flat|nested] 184+ messages in thread
* [PATCH 15/20] object-store: pass down repository to `each_packed_object_fn`
2024-10-21 9:57 [PATCH 00/20] packfile: avoid using the 'the_repository' global variable Karthik Nayak
` (13 preceding siblings ...)
2024-10-21 9:57 ` [PATCH 14/20] packfile: pass down repository to `is_promisor_object` Karthik Nayak
@ 2024-10-21 9:57 ` Karthik Nayak
2024-10-21 9:57 ` [PATCH 16/20] packfile: pass down repository to `open_pack_index` Karthik Nayak
` (14 subsequent siblings)
29 siblings, 0 replies; 184+ messages in thread
From: Karthik Nayak @ 2024-10-21 9:57 UTC (permalink / raw)
To: karthik.188; +Cc: git
The `each_packed_object_fn` defines the type of the function called for
each packed object. In some of the implementations, we require the
repository state. So let's modify the function type to also include a
repository object and modify all implementations to receive the object
and utilize that instead of global state.
Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
---
builtin/cat-file.c | 6 ++++--
builtin/fsck.c | 6 ++++--
builtin/pack-objects.c | 6 ++++--
builtin/repack.c | 5 +++--
commit-graph.c | 3 ++-
object-store-ll.h | 3 ++-
packfile.c | 9 +++++----
reachable.c | 5 +++--
revision.c | 3 ++-
9 files changed, 29 insertions(+), 17 deletions(-)
diff --git a/builtin/cat-file.c b/builtin/cat-file.c
index f6afe67bef..e5d774b097 100644
--- a/builtin/cat-file.c
+++ b/builtin/cat-file.c
@@ -600,7 +600,8 @@ static int collect_loose_object(const struct object_id *oid,
return 0;
}
-static int collect_packed_object(const struct object_id *oid,
+static int collect_packed_object(struct repository *repo UNUSED,
+ const struct object_id *oid,
struct packed_git *pack UNUSED,
uint32_t pos UNUSED,
void *data)
@@ -631,7 +632,8 @@ static int batch_unordered_loose(const struct object_id *oid,
return batch_unordered_object(oid, NULL, 0, data);
}
-static int batch_unordered_packed(const struct object_id *oid,
+static int batch_unordered_packed(struct repository *repo UNUSED,
+ const struct object_id *oid,
struct packed_git *pack,
uint32_t pos,
void *data)
diff --git a/builtin/fsck.c b/builtin/fsck.c
index 9c4e0622b5..0e4b7ec3af 100644
--- a/builtin/fsck.c
+++ b/builtin/fsck.c
@@ -250,7 +250,8 @@ static int mark_loose_unreachable_referents(const struct object_id *oid,
return 0;
}
-static int mark_packed_unreachable_referents(const struct object_id *oid,
+static int mark_packed_unreachable_referents(struct repository *repo UNUSED,
+ const struct object_id *oid,
struct packed_git *pack UNUSED,
uint32_t pos UNUSED,
void *data UNUSED)
@@ -861,7 +862,8 @@ static int mark_loose_for_connectivity(const struct object_id *oid,
return 0;
}
-static int mark_packed_for_connectivity(const struct object_id *oid,
+static int mark_packed_for_connectivity(struct repository *repo UNUSED,
+ const struct object_id *oid,
struct packed_git *pack UNUSED,
uint32_t pos UNUSED,
void *data UNUSED)
diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index 16e7f5d4ec..bfe0197d12 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -3361,7 +3361,8 @@ static int git_pack_config(const char *k, const char *v,
static int stdin_packs_found_nr;
static int stdin_packs_hints_nr;
-static int add_object_entry_from_pack(const struct object_id *oid,
+static int add_object_entry_from_pack(struct repository *repo UNUSED,
+ const struct object_id *oid,
struct packed_git *p,
uint32_t pos,
void *_data)
@@ -3901,7 +3902,8 @@ static void show_edge(struct commit *commit)
add_preferred_base(&commit->object.oid);
}
-static int add_object_in_unpacked_pack(const struct object_id *oid,
+static int add_object_in_unpacked_pack(struct repository *repo UNUSED,
+ const struct object_id *oid,
struct packed_git *pack,
uint32_t pos,
void *data UNUSED)
diff --git a/builtin/repack.c b/builtin/repack.c
index 96a4fa234b..de03a3ecfc 100644
--- a/builtin/repack.c
+++ b/builtin/repack.c
@@ -321,7 +321,8 @@ static void prepare_pack_objects(struct child_process *cmd,
* Write oid to the given struct child_process's stdin, starting it first if
* necessary.
*/
-static int write_oid(const struct object_id *oid,
+static int write_oid(struct repository *repo,
+ const struct object_id *oid,
struct packed_git *pack UNUSED,
uint32_t pos UNUSED, void *data)
{
@@ -332,7 +333,7 @@ static int write_oid(const struct object_id *oid,
die(_("could not start pack-objects to repack promisor objects"));
}
- if (write_in_full(cmd->in, oid_to_hex(oid), the_hash_algo->hexsz) < 0 ||
+ if (write_in_full(cmd->in, oid_to_hex(oid), repo->hash_algo->hexsz) < 0 ||
write_in_full(cmd->in, "\n", 1) < 0)
die(_("failed to feed promisor objects to pack-objects"));
return 0;
diff --git a/commit-graph.c b/commit-graph.c
index 8c72c3ac10..96d55f8885 100644
--- a/commit-graph.c
+++ b/commit-graph.c
@@ -1487,7 +1487,8 @@ static int write_graph_chunk_bloom_data(struct hashfile *f,
return 0;
}
-static int add_packed_commits(const struct object_id *oid,
+static int add_packed_commits(struct repository *repo UNUSED,
+ const struct object_id *oid,
struct packed_git *pack,
uint32_t pos,
void *data)
diff --git a/object-store-ll.h b/object-store-ll.h
index 710130cd06..a9904687d6 100644
--- a/object-store-ll.h
+++ b/object-store-ll.h
@@ -538,7 +538,8 @@ int for_each_loose_object(each_loose_object_fn, void *,
* Each pack is visited in an unspecified order. By default, objects within a
* pack are visited in pack-idx order (i.e., sorted by oid).
*/
-typedef int each_packed_object_fn(const struct object_id *oid,
+typedef int each_packed_object_fn(struct repository *repo,
+ const struct object_id *oid,
struct packed_git *pack,
uint32_t pos,
void *data);
diff --git a/packfile.c b/packfile.c
index 1867c2d844..831d2c2c74 100644
--- a/packfile.c
+++ b/packfile.c
@@ -2199,7 +2199,7 @@ int for_each_object_in_pack(struct repository *repo,
return error("unable to get sha1 of object %u in %s",
index_pos, p->pack_name);
- r = cb(&oid, p, index_pos, data);
+ r = cb(repo, &oid, p, index_pos, data);
if (r)
break;
}
@@ -2237,7 +2237,8 @@ int for_each_packed_object(struct repository *repo, each_packed_object_fn cb,
return r ? r : pack_errors;
}
-static int add_promisor_object(const struct object_id *oid,
+static int add_promisor_object(struct repository *repo,
+ const struct object_id *oid,
struct packed_git *pack UNUSED,
uint32_t pos UNUSED,
void *set_)
@@ -2246,12 +2247,12 @@ static int add_promisor_object(const struct object_id *oid,
struct object *obj;
int we_parsed_object;
- obj = lookup_object(the_repository, oid);
+ obj = lookup_object(repo, oid);
if (obj && obj->parsed) {
we_parsed_object = 0;
} else {
we_parsed_object = 1;
- obj = parse_object(the_repository, oid);
+ obj = parse_object(repo, oid);
}
if (!obj)
diff --git a/reachable.c b/reachable.c
index ecf7ccf504..f8dc2731d6 100644
--- a/reachable.c
+++ b/reachable.c
@@ -274,7 +274,8 @@ static int add_recent_loose(const struct object_id *oid,
return 0;
}
-static int add_recent_packed(const struct object_id *oid,
+static int add_recent_packed(struct repository *repo,
+ const struct object_id *oid,
struct packed_git *p,
uint32_t pos,
void *data)
@@ -285,7 +286,7 @@ static int add_recent_packed(const struct object_id *oid,
if (!want_recent_object(data, oid))
return 0;
- obj = lookup_object(the_repository, oid);
+ obj = lookup_object(repo, oid);
if (obj && obj->flags & SEEN)
return 0;
diff --git a/revision.c b/revision.c
index df1037dcaa..97f5e59258 100644
--- a/revision.c
+++ b/revision.c
@@ -3603,7 +3603,8 @@ void reset_revision_walk(void)
clear_object_flags(SEEN | ADDED | SHOWN | TOPO_WALK_EXPLORED | TOPO_WALK_INDEGREE);
}
-static int mark_uninteresting(const struct object_id *oid,
+static int mark_uninteresting(struct repository *repo UNUSED,
+ const struct object_id *oid,
struct packed_git *pack UNUSED,
uint32_t pos UNUSED,
void *cb)
--
2.47.0
^ permalink raw reply related [flat|nested] 184+ messages in thread
* [PATCH 16/20] packfile: pass down repository to `open_pack_index`
2024-10-21 9:57 [PATCH 00/20] packfile: avoid using the 'the_repository' global variable Karthik Nayak
` (14 preceding siblings ...)
2024-10-21 9:57 ` [PATCH 15/20] object-store: pass down repository to `each_packed_object_fn` Karthik Nayak
@ 2024-10-21 9:57 ` Karthik Nayak
2024-10-21 9:58 ` [PATCH 17/20] packfile: stop using 'the_hash_algo' Karthik Nayak
` (13 subsequent siblings)
29 siblings, 0 replies; 184+ messages in thread
From: Karthik Nayak @ 2024-10-21 9:57 UTC (permalink / raw)
To: karthik.188; +Cc: git
The function `open_pack_index` currently relies on the global variable
`the_repository`. To eliminate global variable usage in `packfile.c`, we
should progressively shift the dependency on the_repository to higher
layers. Let's remove its usage from this function and any related ones.
Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
---
builtin/count-objects.c | 2 +-
builtin/fsck.c | 2 +-
builtin/index-pack.c | 2 +-
builtin/pack-objects.c | 6 +++---
builtin/pack-redundant.c | 2 +-
builtin/repack.c | 2 +-
commit-graph.c | 2 +-
midx-write.c | 10 +++++-----
midx.c | 2 +-
object-name.c | 4 ++--
pack-check.c | 2 +-
pack-mtimes.c | 4 ++--
pack-mtimes.h | 3 ++-
pack-revindex.c | 4 ++--
packfile.c | 26 ++++++++++++++------------
packfile.h | 5 +++--
reachable.c | 2 +-
t/helper/test-pack-mtimes.c | 2 +-
18 files changed, 43 insertions(+), 39 deletions(-)
diff --git a/builtin/count-objects.c b/builtin/count-objects.c
index 1e89148ed7..b122b51fe1 100644
--- a/builtin/count-objects.c
+++ b/builtin/count-objects.c
@@ -131,7 +131,7 @@ int cmd_count_objects(int argc,
for (p = get_all_packs(the_repository); p; p = p->next) {
if (!p->pack_local)
continue;
- if (open_pack_index(p))
+ if (open_pack_index(the_repository, p))
continue;
packed += p->num_objects;
size_pack += p->pack_size + p->index_size;
diff --git a/builtin/fsck.c b/builtin/fsck.c
index 0e4b7ec3af..86006e5bbc 100644
--- a/builtin/fsck.c
+++ b/builtin/fsck.c
@@ -988,7 +988,7 @@ int cmd_fsck(int argc,
if (show_progress) {
for (p = get_all_packs(the_repository); p;
p = p->next) {
- if (open_pack_index(p))
+ if (open_pack_index(the_repository, p))
continue;
total += p->num_objects;
}
diff --git a/builtin/index-pack.c b/builtin/index-pack.c
index eaefb41761..1898dc37a6 100644
--- a/builtin/index-pack.c
+++ b/builtin/index-pack.c
@@ -1656,7 +1656,7 @@ static void read_idx_option(struct pack_idx_option *opts, const char *pack_name)
if (!p)
die(_("Cannot open existing pack file '%s'"), pack_name);
- if (open_pack_index(p))
+ if (open_pack_index(the_repository, p))
die(_("Cannot open existing pack idx file for '%s'"), pack_name);
/* Read the attributes from the existing idx file */
diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index bfe0197d12..ffbd48c60c 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -3902,7 +3902,7 @@ static void show_edge(struct commit *commit)
add_preferred_base(&commit->object.oid);
}
-static int add_object_in_unpacked_pack(struct repository *repo UNUSED,
+static int add_object_in_unpacked_pack(struct repository *repo,
const struct object_id *oid,
struct packed_git *pack,
uint32_t pos,
@@ -3913,7 +3913,7 @@ static int add_object_in_unpacked_pack(struct repository *repo UNUSED,
time_t mtime;
if (pack->is_cruft) {
- if (load_pack_mtimes(pack) < 0)
+ if (load_pack_mtimes(repo, pack) < 0)
die(_("could not load cruft pack .mtimes"));
mtime = nth_packed_mtime(pack, pos);
} else {
@@ -4035,7 +4035,7 @@ static void loosen_unused_packed_objects(void)
if (!p->pack_local || p->pack_keep || p->pack_keep_in_core)
continue;
- if (open_pack_index(p))
+ if (open_pack_index(the_repository, p))
die(_("cannot open pack index"));
for (i = 0; i < p->num_objects; i++) {
diff --git a/builtin/pack-redundant.c b/builtin/pack-redundant.c
index 60f806e672..bc3c3e9c4e 100644
--- a/builtin/pack-redundant.c
+++ b/builtin/pack-redundant.c
@@ -545,7 +545,7 @@ static struct pack_list * add_pack(struct packed_git *p)
l.pack = p;
llist_init(&l.remaining_objects);
- if (open_pack_index(p))
+ if (open_pack_index(the_repository, p))
return NULL;
base = p->index_data;
diff --git a/builtin/repack.c b/builtin/repack.c
index de03a3ecfc..5f317ba224 100644
--- a/builtin/repack.c
+++ b/builtin/repack.c
@@ -461,7 +461,7 @@ struct pack_geometry {
static uint32_t geometry_pack_weight(struct packed_git *p)
{
- if (open_pack_index(p))
+ if (open_pack_index(the_repository, p))
die(_("cannot open index for %s"), p->pack_name);
return p->num_objects;
}
diff --git a/commit-graph.c b/commit-graph.c
index 96d55f8885..35e4e9d99d 100644
--- a/commit-graph.c
+++ b/commit-graph.c
@@ -1920,7 +1920,7 @@ static int fill_oids_from_packs(struct write_commit_graph_context *ctx,
ret = error(_("error adding pack %s"), packname.buf);
goto cleanup;
}
- if (open_pack_index(p)) {
+ if (open_pack_index(the_repository, p)) {
ret = error(_("error opening index for %s"), packname.buf);
goto cleanup;
}
diff --git a/midx-write.c b/midx-write.c
index 4696b8326c..2b7c50d25c 100644
--- a/midx-write.c
+++ b/midx-write.c
@@ -161,7 +161,7 @@ static void add_pack_to_midx(const char *full_path, size_t full_path_len,
return;
}
- if (open_pack_index(p)) {
+ if (open_pack_index(the_repository, p)) {
warning(_("failed to open pack-index '%s'"),
full_path);
close_pack(p);
@@ -301,8 +301,8 @@ static void midx_fanout_add_pack_fanout(struct midx_fanout *fanout,
uint32_t cur_object;
if (cur_fanout)
- start = get_pack_fanout(pack, cur_fanout - 1);
- end = get_pack_fanout(pack, cur_fanout);
+ start = get_pack_fanout(the_repository, pack, cur_fanout - 1);
+ end = get_pack_fanout(the_repository, pack, cur_fanout);
for (cur_object = start; cur_object < end; cur_object++) {
midx_fanout_grow(fanout, fanout->nr + 1);
@@ -950,7 +950,7 @@ static int fill_packs_from_midx(struct write_midx_context *ctx,
return 1;
}
- if (open_pack_index(m->packs[i]))
+ if (open_pack_index(the_repository, m->packs[i]))
die(_("could not open index for %s"),
m->packs[i]->pack_name);
}
@@ -1612,7 +1612,7 @@ static int want_included_pack(struct repository *r,
return 0;
if (p->is_cruft)
return 0;
- if (open_pack_index(p) || !p->num_objects)
+ if (open_pack_index(r, p) || !p->num_objects)
return 0;
return 1;
}
diff --git a/midx.c b/midx.c
index c76df95d6d..edbd2008e3 100644
--- a/midx.c
+++ b/midx.c
@@ -966,7 +966,7 @@ int verify_midx_file(struct repository *r, const char *object_dir, unsigned flag
continue;
}
- if (open_pack_index(e.p)) {
+ if (open_pack_index(the_repository, e.p)) {
midx_report(_("failed to load pack-index for packfile %s"),
e.p->pack_name);
break;
diff --git a/object-name.c b/object-name.c
index 43023884ef..368242133a 100644
--- a/object-name.c
+++ b/object-name.c
@@ -175,7 +175,7 @@ static void unique_in_pack(struct packed_git *p,
if (p->multi_pack_index)
return;
- if (open_pack_index(p) || !p->num_objects)
+ if (open_pack_index(ds->repo, p) || !p->num_objects)
return;
num = p->num_objects;
@@ -761,7 +761,7 @@ static void find_abbrev_len_for_pack(struct packed_git *p,
if (p->multi_pack_index)
return;
- if (open_pack_index(p) || !p->num_objects)
+ if (open_pack_index(mad->repo, p) || !p->num_objects)
return;
num = p->num_objects;
diff --git a/pack-check.c b/pack-check.c
index a5551809c1..d93cf3f224 100644
--- a/pack-check.c
+++ b/pack-check.c
@@ -176,7 +176,7 @@ int verify_pack_index(struct packed_git *p)
{
int err = 0;
- if (open_pack_index(p))
+ if (open_pack_index(the_repository, p))
return error("packfile %s index not opened", p->pack_name);
/* Verify SHA1 sum of the index file */
diff --git a/pack-mtimes.c b/pack-mtimes.c
index cdf30b8d2b..a9fa8d55de 100644
--- a/pack-mtimes.c
+++ b/pack-mtimes.c
@@ -97,7 +97,7 @@ static int load_pack_mtimes_file(char *mtimes_file,
return ret;
}
-int load_pack_mtimes(struct packed_git *p)
+int load_pack_mtimes(struct repository *repo, struct packed_git *p)
{
char *mtimes_name = NULL;
int ret = 0;
@@ -107,7 +107,7 @@ int load_pack_mtimes(struct packed_git *p)
if (p->mtimes_map)
return ret; /* already loaded */
- ret = open_pack_index(p);
+ ret = open_pack_index(repo, p);
if (ret < 0)
goto cleanup;
diff --git a/pack-mtimes.h b/pack-mtimes.h
index 107327cec0..22ab6a8481 100644
--- a/pack-mtimes.h
+++ b/pack-mtimes.h
@@ -5,12 +5,13 @@
#define MTIMES_VERSION 1
struct packed_git;
+struct repository;
/*
* Loads the .mtimes file corresponding to "p", if any, returning zero
* on success.
*/
-int load_pack_mtimes(struct packed_git *p);
+int load_pack_mtimes(struct repository *repo, struct packed_git *p);
/* Returns the mtime associated with the object at position "pos" (in
* lexicographic/index order) in pack "p".
diff --git a/pack-revindex.c b/pack-revindex.c
index 22d3c23464..c14ef971da 100644
--- a/pack-revindex.c
+++ b/pack-revindex.c
@@ -178,7 +178,7 @@ static int create_pack_revindex_in_memory(struct packed_git *p)
if (git_env_bool(GIT_TEST_REV_INDEX_DIE_IN_MEMORY, 0))
die("dying as requested by '%s'",
GIT_TEST_REV_INDEX_DIE_IN_MEMORY);
- if (open_pack_index(p))
+ if (open_pack_index(the_repository, p))
return -1;
create_pack_revindex(p);
return 0;
@@ -274,7 +274,7 @@ int load_pack_revindex_from_disk(struct packed_git *p)
{
char *revindex_name;
int ret;
- if (open_pack_index(p))
+ if (open_pack_index(the_repository, p))
return -1;
revindex_name = pack_revindex_filename(p);
diff --git a/packfile.c b/packfile.c
index 831d2c2c74..b0147231cb 100644
--- a/packfile.c
+++ b/packfile.c
@@ -84,13 +84,14 @@ void pack_report(void)
* consistency checks, then record its information to p. Return 0 on
* success.
*/
-static int check_packed_git_idx(const char *path, struct packed_git *p)
+static int check_packed_git_idx(struct repository *repo, const char *path,
+ struct packed_git *p)
{
void *idx_map;
size_t idx_size;
int fd = git_open(path), ret;
struct stat st;
- const unsigned int hashsz = the_hash_algo->rawsz;
+ const unsigned int hashsz = repo->hash_algo->rawsz;
if (fd < 0)
return -1;
@@ -194,7 +195,7 @@ int load_idx(const char *path, const unsigned int hashsz, void *idx_map,
return 0;
}
-int open_pack_index(struct packed_git *p)
+int open_pack_index(struct repository *repo, struct packed_git *p)
{
char *idx_name;
size_t len;
@@ -206,17 +207,18 @@ int open_pack_index(struct packed_git *p)
if (!strip_suffix(p->pack_name, ".pack", &len))
BUG("pack_name does not end in .pack");
idx_name = xstrfmt("%.*s.idx", (int)len, p->pack_name);
- ret = check_packed_git_idx(idx_name, p);
+ ret = check_packed_git_idx(repo, idx_name, p);
free(idx_name);
return ret;
}
-uint32_t get_pack_fanout(struct packed_git *p, uint32_t value)
+uint32_t get_pack_fanout(struct repository *repo, struct packed_git *p,
+ uint32_t value)
{
const uint32_t *level1_ofs = p->index_data;
if (!level1_ofs) {
- if (open_pack_index(p))
+ if (open_pack_index(repo, p))
return 0;
level1_ofs = p->index_data;
}
@@ -246,7 +248,7 @@ struct packed_git *parse_pack_index(struct repository *repo,
memcpy(p->pack_name, path, alloc); /* includes NUL */
hashcpy(p->hash, sha1, repo->hash_algo);
- if (check_packed_git_idx(idx_path, p)) {
+ if (check_packed_git_idx(repo, idx_path, p)) {
free(p);
return NULL;
}
@@ -544,7 +546,7 @@ static int open_packed_git_1(struct repository *repo, struct packed_git *p)
ssize_t read_result;
const unsigned hashsz = the_hash_algo->rawsz;
- if (open_pack_index(p))
+ if (open_pack_index(repo, p))
return error("packfile %s index unavailable", p->pack_name);
if (!pack_max_fds) {
@@ -951,7 +953,7 @@ unsigned long repo_approximate_object_count(struct repository *r)
for (m = get_multi_pack_index(r); m; m = m->next)
count += m->num_objects;
for (p = r->objects->packed_git; p; p = p->next) {
- if (open_pack_index(p))
+ if (open_pack_index(r, p))
continue;
count += p->num_objects;
}
@@ -1923,7 +1925,7 @@ int nth_packed_object_id(struct repository *repo, struct object_id *oid,
const unsigned char *index = p->index_data;
const unsigned int hashsz = repo->hash_algo->rawsz;
if (!index) {
- if (open_pack_index(p))
+ if (open_pack_index(repo, p))
return -1;
index = p->index_data;
}
@@ -1983,7 +1985,7 @@ off_t find_pack_entry_one(struct repository *repo, const unsigned char *sha1,
uint32_t result;
if (!index) {
- if (open_pack_index(p))
+ if (open_pack_index(repo, p))
return 0;
}
@@ -2226,7 +2228,7 @@ int for_each_packed_object(struct repository *repo, each_packed_object_fn cb,
if ((flags & FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS) &&
p->pack_keep)
continue;
- if (open_pack_index(p)) {
+ if (open_pack_index(repo, p)) {
pack_errors = 1;
continue;
}
diff --git a/packfile.h b/packfile.h
index afec4bbd74..98ec3616b4 100644
--- a/packfile.h
+++ b/packfile.h
@@ -97,7 +97,7 @@ void pack_report(void);
* mmap the index file for the specified packfile (if it is not
* already mmapped). Return 0 on success.
*/
-int open_pack_index(struct packed_git *);
+int open_pack_index(struct repository *repo, struct packed_git *);
/*
* munmap the index file for the specified packfile (if it is
@@ -107,7 +107,8 @@ void close_pack_index(struct packed_git *);
int close_pack_fd(struct packed_git *p);
-uint32_t get_pack_fanout(struct packed_git *p, uint32_t value);
+uint32_t get_pack_fanout(struct repository *repo, struct packed_git *p,
+ uint32_t value);
struct raw_object_store;
diff --git a/reachable.c b/reachable.c
index f8dc2731d6..833013e7a3 100644
--- a/reachable.c
+++ b/reachable.c
@@ -291,7 +291,7 @@ static int add_recent_packed(struct repository *repo,
if (obj && obj->flags & SEEN)
return 0;
if (p->is_cruft) {
- if (load_pack_mtimes(p) < 0)
+ if (load_pack_mtimes(repo, p) < 0)
die(_("could not load cruft pack .mtimes"));
mtime = nth_packed_mtime(p, pos);
}
diff --git a/t/helper/test-pack-mtimes.c b/t/helper/test-pack-mtimes.c
index ebd980b308..b1caeba097 100644
--- a/t/helper/test-pack-mtimes.c
+++ b/t/helper/test-pack-mtimes.c
@@ -11,7 +11,7 @@
static void dump_mtimes(struct packed_git *p)
{
uint32_t i;
- if (load_pack_mtimes(p) < 0)
+ if (load_pack_mtimes(the_repository, p) < 0)
die("could not load pack .mtimes");
for (i = 0; i < p->num_objects; i++) {
--
2.47.0
^ permalink raw reply related [flat|nested] 184+ messages in thread
* [PATCH 17/20] packfile: stop using 'the_hash_algo'
2024-10-21 9:57 [PATCH 00/20] packfile: avoid using the 'the_repository' global variable Karthik Nayak
` (15 preceding siblings ...)
2024-10-21 9:57 ` [PATCH 16/20] packfile: pass down repository to `open_pack_index` Karthik Nayak
@ 2024-10-21 9:58 ` Karthik Nayak
2024-10-21 9:58 ` [PATCH 18/20] packfile: pass down repository to `nth_packed_object_offset` Karthik Nayak
` (12 subsequent siblings)
29 siblings, 0 replies; 184+ messages in thread
From: Karthik Nayak @ 2024-10-21 9:58 UTC (permalink / raw)
To: karthik.188; +Cc: git
Make changes to functions in 'packfile.c' to stop using the global
'the_hash_algo'. This requires changing some of the functions to accept
a repository variable and to modify layers above to pass this variable.
Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
---
object-name.c | 4 ++--
pack-bitmap.c | 2 +-
packfile.c | 29 ++++++++++++++++++-----------
packfile.h | 3 ++-
4 files changed, 23 insertions(+), 15 deletions(-)
diff --git a/object-name.c b/object-name.c
index 368242133a..83a36915d6 100644
--- a/object-name.c
+++ b/object-name.c
@@ -179,7 +179,7 @@ static void unique_in_pack(struct packed_git *p,
return;
num = p->num_objects;
- bsearch_pack(&ds->bin_pfx, p, &first);
+ bsearch_pack(ds->repo, &ds->bin_pfx, p, &first);
/*
* At this point, "first" is the location of the lowest object
@@ -766,7 +766,7 @@ static void find_abbrev_len_for_pack(struct packed_git *p,
num = p->num_objects;
mad_oid = mad->oid;
- match = bsearch_pack(mad_oid, p, &first);
+ match = bsearch_pack(mad->repo, mad_oid, p, &first);
/*
* first is now the position in the packfile where we would insert
diff --git a/pack-bitmap.c b/pack-bitmap.c
index 97237acb24..da2fc45f3f 100644
--- a/pack-bitmap.c
+++ b/pack-bitmap.c
@@ -741,7 +741,7 @@ static uint32_t bitmap_bsearch_pos(struct bitmap_index *bitmap_git,
if (bitmap_is_midx(bitmap_git))
found = bsearch_midx(oid, bitmap_git->midx, result);
else
- found = bsearch_pack(oid, bitmap_git->pack, result);
+ found = bsearch_pack(the_repository, oid, bitmap_git->pack, result);
return found;
}
diff --git a/packfile.c b/packfile.c
index b0147231cb..e06931154f 100644
--- a/packfile.c
+++ b/packfile.c
@@ -30,7 +30,7 @@ char *odb_pack_name(struct repository *repo, struct strbuf *buf,
{
strbuf_reset(buf);
strbuf_addf(buf, "%s/pack/pack-%s.%s", repo_get_object_directory(repo),
- hash_to_hex(hash), ext);
+ hash_to_hex_algop(hash, repo->hash_algo), ext);
return buf->buf;
}
@@ -544,7 +544,7 @@ static int open_packed_git_1(struct repository *repo, struct packed_git *p)
unsigned char hash[GIT_MAX_RAWSZ];
unsigned char *idx_hash;
ssize_t read_result;
- const unsigned hashsz = the_hash_algo->rawsz;
+ const unsigned hashsz = repo->hash_algo->rawsz;
if (open_pack_index(repo, p))
return error("packfile %s index unavailable", p->pack_name);
@@ -614,7 +614,8 @@ static int open_packed_git(struct repository *repo, struct packed_git *p)
return -1;
}
-static int in_window(struct pack_window *win, off_t offset)
+static int in_window(struct repository *repo, struct pack_window *win,
+ off_t offset)
{
/* We must promise at least one full hash after the
* offset is available from this window, otherwise the offset
@@ -624,7 +625,7 @@ static int in_window(struct pack_window *win, off_t offset)
*/
off_t win_off = win->offset;
return win_off <= offset
- && (offset + the_hash_algo->rawsz) <= (win_off + win->len);
+ && (offset + repo->hash_algo->rawsz) <= (win_off + win->len);
}
unsigned char *use_pack(struct repository *repo, struct packed_git *p,
@@ -640,16 +641,16 @@ unsigned char *use_pack(struct repository *repo, struct packed_git *p,
*/
if (!p->pack_size && p->pack_fd == -1 && open_packed_git(repo, p))
die("packfile %s cannot be accessed", p->pack_name);
- if (offset > (p->pack_size - the_hash_algo->rawsz))
+ if (offset > (p->pack_size - repo->hash_algo->rawsz))
die("offset beyond end of packfile (truncated pack?)");
if (offset < 0)
die(_("offset before end of packfile (broken .idx?)"));
- if (!win || !in_window(win, offset)) {
+ if (!win || !in_window(repo, win, offset)) {
if (win)
win->inuse_cnt--;
for (win = p->windows; win; win = win->next) {
- if (in_window(win, offset))
+ if (in_window(repo, win, offset))
break;
}
if (!win) {
@@ -714,6 +715,7 @@ struct packed_git *add_packed_git(struct repository *repo, const char *path,
struct stat st;
size_t alloc;
struct packed_git *p;
+ struct object_id oid;
/*
* Make sure a corresponding .pack file exists and that
@@ -755,8 +757,12 @@ struct packed_git *add_packed_git(struct repository *repo, const char *path,
p->pack_local = local;
p->mtime = st.st_mtime;
if (path_len < repo->hash_algo->hexsz ||
- get_hash_hex(path + path_len - repo->hash_algo->hexsz, p->hash))
+ get_oid_hex_algop(path + path_len - repo->hash_algo->hexsz, &oid,
+ repo->hash_algo))
hashclr(p->hash, repo->hash_algo);
+ else
+ memcpy(p->hash, oid.hash, repo->hash_algo->rawsz);
+
return p;
}
@@ -1895,11 +1901,12 @@ void *unpack_entry(struct repository *r, struct packed_git *p, off_t obj_offset,
return data;
}
-int bsearch_pack(const struct object_id *oid, const struct packed_git *p, uint32_t *result)
+int bsearch_pack(struct repository *repo, const struct object_id *oid,
+ const struct packed_git *p, uint32_t *result)
{
const unsigned char *index_fanout = p->index_data;
const unsigned char *index_lookup;
- const unsigned int hashsz = the_hash_algo->rawsz;
+ const unsigned int hashsz = repo->hash_algo->rawsz;
int index_lookup_width;
if (!index_fanout)
@@ -1990,7 +1997,7 @@ off_t find_pack_entry_one(struct repository *repo, const unsigned char *sha1,
}
hashcpy(oid.hash, sha1, repo->hash_algo);
- if (bsearch_pack(&oid, p, &result))
+ if (bsearch_pack(repo, &oid, p, &result))
return nth_packed_object_offset(p, result);
return 0;
}
diff --git a/packfile.h b/packfile.h
index 98ec3616b4..d145959480 100644
--- a/packfile.h
+++ b/packfile.h
@@ -145,7 +145,8 @@ void check_pack_index_ptr(const struct packed_git *p, const void *ptr);
*
* See 'bsearch_hash' for more information.
*/
-int bsearch_pack(const struct object_id *oid, const struct packed_git *p, uint32_t *result);
+int bsearch_pack(struct repository *repo, const struct object_id *oid,
+ const struct packed_git *p, uint32_t *result);
/*
* Write the oid of the nth object within the specified packfile into the first
--
2.47.0
^ permalink raw reply related [flat|nested] 184+ messages in thread
* [PATCH 18/20] packfile: pass down repository to `nth_packed_object_offset`
2024-10-21 9:57 [PATCH 00/20] packfile: avoid using the 'the_repository' global variable Karthik Nayak
` (16 preceding siblings ...)
2024-10-21 9:58 ` [PATCH 17/20] packfile: stop using 'the_hash_algo' Karthik Nayak
@ 2024-10-21 9:58 ` Karthik Nayak
2024-10-21 9:58 ` [PATCH 19/20] config: make `delta_base_cache_limit` a non-global variable Karthik Nayak
` (11 subsequent siblings)
29 siblings, 0 replies; 184+ messages in thread
From: Karthik Nayak @ 2024-10-21 9:58 UTC (permalink / raw)
To: karthik.188; +Cc: git
The function `nth_packed_object_offset` currently relies on the global
variable `the_repository`. To eliminate global variable usage in
`packfile.c`, we should progressively shift the dependency on
the_repository to higher layers. Let's remove its usage from this
function and any related ones.
Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
---
builtin/cat-file.c | 4 ++--
builtin/pack-objects.c | 6 +++---
commit-graph.c | 2 +-
midx-write.c | 2 +-
pack-check.c | 2 +-
pack-revindex.c | 3 ++-
packfile.c | 8 +++++---
packfile.h | 4 +++-
reachable.c | 3 ++-
9 files changed, 20 insertions(+), 14 deletions(-)
diff --git a/builtin/cat-file.c b/builtin/cat-file.c
index e5d774b097..8aab7481b6 100644
--- a/builtin/cat-file.c
+++ b/builtin/cat-file.c
@@ -632,14 +632,14 @@ static int batch_unordered_loose(const struct object_id *oid,
return batch_unordered_object(oid, NULL, 0, data);
}
-static int batch_unordered_packed(struct repository *repo UNUSED,
+static int batch_unordered_packed(struct repository *repo,
const struct object_id *oid,
struct packed_git *pack,
uint32_t pos,
void *data)
{
return batch_unordered_object(oid, pack,
- nth_packed_object_offset(pack, pos),
+ nth_packed_object_offset(repo, pack, pos),
data);
}
diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index ffbd48c60c..8ab51eab8d 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -3361,7 +3361,7 @@ static int git_pack_config(const char *k, const char *v,
static int stdin_packs_found_nr;
static int stdin_packs_hints_nr;
-static int add_object_entry_from_pack(struct repository *repo UNUSED,
+static int add_object_entry_from_pack(struct repository *repo,
const struct object_id *oid,
struct packed_git *p,
uint32_t pos,
@@ -3375,7 +3375,7 @@ static int add_object_entry_from_pack(struct repository *repo UNUSED,
if (have_duplicate_entry(oid, 0))
return 0;
- ofs = nth_packed_object_offset(p, pos);
+ ofs = nth_packed_object_offset(repo, p, pos);
if (!want_object_in_pack(oid, 0, &p, &ofs))
return 0;
@@ -3919,7 +3919,7 @@ static int add_object_in_unpacked_pack(struct repository *repo,
} else {
mtime = pack->mtime;
}
- offset = nth_packed_object_offset(pack, pos);
+ offset = nth_packed_object_offset(repo, pack, pos);
add_cruft_object_entry(oid, OBJ_NONE, pack, offset,
NULL, mtime);
diff --git a/commit-graph.c b/commit-graph.c
index 35e4e9d99d..24aae68195 100644
--- a/commit-graph.c
+++ b/commit-graph.c
@@ -1495,7 +1495,7 @@ static int add_packed_commits(struct repository *repo UNUSED,
{
struct write_commit_graph_context *ctx = (struct write_commit_graph_context*)data;
enum object_type type;
- off_t offset = nth_packed_object_offset(pack, pos);
+ off_t offset = nth_packed_object_offset(ctx->r, pack, pos);
struct object_info oi = OBJECT_INFO_INIT;
if (ctx->progress)
diff --git a/midx-write.c b/midx-write.c
index 2b7c50d25c..69aada253f 100644
--- a/midx-write.c
+++ b/midx-write.c
@@ -233,7 +233,7 @@ static void fill_pack_entry(uint32_t pack_int_id,
entry->pack_int_id = pack_int_id;
entry->pack_mtime = p->mtime;
- entry->offset = nth_packed_object_offset(p, cur_object);
+ entry->offset = nth_packed_object_offset(the_repository, p, cur_object);
entry->preferred = !!preferred;
}
diff --git a/pack-check.c b/pack-check.c
index d93cf3f224..01562267a4 100644
--- a/pack-check.c
+++ b/pack-check.c
@@ -98,7 +98,7 @@ static int verify_packfile(struct repository *r,
entries[nr_objects].offset = pack_sig_ofs;
/* first sort entries by pack offset, since unpacking them is more efficient that way */
for (i = 0; i < nr_objects; i++) {
- entries[i].offset = nth_packed_object_offset(p, i);
+ entries[i].offset = nth_packed_object_offset(r, p, i);
entries[i].nr = i;
}
QSORT(entries, nr_objects, compare_entries);
diff --git a/pack-revindex.c b/pack-revindex.c
index c14ef971da..03d8c39c94 100644
--- a/pack-revindex.c
+++ b/pack-revindex.c
@@ -466,7 +466,8 @@ off_t pack_pos_to_offset(struct packed_git *p, uint32_t pos)
else if (pos == p->num_objects)
return p->pack_size - the_hash_algo->rawsz;
else
- return nth_packed_object_offset(p, pack_pos_to_index(p, pos));
+ return nth_packed_object_offset(the_repository, p,
+ pack_pos_to_index(p, pos));
}
uint32_t pack_pos_to_midx(struct multi_pack_index *m, uint32_t pos)
diff --git a/packfile.c b/packfile.c
index e06931154f..1415df38e9 100644
--- a/packfile.c
+++ b/packfile.c
@@ -1964,10 +1964,12 @@ void check_pack_index_ptr(const struct packed_git *p, const void *vptr)
p->pack_name);
}
-off_t nth_packed_object_offset(const struct packed_git *p, uint32_t n)
+off_t nth_packed_object_offset(struct repository *repo,
+ const struct packed_git *p,
+ uint32_t n)
{
const unsigned char *index = p->index_data;
- const unsigned int hashsz = the_hash_algo->rawsz;
+ const unsigned int hashsz = repo->hash_algo->rawsz;
index += 4 * 256;
if (p->index_version == 1) {
return ntohl(*((uint32_t *)(index + st_mult(hashsz + 4, n))));
@@ -1998,7 +2000,7 @@ off_t find_pack_entry_one(struct repository *repo, const unsigned char *sha1,
hashcpy(oid.hash, sha1, repo->hash_algo);
if (bsearch_pack(repo, &oid, p, &result))
- return nth_packed_object_offset(p, result);
+ return nth_packed_object_offset(repo, p, result);
return 0;
}
diff --git a/packfile.h b/packfile.h
index d145959480..9184560f0e 100644
--- a/packfile.h
+++ b/packfile.h
@@ -160,7 +160,9 @@ int nth_packed_object_id(struct repository *repo, struct object_id *,
* Return the offset of the nth object within the specified packfile.
* The index must already be opened.
*/
-off_t nth_packed_object_offset(const struct packed_git *, uint32_t n);
+off_t nth_packed_object_offset(struct repository *repo,
+ const struct packed_git *,
+ uint32_t n);
/*
* If the object named sha1 is present in the specified packfile,
diff --git a/reachable.c b/reachable.c
index 833013e7a3..e347e2d6ca 100644
--- a/reachable.c
+++ b/reachable.c
@@ -295,7 +295,8 @@ static int add_recent_packed(struct repository *repo,
die(_("could not load cruft pack .mtimes"));
mtime = nth_packed_mtime(p, pos);
}
- add_recent_object(oid, p, nth_packed_object_offset(p, pos), mtime, data);
+ add_recent_object(oid, p, nth_packed_object_offset(repo, p, pos), mtime,
+ data);
return 0;
}
--
2.47.0
^ permalink raw reply related [flat|nested] 184+ messages in thread
* [PATCH 19/20] config: make `delta_base_cache_limit` a non-global variable
2024-10-21 9:57 [PATCH 00/20] packfile: avoid using the 'the_repository' global variable Karthik Nayak
` (17 preceding siblings ...)
2024-10-21 9:58 ` [PATCH 18/20] packfile: pass down repository to `nth_packed_object_offset` Karthik Nayak
@ 2024-10-21 9:58 ` Karthik Nayak
2024-10-21 9:58 ` [PATCH 20/20] config: make `packed_git_(limit|window_size)` non-global variables Karthik Nayak
` (10 subsequent siblings)
29 siblings, 0 replies; 184+ messages in thread
From: Karthik Nayak @ 2024-10-21 9:58 UTC (permalink / raw)
To: karthik.188; +Cc: git
The `delta_base_cache_limit` variable is a global config variable used
by multiple subsystems. Let's make this non-global, by adding this
variable to the stack of each of the subsystems where it is used.
In `gc.c` we add it to the `gc_config` struct and also the constructor
function. In `index-pack.c` we add it to the `pack_idx_option` struct
and its constructor. Finally, in `packfile.c` we dynamically retrieve
this value from the repository config, since the value is only used once
in the entire subsystem.
Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
---
builtin/gc.c | 5 ++++-
builtin/index-pack.c | 10 +++++++---
config.c | 5 -----
environment.c | 1 -
environment.h | 1 -
pack-objects.h | 3 ++-
pack-write.c | 1 +
pack.h | 1 +
packfile.c | 13 +++++++++++--
9 files changed, 26 insertions(+), 14 deletions(-)
diff --git a/builtin/gc.c b/builtin/gc.c
index d52735354c..9a10eb58bc 100644
--- a/builtin/gc.c
+++ b/builtin/gc.c
@@ -138,6 +138,7 @@ struct gc_config {
char *repack_filter_to;
unsigned long big_pack_threshold;
unsigned long max_delta_cache_size;
+ unsigned long delta_base_cache_limit;
};
#define GC_CONFIG_INIT { \
@@ -153,6 +154,7 @@ struct gc_config {
.prune_expire = xstrdup("2.weeks.ago"), \
.prune_worktrees_expire = xstrdup("3.months.ago"), \
.max_delta_cache_size = DEFAULT_DELTA_CACHE_SIZE, \
+ .delta_base_cache_limit = DEFAULT_DELTA_BASE_CACHE_LIMIT, \
}
static void gc_config_release(struct gc_config *cfg)
@@ -205,6 +207,7 @@ static void gc_config(struct gc_config *cfg)
git_config_get_ulong("gc.bigpackthreshold", &cfg->big_pack_threshold);
git_config_get_ulong("pack.deltacachesize", &cfg->max_delta_cache_size);
+ git_config_get_ulong("core.deltabasecachelimit", &cfg->delta_base_cache_limit);
if (!git_config_get_string("gc.repackfilter", &owned)) {
free(cfg->repack_filter);
@@ -416,7 +419,7 @@ static uint64_t estimate_repack_memory(struct gc_config *cfg,
* read_sha1_file() (either at delta calculation phase, or
* writing phase) also fills up the delta base cache
*/
- heap += delta_base_cache_limit;
+ heap += cfg->delta_base_cache_limit;
/* and of course pack-objects has its own delta cache */
heap += cfg->max_delta_cache_size;
diff --git a/builtin/index-pack.c b/builtin/index-pack.c
index 1898dc37a6..f1edf7068d 100644
--- a/builtin/index-pack.c
+++ b/builtin/index-pack.c
@@ -1238,7 +1238,7 @@ static void parse_pack_objects(unsigned char *hash)
* recursively checking if the resulting object is used as a base
* for some more deltas.
*/
-static void resolve_deltas(void)
+static void resolve_deltas(struct pack_idx_option *opts)
{
int i;
@@ -1254,7 +1254,7 @@ static void resolve_deltas(void)
nr_ref_deltas + nr_ofs_deltas);
nr_dispatched = 0;
- base_cache_limit = delta_base_cache_limit * nr_threads;
+ base_cache_limit = opts->delta_base_cache_limit * nr_threads;
if (nr_threads > 1 || getenv("GIT_FORCE_THREADS")) {
init_thread();
work_lock();
@@ -1604,6 +1604,10 @@ static int git_index_pack_config(const char *k, const char *v,
else
opts->flags &= ~WRITE_REV;
}
+ if (!strcmp(k, "core.deltabasecachelimit")) {
+ opts->delta_base_cache_limit = git_config_ulong(k, v, ctx->kvi);
+ return 0;
+ }
return git_default_config(k, v, ctx, cb);
}
@@ -1930,7 +1934,7 @@ int cmd_index_pack(int argc,
parse_pack_objects(pack_hash);
if (report_end_of_input)
write_in_full(2, "\0", 1);
- resolve_deltas();
+ resolve_deltas(&opts);
conclude_pack(fix_thin_pack, curr_pack, pack_hash);
free(ofs_deltas);
free(ref_deltas);
diff --git a/config.c b/config.c
index a11bb85da3..728ef98e42 100644
--- a/config.c
+++ b/config.c
@@ -1515,11 +1515,6 @@ static int git_default_core_config(const char *var, const char *value,
return 0;
}
- if (!strcmp(var, "core.deltabasecachelimit")) {
- delta_base_cache_limit = git_config_ulong(var, value, ctx->kvi);
- return 0;
- }
-
if (!strcmp(var, "core.autocrlf")) {
if (value && !strcasecmp(value, "input")) {
auto_crlf = AUTO_CRLF_INPUT;
diff --git a/environment.c b/environment.c
index a2ce998081..8e5022c282 100644
--- a/environment.c
+++ b/environment.c
@@ -51,7 +51,6 @@ enum fsync_method fsync_method = FSYNC_METHOD_DEFAULT;
enum fsync_component fsync_components = FSYNC_COMPONENTS_DEFAULT;
size_t packed_git_window_size = DEFAULT_PACKED_GIT_WINDOW_SIZE;
size_t packed_git_limit = DEFAULT_PACKED_GIT_LIMIT;
-size_t delta_base_cache_limit = 96 * 1024 * 1024;
unsigned long big_file_threshold = 512 * 1024 * 1024;
char *editor_program;
char *askpass_program;
diff --git a/environment.h b/environment.h
index 923e12661e..2f43340f0b 100644
--- a/environment.h
+++ b/environment.h
@@ -165,7 +165,6 @@ extern int zlib_compression_level;
extern int pack_compression_level;
extern size_t packed_git_window_size;
extern size_t packed_git_limit;
-extern size_t delta_base_cache_limit;
extern unsigned long big_file_threshold;
extern unsigned long pack_size_limit_cfg;
extern int max_allowed_tree_depth;
diff --git a/pack-objects.h b/pack-objects.h
index b9898a4e64..3f6f504203 100644
--- a/pack-objects.h
+++ b/pack-objects.h
@@ -7,7 +7,8 @@
struct repository;
-#define DEFAULT_DELTA_CACHE_SIZE (256 * 1024 * 1024)
+#define DEFAULT_DELTA_CACHE_SIZE (256 * 1024 * 1024)
+#define DEFAULT_DELTA_BASE_CACHE_LIMIT (96 * 1024 * 1024)
#define OE_DFS_STATE_BITS 2
#define OE_DEPTH_BITS 12
diff --git a/pack-write.c b/pack-write.c
index 8c7dfddc5a..98a8c0e785 100644
--- a/pack-write.c
+++ b/pack-write.c
@@ -21,6 +21,7 @@ void reset_pack_idx_option(struct pack_idx_option *opts)
memset(opts, 0, sizeof(*opts));
opts->version = 2;
opts->off32_limit = 0x7fffffff;
+ opts->delta_base_cache_limit = DEFAULT_DELTA_BASE_CACHE_LIMIT;
}
static int sha1_compare(const void *_a, const void *_b)
diff --git a/pack.h b/pack.h
index 02bbdfb19c..1a33751565 100644
--- a/pack.h
+++ b/pack.h
@@ -58,6 +58,7 @@ struct pack_idx_option {
*/
int anomaly_alloc, anomaly_nr;
uint32_t *anomaly;
+ unsigned long delta_base_cache_limit;
};
void reset_pack_idx_option(struct pack_idx_option *);
diff --git a/packfile.c b/packfile.c
index 1415df38e9..c9812eedad 100644
--- a/packfile.c
+++ b/packfile.c
@@ -24,6 +24,8 @@
#include "commit-graph.h"
#include "pack-revindex.h"
#include "promisor-remote.h"
+#include "config.h"
+#include "pack-objects.h"
char *odb_pack_name(struct repository *repo, struct strbuf *buf,
const unsigned char *hash, const char *ext)
@@ -1494,7 +1496,9 @@ void clear_delta_base_cache(void)
}
static void add_delta_base_cache(struct packed_git *p, off_t base_offset,
- void *base, unsigned long base_size, enum object_type type)
+ void *base, unsigned long base_size,
+ unsigned long delta_base_cache_limit,
+ enum object_type type)
{
struct delta_base_cache_entry *ent;
struct list_head *lru, *tmp;
@@ -1697,6 +1701,9 @@ void *unpack_entry(struct repository *r, struct packed_git *p, off_t obj_offset,
struct unpack_entry_stack_ent *delta_stack = small_delta_stack;
int delta_stack_nr = 0, delta_stack_alloc = UNPACK_ENTRY_STACK_PREALLOC;
int base_from_cache = 0;
+ unsigned long delta_base_cache_limit = DEFAULT_DELTA_BASE_CACHE_LIMIT;
+
+ repo_config_get_ulong(r, "core.deltabasecachelimit", &delta_base_cache_limit);
write_pack_access_log(p, obj_offset);
@@ -1881,7 +1888,9 @@ void *unpack_entry(struct repository *r, struct packed_git *p, off_t obj_offset,
* before we are done using it.
*/
if (!external_base)
- add_delta_base_cache(p, base_obj_offset, base, base_size, type);
+ add_delta_base_cache(p, base_obj_offset, base,
+ base_size, delta_base_cache_limit,
+ type);
free(delta_data);
free(external_base);
--
2.47.0
^ permalink raw reply related [flat|nested] 184+ messages in thread
* [PATCH 20/20] config: make `packed_git_(limit|window_size)` non-global variables
2024-10-21 9:57 [PATCH 00/20] packfile: avoid using the 'the_repository' global variable Karthik Nayak
` (18 preceding siblings ...)
2024-10-21 9:58 ` [PATCH 19/20] config: make `delta_base_cache_limit` a non-global variable Karthik Nayak
@ 2024-10-21 9:58 ` Karthik Nayak
2024-10-21 21:03 ` [PATCH 00/20] packfile: avoid using the 'the_repository' global variable Taylor Blau
` (9 subsequent siblings)
29 siblings, 0 replies; 184+ messages in thread
From: Karthik Nayak @ 2024-10-21 9:58 UTC (permalink / raw)
To: karthik.188; +Cc: git
The variables `packed_git_window_size` and `packed_git_limit` are global
config variables used in the `packfile.c` file. Since it is only used in
this file, let's change it from being a global config variable to a
local variable for the subsystem.
We do this by introducing a new local `packfile_config` struct in
`packfile.c` and also adding the required function to parse the said
config. We then use this within `packfile.c` to obtain the variables.
With this, we rid `packfile.c` from all global variable usage and this
means we can also remove the `USE_THE_REPOSITORY_VARIABLE` guard from
the file.
Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
---
builtin/fast-import.c | 4 +--
config.c | 17 -------------
environment.c | 2 --
packfile.c | 59 +++++++++++++++++++++++++++++++++++++------
packfile.h | 2 +-
5 files changed, 54 insertions(+), 30 deletions(-)
diff --git a/builtin/fast-import.c b/builtin/fast-import.c
index a6743db85c..e8c814a07e 100644
--- a/builtin/fast-import.c
+++ b/builtin/fast-import.c
@@ -3540,7 +3540,7 @@ static void parse_argv(void)
int cmd_fast_import(int argc,
const char **argv,
const char *prefix,
- struct repository *repo UNUSED)
+ struct repository *repo)
{
unsigned int i;
@@ -3661,7 +3661,7 @@ int cmd_fast_import(int argc,
fprintf(stderr, " pools: %10lu KiB\n", (unsigned long)((tree_entry_allocd + fi_mem_pool.pool_alloc) /1024));
fprintf(stderr, " objects: %10" PRIuMAX " KiB\n", (alloc_count*sizeof(struct object_entry))/1024);
fprintf(stderr, "---------------------------------------------------------------------\n");
- pack_report();
+ pack_report(repo);
fprintf(stderr, "---------------------------------------------------------------------\n");
fprintf(stderr, "\n");
}
diff --git a/config.c b/config.c
index 728ef98e42..2c295f7430 100644
--- a/config.c
+++ b/config.c
@@ -1493,28 +1493,11 @@ static int git_default_core_config(const char *var, const char *value,
return 0;
}
- if (!strcmp(var, "core.packedgitwindowsize")) {
- int pgsz_x2 = getpagesize() * 2;
- packed_git_window_size = git_config_ulong(var, value, ctx->kvi);
-
- /* This value must be multiple of (pagesize * 2) */
- packed_git_window_size /= pgsz_x2;
- if (packed_git_window_size < 1)
- packed_git_window_size = 1;
- packed_git_window_size *= pgsz_x2;
- return 0;
- }
-
if (!strcmp(var, "core.bigfilethreshold")) {
big_file_threshold = git_config_ulong(var, value, ctx->kvi);
return 0;
}
- if (!strcmp(var, "core.packedgitlimit")) {
- packed_git_limit = git_config_ulong(var, value, ctx->kvi);
- return 0;
- }
-
if (!strcmp(var, "core.autocrlf")) {
if (value && !strcasecmp(value, "input")) {
auto_crlf = AUTO_CRLF_INPUT;
diff --git a/environment.c b/environment.c
index 8e5022c282..8389a27270 100644
--- a/environment.c
+++ b/environment.c
@@ -49,8 +49,6 @@ int fsync_object_files = -1;
int use_fsync = -1;
enum fsync_method fsync_method = FSYNC_METHOD_DEFAULT;
enum fsync_component fsync_components = FSYNC_COMPONENTS_DEFAULT;
-size_t packed_git_window_size = DEFAULT_PACKED_GIT_WINDOW_SIZE;
-size_t packed_git_limit = DEFAULT_PACKED_GIT_LIMIT;
unsigned long big_file_threshold = 512 * 1024 * 1024;
char *editor_program;
char *askpass_program;
diff --git a/packfile.c b/packfile.c
index c9812eedad..00a39036c9 100644
--- a/packfile.c
+++ b/packfile.c
@@ -1,4 +1,3 @@
-#define USE_THE_REPOSITORY_VARIABLE
#include "git-compat-util.h"
#include "environment.h"
@@ -27,6 +26,17 @@
#include "config.h"
#include "pack-objects.h"
+struct packfile_config {
+ unsigned long packed_git_window_size;
+ unsigned long packed_git_limit;
+};
+
+#define PACKFILE_CONFIG_INIT \
+{ \
+ .packed_git_window_size = DEFAULT_PACKED_GIT_WINDOW_SIZE, \
+ .packed_git_limit = DEFAULT_PACKED_GIT_LIMIT, \
+}
+
char *odb_pack_name(struct repository *repo, struct strbuf *buf,
const unsigned char *hash, const char *ext)
{
@@ -60,15 +70,44 @@ static size_t pack_mapped;
#define SZ_FMT PRIuMAX
static inline uintmax_t sz_fmt(size_t s) { return s; }
-void pack_report(void)
+static int packfile_config(const char *var, const char *value,
+ const struct config_context *ctx, void *cb)
+{
+ struct packfile_config *config = cb;
+
+ if (!strcmp(var, "core.packedgitwindowsize")) {
+ int pgsz_x2 = getpagesize() * 2;
+ config->packed_git_window_size = git_config_ulong(var, value, ctx->kvi);
+
+ /* This value must be multiple of (pagesize * 2) */
+ config->packed_git_window_size /= pgsz_x2;
+ if (config->packed_git_window_size < 1)
+ config->packed_git_window_size = 1;
+ config->packed_git_window_size *= pgsz_x2;
+ return 0;
+ }
+
+ if (!strcmp(var, "core.packedgitlimit")) {
+ config->packed_git_limit = git_config_ulong(var, value, ctx->kvi);
+ return 0;
+ }
+
+ return git_default_config(var, value, ctx, cb);
+}
+
+
+void pack_report(struct repository *repo)
{
+ struct packfile_config config = PACKFILE_CONFIG_INIT;
+ repo_config(repo, packfile_config, &config);
+
fprintf(stderr,
"pack_report: getpagesize() = %10" SZ_FMT "\n"
"pack_report: core.packedGitWindowSize = %10" SZ_FMT "\n"
"pack_report: core.packedGitLimit = %10" SZ_FMT "\n",
sz_fmt(getpagesize()),
- sz_fmt(packed_git_window_size),
- sz_fmt(packed_git_limit));
+ sz_fmt(config.packed_git_window_size),
+ sz_fmt(config.packed_git_limit));
fprintf(stderr,
"pack_report: pack_used_ctr = %10u\n"
"pack_report: pack_mmap_calls = %10u\n"
@@ -656,20 +695,24 @@ unsigned char *use_pack(struct repository *repo, struct packed_git *p,
break;
}
if (!win) {
- size_t window_align = packed_git_window_size / 2;
+ struct packfile_config config = PACKFILE_CONFIG_INIT;
+ size_t window_align;
off_t len;
+ repo_config(repo, packfile_config, &config);
+ window_align = config.packed_git_window_size / 2;
+
if (p->pack_fd == -1 && open_packed_git(repo, p))
die("packfile %s cannot be accessed", p->pack_name);
CALLOC_ARRAY(win, 1);
win->offset = (offset / window_align) * window_align;
len = p->pack_size - win->offset;
- if (len > packed_git_window_size)
- len = packed_git_window_size;
+ if (len > config.packed_git_window_size)
+ len = config.packed_git_window_size;
win->len = (size_t)len;
pack_mapped += win->len;
- while (packed_git_limit < pack_mapped
+ while (config.packed_git_limit < pack_mapped
&& unuse_one_window(repo, p))
; /* nothing */
win->base = xmmap_gently(NULL, win->len,
diff --git a/packfile.h b/packfile.h
index 9184560f0e..1894d7db93 100644
--- a/packfile.h
+++ b/packfile.h
@@ -91,7 +91,7 @@ struct packed_git *find_sha1_pack(struct repository *repo,
const unsigned char *sha1,
struct packed_git *packs);
-void pack_report(void);
+void pack_report(struct repository *repo);
/*
* mmap the index file for the specified packfile (if it is not
--
2.47.0
^ permalink raw reply related [flat|nested] 184+ messages in thread
* Re: [PATCH 00/20] packfile: avoid using the 'the_repository' global variable
2024-10-21 9:57 [PATCH 00/20] packfile: avoid using the 'the_repository' global variable Karthik Nayak
` (19 preceding siblings ...)
2024-10-21 9:58 ` [PATCH 20/20] config: make `packed_git_(limit|window_size)` non-global variables Karthik Nayak
@ 2024-10-21 21:03 ` Taylor Blau
2024-10-27 21:23 ` karthik nayak
2024-10-28 13:43 ` [PATCH v2 0/8] " Karthik Nayak
` (8 subsequent siblings)
29 siblings, 1 reply; 184+ messages in thread
From: Taylor Blau @ 2024-10-21 21:03 UTC (permalink / raw)
To: Karthik Nayak; +Cc: git, Patrick Steinhardt
On Mon, Oct 21, 2024 at 11:57:43AM +0200, Karthik Nayak wrote:
> The `packfile.c` file uses the global variable 'the_repository' extensively
> throughout the code. Let's remove all usecases of this, by modifying the
> required functions to accept a 'struct repository' instead. This is to clean up
> usage of global state.
>
> The first 18 patches are mostly passing a `struct repository` to each of the
> functions within `packfile.c` from other files. The last two patches move some
> global config variables and make them local. I'm not too well versed with this
> section of the code, so would be nice to get some eyes here.
I agree with the goal of this series, but I worry that as written it
will be quite disruptive to other topics on the list.
The standard way to avoid this disruption is to, for e.g. the first
change, do the following:
- Introduce a new function repo_odb_pack_name() that takes in a
'struct repository *', and rewrite odb_pack_name() in terms of it
(passing 'the_repository' in as the argument).
- Write a Coccinelle rule to replace all calls to odb_pack_name()
with calls to repo_odb_pack_name().
- Submit those patches without adjusting any non-obvious callers or
ones that are not contained to a single compilation unit that you
are already touching.
- Wait until a new development cycle has begun, run spatch on the new
rule to replace all other calls. Then optionally rename
repo_odb_pack_name() to odb_pack_name().
I think Patrick (CC'd) has done one of these transitions recently, so
I'll defer to him in case I got any of the details wrong.
In the meantime, I'm going to hold this one out of seen as it may be
disruptive in the current state.
Thanks,
Taylor
^ permalink raw reply [flat|nested] 184+ messages in thread
* Re: [PATCH 00/20] packfile: avoid using the 'the_repository' global variable
2024-10-21 21:03 ` [PATCH 00/20] packfile: avoid using the 'the_repository' global variable Taylor Blau
@ 2024-10-27 21:23 ` karthik nayak
2024-10-27 23:54 ` Taylor Blau
2024-10-28 5:31 ` Jeff King
0 siblings, 2 replies; 184+ messages in thread
From: karthik nayak @ 2024-10-27 21:23 UTC (permalink / raw)
To: Taylor Blau; +Cc: git, Patrick Steinhardt
[-- Attachment #1: Type: text/plain, Size: 2750 bytes --]
Taylor Blau <me@ttaylorr.com> writes:
> On Mon, Oct 21, 2024 at 11:57:43AM +0200, Karthik Nayak wrote:
>> The `packfile.c` file uses the global variable 'the_repository' extensively
>> throughout the code. Let's remove all usecases of this, by modifying the
>> required functions to accept a 'struct repository' instead. This is to clean up
>> usage of global state.
>>
>> The first 18 patches are mostly passing a `struct repository` to each of the
>> functions within `packfile.c` from other files. The last two patches move some
>> global config variables and make them local. I'm not too well versed with this
>> section of the code, so would be nice to get some eyes here.
>
> I agree with the goal of this series, but I worry that as written it
> will be quite disruptive to other topics on the list.
>
I agree, that as it currently sits, this is very disruptive.
> The standard way to avoid this disruption is to, for e.g. the first
> change, do the following:
>
> - Introduce a new function repo_odb_pack_name() that takes in a
> 'struct repository *', and rewrite odb_pack_name() in terms of it
> (passing 'the_repository' in as the argument).
>
> - Write a Coccinelle rule to replace all calls to odb_pack_name()
> with calls to repo_odb_pack_name().
>
> - Submit those patches without adjusting any non-obvious callers or
> ones that are not contained to a single compilation unit that you
> are already touching.
>
> - Wait until a new development cycle has begun, run spatch on the new
> rule to replace all other calls. Then optionally rename
> repo_odb_pack_name() to odb_pack_name().
>
> I think Patrick (CC'd) has done one of these transitions recently, so
> I'll defer to him in case I got any of the details wrong.
>
> In the meantime, I'm going to hold this one out of seen as it may be
> disruptive in the current state.
>
> Thanks,
> Taylor
While thinking about this over the last few days and also getting some
advice from Patrick, I realized that we don't need to be this disruptive
by simply adding the 'repository' variable to the already existing
'packed_git' struct. This allows us to leverage this information more
easily, since most of the functions already have access to the
'packed_git' struct.
This, plus the series by Jeff 'jk/dumb-http-finalize' which also removes
some existing functions. We reduce the impact to only 3 functions being
modified.
I think with such low impact, it might make more sense to not go with
the Coccinelle approach, since it is a lot simpler without it.
I'll post a new version tomorrow showcasing this approach, but I'll
leave the final decision to you whether it is still disruptive, and if
the approach you mentioned would be better.
Thanks
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 690 bytes --]
^ permalink raw reply [flat|nested] 184+ messages in thread
* Re: [PATCH 00/20] packfile: avoid using the 'the_repository' global variable
2024-10-27 21:23 ` karthik nayak
@ 2024-10-27 23:54 ` Taylor Blau
2024-10-28 5:31 ` Jeff King
1 sibling, 0 replies; 184+ messages in thread
From: Taylor Blau @ 2024-10-27 23:54 UTC (permalink / raw)
To: karthik nayak; +Cc: git, Patrick Steinhardt
On Sun, Oct 27, 2024 at 05:23:24PM -0400, karthik nayak wrote:
> While thinking about this over the last few days and also getting some
> advice from Patrick, I realized that we don't need to be this disruptive
> by simply adding the 'repository' variable to the already existing
> 'packed_git' struct. This allows us to leverage this information more
> easily, since most of the functions already have access to the
> 'packed_git' struct.
Great idea!
> This, plus the series by Jeff 'jk/dumb-http-finalize' which also removes
> some existing functions. We reduce the impact to only 3 functions being
> modified.
>
> I think with such low impact, it might make more sense to not go with
> the Coccinelle approach, since it is a lot simpler without it.
>
> I'll post a new version tomorrow showcasing this approach, but I'll
> leave the final decision to you whether it is still disruptive, and if
> the approach you mentioned would be better.
I'll have to see the end result to know for sure, but it sounds like
this would be a good way to move it forward without being too
disruptive.
I wonder how it interacts with alternates, though...
Thanks,
Taylor
^ permalink raw reply [flat|nested] 184+ messages in thread
* Re: [PATCH 00/20] packfile: avoid using the 'the_repository' global variable
2024-10-27 21:23 ` karthik nayak
2024-10-27 23:54 ` Taylor Blau
@ 2024-10-28 5:31 ` Jeff King
2024-10-28 13:36 ` karthik nayak
1 sibling, 1 reply; 184+ messages in thread
From: Jeff King @ 2024-10-28 5:31 UTC (permalink / raw)
To: karthik nayak; +Cc: Taylor Blau, git, Patrick Steinhardt
On Sun, Oct 27, 2024 at 05:23:24PM -0400, karthik nayak wrote:
> While thinking about this over the last few days and also getting some
> advice from Patrick, I realized that we don't need to be this disruptive
> by simply adding the 'repository' variable to the already existing
> 'packed_git' struct. This allows us to leverage this information more
> easily, since most of the functions already have access to the
> 'packed_git' struct.
>
> This, plus the series by Jeff 'jk/dumb-http-finalize' which also removes
> some existing functions. We reduce the impact to only 3 functions being
> modified.
Yeah, I noticed while working on that topic that we were dropping some
uses of the_repository. And FWIW I had the same notion, that packed_git
should perhaps refer to the repository struct in which it resides.
As Taylor noted this is a tiny bit weird with respect to alternates,
which could exist in another repository (but don't have to! It could be
a bare objects/ directory). But I think from the perspective of a
particular process, we only have one repository struct that covers all
of its alternates for the duration of this process. So it would be OK in
practice. You might be able to get away with just storing a hash_algo
pointer in packed_git, which would be less weird (and is enough for the
bits I looked at, but perhaps not in the more general case).
Looking at odb_pack_name(), it will still need to take a repository
struct, since we sometimes form it before having a packed_git. But for
most calls, I suspect you could have an alternate function that takes a
packed_git and uses both its "hash" member and the algorithm.
Anyway, just my two cents having worked in the area recently.
-Peff
^ permalink raw reply [flat|nested] 184+ messages in thread
* Re: [PATCH 00/20] packfile: avoid using the 'the_repository' global variable
2024-10-28 5:31 ` Jeff King
@ 2024-10-28 13:36 ` karthik nayak
2024-10-28 15:21 ` Taylor Blau
0 siblings, 1 reply; 184+ messages in thread
From: karthik nayak @ 2024-10-28 13:36 UTC (permalink / raw)
To: Jeff King; +Cc: Taylor Blau, git, Patrick Steinhardt
[-- Attachment #1: Type: text/plain, Size: 2410 bytes --]
Jeff King <peff@peff.net> writes:
> On Sun, Oct 27, 2024 at 05:23:24PM -0400, karthik nayak wrote:
>
>> While thinking about this over the last few days and also getting some
>> advice from Patrick, I realized that we don't need to be this disruptive
>> by simply adding the 'repository' variable to the already existing
>> 'packed_git' struct. This allows us to leverage this information more
>> easily, since most of the functions already have access to the
>> 'packed_git' struct.
>>
>> This, plus the series by Jeff 'jk/dumb-http-finalize' which also removes
>> some existing functions. We reduce the impact to only 3 functions being
>> modified.
>
> Yeah, I noticed while working on that topic that we were dropping some
> uses of the_repository. And FWIW I had the same notion, that packed_git
> should perhaps refer to the repository struct in which it resides.
>
> As Taylor noted this is a tiny bit weird with respect to alternates,
> which could exist in another repository (but don't have to! It could be
> a bare objects/ directory). But I think from the perspective of a
> particular process, we only have one repository struct that covers all
> of its alternates for the duration of this process. So it would be OK in
> practice. You might be able to get away with just storing a hash_algo
> pointer in packed_git, which would be less weird (and is enough for the
> bits I looked at, but perhaps not in the more general case).
>
This was my thought as well regarding alternates. Also it should be
noted that currently we're using the_repository anyways, so we will be
in the same state as before.
In a general case it seems more necessary to add the repo and not just
the hash_algo. Mostly because there are parts which require access to
the repository and also because some of my patches add config changes
which also require access to the repository.
> Looking at odb_pack_name(), it will still need to take a repository
> struct, since we sometimes form it before having a packed_git. But for
> most calls, I suspect you could have an alternate function that takes a
> packed_git and uses both its "hash" member and the algorithm.
>
I have four functions which would still need to take in a repository:
1. for_each_packed_object
2. has_object_pack
3. has_object_kept_pack
4. obd_pack_name
> Anyway, just my two cents having worked in the area recently.
>
> -Peff
Thanks for your input!
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 690 bytes --]
^ permalink raw reply [flat|nested] 184+ messages in thread
* Re: [PATCH 00/20] packfile: avoid using the 'the_repository' global variable
2024-10-28 13:36 ` karthik nayak
@ 2024-10-28 15:21 ` Taylor Blau
0 siblings, 0 replies; 184+ messages in thread
From: Taylor Blau @ 2024-10-28 15:21 UTC (permalink / raw)
To: karthik nayak; +Cc: Jeff King, git, Patrick Steinhardt
On Mon, Oct 28, 2024 at 08:36:50AM -0500, karthik nayak wrote:
> Jeff King <peff@peff.net> writes:
>
> > On Sun, Oct 27, 2024 at 05:23:24PM -0400, karthik nayak wrote:
> >
> >> While thinking about this over the last few days and also getting some
> >> advice from Patrick, I realized that we don't need to be this disruptive
> >> by simply adding the 'repository' variable to the already existing
> >> 'packed_git' struct. This allows us to leverage this information more
> >> easily, since most of the functions already have access to the
> >> 'packed_git' struct.
> >>
> >> This, plus the series by Jeff 'jk/dumb-http-finalize' which also removes
> >> some existing functions. We reduce the impact to only 3 functions being
> >> modified.
> >
> > Yeah, I noticed while working on that topic that we were dropping some
> > uses of the_repository. And FWIW I had the same notion, that packed_git
> > should perhaps refer to the repository struct in which it resides.
> >
> > As Taylor noted this is a tiny bit weird with respect to alternates,
> > which could exist in another repository (but don't have to! It could be
> > a bare objects/ directory). But I think from the perspective of a
> > particular process, we only have one repository struct that covers all
> > of its alternates for the duration of this process. So it would be OK in
> > practice. You might be able to get away with just storing a hash_algo
> > pointer in packed_git, which would be less weird (and is enough for the
> > bits I looked at, but perhaps not in the more general case).
>
> This was my thought as well regarding alternates. Also it should be
> noted that currently we're using the_repository anyways, so we will be
> in the same state as before.
Makes sense. Thanks, both, for thinking through it together.
> In a general case it seems more necessary to add the repo and not just
> the hash_algo. Mostly because there are parts which require access to
> the repository and also because some of my patches add config changes
> which also require access to the repository.
I could believe that ;-). I see that you posted some patches lower down
in the thread, which I figure probably uncover some cases where we need
more than just a pointer to the_hash_algo.
But let's read on and see exactly what shakes out.
> > Looking at odb_pack_name(), it will still need to take a repository
> > struct, since we sometimes form it before having a packed_git. But for
> > most calls, I suspect you could have an alternate function that takes a
> > packed_git and uses both its "hash" member and the algorithm.
>
> I have four functions which would still need to take in a repository:
> 1. for_each_packed_object
> 2. has_object_pack
> 3. has_object_kept_pack
> 4. obd_pack_name
That matches my own thinking, but perhaps there are others that neither
of us are coming up with off the tops of our heads. I think that as long
as for_each_packed_object() continues to call prepare_packed_git (which
sets up all of our alternates) and we continue to consult the
packed_git_mru cache, we should be OK.
> > Anyway, just my two cents having worked in the area recently.
> >
> > -Peff
>
> Thanks for your input!
Indeed. Thanks, both.
Thanks,
Taylor
^ permalink raw reply [flat|nested] 184+ messages in thread
* [PATCH v2 0/8] packfile: avoid using the 'the_repository' global variable
2024-10-21 9:57 [PATCH 00/20] packfile: avoid using the 'the_repository' global variable Karthik Nayak
` (20 preceding siblings ...)
2024-10-21 21:03 ` [PATCH 00/20] packfile: avoid using the 'the_repository' global variable Taylor Blau
@ 2024-10-28 13:43 ` Karthik Nayak
2024-10-28 13:43 ` [PATCH v2 1/8] packfile: add repository to struct `packed_git` Karthik Nayak
` (7 more replies)
2024-10-30 14:32 ` [PATCH v3 0/9] packfile: avoid using the 'the_repository' global variable Karthik Nayak
` (7 subsequent siblings)
29 siblings, 8 replies; 184+ messages in thread
From: Karthik Nayak @ 2024-10-28 13:43 UTC (permalink / raw)
To: karthik.188; +Cc: git, me
The `packfile.c` file uses the global variable 'the_repository' extensively
throughout the code. Let's remove all usecases of this, by modifying the
required functions to accept a 'struct repository' instead. This is to clean up
usage of global state.
The first 3 patches are mostly internal to `packfile.c`, we add the repository
field to the `packed_git` struct and this is used to clear up some useages of
the global variables.
The next 3 patches are more disruptive, they modify the function definition of
`odb_pack_name`, `has_object[_kept]_pack` and `for_each_packed_object` to receive
a repository, helping remove other usages of 'the_repository' variable.
Finally, the last two patches deal with global config values. These values are
localized.
This series is based off on master: 6a11438f43 (The fifth batch, 2024-10-25),
with 'jk/dumb-http-finalize' merged in. I found no issues merging this with seen,
but since these patches cover a lot of files, there might be some conflicts.
Since this tackles the topic in a new way compared to v1, I've avoided adding a
range-diff with the previous version.
Changes in v2:
- This version reduces the blast radius by not passing around the repository
struct to each function and instead pass it through the packed_git struct. Thanks
to Patrick for the tip.
Karthik Nayak (8):
packfile: add repository to struct `packed_git`
packfile: use `repository` from `packed_git` directly
packfile: pass `repository` to static function in the file
packfile: pass down repository to `odb_pack_name`
packfile: pass down repository to `has_object[_kept]_pack`
packfile: pass down repository to `for_each_packed_object`
config: make `delta_base_cache_limit` a non-global variable
config: make `packed_git_(limit|window_size)` non-global variables
builtin/cat-file.c | 7 +-
builtin/count-objects.c | 2 +-
builtin/fast-import.c | 15 ++--
builtin/fsck.c | 20 +++--
builtin/gc.c | 5 +-
builtin/index-pack.c | 20 +++--
builtin/pack-objects.c | 11 ++-
builtin/pack-redundant.c | 4 +-
builtin/repack.c | 2 +-
builtin/rev-list.c | 2 +-
commit-graph.c | 4 +-
config.c | 22 -----
connected.c | 3 +-
diff.c | 3 +-
environment.c | 3 -
environment.h | 1 -
fsck.c | 2 +-
http.c | 4 +-
list-objects.c | 7 +-
midx-write.c | 2 +-
midx.c | 2 +-
object-store-ll.h | 9 +-
pack-bitmap.c | 2 +-
pack-objects.h | 3 +-
pack-write.c | 1 +
pack.h | 1 +
packfile.c | 185 ++++++++++++++++++++++++++-------------
packfile.h | 18 ++--
promisor-remote.c | 2 +-
prune-packed.c | 2 +-
reachable.c | 4 +-
revision.c | 13 +--
tag.c | 2 +-
33 files changed, 226 insertions(+), 157 deletions(-)
--
2.47.0
^ permalink raw reply [flat|nested] 184+ messages in thread
* [PATCH v2 1/8] packfile: add repository to struct `packed_git`
2024-10-28 13:43 ` [PATCH v2 0/8] " Karthik Nayak
@ 2024-10-28 13:43 ` Karthik Nayak
2024-10-28 16:05 ` Taylor Blau
2024-10-28 13:43 ` [PATCH v2 2/8] packfile: use `repository` from `packed_git` directly Karthik Nayak
` (6 subsequent siblings)
7 siblings, 1 reply; 184+ messages in thread
From: Karthik Nayak @ 2024-10-28 13:43 UTC (permalink / raw)
To: karthik.188; +Cc: git, me
The struct `packed_git` holds information regarding a packed object
file. Let's add the repository variable to this object, to represent the
repository that this packfile belongs to. This helps remove dependency
on the global `the_repository` object in `packfile.c` by simply using
repository information now readily available in the struct.
We also modify `alloc_packed_git` to ensure that the repository is added
to newly created `packed_git` structs. This requires modifying the
function and all its callee to pass the repository object down the
levels.
Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
---
builtin/fast-import.c | 3 ++-
builtin/index-pack.c | 6 ++++--
commit-graph.c | 2 +-
connected.c | 3 ++-
http.c | 2 +-
midx-write.c | 2 +-
midx.c | 2 +-
object-store-ll.h | 5 +++++
packfile.c | 15 +++++++++------
packfile.h | 6 ++++--
10 files changed, 30 insertions(+), 16 deletions(-)
diff --git a/builtin/fast-import.c b/builtin/fast-import.c
index 76d5c20f14..ffee7d3abd 100644
--- a/builtin/fast-import.c
+++ b/builtin/fast-import.c
@@ -765,6 +765,7 @@ static void start_packfile(void)
p->pack_fd = pack_fd;
p->do_not_close = 1;
+ p->repo = the_repository;
pack_file = hashfd(pack_fd, p->pack_name);
pack_data = p;
@@ -888,7 +889,7 @@ static void end_packfile(void)
idx_name = keep_pack(create_index());
/* Register the packfile with core git's machinery. */
- new_p = add_packed_git(idx_name, strlen(idx_name), 1);
+ new_p = add_packed_git(the_repository, idx_name, strlen(idx_name), 1);
if (!new_p)
die("core git rejected index %s", idx_name);
all_packs[pack_id] = new_p;
diff --git a/builtin/index-pack.c b/builtin/index-pack.c
index 9d23b41b3a..be2f99625e 100644
--- a/builtin/index-pack.c
+++ b/builtin/index-pack.c
@@ -1552,7 +1552,8 @@ static void final(const char *final_pack_name, const char *curr_pack_name,
if (do_fsck_object) {
struct packed_git *p;
- p = add_packed_git(final_index_name, strlen(final_index_name), 0);
+ p = add_packed_git(the_repository, final_index_name,
+ strlen(final_index_name), 0);
if (p)
install_packed_git(the_repository, p);
}
@@ -1650,7 +1651,8 @@ static void read_v2_anomalous_offsets(struct packed_git *p,
static void read_idx_option(struct pack_idx_option *opts, const char *pack_name)
{
- struct packed_git *p = add_packed_git(pack_name, strlen(pack_name), 1);
+ struct packed_git *p = add_packed_git(the_repository, pack_name,
+ strlen(pack_name), 1);
if (!p)
die(_("Cannot open existing pack file '%s'"), pack_name);
diff --git a/commit-graph.c b/commit-graph.c
index 5bd89c0acd..83dd69bfeb 100644
--- a/commit-graph.c
+++ b/commit-graph.c
@@ -1914,7 +1914,7 @@ static int fill_oids_from_packs(struct write_commit_graph_context *ctx,
struct packed_git *p;
strbuf_setlen(&packname, dirlen);
strbuf_addstr(&packname, pack_indexes->items[i].string);
- p = add_packed_git(packname.buf, packname.len, 1);
+ p = add_packed_git(ctx->r, packname.buf, packname.len, 1);
if (!p) {
ret = error(_("error adding pack %s"), packname.buf);
goto cleanup;
diff --git a/connected.c b/connected.c
index a9e2e13995..3099da84f3 100644
--- a/connected.c
+++ b/connected.c
@@ -54,7 +54,8 @@ int check_connected(oid_iterate_fn fn, void *cb_data,
strbuf_add(&idx_file, transport->pack_lockfiles.items[0].string,
base_len);
strbuf_addstr(&idx_file, ".idx");
- new_pack = add_packed_git(idx_file.buf, idx_file.len, 1);
+ new_pack = add_packed_git(the_repository, idx_file.buf,
+ idx_file.len, 1);
strbuf_release(&idx_file);
}
diff --git a/http.c b/http.c
index 510332ab04..7e5be05207 100644
--- a/http.c
+++ b/http.c
@@ -2437,7 +2437,7 @@ static int fetch_and_setup_pack_index(struct packed_git **packs_head,
if (!tmp_idx)
return -1;
- new_pack = parse_pack_index(sha1, tmp_idx);
+ new_pack = parse_pack_index(the_repository, sha1, tmp_idx);
if (!new_pack) {
unlink(tmp_idx);
free(tmp_idx);
diff --git a/midx-write.c b/midx-write.c
index b3a5f6c516..c57726ef94 100644
--- a/midx-write.c
+++ b/midx-write.c
@@ -154,7 +154,7 @@ static void add_pack_to_midx(const char *full_path, size_t full_path_len,
return;
ALLOC_GROW(ctx->info, ctx->nr + 1, ctx->alloc);
- p = add_packed_git(full_path, full_path_len, 0);
+ p = add_packed_git(the_repository, full_path, full_path_len, 0);
if (!p) {
warning(_("failed to add packfile '%s'"),
full_path);
diff --git a/midx.c b/midx.c
index e82d4f2e65..8edb75f51d 100644
--- a/midx.c
+++ b/midx.c
@@ -464,7 +464,7 @@ int prepare_midx_pack(struct repository *r, struct multi_pack_index *m,
strhash(key.buf), key.buf,
struct packed_git, packmap_ent);
if (!p) {
- p = add_packed_git(pack_name.buf, pack_name.len, m->local);
+ p = add_packed_git(r, pack_name.buf, pack_name.len, m->local);
if (p) {
install_packed_git(r, p);
list_add_tail(&p->mru, &r->objects->packed_git_mru);
diff --git a/object-store-ll.h b/object-store-ll.h
index 53b8e693b1..8b31072b09 100644
--- a/object-store-ll.h
+++ b/object-store-ll.h
@@ -4,6 +4,7 @@
#include "hashmap.h"
#include "object.h"
#include "list.h"
+#include "repository.h"
#include "thread-utils.h"
#include "oidset.h"
@@ -135,6 +136,10 @@ struct packed_git {
*/
const uint32_t *mtimes_map;
size_t mtimes_size;
+
+ /* repo dentoes the repository this packed file belongs to */
+ struct repository *repo;
+
/* something like ".git/objects/pack/xxxxx.pack" */
char pack_name[FLEX_ARRAY]; /* more */
};
diff --git a/packfile.c b/packfile.c
index 9560f0a33c..45f300e5e1 100644
--- a/packfile.c
+++ b/packfile.c
@@ -217,11 +217,12 @@ uint32_t get_pack_fanout(struct packed_git *p, uint32_t value)
return ntohl(level1_ofs[value]);
}
-static struct packed_git *alloc_packed_git(int extra)
+static struct packed_git *alloc_packed_git(struct repository *repo, int extra)
{
struct packed_git *p = xmalloc(st_add(sizeof(*p), extra));
memset(p, 0, sizeof(*p));
p->pack_fd = -1;
+ p->repo = repo;
return p;
}
@@ -233,11 +234,12 @@ static char *pack_path_from_idx(const char *idx_path)
return xstrfmt("%.*s.pack", (int)len, idx_path);
}
-struct packed_git *parse_pack_index(unsigned char *sha1, const char *idx_path)
+struct packed_git *parse_pack_index(struct repository *repo,
+ unsigned char *sha1, const char *idx_path)
{
char *path = pack_path_from_idx(idx_path);
size_t alloc = st_add(strlen(path), 1);
- struct packed_git *p = alloc_packed_git(alloc);
+ struct packed_git *p = alloc_packed_git(repo, alloc);
memcpy(p->pack_name, path, alloc); /* includes NUL */
free(path);
@@ -703,7 +705,8 @@ void unuse_pack(struct pack_window **w_cursor)
}
}
-struct packed_git *add_packed_git(const char *path, size_t path_len, int local)
+struct packed_git *add_packed_git(struct repository *repo, const char *path,
+ size_t path_len, int local)
{
struct stat st;
size_t alloc;
@@ -721,7 +724,7 @@ struct packed_git *add_packed_git(const char *path, size_t path_len, int local)
* the use xsnprintf double-checks that)
*/
alloc = st_add3(path_len, strlen(".promisor"), 1);
- p = alloc_packed_git(alloc);
+ p = alloc_packed_git(repo, alloc);
memcpy(p->pack_name, path, path_len);
xsnprintf(p->pack_name + path_len, alloc - path_len, ".keep");
@@ -877,7 +880,7 @@ static void prepare_pack(const char *full_name, size_t full_name_len,
/* Don't reopen a pack we already have. */
if (!hashmap_get(&data->r->objects->pack_map, &hent, pack_name)) {
- p = add_packed_git(full_name, full_name_len, data->local);
+ p = add_packed_git(data->r, full_name, full_name_len, data->local);
if (p)
install_packed_git(data->r, p);
}
diff --git a/packfile.h b/packfile.h
index 08f88a7ff5..344da905c2 100644
--- a/packfile.h
+++ b/packfile.h
@@ -46,7 +46,8 @@ const char *pack_basename(struct packed_git *p);
* and does not add the resulting packed_git struct to the internal list of
* packs. You probably want add_packed_git() instead.
*/
-struct packed_git *parse_pack_index(unsigned char *sha1, const char *idx_path);
+struct packed_git *parse_pack_index(struct repository *repo,
+ unsigned char *sha1, const char *idx_path);
typedef void each_file_in_pack_dir_fn(const char *full_path, size_t full_path_len,
const char *file_name, void *data);
@@ -113,7 +114,8 @@ void close_pack(struct packed_git *);
void close_object_store(struct raw_object_store *o);
void unuse_pack(struct pack_window **);
void clear_delta_base_cache(void);
-struct packed_git *add_packed_git(const char *path, size_t path_len, int local);
+struct packed_git *add_packed_git(struct repository *repo, const char *path,
+ size_t path_len, int local);
/*
* Unlink the .pack and associated extension files.
--
2.47.0
^ permalink raw reply related [flat|nested] 184+ messages in thread
* Re: [PATCH v2 1/8] packfile: add repository to struct `packed_git`
2024-10-28 13:43 ` [PATCH v2 1/8] packfile: add repository to struct `packed_git` Karthik Nayak
@ 2024-10-28 16:05 ` Taylor Blau
2024-10-29 11:46 ` karthik nayak
0 siblings, 1 reply; 184+ messages in thread
From: Taylor Blau @ 2024-10-28 16:05 UTC (permalink / raw)
To: Karthik Nayak; +Cc: git
On Mon, Oct 28, 2024 at 02:43:39PM +0100, Karthik Nayak wrote:
> The struct `packed_git` holds information regarding a packed object
> file. Let's add the repository variable to this object, to represent the
> repository that this packfile belongs to. This helps remove dependency
> on the global `the_repository` object in `packfile.c` by simply using
> repository information now readily available in the struct.
Makes sense, good. I think it would be useful here to capture some of
the discussion from just before you sent this series to indicate why
it's OK to use the_repository even when we have alternates.
I think it is now quite obvious in retrospect, but let's do our future
selves a service by capturing it here, too ;-).
> ---
> 10 files changed, 30 insertions(+), 16 deletions(-)
Oh, good. I am glad to see that this new approach is already yielding
far less disruptive of a change.
> diff --git a/builtin/fast-import.c b/builtin/fast-import.c
> index 76d5c20f14..ffee7d3abd 100644
> --- a/builtin/fast-import.c
> +++ b/builtin/fast-import.c
> @@ -765,6 +765,7 @@ static void start_packfile(void)
>
> p->pack_fd = pack_fd;
> p->do_not_close = 1;
> + p->repo = the_repository;
Makes sense. Here we are crafting the packfile by hand, so initializing
->repo directly makes sense here.
It would be nice if we could rewrite this in terms of
packfile.c:alloc_packed_git(), but that is a static function. Exposing
it as non-static is probably showing too much of the internals, so I
think leaving this as-is makes sense.
> diff --git a/commit-graph.c b/commit-graph.c
> index 5bd89c0acd..83dd69bfeb 100644
> --- a/commit-graph.c
> +++ b/commit-graph.c
> @@ -1914,7 +1914,7 @@ static int fill_oids_from_packs(struct write_commit_graph_context *ctx,
> struct packed_git *p;
> strbuf_setlen(&packname, dirlen);
> strbuf_addstr(&packname, pack_indexes->items[i].string);
> - p = add_packed_git(packname.buf, packname.len, 1);
> + p = add_packed_git(ctx->r, packname.buf, packname.len, 1);
I wondered if ctx->r was the right choice here or not, but it is, and it
is (currently) always equal to the value of the_repository, so it's a
moot point. Let's keep going...
> diff --git a/midx-write.c b/midx-write.c
> index b3a5f6c516..c57726ef94 100644
> --- a/midx-write.c
> +++ b/midx-write.c
> @@ -154,7 +154,7 @@ static void add_pack_to_midx(const char *full_path, size_t full_path_len,
> return;
>
> ALLOC_GROW(ctx->info, ctx->nr + 1, ctx->alloc);
> - p = add_packed_git(full_path, full_path_len, 0);
> + p = add_packed_git(the_repository, full_path, full_path_len, 0);
Ugh. I thought we had already added a repository field to our auxiliary
write_midx_context struct, but we have not, so this change looks right
to me. Doing so (adding that new field) seems like it would be a good
piece of #leftoverbits.
> diff --git a/midx.c b/midx.c
> index e82d4f2e65..8edb75f51d 100644
> --- a/midx.c
> +++ b/midx.c
> @@ -464,7 +464,7 @@ int prepare_midx_pack(struct repository *r, struct multi_pack_index *m,
> strhash(key.buf), key.buf,
> struct packed_git, packmap_ent);
> if (!p) {
> - p = add_packed_git(pack_name.buf, pack_name.len, m->local);
> + p = add_packed_git(r, pack_name.buf, pack_name.len, m->local);
OK, so here we're trusting the value of 'r' from the caller. That comes
from 64404a24cf (midx: pass a repository pointer, 2019-04-29), which is
doing the right thing. (As an aside, I thought that that change was from
when we added the --object-dir flag to 'git multi-pack-index', but the
change is in fact unrelated and has to do with adding installed packs to
the repository's MRU list).
> if (p) {
> install_packed_git(r, p);
> list_add_tail(&p->mru, &r->objects->packed_git_mru);
> diff --git a/object-store-ll.h b/object-store-ll.h
> index 53b8e693b1..8b31072b09 100644
> --- a/object-store-ll.h
> +++ b/object-store-ll.h
> @@ -4,6 +4,7 @@
> #include "hashmap.h"
> #include "object.h"
> #include "list.h"
> +#include "repository.h"
Hmm. Do we need to include all of repository.h here? I don't think we
do, because we never peek into any of the fields of that structure from
within this header. So I think you could do something like:
--- 8< ---
diff --git a/object-store-ll.h b/object-store-ll.h
index 6f9f4276e6..bcfae2e1bf 100644
--- a/object-store-ll.h
+++ b/object-store-ll.h
@@ -4,13 +4,13 @@
#include "hashmap.h"
#include "object.h"
#include "list.h"
-#include "repository.h"
#include "thread-utils.h"
#include "oidset.h"
struct oidmap;
struct oidtree;
struct strbuf;
+struct repository;
struct object_directory {
struct object_directory *next;
--- >8 ---
instead of #include-ing the whole thing, which would be preferable.
> #include "thread-utils.h"
> #include "oidset.h"
>
> @@ -135,6 +136,10 @@ struct packed_git {
> */
> const uint32_t *mtimes_map;
> size_t mtimes_size;
> +
> + /* repo dentoes the repository this packed file belongs to */
> + struct repository *repo;
Calling this 'repo' makes sense, but...
> diff --git a/packfile.c b/packfile.c
> index 9560f0a33c..45f300e5e1 100644
> --- a/packfile.c
> +++ b/packfile.c
> @@ -217,11 +217,12 @@ uint32_t get_pack_fanout(struct packed_git *p, uint32_t value)
> return ntohl(level1_ofs[value]);
> }
>
> -static struct packed_git *alloc_packed_git(int extra)
> +static struct packed_git *alloc_packed_git(struct repository *repo, int extra)
Here and elsewhere, I think our usual convention is to call a 'struct
repository *' (when it is a formal parameter of some function) just "r"
instead of "repo".
At least that's what my intuition told me, and a very rough grep says
that '*r' appears as a parameter 815 times, while '*repo' appears only
577 times. It's close, but I think that '*r' is preferred here since
it's fewer characters.
> {
> struct packed_git *p = xmalloc(st_add(sizeof(*p), extra));
> memset(p, 0, sizeof(*p));
Not at all the fault of this patch, but it feels like a bit of a
foot-gun to allocate a bounds-checked version of 'sizeof(*p)+extra',
while only zero'ing the first 'sizeof(*p)' bytes. I think in all cases
where it actually matters via add_packed_git() we fill out that extra
space anyway, but it might be nice cleanup to do something like:
struct packed_git *p;
size_t sz = sizeof(*p) + extra;
p = xcalloc(1, sz);
, or something. But that can be dealt with later and/or as #leftoverbits.
The rest is looking good, nicely done. Let's keep reading...
Thanks,
Taylor
^ permalink raw reply related [flat|nested] 184+ messages in thread
* Re: [PATCH v2 1/8] packfile: add repository to struct `packed_git`
2024-10-28 16:05 ` Taylor Blau
@ 2024-10-29 11:46 ` karthik nayak
2024-10-29 17:27 ` Taylor Blau
0 siblings, 1 reply; 184+ messages in thread
From: karthik nayak @ 2024-10-29 11:46 UTC (permalink / raw)
To: Taylor Blau; +Cc: git
[-- Attachment #1: Type: text/plain, Size: 7428 bytes --]
Taylor Blau <me@ttaylorr.com> writes:
> On Mon, Oct 28, 2024 at 02:43:39PM +0100, Karthik Nayak wrote:
>> The struct `packed_git` holds information regarding a packed object
>> file. Let's add the repository variable to this object, to represent the
>> repository that this packfile belongs to. This helps remove dependency
>> on the global `the_repository` object in `packfile.c` by simply using
>> repository information now readily available in the struct.
>
> Makes sense, good. I think it would be useful here to capture some of
> the discussion from just before you sent this series to indicate why
> it's OK to use the_repository even when we have alternates.
>
> I think it is now quite obvious in retrospect, but let's do our future
> selves a service by capturing it here, too ;-).
>
Agree, will add it in the next version.
>> ---
>> 10 files changed, 30 insertions(+), 16 deletions(-)
>
> Oh, good. I am glad to see that this new approach is already yielding
> far less disruptive of a change.
>
>> diff --git a/builtin/fast-import.c b/builtin/fast-import.c
>> index 76d5c20f14..ffee7d3abd 100644
>> --- a/builtin/fast-import.c
>> +++ b/builtin/fast-import.c
>> @@ -765,6 +765,7 @@ static void start_packfile(void)
>>
>> p->pack_fd = pack_fd;
>> p->do_not_close = 1;
>> + p->repo = the_repository;
>
> Makes sense. Here we are crafting the packfile by hand, so initializing
> ->repo directly makes sense here.
>
> It would be nice if we could rewrite this in terms of
> packfile.c:alloc_packed_git(), but that is a static function. Exposing
> it as non-static is probably showing too much of the internals, so I
> think leaving this as-is makes sense.
>
Yes, I did consider that too, but dropped it for the same reasons you
stated.
>> diff --git a/commit-graph.c b/commit-graph.c
>> index 5bd89c0acd..83dd69bfeb 100644
>> --- a/commit-graph.c
>> +++ b/commit-graph.c
>> @@ -1914,7 +1914,7 @@ static int fill_oids_from_packs(struct write_commit_graph_context *ctx,
>> struct packed_git *p;
>> strbuf_setlen(&packname, dirlen);
>> strbuf_addstr(&packname, pack_indexes->items[i].string);
>> - p = add_packed_git(packname.buf, packname.len, 1);
>> + p = add_packed_git(ctx->r, packname.buf, packname.len, 1);
>
> I wondered if ctx->r was the right choice here or not, but it is, and it
> is (currently) always equal to the value of the_repository, so it's a
> moot point. Let's keep going...
>
>> diff --git a/midx-write.c b/midx-write.c
>> index b3a5f6c516..c57726ef94 100644
>> --- a/midx-write.c
>> +++ b/midx-write.c
>> @@ -154,7 +154,7 @@ static void add_pack_to_midx(const char *full_path, size_t full_path_len,
>> return;
>>
>> ALLOC_GROW(ctx->info, ctx->nr + 1, ctx->alloc);
>> - p = add_packed_git(full_path, full_path_len, 0);
>> + p = add_packed_git(the_repository, full_path, full_path_len, 0);
>
> Ugh. I thought we had already added a repository field to our auxiliary
> write_midx_context struct, but we have not, so this change looks right
> to me. Doing so (adding that new field) seems like it would be a good
> piece of #leftoverbits.
>
>> diff --git a/midx.c b/midx.c
>> index e82d4f2e65..8edb75f51d 100644
>> --- a/midx.c
>> +++ b/midx.c
>> @@ -464,7 +464,7 @@ int prepare_midx_pack(struct repository *r, struct multi_pack_index *m,
>> strhash(key.buf), key.buf,
>> struct packed_git, packmap_ent);
>> if (!p) {
>> - p = add_packed_git(pack_name.buf, pack_name.len, m->local);
>> + p = add_packed_git(r, pack_name.buf, pack_name.len, m->local);
>
> OK, so here we're trusting the value of 'r' from the caller. That comes
> from 64404a24cf (midx: pass a repository pointer, 2019-04-29), which is
> doing the right thing. (As an aside, I thought that that change was from
> when we added the --object-dir flag to 'git multi-pack-index', but the
> change is in fact unrelated and has to do with adding installed packs to
> the repository's MRU list).
>
>> if (p) {
>> install_packed_git(r, p);
>> list_add_tail(&p->mru, &r->objects->packed_git_mru);
>> diff --git a/object-store-ll.h b/object-store-ll.h
>> index 53b8e693b1..8b31072b09 100644
>> --- a/object-store-ll.h
>> +++ b/object-store-ll.h
>> @@ -4,6 +4,7 @@
>> #include "hashmap.h"
>> #include "object.h"
>> #include "list.h"
>> +#include "repository.h"
>
> Hmm. Do we need to include all of repository.h here? I don't think we
> do, because we never peek into any of the fields of that structure from
> within this header. So I think you could do something like:
>
> --- 8< ---
> diff --git a/object-store-ll.h b/object-store-ll.h
> index 6f9f4276e6..bcfae2e1bf 100644
> --- a/object-store-ll.h
> +++ b/object-store-ll.h
> @@ -4,13 +4,13 @@
> #include "hashmap.h"
> #include "object.h"
> #include "list.h"
> -#include "repository.h"
> #include "thread-utils.h"
> #include "oidset.h"
>
> struct oidmap;
> struct oidtree;
> struct strbuf;
> +struct repository;
>
> struct object_directory {
> struct object_directory *next;
> --- >8 ---
>
> instead of #include-ing the whole thing, which would be preferable.
>
This is much better, I will patch it in.
>> #include "thread-utils.h"
>> #include "oidset.h"
>>
>> @@ -135,6 +136,10 @@ struct packed_git {
>> */
>> const uint32_t *mtimes_map;
>> size_t mtimes_size;
>> +
>> + /* repo dentoes the repository this packed file belongs to */
>> + struct repository *repo;
>
> Calling this 'repo' makes sense, but...
>
>> diff --git a/packfile.c b/packfile.c
>> index 9560f0a33c..45f300e5e1 100644
>> --- a/packfile.c
>> +++ b/packfile.c
>> @@ -217,11 +217,12 @@ uint32_t get_pack_fanout(struct packed_git *p, uint32_t value)
>> return ntohl(level1_ofs[value]);
>> }
>>
>> -static struct packed_git *alloc_packed_git(int extra)
>> +static struct packed_git *alloc_packed_git(struct repository *repo, int extra)
>
> Here and elsewhere, I think our usual convention is to call a 'struct
> repository *' (when it is a formal parameter of some function) just "r"
> instead of "repo".
>
> At least that's what my intuition told me, and a very rough grep says
> that '*r' appears as a parameter 815 times, while '*repo' appears only
> 577 times. It's close, but I think that '*r' is preferred here since
> it's fewer characters.
>
I agree, by now you know I prefer readability over fewer characters, so
it more of an intentional choice. But here, I think it can be '*r'
though, since it is sort of obvious what 'r' refers to in most cases.
I will change this in all commits in the next version.
>> {
>> struct packed_git *p = xmalloc(st_add(sizeof(*p), extra));
>> memset(p, 0, sizeof(*p));
>
> Not at all the fault of this patch, but it feels like a bit of a
> foot-gun to allocate a bounds-checked version of 'sizeof(*p)+extra',
> while only zero'ing the first 'sizeof(*p)' bytes. I think in all cases
> where it actually matters via add_packed_git() we fill out that extra
> space anyway, but it might be nice cleanup to do something like:
>
> struct packed_git *p;
> size_t sz = sizeof(*p) + extra;
>
> p = xcalloc(1, sz);
>
> , or something. But that can be dealt with later and/or as #leftoverbits.
>
Like you stated, there is bunch of refactoring we could do here, I mostly
don't want to digress and create too much noise, so I will turn a blind
eye in some cases and leave it for later.
> The rest is looking good, nicely done. Let's keep reading...
>
> Thanks,
> Taylor
Thanks for the review.
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 690 bytes --]
^ permalink raw reply [flat|nested] 184+ messages in thread
* Re: [PATCH v2 1/8] packfile: add repository to struct `packed_git`
2024-10-29 11:46 ` karthik nayak
@ 2024-10-29 17:27 ` Taylor Blau
0 siblings, 0 replies; 184+ messages in thread
From: Taylor Blau @ 2024-10-29 17:27 UTC (permalink / raw)
To: karthik nayak; +Cc: git
On Tue, Oct 29, 2024 at 07:46:35AM -0400, karthik nayak wrote:
> >> diff --git a/builtin/fast-import.c b/builtin/fast-import.c
> >> index 76d5c20f14..ffee7d3abd 100644
> >> --- a/builtin/fast-import.c
> >> +++ b/builtin/fast-import.c
> >> @@ -765,6 +765,7 @@ static void start_packfile(void)
> >>
> >> p->pack_fd = pack_fd;
> >> p->do_not_close = 1;
> >> + p->repo = the_repository;
> >
> > Makes sense. Here we are crafting the packfile by hand, so initializing
> > ->repo directly makes sense here.
> >
> > It would be nice if we could rewrite this in terms of
> > packfile.c:alloc_packed_git(), but that is a static function. Exposing
> > it as non-static is probably showing too much of the internals, so I
> > think leaving this as-is makes sense.
>
> Yes, I did consider that too, but dropped it for the same reasons you
> stated.
Makes sense, and yeah, I think that was a reasonable choice here. I
probably would have done the same thing :-).
> > Hmm. Do we need to include all of repository.h here? I don't think we
> > do, because we never peek into any of the fields of that structure from
> > within this header. So I think you could do something like:
> >
> > --- 8< ---
> > diff --git a/object-store-ll.h b/object-store-ll.h
> > index 6f9f4276e6..bcfae2e1bf 100644
> > --- a/object-store-ll.h
> > +++ b/object-store-ll.h
> > @@ -4,13 +4,13 @@
> > #include "hashmap.h"
> > #include "object.h"
> > #include "list.h"
> > -#include "repository.h"
> > #include "thread-utils.h"
> > #include "oidset.h"
> >
> > struct oidmap;
> > struct oidtree;
> > struct strbuf;
> > +struct repository;
> >
> > struct object_directory {
> > struct object_directory *next;
> > --- >8 ---
> >
> > instead of #include-ing the whole thing, which would be preferable.
> >
>
> This is much better, I will patch it in.
Great, thanks.
> I agree, by now you know I prefer readability over fewer characters, so
> it more of an intentional choice. But here, I think it can be '*r'
> though, since it is sort of obvious what 'r' refers to in most cases.
>
> I will change this in all commits in the next version.
;-).
> >> {
> >> struct packed_git *p = xmalloc(st_add(sizeof(*p), extra));
> >> memset(p, 0, sizeof(*p));
> >
> > Not at all the fault of this patch, but it feels like a bit of a
> > foot-gun to allocate a bounds-checked version of 'sizeof(*p)+extra',
> > while only zero'ing the first 'sizeof(*p)' bytes. I think in all cases
> > where it actually matters via add_packed_git() we fill out that extra
> > space anyway, but it might be nice cleanup to do something like:
> >
> > struct packed_git *p;
> > size_t sz = sizeof(*p) + extra;
> >
> > p = xcalloc(1, sz);
> >
> > , or something. But that can be dealt with later and/or as #leftoverbits.
> >
>
> Like you stated, there is bunch of refactoring we could do here, I mostly
> don't want to digress and create too much noise, so I will turn a blind
> eye in some cases and leave it for later.
Yeah; to be clear I don't think that we have to address in detail all of
these potential refactorings as a prerequisite to merging this series. I
just wanted to mention them on the list to have some record of having
thought about it, and to avoid the risk that searching for #leftoverbits
returns no results ;-).
Thanks,
Taylor
^ permalink raw reply [flat|nested] 184+ messages in thread
* [PATCH v2 2/8] packfile: use `repository` from `packed_git` directly
2024-10-28 13:43 ` [PATCH v2 0/8] " Karthik Nayak
2024-10-28 13:43 ` [PATCH v2 1/8] packfile: add repository to struct `packed_git` Karthik Nayak
@ 2024-10-28 13:43 ` Karthik Nayak
2024-10-28 16:08 ` Taylor Blau
2024-10-28 13:43 ` [PATCH v2 3/8] packfile: pass `repository` to static function in the file Karthik Nayak
` (5 subsequent siblings)
7 siblings, 1 reply; 184+ messages in thread
From: Karthik Nayak @ 2024-10-28 13:43 UTC (permalink / raw)
To: karthik.188; +Cc: git, me
In the previous commit, we introduced the `repository` structure inside
`packed_git`. This provides an alternative route instead of using the
global `the_repository` variable. Let's modify `packfile.c` now to use
this field wherever possible instead of relying on the global state.
There are still a few instances of `the_repository` usage in the file,
where there is no struct `packed_git` locally available, which will be
fixed in the following commits.
Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
---
packfile.c | 50 +++++++++++++++++++++++++++-----------------------
1 file changed, 27 insertions(+), 23 deletions(-)
diff --git a/packfile.c b/packfile.c
index 45f300e5e1..9b353db331 100644
--- a/packfile.c
+++ b/packfile.c
@@ -79,7 +79,7 @@ static int check_packed_git_idx(const char *path, struct packed_git *p)
size_t idx_size;
int fd = git_open(path), ret;
struct stat st;
- const unsigned int hashsz = the_hash_algo->rawsz;
+ const unsigned int hashsz = p->repo->hash_algo->rawsz;
if (fd < 0)
return -1;
@@ -243,7 +243,7 @@ struct packed_git *parse_pack_index(struct repository *repo,
memcpy(p->pack_name, path, alloc); /* includes NUL */
free(path);
- hashcpy(p->hash, sha1, the_repository->hash_algo);
+ hashcpy(p->hash, sha1, p->repo->hash_algo);
if (check_packed_git_idx(idx_path, p)) {
free(p);
return NULL;
@@ -278,7 +278,7 @@ static int unuse_one_window(struct packed_git *current)
if (current)
scan_windows(current, &lru_p, &lru_w, &lru_l);
- for (p = the_repository->objects->packed_git; p; p = p->next)
+ for (p = current->repo->objects->packed_git; p; p = p->next)
scan_windows(p, &lru_p, &lru_w, &lru_l);
if (lru_p) {
munmap(lru_w->base, lru_w->len);
@@ -540,7 +540,7 @@ static int open_packed_git_1(struct packed_git *p)
unsigned char hash[GIT_MAX_RAWSZ];
unsigned char *idx_hash;
ssize_t read_result;
- const unsigned hashsz = the_hash_algo->rawsz;
+ const unsigned hashsz = p->repo->hash_algo->rawsz;
if (open_pack_index(p))
return error("packfile %s index unavailable", p->pack_name);
@@ -597,7 +597,7 @@ static int open_packed_git_1(struct packed_git *p)
if (read_result != hashsz)
return error("packfile %s signature is unavailable", p->pack_name);
idx_hash = ((unsigned char *)p->index_data) + p->index_size - hashsz * 2;
- if (!hasheq(hash, idx_hash, the_repository->hash_algo))
+ if (!hasheq(hash, idx_hash, p->repo->hash_algo))
return error("packfile %s does not match index", p->pack_name);
return 0;
}
@@ -637,7 +637,7 @@ unsigned char *use_pack(struct packed_git *p,
*/
if (!p->pack_size && p->pack_fd == -1 && open_packed_git(p))
die("packfile %s cannot be accessed", p->pack_name);
- if (offset > (p->pack_size - the_hash_algo->rawsz))
+ if (offset > (p->pack_size - p->repo->hash_algo->rawsz))
die("offset beyond end of packfile (truncated pack?)");
if (offset < 0)
die(_("offset before end of packfile (broken .idx?)"));
@@ -711,6 +711,7 @@ struct packed_git *add_packed_git(struct repository *repo, const char *path,
struct stat st;
size_t alloc;
struct packed_git *p;
+ struct object_id oid;
/*
* Make sure a corresponding .pack file exists and that
@@ -751,9 +752,13 @@ struct packed_git *add_packed_git(struct repository *repo, const char *path,
p->pack_size = st.st_size;
p->pack_local = local;
p->mtime = st.st_mtime;
- if (path_len < the_hash_algo->hexsz ||
- get_hash_hex(path + path_len - the_hash_algo->hexsz, p->hash))
- hashclr(p->hash, the_repository->hash_algo);
+ if (path_len < repo->hash_algo->hexsz ||
+ get_oid_hex_algop(path + path_len - repo->hash_algo->hexsz, &oid,
+ repo->hash_algo))
+ hashclr(p->hash, repo->hash_algo);
+ else
+ memcpy(p->hash, oid.hash, repo->hash_algo->rawsz);
+
return p;
}
@@ -1243,9 +1248,9 @@ off_t get_delta_base(struct packed_git *p,
} else if (type == OBJ_REF_DELTA) {
/* The base entry _must_ be in the same pack */
struct object_id oid;
- oidread(&oid, base_info, the_repository->hash_algo);
+ oidread(&oid, base_info, p->repo->hash_algo);
base_offset = find_pack_entry_one(&oid, p);
- *curpos += the_hash_algo->rawsz;
+ *curpos += p->repo->hash_algo->rawsz;
} else
die("I am totally screwed");
return base_offset;
@@ -1266,7 +1271,7 @@ static int get_delta_base_oid(struct packed_git *p,
{
if (type == OBJ_REF_DELTA) {
unsigned char *base = use_pack(p, w_curs, curpos, NULL);
- oidread(oid, base, the_repository->hash_algo);
+ oidread(oid, base, p->repo->hash_algo);
return 0;
} else if (type == OBJ_OFS_DELTA) {
uint32_t base_pos;
@@ -1608,7 +1613,7 @@ int packed_object_info(struct repository *r, struct packed_git *p,
goto out;
}
} else
- oidclr(oi->delta_base_oid, the_repository->hash_algo);
+ oidclr(oi->delta_base_oid, p->repo->hash_algo);
}
oi->whence = in_delta_base_cache(p, obj_offset) ? OI_DBCACHED :
@@ -1897,7 +1902,7 @@ int bsearch_pack(const struct object_id *oid, const struct packed_git *p, uint32
{
const unsigned char *index_fanout = p->index_data;
const unsigned char *index_lookup;
- const unsigned int hashsz = the_hash_algo->rawsz;
+ const unsigned int hashsz = p->repo->hash_algo->rawsz;
int index_lookup_width;
if (!index_fanout)
@@ -1922,7 +1927,7 @@ int nth_packed_object_id(struct object_id *oid,
uint32_t n)
{
const unsigned char *index = p->index_data;
- const unsigned int hashsz = the_hash_algo->rawsz;
+ const unsigned int hashsz = p->repo->hash_algo->rawsz;
if (!index) {
if (open_pack_index(p))
return -1;
@@ -1933,11 +1938,10 @@ int nth_packed_object_id(struct object_id *oid,
index += 4 * 256;
if (p->index_version == 1) {
oidread(oid, index + st_add(st_mult(hashsz + 4, n), 4),
- the_repository->hash_algo);
+ p->repo->hash_algo);
} else {
index += 8;
- oidread(oid, index + st_mult(hashsz, n),
- the_repository->hash_algo);
+ oidread(oid, index + st_mult(hashsz, n), p->repo->hash_algo);
}
return 0;
}
@@ -1959,7 +1963,7 @@ void check_pack_index_ptr(const struct packed_git *p, const void *vptr)
off_t nth_packed_object_offset(const struct packed_git *p, uint32_t n)
{
const unsigned char *index = p->index_data;
- const unsigned int hashsz = the_hash_algo->rawsz;
+ const unsigned int hashsz = p->repo->hash_algo->rawsz;
index += 4 * 256;
if (p->index_version == 1) {
return ntohl(*((uint32_t *)(index + st_mult(hashsz + 4, n))));
@@ -2159,7 +2163,7 @@ int for_each_object_in_pack(struct packed_git *p,
int r = 0;
if (flags & FOR_EACH_OBJECT_PACK_ORDER) {
- if (load_pack_revindex(the_repository, p))
+ if (load_pack_revindex(p->repo, p))
return -1;
}
@@ -2227,7 +2231,7 @@ int for_each_packed_object(each_packed_object_fn cb, void *data,
}
static int add_promisor_object(const struct object_id *oid,
- struct packed_git *pack UNUSED,
+ struct packed_git *pack,
uint32_t pos UNUSED,
void *set_)
{
@@ -2235,12 +2239,12 @@ static int add_promisor_object(const struct object_id *oid,
struct object *obj;
int we_parsed_object;
- obj = lookup_object(the_repository, oid);
+ obj = lookup_object(pack->repo, oid);
if (obj && obj->parsed) {
we_parsed_object = 0;
} else {
we_parsed_object = 1;
- obj = parse_object(the_repository, oid);
+ obj = parse_object(pack->repo, oid);
}
if (!obj)
--
2.47.0
^ permalink raw reply related [flat|nested] 184+ messages in thread
* Re: [PATCH v2 2/8] packfile: use `repository` from `packed_git` directly
2024-10-28 13:43 ` [PATCH v2 2/8] packfile: use `repository` from `packed_git` directly Karthik Nayak
@ 2024-10-28 16:08 ` Taylor Blau
2024-10-29 11:48 ` karthik nayak
0 siblings, 1 reply; 184+ messages in thread
From: Taylor Blau @ 2024-10-28 16:08 UTC (permalink / raw)
To: Karthik Nayak; +Cc: git
On Mon, Oct 28, 2024 at 02:43:40PM +0100, Karthik Nayak wrote:
> In the previous commit, we introduced the `repository` structure inside
> `packed_git`. This provides an alternative route instead of using the
> global `the_repository` variable. Let's modify `packfile.c` now to use
> this field wherever possible instead of relying on the global state.
> There are still a few instances of `the_repository` usage in the file,
> where there is no struct `packed_git` locally available, which will be
> fixed in the following commits.
>
> Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
> ---
> packfile.c | 50 +++++++++++++++++++++++++++-----------------------
> 1 file changed, 27 insertions(+), 23 deletions(-)
Very nice, and indeed much less disruptive than the RFC version of these
patches. All of the first few transformations look correct to me.
> @@ -751,9 +752,13 @@ struct packed_git *add_packed_git(struct repository *repo, const char *path,
> p->pack_size = st.st_size;
> p->pack_local = local;
> p->mtime = st.st_mtime;
> - if (path_len < the_hash_algo->hexsz ||
> - get_hash_hex(path + path_len - the_hash_algo->hexsz, p->hash))
> - hashclr(p->hash, the_repository->hash_algo);
> + if (path_len < repo->hash_algo->hexsz ||
> + get_oid_hex_algop(path + path_len - repo->hash_algo->hexsz, &oid,
> + repo->hash_algo))
> + hashclr(p->hash, repo->hash_algo);
> + else
> + memcpy(p->hash, oid.hash, repo->hash_algo->rawsz);
This should be:
hashcpy(p->hash, oid.hash, repo->hash_algo);
instead of a bare memcpy().
Everything else is looking good.
Thanks,
Taylor
^ permalink raw reply [flat|nested] 184+ messages in thread
* Re: [PATCH v2 2/8] packfile: use `repository` from `packed_git` directly
2024-10-28 16:08 ` Taylor Blau
@ 2024-10-29 11:48 ` karthik nayak
0 siblings, 0 replies; 184+ messages in thread
From: karthik nayak @ 2024-10-29 11:48 UTC (permalink / raw)
To: Taylor Blau; +Cc: git
[-- Attachment #1: Type: text/plain, Size: 1751 bytes --]
Taylor Blau <me@ttaylorr.com> writes:
> On Mon, Oct 28, 2024 at 02:43:40PM +0100, Karthik Nayak wrote:
>> In the previous commit, we introduced the `repository` structure inside
>> `packed_git`. This provides an alternative route instead of using the
>> global `the_repository` variable. Let's modify `packfile.c` now to use
>> this field wherever possible instead of relying on the global state.
>> There are still a few instances of `the_repository` usage in the file,
>> where there is no struct `packed_git` locally available, which will be
>> fixed in the following commits.
>>
>> Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
>> ---
>> packfile.c | 50 +++++++++++++++++++++++++++-----------------------
>> 1 file changed, 27 insertions(+), 23 deletions(-)
>
> Very nice, and indeed much less disruptive than the RFC version of these
> patches. All of the first few transformations look correct to me.
>
>> @@ -751,9 +752,13 @@ struct packed_git *add_packed_git(struct repository *repo, const char *path,
>> p->pack_size = st.st_size;
>> p->pack_local = local;
>> p->mtime = st.st_mtime;
>> - if (path_len < the_hash_algo->hexsz ||
>> - get_hash_hex(path + path_len - the_hash_algo->hexsz, p->hash))
>> - hashclr(p->hash, the_repository->hash_algo);
>> + if (path_len < repo->hash_algo->hexsz ||
>> + get_oid_hex_algop(path + path_len - repo->hash_algo->hexsz, &oid,
>> + repo->hash_algo))
>> + hashclr(p->hash, repo->hash_algo);
>> + else
>> + memcpy(p->hash, oid.hash, repo->hash_algo->rawsz);
>
> This should be:
>
> hashcpy(p->hash, oid.hash, repo->hash_algo);
>
> instead of a bare memcpy().
>
Indeed, didn't know of the function, will change.
> Everything else is looking good.
>
> Thanks,
> Taylor
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 690 bytes --]
^ permalink raw reply [flat|nested] 184+ messages in thread
* [PATCH v2 3/8] packfile: pass `repository` to static function in the file
2024-10-28 13:43 ` [PATCH v2 0/8] " Karthik Nayak
2024-10-28 13:43 ` [PATCH v2 1/8] packfile: add repository to struct `packed_git` Karthik Nayak
2024-10-28 13:43 ` [PATCH v2 2/8] packfile: use `repository` from `packed_git` directly Karthik Nayak
@ 2024-10-28 13:43 ` Karthik Nayak
2024-10-28 16:12 ` Taylor Blau
2024-10-28 13:43 ` [PATCH v2 4/8] packfile: pass down repository to `odb_pack_name` Karthik Nayak
` (4 subsequent siblings)
7 siblings, 1 reply; 184+ messages in thread
From: Karthik Nayak @ 2024-10-28 13:43 UTC (permalink / raw)
To: karthik.188; +Cc: git, me
Some of the static functions in the `packfile.c` access global
variables, which can simply be avoiding by passing the `repository`
struct down to them. Let's do that.
Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
---
packfile.c | 15 ++++++++-------
1 file changed, 8 insertions(+), 7 deletions(-)
diff --git a/packfile.c b/packfile.c
index 9b353db331..cc558f06cc 100644
--- a/packfile.c
+++ b/packfile.c
@@ -460,13 +460,13 @@ static void find_lru_pack(struct packed_git *p, struct packed_git **lru_p, struc
*accept_windows_inuse = has_windows_inuse;
}
-static int close_one_pack(void)
+static int close_one_pack(struct repository *repo)
{
struct packed_git *p, *lru_p = NULL;
struct pack_window *mru_w = NULL;
int accept_windows_inuse = 1;
- for (p = the_repository->objects->packed_git; p; p = p->next) {
+ for (p = repo->objects->packed_git; p; p = p->next) {
if (p->pack_fd == -1)
continue;
find_lru_pack(p, &lru_p, &mru_w, &accept_windows_inuse);
@@ -555,7 +555,7 @@ static int open_packed_git_1(struct packed_git *p)
pack_max_fds = 1;
}
- while (pack_max_fds <= pack_open_fds && close_one_pack())
+ while (pack_max_fds <= pack_open_fds && close_one_pack(p->repo))
; /* nothing */
p->pack_fd = git_open(p->pack_name);
@@ -610,7 +610,8 @@ static int open_packed_git(struct packed_git *p)
return -1;
}
-static int in_window(struct pack_window *win, off_t offset)
+static int in_window(struct repository *repo, struct pack_window *win,
+ off_t offset)
{
/* We must promise at least one full hash after the
* offset is available from this window, otherwise the offset
@@ -620,7 +621,7 @@ static int in_window(struct pack_window *win, off_t offset)
*/
off_t win_off = win->offset;
return win_off <= offset
- && (offset + the_hash_algo->rawsz) <= (win_off + win->len);
+ && (offset + repo->hash_algo->rawsz) <= (win_off + win->len);
}
unsigned char *use_pack(struct packed_git *p,
@@ -642,11 +643,11 @@ unsigned char *use_pack(struct packed_git *p,
if (offset < 0)
die(_("offset before end of packfile (broken .idx?)"));
- if (!win || !in_window(win, offset)) {
+ if (!win || !in_window(p->repo, win, offset)) {
if (win)
win->inuse_cnt--;
for (win = p->windows; win; win = win->next) {
- if (in_window(win, offset))
+ if (in_window(p->repo, win, offset))
break;
}
if (!win) {
--
2.47.0
^ permalink raw reply related [flat|nested] 184+ messages in thread
* Re: [PATCH v2 3/8] packfile: pass `repository` to static function in the file
2024-10-28 13:43 ` [PATCH v2 3/8] packfile: pass `repository` to static function in the file Karthik Nayak
@ 2024-10-28 16:12 ` Taylor Blau
0 siblings, 0 replies; 184+ messages in thread
From: Taylor Blau @ 2024-10-28 16:12 UTC (permalink / raw)
To: Karthik Nayak; +Cc: git
On Mon, Oct 28, 2024 at 02:43:41PM +0100, Karthik Nayak wrote:
> Some of the static functions in the `packfile.c` access global
> variables, which can simply be avoiding by passing the `repository`
> struct down to them. Let's do that.
>
> Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
> ---
> packfile.c | 15 ++++++++-------
> 1 file changed, 8 insertions(+), 7 deletions(-)
>
> diff --git a/packfile.c b/packfile.c
> index 9b353db331..cc558f06cc 100644
> --- a/packfile.c
> +++ b/packfile.c
> @@ -460,13 +460,13 @@ static void find_lru_pack(struct packed_git *p, struct packed_git **lru_p, struc
> *accept_windows_inuse = has_windows_inuse;
> }
>
> -static int close_one_pack(void)
> +static int close_one_pack(struct repository *repo)
Same note on naming this parameter as 'struct repository *r' instead of
"repo".
> {
> struct packed_git *p, *lru_p = NULL;
> struct pack_window *mru_w = NULL;
> int accept_windows_inuse = 1;
>
> - for (p = the_repository->objects->packed_git; p; p = p->next) {
> + for (p = repo->objects->packed_git; p; p = p->next) {
> if (p->pack_fd == -1)
> continue;
> find_lru_pack(p, &lru_p, &mru_w, &accept_windows_inuse);
> @@ -555,7 +555,7 @@ static int open_packed_git_1(struct packed_git *p)
> pack_max_fds = 1;
> }
>
> - while (pack_max_fds <= pack_open_fds && close_one_pack())
> + while (pack_max_fds <= pack_open_fds && close_one_pack(p->repo))
Makes sense, as does the remainder of the patch. Looking good.
Thanks,
Taylor
^ permalink raw reply [flat|nested] 184+ messages in thread
* [PATCH v2 4/8] packfile: pass down repository to `odb_pack_name`
2024-10-28 13:43 ` [PATCH v2 0/8] " Karthik Nayak
` (2 preceding siblings ...)
2024-10-28 13:43 ` [PATCH v2 3/8] packfile: pass `repository` to static function in the file Karthik Nayak
@ 2024-10-28 13:43 ` Karthik Nayak
2024-10-28 16:14 ` Taylor Blau
2024-10-29 5:50 ` Jeff King
2024-10-28 13:43 ` [PATCH v2 5/8] packfile: pass down repository to `has_object[_kept]_pack` Karthik Nayak
` (3 subsequent siblings)
7 siblings, 2 replies; 184+ messages in thread
From: Karthik Nayak @ 2024-10-28 13:43 UTC (permalink / raw)
To: karthik.188; +Cc: git, me
The function `odb_pack_name` currently relies on the global variable
`the_repository`. To eliminate global variable usage in `packfile.c`, we
should progressively shift the dependency on the_repository to higher
layers.
Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
---
builtin/fast-import.c | 8 ++++----
builtin/index-pack.c | 4 ++--
builtin/pack-redundant.c | 4 ++--
http.c | 2 +-
packfile.c | 9 ++++-----
packfile.h | 3 ++-
6 files changed, 15 insertions(+), 15 deletions(-)
diff --git a/builtin/fast-import.c b/builtin/fast-import.c
index ffee7d3abd..f4892d7f37 100644
--- a/builtin/fast-import.c
+++ b/builtin/fast-import.c
@@ -806,7 +806,7 @@ static char *keep_pack(const char *curr_index_name)
struct strbuf name = STRBUF_INIT;
int keep_fd;
- odb_pack_name(&name, pack_data->hash, "keep");
+ odb_pack_name(the_repository, &name, pack_data->hash, "keep");
keep_fd = odb_pack_keep(name.buf);
if (keep_fd < 0)
die_errno("cannot create keep file");
@@ -814,11 +814,11 @@ static char *keep_pack(const char *curr_index_name)
if (close(keep_fd))
die_errno("failed to write keep file");
- odb_pack_name(&name, pack_data->hash, "pack");
+ odb_pack_name(the_repository, &name, pack_data->hash, "pack");
if (finalize_object_file(pack_data->pack_name, name.buf))
die("cannot store pack file");
- odb_pack_name(&name, pack_data->hash, "idx");
+ odb_pack_name(the_repository, &name, pack_data->hash, "idx");
if (finalize_object_file(curr_index_name, name.buf))
die("cannot store index file");
free((void *)curr_index_name);
@@ -832,7 +832,7 @@ static void unkeep_all_packs(void)
for (k = 0; k < pack_id; k++) {
struct packed_git *p = all_packs[k];
- odb_pack_name(&name, p->hash, "keep");
+ odb_pack_name(p->repo, &name, p->hash, "keep");
unlink_or_warn(name.buf);
}
strbuf_release(&name);
diff --git a/builtin/index-pack.c b/builtin/index-pack.c
index be2f99625e..eaefb41761 100644
--- a/builtin/index-pack.c
+++ b/builtin/index-pack.c
@@ -1479,7 +1479,7 @@ static void write_special_file(const char *suffix, const char *msg,
if (pack_name)
filename = derive_filename(pack_name, "pack", suffix, &name_buf);
else
- filename = odb_pack_name(&name_buf, hash, suffix);
+ filename = odb_pack_name(the_repository, &name_buf, hash, suffix);
fd = odb_pack_keep(filename);
if (fd < 0) {
@@ -1507,7 +1507,7 @@ static void rename_tmp_packfile(const char **final_name,
{
if (!*final_name || strcmp(*final_name, curr_name)) {
if (!*final_name)
- *final_name = odb_pack_name(name, hash, ext);
+ *final_name = odb_pack_name(the_repository, name, hash, ext);
if (finalize_object_file(curr_name, *final_name))
die(_("unable to rename temporary '*.%s' file to '%s'"),
ext, *final_name);
diff --git a/builtin/pack-redundant.c b/builtin/pack-redundant.c
index d2c1c4e5ec..7d6c47ffd9 100644
--- a/builtin/pack-redundant.c
+++ b/builtin/pack-redundant.c
@@ -589,7 +589,7 @@ static void load_all(void)
}
}
-int cmd_pack_redundant(int argc, const char **argv, const char *prefix UNUSED, struct repository *repo UNUSED) {
+int cmd_pack_redundant(int argc, const char **argv, const char *prefix UNUSED, struct repository *repo) {
int i; int i_still_use_this = 0; struct pack_list *min = NULL, *red, *pl;
struct llist *ignore;
struct strbuf idx_name = STRBUF_INIT;
@@ -690,7 +690,7 @@ int cmd_pack_redundant(int argc, const char **argv, const char *prefix UNUSED, s
pl = red = pack_list_difference(local_packs, min);
while (pl) {
printf("%s\n%s\n",
- odb_pack_name(&idx_name, pl->pack->hash, "idx"),
+ odb_pack_name(repo, &idx_name, pl->pack->hash, "idx"),
pl->pack->pack_name);
pl = pl->next;
}
diff --git a/http.c b/http.c
index 7e5be05207..50d8811cea 100644
--- a/http.c
+++ b/http.c
@@ -2579,7 +2579,7 @@ struct http_pack_request *new_direct_http_pack_request(
preq->url = url;
- odb_pack_name(&preq->tmpfile, packed_git_hash, "pack");
+ odb_pack_name(the_repository, &preq->tmpfile, packed_git_hash, "pack");
strbuf_addstr(&preq->tmpfile, ".temp");
preq->packfile = fopen(preq->tmpfile.buf, "a");
if (!preq->packfile) {
diff --git a/packfile.c b/packfile.c
index cc558f06cc..096a0cd6ba 100644
--- a/packfile.c
+++ b/packfile.c
@@ -25,13 +25,12 @@
#include "pack-revindex.h"
#include "promisor-remote.h"
-char *odb_pack_name(struct strbuf *buf,
- const unsigned char *hash,
- const char *ext)
+char *odb_pack_name(struct repository *repo, struct strbuf *buf,
+ const unsigned char *hash, const char *ext)
{
strbuf_reset(buf);
- strbuf_addf(buf, "%s/pack/pack-%s.%s", repo_get_object_directory(the_repository),
- hash_to_hex(hash), ext);
+ strbuf_addf(buf, "%s/pack/pack-%s.%s", repo_get_object_directory(repo),
+ hash_to_hex_algop(hash, repo->hash_algo), ext);
return buf->buf;
}
diff --git a/packfile.h b/packfile.h
index 344da905c2..48d058699d 100644
--- a/packfile.h
+++ b/packfile.h
@@ -29,7 +29,8 @@ struct pack_entry {
*
* Example: odb_pack_name(out, sha1, "idx") => ".git/objects/pack/pack-1234..idx"
*/
-char *odb_pack_name(struct strbuf *buf, const unsigned char *sha1, const char *ext);
+char *odb_pack_name(struct repository *repo, struct strbuf *buf,
+ const unsigned char *hash, const char *ext);
/*
* Return the basename of the packfile, omitting any containing directory
--
2.47.0
^ permalink raw reply related [flat|nested] 184+ messages in thread
* Re: [PATCH v2 4/8] packfile: pass down repository to `odb_pack_name`
2024-10-28 13:43 ` [PATCH v2 4/8] packfile: pass down repository to `odb_pack_name` Karthik Nayak
@ 2024-10-28 16:14 ` Taylor Blau
2024-10-29 5:50 ` Jeff King
1 sibling, 0 replies; 184+ messages in thread
From: Taylor Blau @ 2024-10-28 16:14 UTC (permalink / raw)
To: Karthik Nayak; +Cc: git
On Mon, Oct 28, 2024 at 02:43:42PM +0100, Karthik Nayak wrote:
> ---
> builtin/fast-import.c | 8 ++++----
> builtin/index-pack.c | 4 ++--
> builtin/pack-redundant.c | 4 ++--
> http.c | 2 +-
> packfile.c | 9 ++++-----
> packfile.h | 3 ++-
> 6 files changed, 15 insertions(+), 15 deletions(-)
All looking very sensible, nicely done.
Thanks,
Taylor
^ permalink raw reply [flat|nested] 184+ messages in thread
* Re: [PATCH v2 4/8] packfile: pass down repository to `odb_pack_name`
2024-10-28 13:43 ` [PATCH v2 4/8] packfile: pass down repository to `odb_pack_name` Karthik Nayak
2024-10-28 16:14 ` Taylor Blau
@ 2024-10-29 5:50 ` Jeff King
2024-10-29 12:45 ` karthik nayak
2024-10-29 17:33 ` Taylor Blau
1 sibling, 2 replies; 184+ messages in thread
From: Jeff King @ 2024-10-29 5:50 UTC (permalink / raw)
To: Karthik Nayak; +Cc: git, me
On Mon, Oct 28, 2024 at 02:43:42PM +0100, Karthik Nayak wrote:
> diff --git a/builtin/fast-import.c b/builtin/fast-import.c
> index ffee7d3abd..f4892d7f37 100644
> --- a/builtin/fast-import.c
> +++ b/builtin/fast-import.c
> @@ -806,7 +806,7 @@ static char *keep_pack(const char *curr_index_name)
> struct strbuf name = STRBUF_INIT;
> int keep_fd;
>
> - odb_pack_name(&name, pack_data->hash, "keep");
> + odb_pack_name(the_repository, &name, pack_data->hash, "keep");
Why not pack_data->repo here? It's always going to be set to
the_repository in this program, but I think minimizing the number of
references to it still has value.
> @@ -814,11 +814,11 @@ static char *keep_pack(const char *curr_index_name)
> if (close(keep_fd))
> die_errno("failed to write keep file");
>
> - odb_pack_name(&name, pack_data->hash, "pack");
> + odb_pack_name(the_repository, &name, pack_data->hash, "pack");
> if (finalize_object_file(pack_data->pack_name, name.buf))
> die("cannot store pack file");
>
> - odb_pack_name(&name, pack_data->hash, "idx");
> + odb_pack_name(the_repository, &name, pack_data->hash, "idx");
Likewise in both of these spots.
> if (finalize_object_file(curr_index_name, name.buf))
> die("cannot store index file");
> free((void *)curr_index_name);
> @@ -832,7 +832,7 @@ static void unkeep_all_packs(void)
>
> for (k = 0; k < pack_id; k++) {
> struct packed_git *p = all_packs[k];
> - odb_pack_name(&name, p->hash, "keep");
> + odb_pack_name(p->repo, &name, p->hash, "keep");
This one does use p->repo. Good.
> diff --git a/builtin/index-pack.c b/builtin/index-pack.c
> index be2f99625e..eaefb41761 100644
> --- a/builtin/index-pack.c
> +++ b/builtin/index-pack.c
> @@ -1479,7 +1479,7 @@ static void write_special_file(const char *suffix, const char *msg,
> if (pack_name)
> filename = derive_filename(pack_name, "pack", suffix, &name_buf);
> else
> - filename = odb_pack_name(&name_buf, hash, suffix);
> + filename = odb_pack_name(the_repository, &name_buf, hash, suffix);
>
> fd = odb_pack_keep(filename);
> if (fd < 0) {
> @@ -1507,7 +1507,7 @@ static void rename_tmp_packfile(const char **final_name,
> {
> if (!*final_name || strcmp(*final_name, curr_name)) {
> if (!*final_name)
> - *final_name = odb_pack_name(name, hash, ext);
> + *final_name = odb_pack_name(the_repository, name, hash, ext);
These two don't have a packed_git, so they use their own repo. Makes
sense.
> -int cmd_pack_redundant(int argc, const char **argv, const char *prefix UNUSED, struct repository *repo UNUSED) {
> +int cmd_pack_redundant(int argc, const char **argv, const char *prefix UNUSED, struct repository *repo) {
> int i; int i_still_use_this = 0; struct pack_list *min = NULL, *red, *pl;
> struct llist *ignore;
> struct strbuf idx_name = STRBUF_INIT;
> @@ -690,7 +690,7 @@ int cmd_pack_redundant(int argc, const char **argv, const char *prefix UNUSED, s
> pl = red = pack_list_difference(local_packs, min);
> while (pl) {
> printf("%s\n%s\n",
> - odb_pack_name(&idx_name, pl->pack->hash, "idx"),
> + odb_pack_name(repo, &idx_name, pl->pack->hash, "idx"),
> pl->pack->pack_name);
> pl = pl->next;
> }
This one is using the "repo" variable passed to the main function. That
seems a little sketchy to me philosophically, though, because these
packs all came from a call to get_all_packs(the_repository). I think
the two will always be the same, but it feels like we should be using
pl->pack->repo here for consistency.
> diff --git a/http.c b/http.c
> index 7e5be05207..50d8811cea 100644
> --- a/http.c
> +++ b/http.c
> @@ -2579,7 +2579,7 @@ struct http_pack_request *new_direct_http_pack_request(
>
> preq->url = url;
>
> - odb_pack_name(&preq->tmpfile, packed_git_hash, "pack");
> + odb_pack_name(the_repository, &preq->tmpfile, packed_git_hash, "pack");
> strbuf_addstr(&preq->tmpfile, ".temp");
> preq->packfile = fopen(preq->tmpfile.buf, "a");
> if (!preq->packfile) {
This one really ought to be using the packed_git we set up for the
matching idx file, but we won't have passed it through. And it's not
worth heavy refactoring just to get access to it, IMHO.
Earlier I mentioned that another helper could simplify many of these
sites a little. What I meant was this (on top of what's in your series):
diff --git a/builtin/fast-import.c b/builtin/fast-import.c
index 9056447bd0..976cb1d77b 100644
--- a/builtin/fast-import.c
+++ b/builtin/fast-import.c
@@ -806,19 +806,19 @@ static char *keep_pack(const char *curr_index_name)
struct strbuf name = STRBUF_INIT;
int keep_fd;
- odb_pack_name(the_repository, &name, pack_data->hash, "keep");
+ pack_hashfile(pack_data, &name, "keep");
keep_fd = odb_pack_keep(name.buf);
if (keep_fd < 0)
die_errno("cannot create keep file");
write_or_die(keep_fd, keep_msg, strlen(keep_msg));
if (close(keep_fd))
die_errno("failed to write keep file");
- odb_pack_name(the_repository, &name, pack_data->hash, "pack");
+ pack_hashfile(pack_data, &name, "pack");
if (finalize_object_file(pack_data->pack_name, name.buf))
die("cannot store pack file");
- odb_pack_name(the_repository, &name, pack_data->hash, "idx");
+ pack_hashfile(pack_data, &name, "idx");
if (finalize_object_file(curr_index_name, name.buf))
die("cannot store index file");
free((void *)curr_index_name);
@@ -832,7 +832,7 @@ static void unkeep_all_packs(void)
for (k = 0; k < pack_id; k++) {
struct packed_git *p = all_packs[k];
- odb_pack_name(p->repo, &name, p->hash, "keep");
+ pack_hashfile(p, &name, "keep");
unlink_or_warn(name.buf);
}
strbuf_release(&name);
diff --git a/builtin/pack-redundant.c b/builtin/pack-redundant.c
index 7d6c47ffd9..d3b5e7e112 100644
--- a/builtin/pack-redundant.c
+++ b/builtin/pack-redundant.c
@@ -690,7 +690,7 @@ int cmd_pack_redundant(int argc, const char **argv, const char *prefix UNUSED, s
pl = red = pack_list_difference(local_packs, min);
while (pl) {
printf("%s\n%s\n",
- odb_pack_name(repo, &idx_name, pl->pack->hash, "idx"),
+ pack_hashfile(pl->pack, &idx_name, "idx"),
pl->pack->pack_name);
pl = pl->next;
G }
diff --git a/packfile.c b/packfile.c
index cfbfcdc2b8..d81a62eb84 100644
--- a/packfile.c
+++ b/packfile.c
@@ -46,6 +46,11 @@ char *odb_pack_name(struct repository *repo, struct strbuf *buf,
return buf->buf;
}
+char *pack_hashfile(struct packed_git *p, struct strbuf *out, const char *ext)
+{
+ return odb_pack_name(p->repo, out, p->hash, ext);
+}
+
static unsigned int pack_used_ctr;
static unsigned int pack_mmap_calls;
static unsigned int peak_pack_open_windows;
diff --git a/packfile.h b/packfile.h
index 3409aef35d..43c19d7bba 100644
--- a/packfile.h
+++ b/packfile.h
@@ -32,6 +32,9 @@ struct pack_entry {
char *odb_pack_name(struct repository *repo, struct strbuf *buf,
const unsigned char *hash, const char *ext);
+/* Like odb_pack_name(), but pull repo and hash from existing packed_git. */
+char *pack_hashfile(struct packed_git *p, struct strbuf *out, const char *ext);
+
/*
* Return the basename of the packfile, omitting any containing directory
* (e.g., "pack-1234abcd[...].pack").
While coming up with the name, though, I had some second thoughts. The
interface implies that its the way you should derive a pack-related
filename from a packed_git. But it really is mis-designed for that
purpose! The packed_git struct has "foo.pack" or similar in its
pack_name field, and the correct way to derive the .idx, .bitmap, .keep,
etc, is by string substitution. While we do tend to name packs
pack-$hash.pack, most of the code will happily work on
"some-arbitrary-name.pack". And that's why we have so few
odb_pack_name() calls in the first place.
IMHO the ones in fast-import should probably be doing that suffix
replacement instead (and probably we should have a decent helper to
facilitate that; you can grep for strip_suffix.*pack to see places that
could potentially use it).
All that said, I don't think it's worth derailing your series to deal
with that cleanup. That can come later if we want. And if we do that,
then the pack_hashfile() I suggested above would have no callers,
because it's the wrong approach.
I do think it's probably worth changing your series to use the
packed_git repo pointers we already have available, though (i.e., the
cases I pointed out inline above).
-Peff
^ permalink raw reply related [flat|nested] 184+ messages in thread
* Re: [PATCH v2 4/8] packfile: pass down repository to `odb_pack_name`
2024-10-29 5:50 ` Jeff King
@ 2024-10-29 12:45 ` karthik nayak
2024-10-29 17:33 ` Taylor Blau
1 sibling, 0 replies; 184+ messages in thread
From: karthik nayak @ 2024-10-29 12:45 UTC (permalink / raw)
To: Jeff King; +Cc: git, me
[-- Attachment #1: Type: text/plain, Size: 9145 bytes --]
Jeff King <peff@peff.net> writes:
> On Mon, Oct 28, 2024 at 02:43:42PM +0100, Karthik Nayak wrote:
>
>> diff --git a/builtin/fast-import.c b/builtin/fast-import.c
>> index ffee7d3abd..f4892d7f37 100644
>> --- a/builtin/fast-import.c
>> +++ b/builtin/fast-import.c
>> @@ -806,7 +806,7 @@ static char *keep_pack(const char *curr_index_name)
>> struct strbuf name = STRBUF_INIT;
>> int keep_fd;
>>
>> - odb_pack_name(&name, pack_data->hash, "keep");
>> + odb_pack_name(the_repository, &name, pack_data->hash, "keep");
>
> Why not pack_data->repo here? It's always going to be set to
> the_repository in this program, but I think minimizing the number of
> references to it still has value.
>
I tried to swap out 'the_repository' with local variables in most places
I could. Here, I totally missed 'pack_data', although it was right
there. Thanks will swap out.
>> @@ -814,11 +814,11 @@ static char *keep_pack(const char *curr_index_name)
>> if (close(keep_fd))
>> die_errno("failed to write keep file");
>>
>> - odb_pack_name(&name, pack_data->hash, "pack");
>> + odb_pack_name(the_repository, &name, pack_data->hash, "pack");
>> if (finalize_object_file(pack_data->pack_name, name.buf))
>> die("cannot store pack file");
>>
>> - odb_pack_name(&name, pack_data->hash, "idx");
>> + odb_pack_name(the_repository, &name, pack_data->hash, "idx");
>
> Likewise in both of these spots.
>
>> if (finalize_object_file(curr_index_name, name.buf))
>> die("cannot store index file");
>> free((void *)curr_index_name);
>> @@ -832,7 +832,7 @@ static void unkeep_all_packs(void)
>>
>> for (k = 0; k < pack_id; k++) {
>> struct packed_git *p = all_packs[k];
>> - odb_pack_name(&name, p->hash, "keep");
>> + odb_pack_name(p->repo, &name, p->hash, "keep");
>
> This one does use p->repo. Good.
>
>> diff --git a/builtin/index-pack.c b/builtin/index-pack.c
>> index be2f99625e..eaefb41761 100644
>> --- a/builtin/index-pack.c
>> +++ b/builtin/index-pack.c
>> @@ -1479,7 +1479,7 @@ static void write_special_file(const char *suffix, const char *msg,
>> if (pack_name)
>> filename = derive_filename(pack_name, "pack", suffix, &name_buf);
>> else
>> - filename = odb_pack_name(&name_buf, hash, suffix);
>> + filename = odb_pack_name(the_repository, &name_buf, hash, suffix);
>>
>> fd = odb_pack_keep(filename);
>> if (fd < 0) {
>> @@ -1507,7 +1507,7 @@ static void rename_tmp_packfile(const char **final_name,
>> {
>> if (!*final_name || strcmp(*final_name, curr_name)) {
>> if (!*final_name)
>> - *final_name = odb_pack_name(name, hash, ext);
>> + *final_name = odb_pack_name(the_repository, name, hash, ext);
>
> These two don't have a packed_git, so they use their own repo. Makes
> sense.
>
>> -int cmd_pack_redundant(int argc, const char **argv, const char *prefix UNUSED, struct repository *repo UNUSED) {
>> +int cmd_pack_redundant(int argc, const char **argv, const char *prefix UNUSED, struct repository *repo) {
>> int i; int i_still_use_this = 0; struct pack_list *min = NULL, *red, *pl;
>> struct llist *ignore;
>> struct strbuf idx_name = STRBUF_INIT;
>> @@ -690,7 +690,7 @@ int cmd_pack_redundant(int argc, const char **argv, const char *prefix UNUSED, s
>> pl = red = pack_list_difference(local_packs, min);
>> while (pl) {
>> printf("%s\n%s\n",
>> - odb_pack_name(&idx_name, pl->pack->hash, "idx"),
>> + odb_pack_name(repo, &idx_name, pl->pack->hash, "idx"),
>> pl->pack->pack_name);
>> pl = pl->next;
>> }
>
> This one is using the "repo" variable passed to the main function. That
> seems a little sketchy to me philosophically, though, because these
> packs all came from a call to get_all_packs(the_repository). I think
> the two will always be the same, but it feels like we should be using
> pl->pack->repo here for consistency.
>
Yeah, this seems more appropriate indeed. I will swap out.
>> diff --git a/http.c b/http.c
>> index 7e5be05207..50d8811cea 100644
>> --- a/http.c
>> +++ b/http.c
>> @@ -2579,7 +2579,7 @@ struct http_pack_request *new_direct_http_pack_request(
>>
>> preq->url = url;
>>
>> - odb_pack_name(&preq->tmpfile, packed_git_hash, "pack");
>> + odb_pack_name(the_repository, &preq->tmpfile, packed_git_hash, "pack");
>> strbuf_addstr(&preq->tmpfile, ".temp");
>> preq->packfile = fopen(preq->tmpfile.buf, "a");
>> if (!preq->packfile) {
>
> This one really ought to be using the packed_git we set up for the
> matching idx file, but we won't have passed it through. And it's not
> worth heavy refactoring just to get access to it, IMHO.
>
>
> Earlier I mentioned that another helper could simplify many of these
> sites a little. What I meant was this (on top of what's in your series):
>
> diff --git a/builtin/fast-import.c b/builtin/fast-import.c
> index 9056447bd0..976cb1d77b 100644
> --- a/builtin/fast-import.c
> +++ b/builtin/fast-import.c
> @@ -806,19 +806,19 @@ static char *keep_pack(const char *curr_index_name)
> struct strbuf name = STRBUF_INIT;
> int keep_fd;
>
> - odb_pack_name(the_repository, &name, pack_data->hash, "keep");
> + pack_hashfile(pack_data, &name, "keep");
> keep_fd = odb_pack_keep(name.buf);
> if (keep_fd < 0)
> die_errno("cannot create keep file");
> write_or_die(keep_fd, keep_msg, strlen(keep_msg));
> if (close(keep_fd))
> die_errno("failed to write keep file");
>
> - odb_pack_name(the_repository, &name, pack_data->hash, "pack");
> + pack_hashfile(pack_data, &name, "pack");
> if (finalize_object_file(pack_data->pack_name, name.buf))
> die("cannot store pack file");
>
> - odb_pack_name(the_repository, &name, pack_data->hash, "idx");
> + pack_hashfile(pack_data, &name, "idx");
> if (finalize_object_file(curr_index_name, name.buf))
> die("cannot store index file");
> free((void *)curr_index_name);
> @@ -832,7 +832,7 @@ static void unkeep_all_packs(void)
>
> for (k = 0; k < pack_id; k++) {
> struct packed_git *p = all_packs[k];
> - odb_pack_name(p->repo, &name, p->hash, "keep");
> + pack_hashfile(p, &name, "keep");
> unlink_or_warn(name.buf);
> }
> strbuf_release(&name);
> diff --git a/builtin/pack-redundant.c b/builtin/pack-redundant.c
> index 7d6c47ffd9..d3b5e7e112 100644
> --- a/builtin/pack-redundant.c
> +++ b/builtin/pack-redundant.c
> @@ -690,7 +690,7 @@ int cmd_pack_redundant(int argc, const char **argv, const char *prefix UNUSED, s
> pl = red = pack_list_difference(local_packs, min);
> while (pl) {
> printf("%s\n%s\n",
> - odb_pack_name(repo, &idx_name, pl->pack->hash, "idx"),
> + pack_hashfile(pl->pack, &idx_name, "idx"),
> pl->pack->pack_name);
> pl = pl->next;
> G }
> diff --git a/packfile.c b/packfile.c
> index cfbfcdc2b8..d81a62eb84 100644
> --- a/packfile.c
> +++ b/packfile.c
> @@ -46,6 +46,11 @@ char *odb_pack_name(struct repository *repo, struct strbuf *buf,
> return buf->buf;
> }
>
> +char *pack_hashfile(struct packed_git *p, struct strbuf *out, const char *ext)
> +{
> + return odb_pack_name(p->repo, out, p->hash, ext);
> +}
> +
> static unsigned int pack_used_ctr;
> static unsigned int pack_mmap_calls;
> static unsigned int peak_pack_open_windows;
> diff --git a/packfile.h b/packfile.h
> index 3409aef35d..43c19d7bba 100644
> --- a/packfile.h
> +++ b/packfile.h
> @@ -32,6 +32,9 @@ struct pack_entry {
> char *odb_pack_name(struct repository *repo, struct strbuf *buf,
> const unsigned char *hash, const char *ext);
>
> +/* Like odb_pack_name(), but pull repo and hash from existing packed_git. */
> +char *pack_hashfile(struct packed_git *p, struct strbuf *out, const char *ext);
> +
> /*
> * Return the basename of the packfile, omitting any containing directory
> * (e.g., "pack-1234abcd[...].pack").
>
>
> While coming up with the name, though, I had some second thoughts. The
> interface implies that its the way you should derive a pack-related
> filename from a packed_git. But it really is mis-designed for that
> purpose! The packed_git struct has "foo.pack" or similar in its
> pack_name field, and the correct way to derive the .idx, .bitmap, .keep,
> etc, is by string substitution. While we do tend to name packs
> pack-$hash.pack, most of the code will happily work on
> "some-arbitrary-name.pack". And that's why we have so few
> odb_pack_name() calls in the first place.
>
> IMHO the ones in fast-import should probably be doing that suffix
> replacement instead (and probably we should have a decent helper to
> facilitate that; you can grep for strip_suffix.*pack to see places that
> could potentially use it).
>
> All that said, I don't think it's worth derailing your series to deal
> with that cleanup. That can come later if we want. And if we do that,
> then the pack_hashfile() I suggested above would have no callers,
> because it's the wrong approach.
Thanks for the detailed explanation. Maybe we should mark this as
#leftoverbits for a future cleanup.
>
> I do think it's probably worth changing your series to use the
> packed_git repo pointers we already have available, though (i.e., the
> cases I pointed out inline above).
>
Yes, this I've incorporated into the next version. Thanks.
> -Peff
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 690 bytes --]
^ permalink raw reply [flat|nested] 184+ messages in thread
* Re: [PATCH v2 4/8] packfile: pass down repository to `odb_pack_name`
2024-10-29 5:50 ` Jeff King
2024-10-29 12:45 ` karthik nayak
@ 2024-10-29 17:33 ` Taylor Blau
1 sibling, 0 replies; 184+ messages in thread
From: Taylor Blau @ 2024-10-29 17:33 UTC (permalink / raw)
To: Jeff King; +Cc: Karthik Nayak, git
On Tue, Oct 29, 2024 at 01:50:39AM -0400, Jeff King wrote:
> On Mon, Oct 28, 2024 at 02:43:42PM +0100, Karthik Nayak wrote:
>
> > diff --git a/builtin/fast-import.c b/builtin/fast-import.c
> > index ffee7d3abd..f4892d7f37 100644
> > --- a/builtin/fast-import.c
> > +++ b/builtin/fast-import.c
> > @@ -806,7 +806,7 @@ static char *keep_pack(const char *curr_index_name)
> > struct strbuf name = STRBUF_INIT;
> > int keep_fd;
> >
> > - odb_pack_name(&name, pack_data->hash, "keep");
> > + odb_pack_name(the_repository, &name, pack_data->hash, "keep");
>
> Why not pack_data->repo here? It's always going to be set to
> the_repository in this program, but I think minimizing the number of
> references to it still has value.
Yeah, I had pointed out a similar thing when I looked at this patch in
the message above yours in this thread.
I think we reached the same conclusion that this isn't strictly
incorrect, because in all of the instances that I looked at, p->repo is
equal to the_repository, so from an external behavior perspective, the
two are equivalent choices.
But I agree that the point is to *use* p->repo and not rely directly on
'the_repository', so that your suggestion here is a good one.
> Earlier I mentioned that another helper could simplify many of these
> sites a little. What I meant was this (on top of what's in your series):
>
> diff --git a/builtin/fast-import.c b/builtin/fast-import.c
> index 9056447bd0..976cb1d77b 100644
> --- a/builtin/fast-import.c
> +++ b/builtin/fast-import.c
> @@ -806,19 +806,19 @@ static char *keep_pack(const char *curr_index_name)
> struct strbuf name = STRBUF_INIT;
> int keep_fd;
>
> - odb_pack_name(the_repository, &name, pack_data->hash, "keep");
> + pack_hashfile(pack_data, &name, "keep");
> keep_fd = odb_pack_keep(name.buf);
> if (keep_fd < 0)
> die_errno("cannot create keep file");
> write_or_die(keep_fd, keep_msg, strlen(keep_msg));
> if (close(keep_fd))
> die_errno("failed to write keep file");
>
> - odb_pack_name(the_repository, &name, pack_data->hash, "pack");
> + pack_hashfile(pack_data, &name, "pack");
> if (finalize_object_file(pack_data->pack_name, name.buf))
> die("cannot store pack file");
>
> - odb_pack_name(the_repository, &name, pack_data->hash, "idx");
> + pack_hashfile(pack_data, &name, "idx");
> if (finalize_object_file(curr_index_name, name.buf))
> die("cannot store index file");
> free((void *)curr_index_name);
> @@ -832,7 +832,7 @@ static void unkeep_all_packs(void)
>
> for (k = 0; k < pack_id; k++) {
> struct packed_git *p = all_packs[k];
> - odb_pack_name(p->repo, &name, p->hash, "keep");
> + pack_hashfile(p, &name, "keep");
> unlink_or_warn(name.buf);
> }
> strbuf_release(&name);
> diff --git a/builtin/pack-redundant.c b/builtin/pack-redundant.c
> index 7d6c47ffd9..d3b5e7e112 100644
> --- a/builtin/pack-redundant.c
> +++ b/builtin/pack-redundant.c
> @@ -690,7 +690,7 @@ int cmd_pack_redundant(int argc, const char **argv, const char *prefix UNUSED, s
> pl = red = pack_list_difference(local_packs, min);
> while (pl) {
> printf("%s\n%s\n",
> - odb_pack_name(repo, &idx_name, pl->pack->hash, "idx"),
> + pack_hashfile(pl->pack, &idx_name, "idx"),
> pl->pack->pack_name);
> pl = pl->next;
> G }
> diff --git a/packfile.c b/packfile.c
> index cfbfcdc2b8..d81a62eb84 100644
> --- a/packfile.c
> +++ b/packfile.c
> @@ -46,6 +46,11 @@ char *odb_pack_name(struct repository *repo, struct strbuf *buf,
> return buf->buf;
> }
>
> +char *pack_hashfile(struct packed_git *p, struct strbuf *out, const char *ext)
> +{
> + return odb_pack_name(p->repo, out, p->hash, ext);
> +}
> +
> static unsigned int pack_used_ctr;
> static unsigned int pack_mmap_calls;
> static unsigned int peak_pack_open_windows;
> diff --git a/packfile.h b/packfile.h
> index 3409aef35d..43c19d7bba 100644
> --- a/packfile.h
> +++ b/packfile.h
> @@ -32,6 +32,9 @@ struct pack_entry {
> char *odb_pack_name(struct repository *repo, struct strbuf *buf,
> const unsigned char *hash, const char *ext);
>
> +/* Like odb_pack_name(), but pull repo and hash from existing packed_git. */
> +char *pack_hashfile(struct packed_git *p, struct strbuf *out, const char *ext);
> +
> /*
> * Return the basename of the packfile, omitting any containing directory
> * (e.g., "pack-1234abcd[...].pack").
>
>
> While coming up with the name, though, I had some second thoughts. The
> interface implies that its the way you should derive a pack-related
> filename from a packed_git. But it really is mis-designed for that
> purpose! The packed_git struct has "foo.pack" or similar in its
> pack_name field, and the correct way to derive the .idx, .bitmap, .keep,
> etc, is by string substitution. While we do tend to name packs
> pack-$hash.pack, most of the code will happily work on
> "some-arbitrary-name.pack". And that's why we have so few
> odb_pack_name() calls in the first place.
>
> IMHO the ones in fast-import should probably be doing that suffix
> replacement instead (and probably we should have a decent helper to
> facilitate that; you can grep for strip_suffix.*pack to see places that
> could potentially use it).
>
> All that said, I don't think it's worth derailing your series to deal
> with that cleanup. That can come later if we want. And if we do that,
> then the pack_hashfile() I suggested above would have no callers,
> because it's the wrong approach.
Heh. I feel like you and I just discussed this on the list together a
couple of days ago. Indeed, there are quite a few that would benefit
from such a cleanup (there are even more if you search for
'strip_suffix.*idx', which would work similarly).
> I do think it's probably worth changing your series to use the
> packed_git repo pointers we already have available, though (i.e., the
> cases I pointed out inline above).
But yeah, we can take that up as a secondary step on top of this series
if we wend up wanting to do that in the future.
Thanks,
Taylor
^ permalink raw reply [flat|nested] 184+ messages in thread
* [PATCH v2 5/8] packfile: pass down repository to `has_object[_kept]_pack`
2024-10-28 13:43 ` [PATCH v2 0/8] " Karthik Nayak
` (3 preceding siblings ...)
2024-10-28 13:43 ` [PATCH v2 4/8] packfile: pass down repository to `odb_pack_name` Karthik Nayak
@ 2024-10-28 13:43 ` Karthik Nayak
2024-10-28 16:28 ` Taylor Blau
2024-10-28 13:43 ` [PATCH v2 6/8] packfile: pass down repository to `for_each_packed_object` Karthik Nayak
` (2 subsequent siblings)
7 siblings, 1 reply; 184+ messages in thread
From: Karthik Nayak @ 2024-10-28 13:43 UTC (permalink / raw)
To: karthik.188; +Cc: git, me
The functions `has_object[_kept]_pack` currently rely on the global
variable `the_repository`. To eliminate global variable usage in
`packfile.c`, we should progressively shift the dependency on
the_repository to higher layers. Let's remove its usage from these
functions and any related ones.
Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
---
builtin/count-objects.c | 2 +-
builtin/fsck.c | 2 +-
builtin/pack-objects.c | 4 ++--
diff.c | 3 ++-
list-objects.c | 3 ++-
pack-bitmap.c | 2 +-
packfile.c | 9 +++++----
packfile.h | 5 +++--
prune-packed.c | 2 +-
reachable.c | 2 +-
revision.c | 4 ++--
11 files changed, 21 insertions(+), 17 deletions(-)
diff --git a/builtin/count-objects.c b/builtin/count-objects.c
index 04d80887e0..1e89148ed7 100644
--- a/builtin/count-objects.c
+++ b/builtin/count-objects.c
@@ -67,7 +67,7 @@ static int count_loose(const struct object_id *oid, const char *path,
else {
loose_size += on_disk_bytes(st);
loose++;
- if (verbose && has_object_pack(oid))
+ if (verbose && has_object_pack(the_repository, oid))
packed_loose++;
}
return 0;
diff --git a/builtin/fsck.c b/builtin/fsck.c
index 7f4e2f0414..bb56eb98ac 100644
--- a/builtin/fsck.c
+++ b/builtin/fsck.c
@@ -272,7 +272,7 @@ static void check_reachable_object(struct object *obj)
if (!(obj->flags & HAS_OBJ)) {
if (is_promisor_object(&obj->oid))
return;
- if (has_object_pack(&obj->oid))
+ if (has_object_pack(the_repository, &obj->oid))
return; /* it is in pack - forget about it */
printf_ln(_("missing %s %s"),
printable_type(&obj->oid, obj->type),
diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index 0800714267..2b2816c243 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -1529,7 +1529,7 @@ static int want_found_object(const struct object_id *oid, int exclude,
return 0;
if (ignore_packed_keep_in_core && p->pack_keep_in_core)
return 0;
- if (has_object_kept_pack(oid, flags))
+ if (has_object_kept_pack(the_repository, oid, flags))
return 0;
}
@@ -3627,7 +3627,7 @@ static void show_cruft_commit(struct commit *commit, void *data)
static int cruft_include_check_obj(struct object *obj, void *data UNUSED)
{
- return !has_object_kept_pack(&obj->oid, IN_CORE_KEEP_PACKS);
+ return !has_object_kept_pack(the_repository, &obj->oid, IN_CORE_KEEP_PACKS);
}
static int cruft_include_check(struct commit *commit, void *data)
diff --git a/diff.c b/diff.c
index dceac20d18..1d483bdf37 100644
--- a/diff.c
+++ b/diff.c
@@ -4041,7 +4041,8 @@ static int reuse_worktree_file(struct index_state *istate,
* objects however would tend to be slower as they need
* to be individually opened and inflated.
*/
- if (!FAST_WORKING_DIRECTORY && !want_file && has_object_pack(oid))
+ if (!FAST_WORKING_DIRECTORY && !want_file &&
+ has_object_pack(the_repository, oid))
return 0;
/*
diff --git a/list-objects.c b/list-objects.c
index 985d008799..31236a8dc9 100644
--- a/list-objects.c
+++ b/list-objects.c
@@ -41,7 +41,8 @@ static void show_object(struct traversal_context *ctx,
{
if (!ctx->show_object)
return;
- if (ctx->revs->unpacked && has_object_pack(&object->oid))
+ if (ctx->revs->unpacked && has_object_pack(ctx->revs->repo,
+ &object->oid))
return;
ctx->show_object(object, name, ctx->show_data);
diff --git a/pack-bitmap.c b/pack-bitmap.c
index 4fa9dfc771..d34ba9909a 100644
--- a/pack-bitmap.c
+++ b/pack-bitmap.c
@@ -1889,7 +1889,7 @@ static void filter_packed_objects_from_bitmap(struct bitmap_index *bitmap_git,
bitmap_unset(result, i);
for (i = 0; i < eindex->count; ++i) {
- if (has_object_pack(&eindex->objects[i]->oid))
+ if (has_object_pack(the_repository, &eindex->objects[i]->oid))
bitmap_unset(result, objects_nr + i);
}
}
diff --git a/packfile.c b/packfile.c
index 096a0cd6ba..c6d7ed38f6 100644
--- a/packfile.c
+++ b/packfile.c
@@ -2143,16 +2143,17 @@ int find_kept_pack_entry(struct repository *r,
return 0;
}
-int has_object_pack(const struct object_id *oid)
+int has_object_pack(struct repository *repo, const struct object_id *oid)
{
struct pack_entry e;
- return find_pack_entry(the_repository, oid, &e);
+ return find_pack_entry(repo, oid, &e);
}
-int has_object_kept_pack(const struct object_id *oid, unsigned flags)
+int has_object_kept_pack(struct repository *repo, const struct object_id *oid,
+ unsigned flags)
{
struct pack_entry e;
- return find_kept_pack_entry(the_repository, oid, flags, &e);
+ return find_kept_pack_entry(repo, oid, flags, &e);
}
int for_each_object_in_pack(struct packed_git *p,
diff --git a/packfile.h b/packfile.h
index 48d058699d..ac4f2210c5 100644
--- a/packfile.h
+++ b/packfile.h
@@ -193,8 +193,9 @@ const struct packed_git *has_packed_and_bad(struct repository *, const struct ob
int find_pack_entry(struct repository *r, const struct object_id *oid, struct pack_entry *e);
int find_kept_pack_entry(struct repository *r, const struct object_id *oid, unsigned flags, struct pack_entry *e);
-int has_object_pack(const struct object_id *oid);
-int has_object_kept_pack(const struct object_id *oid, unsigned flags);
+int has_object_pack(struct repository *repo, const struct object_id *oid);
+int has_object_kept_pack(struct repository *repo, const struct object_id *oid,
+ unsigned flags);
/*
* Return 1 if an object in a promisor packfile is or refers to the given
diff --git a/prune-packed.c b/prune-packed.c
index 2bb99c29df..d1c65ab10e 100644
--- a/prune-packed.c
+++ b/prune-packed.c
@@ -24,7 +24,7 @@ static int prune_object(const struct object_id *oid, const char *path,
{
int *opts = data;
- if (!has_object_pack(oid))
+ if (!has_object_pack(the_repository, oid))
return 0;
if (*opts & PRUNE_PACKED_DRY_RUN)
diff --git a/reachable.c b/reachable.c
index 3e9b3dd0a4..09d2c50079 100644
--- a/reachable.c
+++ b/reachable.c
@@ -239,7 +239,7 @@ static int want_recent_object(struct recent_data *data,
const struct object_id *oid)
{
if (data->ignore_in_core_kept_packs &&
- has_object_kept_pack(oid, IN_CORE_KEEP_PACKS))
+ has_object_kept_pack(data->revs->repo, oid, IN_CORE_KEEP_PACKS))
return 0;
return 1;
}
diff --git a/revision.c b/revision.c
index f5f5b84f2b..d1d152a67b 100644
--- a/revision.c
+++ b/revision.c
@@ -4103,10 +4103,10 @@ enum commit_action get_commit_action(struct rev_info *revs, struct commit *commi
{
if (commit->object.flags & SHOWN)
return commit_ignore;
- if (revs->unpacked && has_object_pack(&commit->object.oid))
+ if (revs->unpacked && has_object_pack(revs->repo, &commit->object.oid))
return commit_ignore;
if (revs->no_kept_objects) {
- if (has_object_kept_pack(&commit->object.oid,
+ if (has_object_kept_pack(revs->repo, &commit->object.oid,
revs->keep_pack_cache_flags))
return commit_ignore;
}
--
2.47.0
^ permalink raw reply related [flat|nested] 184+ messages in thread
* Re: [PATCH v2 5/8] packfile: pass down repository to `has_object[_kept]_pack`
2024-10-28 13:43 ` [PATCH v2 5/8] packfile: pass down repository to `has_object[_kept]_pack` Karthik Nayak
@ 2024-10-28 16:28 ` Taylor Blau
2024-10-29 16:03 ` karthik nayak
0 siblings, 1 reply; 184+ messages in thread
From: Taylor Blau @ 2024-10-28 16:28 UTC (permalink / raw)
To: Karthik Nayak; +Cc: git
On Mon, Oct 28, 2024 at 02:43:43PM +0100, Karthik Nayak wrote:
> diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
> index 0800714267..2b2816c243 100644
> --- a/builtin/pack-objects.c
> +++ b/builtin/pack-objects.c
> @@ -1529,7 +1529,7 @@ static int want_found_object(const struct object_id *oid, int exclude,
> return 0;
> if (ignore_packed_keep_in_core && p->pack_keep_in_core)
> return 0;
> - if (has_object_kept_pack(oid, flags))
> + if (has_object_kept_pack(the_repository, oid, flags))
Do we want to use p->repo here instead of the_repository? I think the
answer is "yes" since in this function we are given a pack "p" and want
to determine if the given object contained in "p" is useful to pack. If
not, we want to search for it among other packs here, likely within the
same repository.
(Again, probably a moot point here since this is all going to be
the_repository anyway, but just thinking aloud...).
> }
>
> @@ -3627,7 +3627,7 @@ static void show_cruft_commit(struct commit *commit, void *data)
>
> static int cruft_include_check_obj(struct object *obj, void *data UNUSED)
> {
> - return !has_object_kept_pack(&obj->oid, IN_CORE_KEEP_PACKS);
> + return !has_object_kept_pack(the_repository, &obj->oid, IN_CORE_KEEP_PACKS);
Here we don't know what pack "obj" is contained in, which makes sense
since this is a traversal callback, not something that is iterating over
the contents of a particular pack or similar. So using the_repository is
right here.
Although... should we be using to_pack->repo here over the_repository
(in builtin/pack-objects.c)? The rest of the code definitely does *not*
do that, but I think probably should.
> static int cruft_include_check(struct commit *commit, void *data)
> diff --git a/diff.c b/diff.c
> index dceac20d18..1d483bdf37 100644
> --- a/diff.c
> +++ b/diff.c
> @@ -4041,7 +4041,8 @@ static int reuse_worktree_file(struct index_state *istate,
> * objects however would tend to be slower as they need
> * to be individually opened and inflated.
> */
> - if (!FAST_WORKING_DIRECTORY && !want_file && has_object_pack(oid))
> + if (!FAST_WORKING_DIRECTORY && !want_file &&
> + has_object_pack(the_repository, oid))
> return 0;
>
> /*
> diff --git a/list-objects.c b/list-objects.c
> index 985d008799..31236a8dc9 100644
> --- a/list-objects.c
> +++ b/list-objects.c
> @@ -41,7 +41,8 @@ static void show_object(struct traversal_context *ctx,
> {
> if (!ctx->show_object)
> return;
> - if (ctx->revs->unpacked && has_object_pack(&object->oid))
> + if (ctx->revs->unpacked && has_object_pack(ctx->revs->repo,
> + &object->oid))
> return;
>
> ctx->show_object(object, name, ctx->show_data);
> diff --git a/pack-bitmap.c b/pack-bitmap.c
> index 4fa9dfc771..d34ba9909a 100644
> --- a/pack-bitmap.c
> +++ b/pack-bitmap.c
> @@ -1889,7 +1889,7 @@ static void filter_packed_objects_from_bitmap(struct bitmap_index *bitmap_git,
> bitmap_unset(result, i);
>
> for (i = 0; i < eindex->count; ++i) {
> - if (has_object_pack(&eindex->objects[i]->oid))
> + if (has_object_pack(the_repository, &eindex->objects[i]->oid))
Interesting. I think the_repository in practice is fine here, but I
might have expected something like bitmap_git->p->repo, or the
equivalent for the MIDX case.
So I was going to suggest something like:
static struct repository *bitmap_repo(const struct bitmap_index *bitmap_git)
{
if (bitmap_is_midx(bitmap_git))
return bitmap_git->midx->repo;
return bitmap_git->pack->repo;
}
and then rewriting this as:
if (has_object_pack(bitmap_repo(bitmap_git), &eindex->objects[i]->oid))
, but we can't do that, because the MIDX structure does not know what
repository it belongs to, only the object_dir it resides in!
And I think that causes wrinkles earlier in your series that I didn't
think of at the time when reviewing, because it seems odd in retrospect
that, e.g. we have something like:
load_multi_pack_index(the_repository->objects->odb->path, ...);
where we pass in the object_dir path directly, but other functions like
prepare_midx_pack() that take in a 'struct repository *'.
I wonder if we should be initializing the MIDX with a repository
pointer, so that it knows what repository it belongs to. I suspect that
we will still have to pass in a separate string indicating the
object_dir, likely because of the --object-dir quirk I mentioned
earlier.
But my main thought here is that we should be able to infer from a
'struct bitmap_index *' what repository it belongs to instead of using
'the_repository' here directly.
The rest all looks quite reasonable to me.
Thanks,
Taylor
^ permalink raw reply [flat|nested] 184+ messages in thread
* Re: [PATCH v2 5/8] packfile: pass down repository to `has_object[_kept]_pack`
2024-10-28 16:28 ` Taylor Blau
@ 2024-10-29 16:03 ` karthik nayak
0 siblings, 0 replies; 184+ messages in thread
From: karthik nayak @ 2024-10-29 16:03 UTC (permalink / raw)
To: Taylor Blau; +Cc: git
[-- Attachment #1: Type: text/plain, Size: 6060 bytes --]
Taylor Blau <me@ttaylorr.com> writes:
> On Mon, Oct 28, 2024 at 02:43:43PM +0100, Karthik Nayak wrote:
>> diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
>> index 0800714267..2b2816c243 100644
>> --- a/builtin/pack-objects.c
>> +++ b/builtin/pack-objects.c
>> @@ -1529,7 +1529,7 @@ static int want_found_object(const struct object_id *oid, int exclude,
>> return 0;
>> if (ignore_packed_keep_in_core && p->pack_keep_in_core)
>> return 0;
>> - if (has_object_kept_pack(oid, flags))
>> + if (has_object_kept_pack(the_repository, oid, flags))
>
> Do we want to use p->repo here instead of the_repository? I think the
> answer is "yes" since in this function we are given a pack "p" and want
> to determine if the given object contained in "p" is useful to pack. If
> not, we want to search for it among other packs here, likely within the
> same repository.
>
> (Again, probably a moot point here since this is all going to be
> the_repository anyway, but just thinking aloud...).
>
I don't think it is a moot point at all. We do want to move up the
layers and cleanup usage of global variables. Reducing the work
required definitely gets us there faster.
>> }
>>
>> @@ -3627,7 +3627,7 @@ static void show_cruft_commit(struct commit *commit, void *data)
>>
>> static int cruft_include_check_obj(struct object *obj, void *data UNUSED)
>> {
>> - return !has_object_kept_pack(&obj->oid, IN_CORE_KEEP_PACKS);
>> + return !has_object_kept_pack(the_repository, &obj->oid, IN_CORE_KEEP_PACKS);
>
> Here we don't know what pack "obj" is contained in, which makes sense
> since this is a traversal callback, not something that is iterating over
> the contents of a particular pack or similar. So using the_repository is
> right here.
>
> Although... should we be using to_pack->repo here over the_repository
> (in builtin/pack-objects.c)? The rest of the code definitely does *not*
> do that, but I think probably should.
I think so too, I won't change existing code, but makes sense to do it
in our patches. Will amend.
>
>> static int cruft_include_check(struct commit *commit, void *data)
>> diff --git a/diff.c b/diff.c
>> index dceac20d18..1d483bdf37 100644
>> --- a/diff.c
>> +++ b/diff.c
>> @@ -4041,7 +4041,8 @@ static int reuse_worktree_file(struct index_state *istate,
>> * objects however would tend to be slower as they need
>> * to be individually opened and inflated.
>> */
>> - if (!FAST_WORKING_DIRECTORY && !want_file && has_object_pack(oid))
>> + if (!FAST_WORKING_DIRECTORY && !want_file &&
>> + has_object_pack(the_repository, oid))
>> return 0;
>>
>> /*
>> diff --git a/list-objects.c b/list-objects.c
>> index 985d008799..31236a8dc9 100644
>> --- a/list-objects.c
>> +++ b/list-objects.c
>> @@ -41,7 +41,8 @@ static void show_object(struct traversal_context *ctx,
>> {
>> if (!ctx->show_object)
>> return;
>> - if (ctx->revs->unpacked && has_object_pack(&object->oid))
>> + if (ctx->revs->unpacked && has_object_pack(ctx->revs->repo,
>> + &object->oid))
>> return;
>>
>> ctx->show_object(object, name, ctx->show_data);
>> diff --git a/pack-bitmap.c b/pack-bitmap.c
>> index 4fa9dfc771..d34ba9909a 100644
>> --- a/pack-bitmap.c
>> +++ b/pack-bitmap.c
>> @@ -1889,7 +1889,7 @@ static void filter_packed_objects_from_bitmap(struct bitmap_index *bitmap_git,
>> bitmap_unset(result, i);
>>
>> for (i = 0; i < eindex->count; ++i) {
>> - if (has_object_pack(&eindex->objects[i]->oid))
>> + if (has_object_pack(the_repository, &eindex->objects[i]->oid))
>
> Interesting. I think the_repository in practice is fine here, but I
> might have expected something like bitmap_git->p->repo, or the
> equivalent for the MIDX case.
>
> So I was going to suggest something like:
>
> static struct repository *bitmap_repo(const struct bitmap_index *bitmap_git)
> {
> if (bitmap_is_midx(bitmap_git))
> return bitmap_git->midx->repo;
> return bitmap_git->pack->repo;
> }
>
> and then rewriting this as:
>
> if (has_object_pack(bitmap_repo(bitmap_git), &eindex->objects[i]->oid))
>
> , but we can't do that, because the MIDX structure does not know what
> repository it belongs to, only the object_dir it resides in!
>
Exactly, I agree it should be achieved from `struct bitmap_index`.
Unfortunately we can't with the current state as you noted.
> And I think that causes wrinkles earlier in your series that I didn't
> think of at the time when reviewing, because it seems odd in retrospect
> that, e.g. we have something like:
>
> load_multi_pack_index(the_repository->objects->odb->path, ...);
>
> where we pass in the object_dir path directly, but other functions like
> prepare_midx_pack() that take in a 'struct repository *'.
>
> I wonder if we should be initializing the MIDX with a repository
> pointer, so that it knows what repository it belongs to. I suspect that
> we will still have to pass in a separate string indicating the
> object_dir, likely because of the --object-dir quirk I mentioned
> earlier.
>
> But my main thought here is that we should be able to infer from a
> 'struct bitmap_index *' what repository it belongs to instead of using
> 'the_repository' here directly.
>
I also think it makes sense to progress to the goal of removing global
variables in a way where we primarily focus on a single file/subsystem
at a time. And directionally between the bottom <> top levels.
This patch series focuses on the `packfile.c` file, so we cleanup the
file and remove associated usages of the global variable and try to also
follow some form of cleanup as we go. But for other files, it is okay to
still rely on the global variables. Slowly when the cleanup phase
reaches those files, we can give our focus to those files.
So here, it would be nice to have MIDX have a repository pointer too,
but I think we'd be overshooting trying to refactor that in this series.
So I'd leave it as is and focus on that when we get to cleaning up
`pack-bitmap.c`.
> The rest all looks quite reasonable to me.
>
> Thanks,
> Taylor
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 690 bytes --]
^ permalink raw reply [flat|nested] 184+ messages in thread
* [PATCH v2 6/8] packfile: pass down repository to `for_each_packed_object`
2024-10-28 13:43 ` [PATCH v2 0/8] " Karthik Nayak
` (4 preceding siblings ...)
2024-10-28 13:43 ` [PATCH v2 5/8] packfile: pass down repository to `has_object[_kept]_pack` Karthik Nayak
@ 2024-10-28 13:43 ` Karthik Nayak
2024-10-28 13:43 ` [PATCH v2 7/8] config: make `delta_base_cache_limit` a non-global variable Karthik Nayak
2024-10-28 13:43 ` [PATCH v2 8/8] config: make `packed_git_(limit|window_size)` non-global variables Karthik Nayak
7 siblings, 0 replies; 184+ messages in thread
From: Karthik Nayak @ 2024-10-28 13:43 UTC (permalink / raw)
To: karthik.188; +Cc: git, me
The function `for_each_packed_object` currently relies on the global
variable `the_repository`. To eliminate global variable usage in
`packfile.c`, we should progressively shift the dependency on
the_repository to higher layers. Let's remove its usage from this
function and closely related function `is_promisor_object`.
Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
---
builtin/cat-file.c | 7 ++++---
builtin/fsck.c | 18 +++++++++++-------
builtin/pack-objects.c | 7 +++++--
builtin/repack.c | 2 +-
builtin/rev-list.c | 2 +-
commit-graph.c | 2 +-
fsck.c | 2 +-
list-objects.c | 4 ++--
object-store-ll.h | 4 ++--
packfile.c | 14 +++++++-------
packfile.h | 2 +-
promisor-remote.c | 2 +-
reachable.c | 2 +-
revision.c | 9 +++++----
tag.c | 2 +-
15 files changed, 44 insertions(+), 35 deletions(-)
diff --git a/builtin/cat-file.c b/builtin/cat-file.c
index bfdfb51c7c..d67b101c20 100644
--- a/builtin/cat-file.c
+++ b/builtin/cat-file.c
@@ -827,15 +827,16 @@ static int batch_objects(struct batch_options *opt)
cb.seen = &seen;
for_each_loose_object(batch_unordered_loose, &cb, 0);
- for_each_packed_object(batch_unordered_packed, &cb,
- FOR_EACH_OBJECT_PACK_ORDER);
+ for_each_packed_object(the_repository, batch_unordered_packed,
+ &cb, FOR_EACH_OBJECT_PACK_ORDER);
oidset_clear(&seen);
} else {
struct oid_array sa = OID_ARRAY_INIT;
for_each_loose_object(collect_loose_object, &sa, 0);
- for_each_packed_object(collect_packed_object, &sa, 0);
+ for_each_packed_object(the_repository, collect_packed_object,
+ &sa, 0);
oid_array_for_each_unique(&sa, batch_object_cb, &cb);
diff --git a/builtin/fsck.c b/builtin/fsck.c
index bb56eb98ac..0196c54eb6 100644
--- a/builtin/fsck.c
+++ b/builtin/fsck.c
@@ -150,7 +150,7 @@ static int mark_object(struct object *obj, enum object_type type,
return 0;
obj->flags |= REACHABLE;
- if (is_promisor_object(&obj->oid))
+ if (is_promisor_object(the_repository, &obj->oid))
/*
* Further recursion does not need to be performed on this
* object since it is a promisor object (so it does not need to
@@ -270,7 +270,7 @@ static void check_reachable_object(struct object *obj)
* do a full fsck
*/
if (!(obj->flags & HAS_OBJ)) {
- if (is_promisor_object(&obj->oid))
+ if (is_promisor_object(the_repository, &obj->oid))
return;
if (has_object_pack(the_repository, &obj->oid))
return; /* it is in pack - forget about it */
@@ -391,7 +391,10 @@ static void check_connectivity(void)
* traversal.
*/
for_each_loose_object(mark_loose_unreachable_referents, NULL, 0);
- for_each_packed_object(mark_packed_unreachable_referents, NULL, 0);
+ for_each_packed_object(the_repository,
+ mark_packed_unreachable_referents,
+ NULL,
+ 0);
}
/* Look up all the requirements, warn about missing objects.. */
@@ -488,7 +491,7 @@ static void fsck_handle_reflog_oid(const char *refname, struct object_id *oid,
refname, timestamp);
obj->flags |= USED;
mark_object_reachable(obj);
- } else if (!is_promisor_object(oid)) {
+ } else if (!is_promisor_object(the_repository, oid)) {
error(_("%s: invalid reflog entry %s"),
refname, oid_to_hex(oid));
errors_found |= ERROR_REACHABLE;
@@ -531,7 +534,7 @@ static int fsck_handle_ref(const char *refname, const char *referent UNUSED, con
obj = parse_object(the_repository, oid);
if (!obj) {
- if (is_promisor_object(oid)) {
+ if (is_promisor_object(the_repository, oid)) {
/*
* Increment default_refs anyway, because this is a
* valid ref.
@@ -966,7 +969,8 @@ int cmd_fsck(int argc,
if (connectivity_only) {
for_each_loose_object(mark_loose_for_connectivity, NULL, 0);
- for_each_packed_object(mark_packed_for_connectivity, NULL, 0);
+ for_each_packed_object(the_repository,
+ mark_packed_for_connectivity, NULL, 0);
} else {
prepare_alt_odb(the_repository);
for (odb = the_repository->objects->odb; odb; odb = odb->next)
@@ -1011,7 +1015,7 @@ int cmd_fsck(int argc,
&oid);
if (!obj || !(obj->flags & HAS_OBJ)) {
- if (is_promisor_object(&oid))
+ if (is_promisor_object(the_repository, &oid))
continue;
error(_("%s: object missing"), oid_to_hex(&oid));
errors_found |= ERROR_OBJECT;
diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index 2b2816c243..3635b2c84c 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -3858,7 +3858,8 @@ static void show_object__ma_allow_promisor(struct object *obj, const char *name,
* Quietly ignore EXPECTED missing objects. This avoids problems with
* staging them now and getting an odd error later.
*/
- if (!has_object(the_repository, &obj->oid, 0) && is_promisor_object(&obj->oid))
+ if (!has_object(the_repository, &obj->oid, 0) &&
+ is_promisor_object(the_repository, &obj->oid))
return;
show_object(obj, name, data);
@@ -3927,7 +3928,9 @@ static int add_object_in_unpacked_pack(const struct object_id *oid,
static void add_objects_in_unpacked_packs(void)
{
- if (for_each_packed_object(add_object_in_unpacked_pack, NULL,
+ if (for_each_packed_object(the_repository,
+ add_object_in_unpacked_pack,
+ NULL,
FOR_EACH_OBJECT_PACK_ORDER |
FOR_EACH_OBJECT_LOCAL_ONLY |
FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS |
diff --git a/builtin/repack.c b/builtin/repack.c
index d6bb37e84a..96a4fa234b 100644
--- a/builtin/repack.c
+++ b/builtin/repack.c
@@ -404,7 +404,7 @@ static void repack_promisor_objects(const struct pack_objects_args *args,
* {type -> existing pack order} ordering when computing deltas instead
* of a {type -> size} ordering, which may produce better deltas.
*/
- for_each_packed_object(write_oid, &cmd,
+ for_each_packed_object(the_repository, write_oid, &cmd,
FOR_EACH_OBJECT_PROMISOR_ONLY);
if (cmd.in == -1) {
diff --git a/builtin/rev-list.c b/builtin/rev-list.c
index f62bcbf2b1..43c42621e3 100644
--- a/builtin/rev-list.c
+++ b/builtin/rev-list.c
@@ -121,7 +121,7 @@ static inline void finish_object__ma(struct object *obj)
return;
case MA_ALLOW_PROMISOR:
- if (is_promisor_object(&obj->oid))
+ if (is_promisor_object(the_repository, &obj->oid))
return;
die("unexpected missing %s object '%s'",
type_name(obj->type), oid_to_hex(&obj->oid));
diff --git a/commit-graph.c b/commit-graph.c
index 83dd69bfeb..e2e2083951 100644
--- a/commit-graph.c
+++ b/commit-graph.c
@@ -1960,7 +1960,7 @@ static void fill_oids_from_all_packs(struct write_commit_graph_context *ctx)
ctx->progress = start_delayed_progress(
_("Finding commits for commit graph among packed objects"),
ctx->approx_nr_objects);
- for_each_packed_object(add_packed_commits, ctx,
+ for_each_packed_object(ctx->r, add_packed_commits, ctx,
FOR_EACH_OBJECT_PACK_ORDER);
if (ctx->progress_done < ctx->approx_nr_objects)
display_progress(ctx->progress, ctx->approx_nr_objects);
diff --git a/fsck.c b/fsck.c
index 3756f52459..87ce999a49 100644
--- a/fsck.c
+++ b/fsck.c
@@ -1295,7 +1295,7 @@ static int fsck_blobs(struct oidset *blobs_found, struct oidset *blobs_done,
buf = repo_read_object_file(the_repository, oid, &type, &size);
if (!buf) {
- if (is_promisor_object(oid))
+ if (is_promisor_object(the_repository, oid))
continue;
ret |= report(options,
oid, OBJ_BLOB, msg_missing,
diff --git a/list-objects.c b/list-objects.c
index 31236a8dc9..d11a389b3a 100644
--- a/list-objects.c
+++ b/list-objects.c
@@ -75,7 +75,7 @@ static void process_blob(struct traversal_context *ctx,
*/
if (ctx->revs->exclude_promisor_objects &&
!repo_has_object_file(the_repository, &obj->oid) &&
- is_promisor_object(&obj->oid))
+ is_promisor_object(ctx->revs->repo, &obj->oid))
return;
pathlen = path->len;
@@ -180,7 +180,7 @@ static void process_tree(struct traversal_context *ctx,
* an incomplete list of missing objects.
*/
if (revs->exclude_promisor_objects &&
- is_promisor_object(&obj->oid))
+ is_promisor_object(revs->repo, &obj->oid))
return;
if (!revs->do_not_die_on_missing_objects)
diff --git a/object-store-ll.h b/object-store-ll.h
index 8b31072b09..6f9f4276e6 100644
--- a/object-store-ll.h
+++ b/object-store-ll.h
@@ -550,7 +550,7 @@ typedef int each_packed_object_fn(const struct object_id *oid,
int for_each_object_in_pack(struct packed_git *p,
each_packed_object_fn, void *data,
enum for_each_object_flags flags);
-int for_each_packed_object(each_packed_object_fn, void *,
- enum for_each_object_flags flags);
+int for_each_packed_object(struct repository *repo, each_packed_object_fn cb,
+ void *data, enum for_each_object_flags flags);
#endif /* OBJECT_STORE_LL_H */
diff --git a/packfile.c b/packfile.c
index c6d7ed38f6..74ac6d793b 100644
--- a/packfile.c
+++ b/packfile.c
@@ -2200,15 +2200,15 @@ int for_each_object_in_pack(struct packed_git *p,
return r;
}
-int for_each_packed_object(each_packed_object_fn cb, void *data,
- enum for_each_object_flags flags)
+int for_each_packed_object(struct repository *repo, each_packed_object_fn cb,
+ void *data, enum for_each_object_flags flags)
{
struct packed_git *p;
int r = 0;
int pack_errors = 0;
- prepare_packed_git(the_repository);
- for (p = get_all_packs(the_repository); p; p = p->next) {
+ prepare_packed_git(repo);
+ for (p = get_all_packs(repo); p; p = p->next) {
if ((flags & FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local)
continue;
if ((flags & FOR_EACH_OBJECT_PROMISOR_ONLY) &&
@@ -2286,14 +2286,14 @@ static int add_promisor_object(const struct object_id *oid,
return 0;
}
-int is_promisor_object(const struct object_id *oid)
+int is_promisor_object(struct repository *repo, const struct object_id *oid)
{
static struct oidset promisor_objects;
static int promisor_objects_prepared;
if (!promisor_objects_prepared) {
- if (repo_has_promisor_remote(the_repository)) {
- for_each_packed_object(add_promisor_object,
+ if (repo_has_promisor_remote(repo)) {
+ for_each_packed_object(repo, add_promisor_object,
&promisor_objects,
FOR_EACH_OBJECT_PROMISOR_ONLY |
FOR_EACH_OBJECT_PACK_ORDER);
diff --git a/packfile.h b/packfile.h
index ac4f2210c5..c1883e60ef 100644
--- a/packfile.h
+++ b/packfile.h
@@ -201,7 +201,7 @@ int has_object_kept_pack(struct repository *repo, const struct object_id *oid,
* Return 1 if an object in a promisor packfile is or refers to the given
* object, 0 otherwise.
*/
-int is_promisor_object(const struct object_id *oid);
+int is_promisor_object(struct repository *repo, const struct object_id *oid);
/*
* Expose a function for fuzz testing.
diff --git a/promisor-remote.c b/promisor-remote.c
index 9345ae3db2..c714f4f007 100644
--- a/promisor-remote.c
+++ b/promisor-remote.c
@@ -283,7 +283,7 @@ void promisor_remote_get_direct(struct repository *repo,
}
for (i = 0; i < remaining_nr; i++) {
- if (is_promisor_object(&remaining_oids[i]))
+ if (is_promisor_object(repo, &remaining_oids[i]))
die(_("could not fetch %s from promisor remote"),
oid_to_hex(&remaining_oids[i]));
}
diff --git a/reachable.c b/reachable.c
index 09d2c50079..ecf7ccf504 100644
--- a/reachable.c
+++ b/reachable.c
@@ -324,7 +324,7 @@ int add_unseen_recent_objects_to_traversal(struct rev_info *revs,
if (ignore_in_core_kept_packs)
flags |= FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS;
- r = for_each_packed_object(add_recent_packed, &data, flags);
+ r = for_each_packed_object(revs->repo, add_recent_packed, &data, flags);
done:
oidset_clear(&data.extra_recent_oids);
diff --git a/revision.c b/revision.c
index d1d152a67b..45dc6d2819 100644
--- a/revision.c
+++ b/revision.c
@@ -390,7 +390,8 @@ static struct object *get_reference(struct rev_info *revs, const char *name,
if (!object) {
if (revs->ignore_missing)
return NULL;
- if (revs->exclude_promisor_objects && is_promisor_object(oid))
+ if (revs->exclude_promisor_objects &&
+ is_promisor_object(revs->repo, oid))
return NULL;
if (revs->do_not_die_on_missing_objects) {
oidset_insert(&revs->missing_commits, oid);
@@ -432,7 +433,7 @@ static struct commit *handle_commit(struct rev_info *revs,
if (revs->ignore_missing_links || (flags & UNINTERESTING))
return NULL;
if (revs->exclude_promisor_objects &&
- is_promisor_object(&tag->tagged->oid))
+ is_promisor_object(revs->repo, &tag->tagged->oid))
return NULL;
if (revs->do_not_die_on_missing_objects && oid) {
oidset_insert(&revs->missing_commits, oid);
@@ -1211,7 +1212,7 @@ static int process_parents(struct rev_info *revs, struct commit *commit,
revs->do_not_die_on_missing_objects;
if (repo_parse_commit_gently(revs->repo, p, gently) < 0) {
if (revs->exclude_promisor_objects &&
- is_promisor_object(&p->object.oid)) {
+ is_promisor_object(revs->repo, &p->object.oid)) {
if (revs->first_parent_only)
break;
continue;
@@ -3915,7 +3916,7 @@ int prepare_revision_walk(struct rev_info *revs)
revs->treesame.name = "treesame";
if (revs->exclude_promisor_objects) {
- for_each_packed_object(mark_uninteresting, revs,
+ for_each_packed_object(revs->repo, mark_uninteresting, revs,
FOR_EACH_OBJECT_PROMISOR_ONLY);
}
diff --git a/tag.c b/tag.c
index d24170e340..beef9571b5 100644
--- a/tag.c
+++ b/tag.c
@@ -84,7 +84,7 @@ struct object *deref_tag(struct repository *r, struct object *o, const char *war
o = NULL;
}
if (!o && warn) {
- if (last_oid && is_promisor_object(last_oid))
+ if (last_oid && is_promisor_object(r, last_oid))
return NULL;
if (!warnlen)
warnlen = strlen(warn);
--
2.47.0
^ permalink raw reply related [flat|nested] 184+ messages in thread
* [PATCH v2 7/8] config: make `delta_base_cache_limit` a non-global variable
2024-10-28 13:43 ` [PATCH v2 0/8] " Karthik Nayak
` (5 preceding siblings ...)
2024-10-28 13:43 ` [PATCH v2 6/8] packfile: pass down repository to `for_each_packed_object` Karthik Nayak
@ 2024-10-28 13:43 ` Karthik Nayak
2024-10-28 16:38 ` me
2024-10-28 13:43 ` [PATCH v2 8/8] config: make `packed_git_(limit|window_size)` non-global variables Karthik Nayak
7 siblings, 1 reply; 184+ messages in thread
From: Karthik Nayak @ 2024-10-28 13:43 UTC (permalink / raw)
To: karthik.188; +Cc: git, me
The `delta_base_cache_limit` variable is a global config variable used
by multiple subsystems. Let's make this non-global, by adding this
variable to the stack of each of the subsystems where it is used.
In `gc.c` we add it to the `gc_config` struct and also the constructor
function. In `index-pack.c` we add it to the `pack_idx_option` struct
and its constructor. Finally, in `packfile.c` we dynamically retrieve
this value from the repository config, since the value is only used once
in the entire subsystem.
Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
---
builtin/gc.c | 5 ++++-
builtin/index-pack.c | 10 +++++++---
config.c | 5 -----
environment.c | 1 -
environment.h | 1 -
pack-objects.h | 3 ++-
pack-write.c | 1 +
pack.h | 1 +
packfile.c | 13 +++++++++++--
9 files changed, 26 insertions(+), 14 deletions(-)
diff --git a/builtin/gc.c b/builtin/gc.c
index d52735354c..9a10eb58bc 100644
--- a/builtin/gc.c
+++ b/builtin/gc.c
@@ -138,6 +138,7 @@ struct gc_config {
char *repack_filter_to;
unsigned long big_pack_threshold;
unsigned long max_delta_cache_size;
+ unsigned long delta_base_cache_limit;
};
#define GC_CONFIG_INIT { \
@@ -153,6 +154,7 @@ struct gc_config {
.prune_expire = xstrdup("2.weeks.ago"), \
.prune_worktrees_expire = xstrdup("3.months.ago"), \
.max_delta_cache_size = DEFAULT_DELTA_CACHE_SIZE, \
+ .delta_base_cache_limit = DEFAULT_DELTA_BASE_CACHE_LIMIT, \
}
static void gc_config_release(struct gc_config *cfg)
@@ -205,6 +207,7 @@ static void gc_config(struct gc_config *cfg)
git_config_get_ulong("gc.bigpackthreshold", &cfg->big_pack_threshold);
git_config_get_ulong("pack.deltacachesize", &cfg->max_delta_cache_size);
+ git_config_get_ulong("core.deltabasecachelimit", &cfg->delta_base_cache_limit);
if (!git_config_get_string("gc.repackfilter", &owned)) {
free(cfg->repack_filter);
@@ -416,7 +419,7 @@ static uint64_t estimate_repack_memory(struct gc_config *cfg,
* read_sha1_file() (either at delta calculation phase, or
* writing phase) also fills up the delta base cache
*/
- heap += delta_base_cache_limit;
+ heap += cfg->delta_base_cache_limit;
/* and of course pack-objects has its own delta cache */
heap += cfg->max_delta_cache_size;
diff --git a/builtin/index-pack.c b/builtin/index-pack.c
index eaefb41761..23bfa45403 100644
--- a/builtin/index-pack.c
+++ b/builtin/index-pack.c
@@ -1238,7 +1238,7 @@ static void parse_pack_objects(unsigned char *hash)
* recursively checking if the resulting object is used as a base
* for some more deltas.
*/
-static void resolve_deltas(void)
+static void resolve_deltas(struct pack_idx_option *opts)
{
int i;
@@ -1254,7 +1254,7 @@ static void resolve_deltas(void)
nr_ref_deltas + nr_ofs_deltas);
nr_dispatched = 0;
- base_cache_limit = delta_base_cache_limit * nr_threads;
+ base_cache_limit = opts->delta_base_cache_limit * nr_threads;
if (nr_threads > 1 || getenv("GIT_FORCE_THREADS")) {
init_thread();
work_lock();
@@ -1604,6 +1604,10 @@ static int git_index_pack_config(const char *k, const char *v,
else
opts->flags &= ~WRITE_REV;
}
+ if (!strcmp(k, "core.deltabasecachelimit")) {
+ opts->delta_base_cache_limit = git_config_ulong(k, v, ctx->kvi);
+ return 0;
+ }
return git_default_config(k, v, ctx, cb);
}
@@ -1930,7 +1934,7 @@ int cmd_index_pack(int argc,
parse_pack_objects(pack_hash);
if (report_end_of_input)
write_in_full(2, "\0", 1);
- resolve_deltas();
+ resolve_deltas(&opts);
conclude_pack(fix_thin_pack, curr_pack, pack_hash);
free(ofs_deltas);
free(ref_deltas);
diff --git a/config.c b/config.c
index a11bb85da3..728ef98e42 100644
--- a/config.c
+++ b/config.c
@@ -1515,11 +1515,6 @@ static int git_default_core_config(const char *var, const char *value,
return 0;
}
- if (!strcmp(var, "core.deltabasecachelimit")) {
- delta_base_cache_limit = git_config_ulong(var, value, ctx->kvi);
- return 0;
- }
-
if (!strcmp(var, "core.autocrlf")) {
if (value && !strcasecmp(value, "input")) {
auto_crlf = AUTO_CRLF_INPUT;
diff --git a/environment.c b/environment.c
index a2ce998081..8e5022c282 100644
--- a/environment.c
+++ b/environment.c
@@ -51,7 +51,6 @@ enum fsync_method fsync_method = FSYNC_METHOD_DEFAULT;
enum fsync_component fsync_components = FSYNC_COMPONENTS_DEFAULT;
size_t packed_git_window_size = DEFAULT_PACKED_GIT_WINDOW_SIZE;
size_t packed_git_limit = DEFAULT_PACKED_GIT_LIMIT;
-size_t delta_base_cache_limit = 96 * 1024 * 1024;
unsigned long big_file_threshold = 512 * 1024 * 1024;
char *editor_program;
char *askpass_program;
diff --git a/environment.h b/environment.h
index 923e12661e..2f43340f0b 100644
--- a/environment.h
+++ b/environment.h
@@ -165,7 +165,6 @@ extern int zlib_compression_level;
extern int pack_compression_level;
extern size_t packed_git_window_size;
extern size_t packed_git_limit;
-extern size_t delta_base_cache_limit;
extern unsigned long big_file_threshold;
extern unsigned long pack_size_limit_cfg;
extern int max_allowed_tree_depth;
diff --git a/pack-objects.h b/pack-objects.h
index b9898a4e64..3f6f504203 100644
--- a/pack-objects.h
+++ b/pack-objects.h
@@ -7,7 +7,8 @@
struct repository;
-#define DEFAULT_DELTA_CACHE_SIZE (256 * 1024 * 1024)
+#define DEFAULT_DELTA_CACHE_SIZE (256 * 1024 * 1024)
+#define DEFAULT_DELTA_BASE_CACHE_LIMIT (96 * 1024 * 1024)
#define OE_DFS_STATE_BITS 2
#define OE_DEPTH_BITS 12
diff --git a/pack-write.c b/pack-write.c
index 8c7dfddc5a..98a8c0e785 100644
--- a/pack-write.c
+++ b/pack-write.c
@@ -21,6 +21,7 @@ void reset_pack_idx_option(struct pack_idx_option *opts)
memset(opts, 0, sizeof(*opts));
opts->version = 2;
opts->off32_limit = 0x7fffffff;
+ opts->delta_base_cache_limit = DEFAULT_DELTA_BASE_CACHE_LIMIT;
}
static int sha1_compare(const void *_a, const void *_b)
diff --git a/pack.h b/pack.h
index 02bbdfb19c..1a33751565 100644
--- a/pack.h
+++ b/pack.h
@@ -58,6 +58,7 @@ struct pack_idx_option {
*/
int anomaly_alloc, anomaly_nr;
uint32_t *anomaly;
+ unsigned long delta_base_cache_limit;
};
void reset_pack_idx_option(struct pack_idx_option *);
diff --git a/packfile.c b/packfile.c
index 74ac6d793b..93b0d6af31 100644
--- a/packfile.c
+++ b/packfile.c
@@ -24,6 +24,8 @@
#include "commit-graph.h"
#include "pack-revindex.h"
#include "promisor-remote.h"
+#include "config.h"
+#include "pack-objects.h"
char *odb_pack_name(struct repository *repo, struct strbuf *buf,
const unsigned char *hash, const char *ext)
@@ -1496,7 +1498,9 @@ void clear_delta_base_cache(void)
}
static void add_delta_base_cache(struct packed_git *p, off_t base_offset,
- void *base, unsigned long base_size, enum object_type type)
+ void *base, unsigned long base_size,
+ unsigned long delta_base_cache_limit,
+ enum object_type type)
{
struct delta_base_cache_entry *ent;
struct list_head *lru, *tmp;
@@ -1697,6 +1701,9 @@ void *unpack_entry(struct repository *r, struct packed_git *p, off_t obj_offset,
struct unpack_entry_stack_ent *delta_stack = small_delta_stack;
int delta_stack_nr = 0, delta_stack_alloc = UNPACK_ENTRY_STACK_PREALLOC;
int base_from_cache = 0;
+ unsigned long delta_base_cache_limit = DEFAULT_DELTA_BASE_CACHE_LIMIT;
+
+ repo_config_get_ulong(r, "core.deltabasecachelimit", &delta_base_cache_limit);
write_pack_access_log(p, obj_offset);
@@ -1878,7 +1885,9 @@ void *unpack_entry(struct repository *r, struct packed_git *p, off_t obj_offset,
* before we are done using it.
*/
if (!external_base)
- add_delta_base_cache(p, base_obj_offset, base, base_size, type);
+ add_delta_base_cache(p, base_obj_offset, base,
+ base_size, delta_base_cache_limit,
+ type);
free(delta_data);
free(external_base);
--
2.47.0
^ permalink raw reply related [flat|nested] 184+ messages in thread
* Re: [PATCH v2 7/8] config: make `delta_base_cache_limit` a non-global variable
2024-10-28 13:43 ` [PATCH v2 7/8] config: make `delta_base_cache_limit` a non-global variable Karthik Nayak
@ 2024-10-28 16:38 ` me
2024-10-29 16:07 ` karthik nayak
0 siblings, 1 reply; 184+ messages in thread
From: me @ 2024-10-28 16:38 UTC (permalink / raw)
Cc: git
On Mon, Oct 28, 2024 at 02:43:45PM +0100, Karthik Nayak wrote:
> The `delta_base_cache_limit` variable is a global config variable used
> by multiple subsystems. Let's make this non-global, by adding this
> variable to the stack of each of the subsystems where it is used.
>
> In `gc.c` we add it to the `gc_config` struct and also the constructor
> function. In `index-pack.c` we add it to the `pack_idx_option` struct
> and its constructor. Finally, in `packfile.c` we dynamically retrieve
> this value from the repository config, since the value is only used once
> in the entire subsystem.
OK. Perhaps I am not quite following why this change is necessary, at
least in the context of the rest of this series. But let's read on...
> @@ -1604,6 +1604,10 @@ static int git_index_pack_config(const char *k, const char *v,
> else
> opts->flags &= ~WRITE_REV;
Not a huge deal, and not the fault of your patch here, but the
if(!strcmp(k, "pack.writereverseindex")) block should terminate with a
"return 0".
> + if (!strcmp(k, "core.deltabasecachelimit")) {
> + opts->delta_base_cache_limit = git_config_ulong(k, v, ctx->kvi);
> + return 0;
But here you do 'return 0;' at the end of handling the
'core.deltabasecachelimit' configuration value. Good.
> diff --git a/config.c b/config.c
> index a11bb85da3..728ef98e42 100644
> --- a/config.c
> +++ b/config.c
> @@ -1515,11 +1515,6 @@ static int git_default_core_config(const char *var, const char *value,
> return 0;
> }
>
> - if (!strcmp(var, "core.deltabasecachelimit")) {
> - delta_base_cache_limit = git_config_ulong(var, value, ctx->kvi);
> - return 0;
> - }
> -
This is safe to drop from git_default_core_config() because the static
variable from environment.h is gone, so nobody is accidentally reading
an zero'd value.
> diff --git a/pack-objects.h b/pack-objects.h
> index b9898a4e64..3f6f504203 100644
> --- a/pack-objects.h
> +++ b/pack-objects.h
> @@ -7,7 +7,8 @@
>
> struct repository;
>
> -#define DEFAULT_DELTA_CACHE_SIZE (256 * 1024 * 1024)
> +#define DEFAULT_DELTA_CACHE_SIZE (256 * 1024 * 1024)
> +#define DEFAULT_DELTA_BASE_CACHE_LIMIT (96 * 1024 * 1024)
Adding DEFAULT_DELTA_BASE_CACHE_LIMIT makes sense, and I assume the
diff on the line above is clang-format noise to keep the two
declarations aligned or something?
The rest looks good.
Thanks,
Taylor
^ permalink raw reply [flat|nested] 184+ messages in thread
* Re: [PATCH v2 7/8] config: make `delta_base_cache_limit` a non-global variable
2024-10-28 16:38 ` me
@ 2024-10-29 16:07 ` karthik nayak
0 siblings, 0 replies; 184+ messages in thread
From: karthik nayak @ 2024-10-29 16:07 UTC (permalink / raw)
To: me; +Cc: git
[-- Attachment #1: Type: text/plain, Size: 2724 bytes --]
me@ttaylorr.com writes:
> On Mon, Oct 28, 2024 at 02:43:45PM +0100, Karthik Nayak wrote:
>> The `delta_base_cache_limit` variable is a global config variable used
>> by multiple subsystems. Let's make this non-global, by adding this
>> variable to the stack of each of the subsystems where it is used.
>>
>> In `gc.c` we add it to the `gc_config` struct and also the constructor
>> function. In `index-pack.c` we add it to the `pack_idx_option` struct
>> and its constructor. Finally, in `packfile.c` we dynamically retrieve
>> this value from the repository config, since the value is only used once
>> in the entire subsystem.
>
> OK. Perhaps I am not quite following why this change is necessary, at
> least in the context of the rest of this series. But let's read on...
>
Ah, well, as you know by now, it is to cleanup the usage of the global
config state in packfile.c. I think I brief over it in the cover letter
but like you mentioned in the next patch, I'll amend and add some
details here too.
>> @@ -1604,6 +1604,10 @@ static int git_index_pack_config(const char *k, const char *v,
>> else
>> opts->flags &= ~WRITE_REV;
>
> Not a huge deal, and not the fault of your patch here, but the
> if(!strcmp(k, "pack.writereverseindex")) block should terminate with a
> "return 0".
>
>> + if (!strcmp(k, "core.deltabasecachelimit")) {
>> + opts->delta_base_cache_limit = git_config_ulong(k, v, ctx->kvi);
>> + return 0;
>
> But here you do 'return 0;' at the end of handling the
> 'core.deltabasecachelimit' configuration value. Good.
>
>> diff --git a/config.c b/config.c
>> index a11bb85da3..728ef98e42 100644
>> --- a/config.c
>> +++ b/config.c
>> @@ -1515,11 +1515,6 @@ static int git_default_core_config(const char *var, const char *value,
>> return 0;
>> }
>>
>> - if (!strcmp(var, "core.deltabasecachelimit")) {
>> - delta_base_cache_limit = git_config_ulong(var, value, ctx->kvi);
>> - return 0;
>> - }
>> -
>
> This is safe to drop from git_default_core_config() because the static
> variable from environment.h is gone, so nobody is accidentally reading
> an zero'd value.
>
>> diff --git a/pack-objects.h b/pack-objects.h
>> index b9898a4e64..3f6f504203 100644
>> --- a/pack-objects.h
>> +++ b/pack-objects.h
>> @@ -7,7 +7,8 @@
>>
>> struct repository;
>>
>> -#define DEFAULT_DELTA_CACHE_SIZE (256 * 1024 * 1024)
>> +#define DEFAULT_DELTA_CACHE_SIZE (256 * 1024 * 1024)
>> +#define DEFAULT_DELTA_BASE_CACHE_LIMIT (96 * 1024 * 1024)
>
> Adding DEFAULT_DELTA_BASE_CACHE_LIMIT makes sense, and I assume the
> diff on the line above is clang-format noise to keep the two
> declarations aligned or something?
>
Yup, indeed, that is the point.
> The rest looks good.
>
> Thanks,
> Taylor
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 690 bytes --]
^ permalink raw reply [flat|nested] 184+ messages in thread
* [PATCH v2 8/8] config: make `packed_git_(limit|window_size)` non-global variables
2024-10-28 13:43 ` [PATCH v2 0/8] " Karthik Nayak
` (6 preceding siblings ...)
2024-10-28 13:43 ` [PATCH v2 7/8] config: make `delta_base_cache_limit` a non-global variable Karthik Nayak
@ 2024-10-28 13:43 ` Karthik Nayak
2024-10-28 16:45 ` Taylor Blau
7 siblings, 1 reply; 184+ messages in thread
From: Karthik Nayak @ 2024-10-28 13:43 UTC (permalink / raw)
To: karthik.188; +Cc: git, me
The variables `packed_git_window_size` and `packed_git_limit` are global
config variables used in the `packfile.c` file. Since it is only used in
this file, let's change it from being a global config variable to a
local variable for the subsystem.
We do this by introducing a new local `packfile_config` struct in
`packfile.c` and also adding the required function to parse the said
config. We then use this within `packfile.c` to obtain the variables.
With this, we rid `packfile.c` from all global variable usage and this
means we can also remove the `USE_THE_REPOSITORY_VARIABLE` guard from
the file.
Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
---
builtin/fast-import.c | 4 +--
config.c | 17 ------------
environment.c | 2 --
packfile.c | 60 +++++++++++++++++++++++++++++++++++++------
packfile.h | 2 +-
5 files changed, 55 insertions(+), 30 deletions(-)
diff --git a/builtin/fast-import.c b/builtin/fast-import.c
index f4892d7f37..9056447bd0 100644
--- a/builtin/fast-import.c
+++ b/builtin/fast-import.c
@@ -3539,7 +3539,7 @@ static void parse_argv(void)
int cmd_fast_import(int argc,
const char **argv,
const char *prefix,
- struct repository *repo UNUSED)
+ struct repository *repo)
{
unsigned int i;
@@ -3660,7 +3660,7 @@ int cmd_fast_import(int argc,
fprintf(stderr, " pools: %10lu KiB\n", (unsigned long)((tree_entry_allocd + fi_mem_pool.pool_alloc) /1024));
fprintf(stderr, " objects: %10" PRIuMAX " KiB\n", (alloc_count*sizeof(struct object_entry))/1024);
fprintf(stderr, "---------------------------------------------------------------------\n");
- pack_report();
+ pack_report(repo);
fprintf(stderr, "---------------------------------------------------------------------\n");
fprintf(stderr, "\n");
}
diff --git a/config.c b/config.c
index 728ef98e42..2c295f7430 100644
--- a/config.c
+++ b/config.c
@@ -1493,28 +1493,11 @@ static int git_default_core_config(const char *var, const char *value,
return 0;
}
- if (!strcmp(var, "core.packedgitwindowsize")) {
- int pgsz_x2 = getpagesize() * 2;
- packed_git_window_size = git_config_ulong(var, value, ctx->kvi);
-
- /* This value must be multiple of (pagesize * 2) */
- packed_git_window_size /= pgsz_x2;
- if (packed_git_window_size < 1)
- packed_git_window_size = 1;
- packed_git_window_size *= pgsz_x2;
- return 0;
- }
-
if (!strcmp(var, "core.bigfilethreshold")) {
big_file_threshold = git_config_ulong(var, value, ctx->kvi);
return 0;
}
- if (!strcmp(var, "core.packedgitlimit")) {
- packed_git_limit = git_config_ulong(var, value, ctx->kvi);
- return 0;
- }
-
if (!strcmp(var, "core.autocrlf")) {
if (value && !strcasecmp(value, "input")) {
auto_crlf = AUTO_CRLF_INPUT;
diff --git a/environment.c b/environment.c
index 8e5022c282..8389a27270 100644
--- a/environment.c
+++ b/environment.c
@@ -49,8 +49,6 @@ int fsync_object_files = -1;
int use_fsync = -1;
enum fsync_method fsync_method = FSYNC_METHOD_DEFAULT;
enum fsync_component fsync_components = FSYNC_COMPONENTS_DEFAULT;
-size_t packed_git_window_size = DEFAULT_PACKED_GIT_WINDOW_SIZE;
-size_t packed_git_limit = DEFAULT_PACKED_GIT_LIMIT;
unsigned long big_file_threshold = 512 * 1024 * 1024;
char *editor_program;
char *askpass_program;
diff --git a/packfile.c b/packfile.c
index 93b0d6af31..cfbfcdc2b8 100644
--- a/packfile.c
+++ b/packfile.c
@@ -1,4 +1,3 @@
-#define USE_THE_REPOSITORY_VARIABLE
#include "git-compat-util.h"
#include "environment.h"
@@ -27,6 +26,17 @@
#include "config.h"
#include "pack-objects.h"
+struct packfile_config {
+ unsigned long packed_git_window_size;
+ unsigned long packed_git_limit;
+};
+
+#define PACKFILE_CONFIG_INIT \
+{ \
+ .packed_git_window_size = DEFAULT_PACKED_GIT_WINDOW_SIZE, \
+ .packed_git_limit = DEFAULT_PACKED_GIT_LIMIT, \
+}
+
char *odb_pack_name(struct repository *repo, struct strbuf *buf,
const unsigned char *hash, const char *ext)
{
@@ -48,15 +58,44 @@ static size_t pack_mapped;
#define SZ_FMT PRIuMAX
static inline uintmax_t sz_fmt(size_t s) { return s; }
-void pack_report(void)
+static int packfile_config(const char *var, const char *value,
+ const struct config_context *ctx, void *cb)
{
+ struct packfile_config *config = cb;
+
+ if (!strcmp(var, "core.packedgitwindowsize")) {
+ int pgsz_x2 = getpagesize() * 2;
+ config->packed_git_window_size = git_config_ulong(var, value, ctx->kvi);
+
+ /* This value must be multiple of (pagesize * 2) */
+ config->packed_git_window_size /= pgsz_x2;
+ if (config->packed_git_window_size < 1)
+ config->packed_git_window_size = 1;
+ config->packed_git_window_size *= pgsz_x2;
+ return 0;
+ }
+
+ if (!strcmp(var, "core.packedgitlimit")) {
+ config->packed_git_limit = git_config_ulong(var, value, ctx->kvi);
+ return 0;
+ }
+
+ return git_default_config(var, value, ctx, cb);
+}
+
+
+void pack_report(struct repository *repo)
+{
+ struct packfile_config config = PACKFILE_CONFIG_INIT;
+ repo_config(repo, packfile_config, &config);
+
fprintf(stderr,
"pack_report: getpagesize() = %10" SZ_FMT "\n"
"pack_report: core.packedGitWindowSize = %10" SZ_FMT "\n"
"pack_report: core.packedGitLimit = %10" SZ_FMT "\n",
sz_fmt(getpagesize()),
- sz_fmt(packed_git_window_size),
- sz_fmt(packed_git_limit));
+ sz_fmt(config.packed_git_window_size),
+ sz_fmt(config.packed_git_limit));
fprintf(stderr,
"pack_report: pack_used_ctr = %10u\n"
"pack_report: pack_mmap_calls = %10u\n"
@@ -652,20 +691,25 @@ unsigned char *use_pack(struct packed_git *p,
break;
}
if (!win) {
- size_t window_align = packed_git_window_size / 2;
+ struct packfile_config config = PACKFILE_CONFIG_INIT;
+ size_t window_align;
off_t len;
+ repo_config(p->repo, packfile_config, &config);
+ window_align = config.packed_git_window_size / 2;
+
if (p->pack_fd == -1 && open_packed_git(p))
die("packfile %s cannot be accessed", p->pack_name);
CALLOC_ARRAY(win, 1);
win->offset = (offset / window_align) * window_align;
len = p->pack_size - win->offset;
- if (len > packed_git_window_size)
- len = packed_git_window_size;
+ if (len > config.packed_git_window_size)
+ len = config.packed_git_window_size;
win->len = (size_t)len;
pack_mapped += win->len;
- while (packed_git_limit < pack_mapped
+
+ while (config.packed_git_limit < pack_mapped
&& unuse_one_window(p))
; /* nothing */
win->base = xmmap_gently(NULL, win->len,
diff --git a/packfile.h b/packfile.h
index c1883e60ef..3409aef35d 100644
--- a/packfile.h
+++ b/packfile.h
@@ -89,7 +89,7 @@ unsigned long repo_approximate_object_count(struct repository *r);
struct packed_git *find_oid_pack(const struct object_id *oid,
struct packed_git *packs);
-void pack_report(void);
+void pack_report(struct repository *repo);
/*
* mmap the index file for the specified packfile (if it is not
--
2.47.0
^ permalink raw reply related [flat|nested] 184+ messages in thread
* Re: [PATCH v2 8/8] config: make `packed_git_(limit|window_size)` non-global variables
2024-10-28 13:43 ` [PATCH v2 8/8] config: make `packed_git_(limit|window_size)` non-global variables Karthik Nayak
@ 2024-10-28 16:45 ` Taylor Blau
2024-10-29 16:09 ` karthik nayak
0 siblings, 1 reply; 184+ messages in thread
From: Taylor Blau @ 2024-10-28 16:45 UTC (permalink / raw)
To: Karthik Nayak; +Cc: git
On Mon, Oct 28, 2024 at 02:43:46PM +0100, Karthik Nayak wrote:
> The variables `packed_git_window_size` and `packed_git_limit` are global
> config variables used in the `packfile.c` file. Since it is only used in
> this file, let's change it from being a global config variable to a
> local variable for the subsystem.
>
> We do this by introducing a new local `packfile_config` struct in
> `packfile.c` and also adding the required function to parse the said
> config. We then use this within `packfile.c` to obtain the variables.
>
> With this, we rid `packfile.c` from all global variable usage and this
> means we can also remove the `USE_THE_REPOSITORY_VARIABLE` guard from
> the file.
Ahh. Now the motivation of the previous patch is clearer. Have you
considered hinting at the motivation here in the previous commit message
(e.g., "this gets us part of the way towards ...")?
> diff --git a/environment.c b/environment.c
> index 8e5022c282..8389a27270 100644
> --- a/environment.c
> +++ b/environment.c
> @@ -49,8 +49,6 @@ int fsync_object_files = -1;
> int use_fsync = -1;
> enum fsync_method fsync_method = FSYNC_METHOD_DEFAULT;
> enum fsync_component fsync_components = FSYNC_COMPONENTS_DEFAULT;
> -size_t packed_git_window_size = DEFAULT_PACKED_GIT_WINDOW_SIZE;
> -size_t packed_git_limit = DEFAULT_PACKED_GIT_LIMIT;
Very satisfying :-).
> +struct packfile_config {
> + unsigned long packed_git_window_size;
> + unsigned long packed_git_limit;
> +};
> +
> +#define PACKFILE_CONFIG_INIT \
> +{ \
> + .packed_git_window_size = DEFAULT_PACKED_GIT_WINDOW_SIZE, \
> + .packed_git_limit = DEFAULT_PACKED_GIT_LIMIT, \
s/, /, /
> +static int packfile_config(const char *var, const char *value,
> + const struct config_context *ctx, void *cb)
> {
> + struct packfile_config *config = cb;
> +
> + if (!strcmp(var, "core.packedgitwindowsize")) {
> + int pgsz_x2 = getpagesize() * 2;
> + config->packed_git_window_size = git_config_ulong(var, value, ctx->kvi);
> +
> + /* This value must be multiple of (pagesize * 2) */
> + config->packed_git_window_size /= pgsz_x2;
> + if (config->packed_git_window_size < 1)
> + config->packed_git_window_size = 1;
> + config->packed_git_window_size *= pgsz_x2;
> + return 0;
> + }
> +
> + if (!strcmp(var, "core.packedgitlimit")) {
> + config->packed_git_limit = git_config_ulong(var, value, ctx->kvi);
> + return 0;
> + }
> +
> + return git_default_config(var, value, ctx, cb);
> +}
I get that this was copy/pasted from elsewhere, but it would be nice to
replace the "every if statement ends in 'return 0' to keep them mutually
exclusive" with else if statements instead:
--- 8< ---
diff --git a/packfile.c b/packfile.c
index cfbfcdc2b8..c8af29bf0a 100644
--- a/packfile.c
+++ b/packfile.c
@@ -72,15 +72,11 @@ static int packfile_config(const char *var, const char *value,
if (config->packed_git_window_size < 1)
config->packed_git_window_size = 1;
config->packed_git_window_size *= pgsz_x2;
- return 0;
- }
-
- if (!strcmp(var, "core.packedgitlimit")) {
+ } else if (!strcmp(var, "core.packedgitlimit")) {
config->packed_git_limit = git_config_ulong(var, value, ctx->kvi);
- return 0;
+ } else {
+ return git_default_config(var, value, ctx, cb);
}
-
- return git_default_config(var, value, ctx, cb);
}
--- >8 ---
> +
> +
Extra newline here (after the definition of packfile_config())?
The rest all looks good.
Thanks,
Taylor
^ permalink raw reply related [flat|nested] 184+ messages in thread
* Re: [PATCH v2 8/8] config: make `packed_git_(limit|window_size)` non-global variables
2024-10-28 16:45 ` Taylor Blau
@ 2024-10-29 16:09 ` karthik nayak
2024-10-29 17:48 ` Taylor Blau
0 siblings, 1 reply; 184+ messages in thread
From: karthik nayak @ 2024-10-29 16:09 UTC (permalink / raw)
To: Taylor Blau; +Cc: git
[-- Attachment #1: Type: text/plain, Size: 3857 bytes --]
Taylor Blau <me@ttaylorr.com> writes:
> On Mon, Oct 28, 2024 at 02:43:46PM +0100, Karthik Nayak wrote:
>> The variables `packed_git_window_size` and `packed_git_limit` are global
>> config variables used in the `packfile.c` file. Since it is only used in
>> this file, let's change it from being a global config variable to a
>> local variable for the subsystem.
>>
>> We do this by introducing a new local `packfile_config` struct in
>> `packfile.c` and also adding the required function to parse the said
>> config. We then use this within `packfile.c` to obtain the variables.
>>
>> With this, we rid `packfile.c` from all global variable usage and this
>> means we can also remove the `USE_THE_REPOSITORY_VARIABLE` guard from
>> the file.
>
> Ahh. Now the motivation of the previous patch is clearer. Have you
> considered hinting at the motivation here in the previous commit message
> (e.g., "this gets us part of the way towards ...")?
>
Indeed, will add.
>> diff --git a/environment.c b/environment.c
>> index 8e5022c282..8389a27270 100644
>> --- a/environment.c
>> +++ b/environment.c
>> @@ -49,8 +49,6 @@ int fsync_object_files = -1;
>> int use_fsync = -1;
>> enum fsync_method fsync_method = FSYNC_METHOD_DEFAULT;
>> enum fsync_component fsync_components = FSYNC_COMPONENTS_DEFAULT;
>> -size_t packed_git_window_size = DEFAULT_PACKED_GIT_WINDOW_SIZE;
>> -size_t packed_git_limit = DEFAULT_PACKED_GIT_LIMIT;
>
> Very satisfying :-).
>
>> +struct packfile_config {
>> + unsigned long packed_git_window_size;
>> + unsigned long packed_git_limit;
>> +};
>> +
>> +#define PACKFILE_CONFIG_INIT \
>> +{ \
>> + .packed_git_window_size = DEFAULT_PACKED_GIT_WINDOW_SIZE, \
>> + .packed_git_limit = DEFAULT_PACKED_GIT_LIMIT, \
>
> s/, /, /
>
>> +static int packfile_config(const char *var, const char *value,
>> + const struct config_context *ctx, void *cb)
>> {
>> + struct packfile_config *config = cb;
>> +
>> + if (!strcmp(var, "core.packedgitwindowsize")) {
>> + int pgsz_x2 = getpagesize() * 2;
>> + config->packed_git_window_size = git_config_ulong(var, value, ctx->kvi);
>> +
>> + /* This value must be multiple of (pagesize * 2) */
>> + config->packed_git_window_size /= pgsz_x2;
>> + if (config->packed_git_window_size < 1)
>> + config->packed_git_window_size = 1;
>> + config->packed_git_window_size *= pgsz_x2;
>> + return 0;
>> + }
>> +
>> + if (!strcmp(var, "core.packedgitlimit")) {
>> + config->packed_git_limit = git_config_ulong(var, value, ctx->kvi);
>> + return 0;
>> + }
>> +
>> + return git_default_config(var, value, ctx, cb);
>> +}
>
> I get that this was copy/pasted from elsewhere, but it would be nice to
> replace the "every if statement ends in 'return 0' to keep them mutually
> exclusive" with else if statements instead:
>
> --- 8< ---
> diff --git a/packfile.c b/packfile.c
> index cfbfcdc2b8..c8af29bf0a 100644
> --- a/packfile.c
> +++ b/packfile.c
> @@ -72,15 +72,11 @@ static int packfile_config(const char *var, const char *value,
> if (config->packed_git_window_size < 1)
> config->packed_git_window_size = 1;
> config->packed_git_window_size *= pgsz_x2;
> - return 0;
> - }
> -
> - if (!strcmp(var, "core.packedgitlimit")) {
> + } else if (!strcmp(var, "core.packedgitlimit")) {
> config->packed_git_limit = git_config_ulong(var, value, ctx->kvi);
> - return 0;
> + } else {
> + return git_default_config(var, value, ctx, cb);
> }
> -
> - return git_default_config(var, value, ctx, cb);
> }
> --- >8 ---
>
Thanks, will patch this in. I try and avoid such things to mostly make
it easier to review code block movements. But here I think it is indeed
nicer to change for the better.
>> +
>> +
>
> Extra newline here (after the definition of packfile_config())?
>
Oops!
> The rest all looks good.
>
> Thanks,
> Taylor
Thanks for the thorough review. Appreciate it!
Karthik
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 690 bytes --]
^ permalink raw reply [flat|nested] 184+ messages in thread
* [PATCH v3 0/9] packfile: avoid using the 'the_repository' global variable
2024-10-21 9:57 [PATCH 00/20] packfile: avoid using the 'the_repository' global variable Karthik Nayak
` (21 preceding siblings ...)
2024-10-28 13:43 ` [PATCH v2 0/8] " Karthik Nayak
@ 2024-10-30 14:32 ` Karthik Nayak
2024-10-30 14:32 ` [PATCH v3 1/9] packfile: add repository to struct `packed_git` Karthik Nayak
` (8 more replies)
2024-10-31 9:39 ` [PATCH v4 0/9] packfile: avoid using the 'the_repository' global variable Karthik Nayak
` (6 subsequent siblings)
29 siblings, 9 replies; 184+ messages in thread
From: Karthik Nayak @ 2024-10-30 14:32 UTC (permalink / raw)
To: karthik.188; +Cc: git, me, peff
The `packfile.c` file uses the global variable 'the_repository' extensively
throughout the code. Let's remove all usecases of this, by modifying the
required functions to accept a 'struct repository' instead. This is to clean up
usage of global state.
The first 3 patches are mostly internal to `packfile.c`, we add the repository
field to the `packed_git` struct and this is used to clear up some useages of
the global variables.
The next 3 patches are more disruptive, they modify the function definition of
`odb_pack_name`, `has_object[_kept]_pack` and `for_each_packed_object` to receive
a repository, helping remove other usages of 'the_repository' variable.
Finally, the last two patches deal with global config values. These values are
localized.
This series is based off on master: 6a11438f43 (The fifth batch, 2024-10-25),
with 'jk/dumb-http-finalize' merged in. I found no issues merging this with seen,
but since these patches cover a lot of files, there might be some conflicts.
Changes in v3:
- Improved commit messages. In the first commit to talk about how packed_git
struct could also be part of the alternates of a repository. In the 7th commit
to talk about the motive behind removing the global variable.
- Changed 'packed_git->repo' to 'packed_git->r' to keep it consistent with the
rest of the code base.
- Replaced 'the_repository' with locally available access to the repository
struct in multiple regions.
- Removed unecessary inclusion of the 'repository.h' header file by forward
declaring the 'repository' struct.
- Replace memcpy with hashcpy.
- Change the logic in the 7th patch to use if else statements.
- Added an extra commit to cleanup `pack-bitmap.c`.
Karthik Nayak (9):
packfile: add repository to struct `packed_git`
packfile: use `repository` from `packed_git` directly
packfile: pass `repository` to static function in the file
packfile: pass down repository to `odb_pack_name`
packfile: pass down repository to `has_object[_kept]_pack`
packfile: pass down repository to `for_each_packed_object`
config: make `delta_base_cache_limit` a non-global variable
config: make `packed_git_(limit|window_size)` non-global variables
midx: add repository to `multi_pack_index` struct
builtin/cat-file.c | 7 +-
builtin/count-objects.c | 2 +-
builtin/fast-import.c | 15 ++--
builtin/fsck.c | 20 +++--
builtin/gc.c | 5 +-
builtin/index-pack.c | 20 +++--
builtin/pack-objects.c | 11 ++-
builtin/pack-redundant.c | 2 +-
builtin/repack.c | 2 +-
builtin/rev-list.c | 2 +-
commit-graph.c | 4 +-
config.c | 22 -----
connected.c | 3 +-
diff.c | 3 +-
environment.c | 3 -
environment.h | 1 -
fsck.c | 2 +-
http.c | 4 +-
list-objects.c | 7 +-
midx-write.c | 2 +-
midx.c | 3 +-
midx.h | 3 +
object-store-ll.h | 9 +-
pack-bitmap.c | 97 +++++++++++++--------
pack-objects.h | 3 +-
pack-write.c | 1 +
pack.h | 1 +
packfile.c | 182 ++++++++++++++++++++++++++-------------
packfile.h | 18 ++--
promisor-remote.c | 2 +-
prune-packed.c | 2 +-
reachable.c | 4 +-
revision.c | 13 +--
tag.c | 2 +-
34 files changed, 285 insertions(+), 192 deletions(-)
Range-diff against v2:
1: 26d2461cc3 ! 1: 5afb9af0af packfile: add repository to struct `packed_git`
@@ Commit message
on the global `the_repository` object in `packfile.c` by simply using
repository information now readily available in the struct.
+ We do need to consider that a pack file could be part of the alternates
+ of a repository, but considering that we only have one repository struct
+ and also that we currently anyways use 'the_repository'. We should be
+ OK with this change.
+
We also modify `alloc_packed_git` to ensure that the repository is added
to newly created `packed_git` structs. This requires modifying the
function and all its callee to pass the repository object down the
levels.
+ Helped-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
## builtin/fast-import.c ##
@@ builtin/fast-import.c: static void start_packfile(void)
p->pack_fd = pack_fd;
p->do_not_close = 1;
-+ p->repo = the_repository;
++ p->r = the_repository;
pack_file = hashfd(pack_fd, p->pack_name);
pack_data = p;
@@ builtin/fast-import.c: static void end_packfile(void)
/* Register the packfile with core git's machinery. */
- new_p = add_packed_git(idx_name, strlen(idx_name), 1);
-+ new_p = add_packed_git(the_repository, idx_name, strlen(idx_name), 1);
++ new_p = add_packed_git(pack_data->r, idx_name, strlen(idx_name), 1);
if (!new_p)
die("core git rejected index %s", idx_name);
all_packs[pack_id] = new_p;
@@ midx.c: int prepare_midx_pack(struct repository *r, struct multi_pack_index *m,
## object-store-ll.h ##
@@
- #include "hashmap.h"
- #include "object.h"
- #include "list.h"
-+#include "repository.h"
- #include "thread-utils.h"
- #include "oidset.h"
+ struct oidmap;
+ struct oidtree;
+ struct strbuf;
++struct repository;
+ struct object_directory {
+ struct object_directory *next;
@@ object-store-ll.h: struct packed_git {
*/
const uint32_t *mtimes_map;
size_t mtimes_size;
+
+ /* repo dentoes the repository this packed file belongs to */
-+ struct repository *repo;
++ struct repository *r;
+
/* something like ".git/objects/pack/xxxxx.pack" */
char pack_name[FLEX_ARRAY]; /* more */
@@ packfile.c: uint32_t get_pack_fanout(struct packed_git *p, uint32_t value)
}
-static struct packed_git *alloc_packed_git(int extra)
-+static struct packed_git *alloc_packed_git(struct repository *repo, int extra)
++static struct packed_git *alloc_packed_git(struct repository *r, int extra)
{
struct packed_git *p = xmalloc(st_add(sizeof(*p), extra));
memset(p, 0, sizeof(*p));
p->pack_fd = -1;
-+ p->repo = repo;
++ p->r = r;
return p;
}
@@ packfile.c: static char *pack_path_from_idx(const char *idx_path)
}
-struct packed_git *parse_pack_index(unsigned char *sha1, const char *idx_path)
-+struct packed_git *parse_pack_index(struct repository *repo,
-+ unsigned char *sha1, const char *idx_path)
++struct packed_git *parse_pack_index(struct repository *r, unsigned char *sha1,
++ const char *idx_path)
{
char *path = pack_path_from_idx(idx_path);
size_t alloc = st_add(strlen(path), 1);
- struct packed_git *p = alloc_packed_git(alloc);
-+ struct packed_git *p = alloc_packed_git(repo, alloc);
++ struct packed_git *p = alloc_packed_git(r, alloc);
memcpy(p->pack_name, path, alloc); /* includes NUL */
free(path);
@@ packfile.c: void unuse_pack(struct pack_window **w_cursor)
}
-struct packed_git *add_packed_git(const char *path, size_t path_len, int local)
-+struct packed_git *add_packed_git(struct repository *repo, const char *path,
++struct packed_git *add_packed_git(struct repository *r, const char *path,
+ size_t path_len, int local)
{
struct stat st;
@@ packfile.c: struct packed_git *add_packed_git(const char *path, size_t path_len,
*/
alloc = st_add3(path_len, strlen(".promisor"), 1);
- p = alloc_packed_git(alloc);
-+ p = alloc_packed_git(repo, alloc);
++ p = alloc_packed_git(r, alloc);
memcpy(p->pack_name, path, path_len);
xsnprintf(p->pack_name + path_len, alloc - path_len, ".keep");
@@ packfile.h: const char *pack_basename(struct packed_git *p);
* packs. You probably want add_packed_git() instead.
*/
-struct packed_git *parse_pack_index(unsigned char *sha1, const char *idx_path);
-+struct packed_git *parse_pack_index(struct repository *repo,
-+ unsigned char *sha1, const char *idx_path);
++struct packed_git *parse_pack_index(struct repository *r, unsigned char *sha1,
++ const char *idx_path);
typedef void each_file_in_pack_dir_fn(const char *full_path, size_t full_path_len,
const char *file_name, void *data);
@@ packfile.h: void close_pack(struct packed_git *);
void unuse_pack(struct pack_window **);
void clear_delta_base_cache(void);
-struct packed_git *add_packed_git(const char *path, size_t path_len, int local);
-+struct packed_git *add_packed_git(struct repository *repo, const char *path,
++struct packed_git *add_packed_git(struct repository *r, const char *path,
+ size_t path_len, int local);
/*
2: ca03355686 ! 2: 5350b4f9fb packfile: use `repository` from `packed_git` directly
@@ Commit message
where there is no struct `packed_git` locally available, which will be
fixed in the following commits.
+ Helped-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
## packfile.c ##
@@ packfile.c: static int check_packed_git_idx(const char *path, struct packed_git
int fd = git_open(path), ret;
struct stat st;
- const unsigned int hashsz = the_hash_algo->rawsz;
-+ const unsigned int hashsz = p->repo->hash_algo->rawsz;
++ const unsigned int hashsz = p->r->hash_algo->rawsz;
if (fd < 0)
return -1;
-@@ packfile.c: struct packed_git *parse_pack_index(struct repository *repo,
+@@ packfile.c: struct packed_git *parse_pack_index(struct repository *r, unsigned char *sha1,
memcpy(p->pack_name, path, alloc); /* includes NUL */
free(path);
- hashcpy(p->hash, sha1, the_repository->hash_algo);
-+ hashcpy(p->hash, sha1, p->repo->hash_algo);
++ hashcpy(p->hash, sha1, p->r->hash_algo);
if (check_packed_git_idx(idx_path, p)) {
free(p);
return NULL;
@@ packfile.c: static int unuse_one_window(struct packed_git *current)
if (current)
scan_windows(current, &lru_p, &lru_w, &lru_l);
- for (p = the_repository->objects->packed_git; p; p = p->next)
-+ for (p = current->repo->objects->packed_git; p; p = p->next)
++ for (p = current->r->objects->packed_git; p; p = p->next)
scan_windows(p, &lru_p, &lru_w, &lru_l);
if (lru_p) {
munmap(lru_w->base, lru_w->len);
@@ packfile.c: static int open_packed_git_1(struct packed_git *p)
unsigned char *idx_hash;
ssize_t read_result;
- const unsigned hashsz = the_hash_algo->rawsz;
-+ const unsigned hashsz = p->repo->hash_algo->rawsz;
++ const unsigned hashsz = p->r->hash_algo->rawsz;
if (open_pack_index(p))
return error("packfile %s index unavailable", p->pack_name);
@@ packfile.c: static int open_packed_git_1(struct packed_git *p)
return error("packfile %s signature is unavailable", p->pack_name);
idx_hash = ((unsigned char *)p->index_data) + p->index_size - hashsz * 2;
- if (!hasheq(hash, idx_hash, the_repository->hash_algo))
-+ if (!hasheq(hash, idx_hash, p->repo->hash_algo))
++ if (!hasheq(hash, idx_hash, p->r->hash_algo))
return error("packfile %s does not match index", p->pack_name);
return 0;
}
@@ packfile.c: unsigned char *use_pack(struct packed_git *p,
if (!p->pack_size && p->pack_fd == -1 && open_packed_git(p))
die("packfile %s cannot be accessed", p->pack_name);
- if (offset > (p->pack_size - the_hash_algo->rawsz))
-+ if (offset > (p->pack_size - p->repo->hash_algo->rawsz))
++ if (offset > (p->pack_size - p->r->hash_algo->rawsz))
die("offset beyond end of packfile (truncated pack?)");
if (offset < 0)
die(_("offset before end of packfile (broken .idx?)"));
-@@ packfile.c: struct packed_git *add_packed_git(struct repository *repo, const char *path,
+@@ packfile.c: struct packed_git *add_packed_git(struct repository *r, const char *path,
struct stat st;
size_t alloc;
struct packed_git *p;
@@ packfile.c: struct packed_git *add_packed_git(struct repository *repo, const cha
/*
* Make sure a corresponding .pack file exists and that
-@@ packfile.c: struct packed_git *add_packed_git(struct repository *repo, const char *path,
+@@ packfile.c: struct packed_git *add_packed_git(struct repository *r, const char *path,
p->pack_size = st.st_size;
p->pack_local = local;
p->mtime = st.st_mtime;
- if (path_len < the_hash_algo->hexsz ||
- get_hash_hex(path + path_len - the_hash_algo->hexsz, p->hash))
- hashclr(p->hash, the_repository->hash_algo);
-+ if (path_len < repo->hash_algo->hexsz ||
-+ get_oid_hex_algop(path + path_len - repo->hash_algo->hexsz, &oid,
-+ repo->hash_algo))
-+ hashclr(p->hash, repo->hash_algo);
++ if (path_len < r->hash_algo->hexsz ||
++ get_oid_hex_algop(path + path_len - r->hash_algo->hexsz, &oid,
++ r->hash_algo))
++ hashclr(p->hash, r->hash_algo);
+ else
-+ memcpy(p->hash, oid.hash, repo->hash_algo->rawsz);
++ hashcpy(p->hash, oid.hash, r->hash_algo);
+
return p;
}
@@ packfile.c: off_t get_delta_base(struct packed_git *p,
/* The base entry _must_ be in the same pack */
struct object_id oid;
- oidread(&oid, base_info, the_repository->hash_algo);
-+ oidread(&oid, base_info, p->repo->hash_algo);
++ oidread(&oid, base_info, p->r->hash_algo);
base_offset = find_pack_entry_one(&oid, p);
- *curpos += the_hash_algo->rawsz;
-+ *curpos += p->repo->hash_algo->rawsz;
++ *curpos += p->r->hash_algo->rawsz;
} else
die("I am totally screwed");
return base_offset;
@@ packfile.c: static int get_delta_base_oid(struct packed_git *p,
if (type == OBJ_REF_DELTA) {
unsigned char *base = use_pack(p, w_curs, curpos, NULL);
- oidread(oid, base, the_repository->hash_algo);
-+ oidread(oid, base, p->repo->hash_algo);
++ oidread(oid, base, p->r->hash_algo);
return 0;
} else if (type == OBJ_OFS_DELTA) {
uint32_t base_pos;
@@ packfile.c: int packed_object_info(struct repository *r, struct packed_git *p,
}
} else
- oidclr(oi->delta_base_oid, the_repository->hash_algo);
-+ oidclr(oi->delta_base_oid, p->repo->hash_algo);
++ oidclr(oi->delta_base_oid, p->r->hash_algo);
}
oi->whence = in_delta_base_cache(p, obj_offset) ? OI_DBCACHED :
@@ packfile.c: int bsearch_pack(const struct object_id *oid, const struct packed_gi
const unsigned char *index_fanout = p->index_data;
const unsigned char *index_lookup;
- const unsigned int hashsz = the_hash_algo->rawsz;
-+ const unsigned int hashsz = p->repo->hash_algo->rawsz;
++ const unsigned int hashsz = p->r->hash_algo->rawsz;
int index_lookup_width;
if (!index_fanout)
@@ packfile.c: int nth_packed_object_id(struct object_id *oid,
{
const unsigned char *index = p->index_data;
- const unsigned int hashsz = the_hash_algo->rawsz;
-+ const unsigned int hashsz = p->repo->hash_algo->rawsz;
++ const unsigned int hashsz = p->r->hash_algo->rawsz;
if (!index) {
if (open_pack_index(p))
return -1;
@@ packfile.c: int nth_packed_object_id(struct object_id *oid,
if (p->index_version == 1) {
oidread(oid, index + st_add(st_mult(hashsz + 4, n), 4),
- the_repository->hash_algo);
-+ p->repo->hash_algo);
++ p->r->hash_algo);
} else {
index += 8;
- oidread(oid, index + st_mult(hashsz, n),
- the_repository->hash_algo);
-+ oidread(oid, index + st_mult(hashsz, n), p->repo->hash_algo);
++ oidread(oid, index + st_mult(hashsz, n), p->r->hash_algo);
}
return 0;
}
@@ packfile.c: void check_pack_index_ptr(const struct packed_git *p, const void *vp
{
const unsigned char *index = p->index_data;
- const unsigned int hashsz = the_hash_algo->rawsz;
-+ const unsigned int hashsz = p->repo->hash_algo->rawsz;
++ const unsigned int hashsz = p->r->hash_algo->rawsz;
index += 4 * 256;
if (p->index_version == 1) {
return ntohl(*((uint32_t *)(index + st_mult(hashsz + 4, n))));
@@ packfile.c: int for_each_object_in_pack(struct packed_git *p,
if (flags & FOR_EACH_OBJECT_PACK_ORDER) {
- if (load_pack_revindex(the_repository, p))
-+ if (load_pack_revindex(p->repo, p))
++ if (load_pack_revindex(p->r, p))
return -1;
}
@@ packfile.c: static int add_promisor_object(const struct object_id *oid,
int we_parsed_object;
- obj = lookup_object(the_repository, oid);
-+ obj = lookup_object(pack->repo, oid);
++ obj = lookup_object(pack->r, oid);
if (obj && obj->parsed) {
we_parsed_object = 0;
} else {
we_parsed_object = 1;
- obj = parse_object(the_repository, oid);
-+ obj = parse_object(pack->repo, oid);
++ obj = parse_object(pack->r, oid);
}
if (!obj)
-: ---------- > 3: 5b975cb6d6 packfile: pass `repository` to static function in the file
4: 73ba9945a7 ! 4: 13a166fcca packfile: pass down repository to `odb_pack_name`
@@ builtin/fast-import.c: static char *keep_pack(const char *curr_index_name)
int keep_fd;
- odb_pack_name(&name, pack_data->hash, "keep");
-+ odb_pack_name(the_repository, &name, pack_data->hash, "keep");
++ odb_pack_name(pack_data->r, &name, pack_data->hash, "keep");
keep_fd = odb_pack_keep(name.buf);
if (keep_fd < 0)
die_errno("cannot create keep file");
@@ builtin/fast-import.c: static char *keep_pack(const char *curr_index_name)
die_errno("failed to write keep file");
- odb_pack_name(&name, pack_data->hash, "pack");
-+ odb_pack_name(the_repository, &name, pack_data->hash, "pack");
++ odb_pack_name(pack_data->r, &name, pack_data->hash, "pack");
if (finalize_object_file(pack_data->pack_name, name.buf))
die("cannot store pack file");
- odb_pack_name(&name, pack_data->hash, "idx");
-+ odb_pack_name(the_repository, &name, pack_data->hash, "idx");
++ odb_pack_name(pack_data->r, &name, pack_data->hash, "idx");
if (finalize_object_file(curr_index_name, name.buf))
die("cannot store index file");
free((void *)curr_index_name);
@@ builtin/fast-import.c: static void unkeep_all_packs(void)
for (k = 0; k < pack_id; k++) {
struct packed_git *p = all_packs[k];
- odb_pack_name(&name, p->hash, "keep");
-+ odb_pack_name(p->repo, &name, p->hash, "keep");
++ odb_pack_name(p->r, &name, p->hash, "keep");
unlink_or_warn(name.buf);
}
strbuf_release(&name);
@@ builtin/index-pack.c: static void rename_tmp_packfile(const char **final_name,
ext, *final_name);
## builtin/pack-redundant.c ##
-@@ builtin/pack-redundant.c: static void load_all(void)
- }
- }
-
--int cmd_pack_redundant(int argc, const char **argv, const char *prefix UNUSED, struct repository *repo UNUSED) {
-+int cmd_pack_redundant(int argc, const char **argv, const char *prefix UNUSED, struct repository *repo) {
- int i; int i_still_use_this = 0; struct pack_list *min = NULL, *red, *pl;
- struct llist *ignore;
- struct strbuf idx_name = STRBUF_INIT;
@@ builtin/pack-redundant.c: int cmd_pack_redundant(int argc, const char **argv, const char *prefix UNUSED, s
pl = red = pack_list_difference(local_packs, min);
while (pl) {
printf("%s\n%s\n",
- odb_pack_name(&idx_name, pl->pack->hash, "idx"),
-+ odb_pack_name(repo, &idx_name, pl->pack->hash, "idx"),
++ odb_pack_name(pl->pack->r, &idx_name, pl->pack->hash, "idx"),
pl->pack->pack_name);
pl = pl->next;
}
@@ packfile.c
-char *odb_pack_name(struct strbuf *buf,
- const unsigned char *hash,
- const char *ext)
-+char *odb_pack_name(struct repository *repo, struct strbuf *buf,
++char *odb_pack_name(struct repository *r, struct strbuf *buf,
+ const unsigned char *hash, const char *ext)
{
strbuf_reset(buf);
- strbuf_addf(buf, "%s/pack/pack-%s.%s", repo_get_object_directory(the_repository),
- hash_to_hex(hash), ext);
-+ strbuf_addf(buf, "%s/pack/pack-%s.%s", repo_get_object_directory(repo),
-+ hash_to_hex_algop(hash, repo->hash_algo), ext);
++ strbuf_addf(buf, "%s/pack/pack-%s.%s", repo_get_object_directory(r),
++ hash_to_hex_algop(hash, r->hash_algo), ext);
return buf->buf;
}
@@ packfile.h: struct pack_entry {
* Example: odb_pack_name(out, sha1, "idx") => ".git/objects/pack/pack-1234..idx"
*/
-char *odb_pack_name(struct strbuf *buf, const unsigned char *sha1, const char *ext);
-+char *odb_pack_name(struct repository *repo, struct strbuf *buf,
++char *odb_pack_name(struct repository *r, struct strbuf *buf,
+ const unsigned char *hash, const char *ext);
/*
5: 4e883a4d1c ! 5: 1fac06f19e packfile: pass down repository to `has_object[_kept]_pack`
@@ builtin/pack-objects.c: static int want_found_object(const struct object_id *oid
if (ignore_packed_keep_in_core && p->pack_keep_in_core)
return 0;
- if (has_object_kept_pack(oid, flags))
-+ if (has_object_kept_pack(the_repository, oid, flags))
++ if (has_object_kept_pack(p->r, oid, flags))
return 0;
}
@@ builtin/pack-objects.c: static void show_cruft_commit(struct commit *commit, voi
static int cruft_include_check_obj(struct object *obj, void *data UNUSED)
{
- return !has_object_kept_pack(&obj->oid, IN_CORE_KEEP_PACKS);
-+ return !has_object_kept_pack(the_repository, &obj->oid, IN_CORE_KEEP_PACKS);
++ return !has_object_kept_pack(to_pack.repo, &obj->oid, IN_CORE_KEEP_PACKS);
}
static int cruft_include_check(struct commit *commit, void *data)
@@ diff.c: static int reuse_worktree_file(struct index_state *istate,
*/
- if (!FAST_WORKING_DIRECTORY && !want_file && has_object_pack(oid))
+ if (!FAST_WORKING_DIRECTORY && !want_file &&
-+ has_object_pack(the_repository, oid))
++ has_object_pack(istate->repo, oid))
return 0;
/*
@@ packfile.c: int find_kept_pack_entry(struct repository *r,
}
-int has_object_pack(const struct object_id *oid)
-+int has_object_pack(struct repository *repo, const struct object_id *oid)
++int has_object_pack(struct repository *r, const struct object_id *oid)
{
struct pack_entry e;
- return find_pack_entry(the_repository, oid, &e);
-+ return find_pack_entry(repo, oid, &e);
++ return find_pack_entry(r, oid, &e);
}
-int has_object_kept_pack(const struct object_id *oid, unsigned flags)
-+int has_object_kept_pack(struct repository *repo, const struct object_id *oid,
++int has_object_kept_pack(struct repository *r, const struct object_id *oid,
+ unsigned flags)
{
struct pack_entry e;
- return find_kept_pack_entry(the_repository, oid, flags, &e);
-+ return find_kept_pack_entry(repo, oid, flags, &e);
++ return find_kept_pack_entry(r, oid, flags, &e);
}
int for_each_object_in_pack(struct packed_git *p,
@@ packfile.h: const struct packed_git *has_packed_and_bad(struct repository *, con
-int has_object_pack(const struct object_id *oid);
-int has_object_kept_pack(const struct object_id *oid, unsigned flags);
-+int has_object_pack(struct repository *repo, const struct object_id *oid);
-+int has_object_kept_pack(struct repository *repo, const struct object_id *oid,
++int has_object_pack(struct repository *r, const struct object_id *oid);
++int has_object_kept_pack(struct repository *r, const struct object_id *oid,
+ unsigned flags);
/*
6: 7c599e16f6 ! 6: a5fb3b1a4a packfile: pass down repository to `for_each_packed_object`
@@ builtin/pack-objects.c: static void show_object__ma_allow_promisor(struct object
*/
- if (!has_object(the_repository, &obj->oid, 0) && is_promisor_object(&obj->oid))
+ if (!has_object(the_repository, &obj->oid, 0) &&
-+ is_promisor_object(the_repository, &obj->oid))
++ is_promisor_object(to_pack.repo, &obj->oid))
return;
show_object(obj, name, data);
@@ builtin/pack-objects.c: static int add_object_in_unpacked_pack(const struct obje
static void add_objects_in_unpacked_packs(void)
{
- if (for_each_packed_object(add_object_in_unpacked_pack, NULL,
-+ if (for_each_packed_object(the_repository,
++ if (for_each_packed_object(to_pack.repo,
+ add_object_in_unpacked_pack,
+ NULL,
FOR_EACH_OBJECT_PACK_ORDER |
@@ packfile.c: static int add_promisor_object(const struct object_id *oid,
}
-int is_promisor_object(const struct object_id *oid)
-+int is_promisor_object(struct repository *repo, const struct object_id *oid)
++int is_promisor_object(struct repository *r, const struct object_id *oid)
{
static struct oidset promisor_objects;
static int promisor_objects_prepared;
@@ packfile.c: static int add_promisor_object(const struct object_id *oid,
if (!promisor_objects_prepared) {
- if (repo_has_promisor_remote(the_repository)) {
- for_each_packed_object(add_promisor_object,
-+ if (repo_has_promisor_remote(repo)) {
-+ for_each_packed_object(repo, add_promisor_object,
++ if (repo_has_promisor_remote(r)) {
++ for_each_packed_object(r, add_promisor_object,
&promisor_objects,
FOR_EACH_OBJECT_PROMISOR_ONLY |
FOR_EACH_OBJECT_PACK_ORDER);
## packfile.h ##
-@@ packfile.h: int has_object_kept_pack(struct repository *repo, const struct object_id *oid,
+@@ packfile.h: int has_object_kept_pack(struct repository *r, const struct object_id *oid,
* Return 1 if an object in a promisor packfile is or refers to the given
* object, 0 otherwise.
*/
-int is_promisor_object(const struct object_id *oid);
-+int is_promisor_object(struct repository *repo, const struct object_id *oid);
++int is_promisor_object(struct repository *r, const struct object_id *oid);
/*
* Expose a function for fuzz testing.
7: 89ee3f48ce ! 7: 6e5951ceea config: make `delta_base_cache_limit` a non-global variable
@@ Commit message
this value from the repository config, since the value is only used once
in the entire subsystem.
+ These changes are made to remove the usage of `delta_base_cache_limit`
+ as a global variable in `packfile.c`. This brings us one step closer to
+ removing the `USE_THE_REPOSITORY_VARIABLE` definition in `packfile.c`
+ which we complete in the next patch.
+
Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
## builtin/gc.c ##
@@ packfile.c
+#include "config.h"
+#include "pack-objects.h"
- char *odb_pack_name(struct repository *repo, struct strbuf *buf,
+ char *odb_pack_name(struct repository *r, struct strbuf *buf,
const unsigned char *hash, const char *ext)
@@ packfile.c: void clear_delta_base_cache(void)
}
8: e99b1dad54 ! 8: ec9061fbbd config: make `packed_git_(limit|window_size)` non-global variables
@@ Commit message
means we can also remove the `USE_THE_REPOSITORY_VARIABLE` guard from
the file.
+ Helped-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
## builtin/fast-import.c ##
@@ packfile.c
+#define PACKFILE_CONFIG_INIT \
+{ \
+ .packed_git_window_size = DEFAULT_PACKED_GIT_WINDOW_SIZE, \
-+ .packed_git_limit = DEFAULT_PACKED_GIT_LIMIT, \
++ .packed_git_limit = DEFAULT_PACKED_GIT_LIMIT, \
+}
+
- char *odb_pack_name(struct repository *repo, struct strbuf *buf,
+ char *odb_pack_name(struct repository *r, struct strbuf *buf,
const unsigned char *hash, const char *ext)
{
@@ packfile.c: static size_t pack_mapped;
@@ packfile.c: static size_t pack_mapped;
-void pack_report(void)
+static int packfile_config(const char *var, const char *value,
+ const struct config_context *ctx, void *cb)
- {
++{
+ struct packfile_config *config = cb;
+
+ if (!strcmp(var, "core.packedgitwindowsize")) {
@@ packfile.c: static size_t pack_mapped;
+ config->packed_git_window_size = 1;
+ config->packed_git_window_size *= pgsz_x2;
+ return 0;
-+ }
-+
-+ if (!strcmp(var, "core.packedgitlimit")) {
++ } else if (!strcmp(var, "core.packedgitlimit")) {
+ config->packed_git_limit = git_config_ulong(var, value, ctx->kvi);
+ return 0;
++ } else {
++ return git_default_config(var, value, ctx, cb);
+ }
-+
-+ return git_default_config(var, value, ctx, cb);
+}
+
-+
+void pack_report(struct repository *repo)
-+{
+ {
+ struct packfile_config config = PACKFILE_CONFIG_INIT;
+ repo_config(repo, packfile_config, &config);
+
@@ packfile.c: unsigned char *use_pack(struct packed_git *p,
+ size_t window_align;
off_t len;
-+ repo_config(p->repo, packfile_config, &config);
++ repo_config(p->r, packfile_config, &config);
+ window_align = config.packed_git_window_size / 2;
+
if (p->pack_fd == -1 && open_packed_git(p))
3: 1f8ef580e5 ! 9: c0b386412d packfile: pass `repository` to static function in the file
@@ Metadata
Author: Karthik Nayak <karthik.188@gmail.com>
## Commit message ##
- packfile: pass `repository` to static function in the file
+ midx: add repository to `multi_pack_index` struct
- Some of the static functions in the `packfile.c` access global
- variables, which can simply be avoiding by passing the `repository`
- struct down to them. Let's do that.
+ The `multi_pack_index` struct represents the MIDX for a repository.
+ Here, we add a pointer to the repository in this struct, allowing direct
+ use of the repository variable without relying on the global
+ `the_repository` struct.
+
+ With this addition, we can determine the repository associated with a
+ `bitmap_index` struct. A `bitmap_index` points to either a `packed_git`
+ or a `multi_pack_index`, both of which have direct repository
+ references. To support this, we introduce a static helper function,
+ `bitmap_repo`, in `pack-bitmap.c`, which retrieves a repository given a
+ `bitmap_index`.
+
+ With this, we clear up all usages of `the_repository` within
+ `pack-bitmap.c` and also remove the `USE_THE_REPOSITORY_VARIABLE`
+ definition. Bringing us another step closer to remove all global
+ variable usage.
+
+ Although this change also opens up the potential to clean up `midx.c`,
+ doing so would require additional refactoring to pass the repository
+ struct to functions where the MIDX struct is created: a task better
+ suited for future patches.
Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
- ## packfile.c ##
-@@ packfile.c: static void find_lru_pack(struct packed_git *p, struct packed_git **lru_p, struc
- *accept_windows_inuse = has_windows_inuse;
+ ## midx.c ##
+@@ midx.c: static struct multi_pack_index *load_multi_pack_index_one(const char *object_dir
+ m->data = midx_map;
+ m->data_len = midx_size;
+ m->local = local;
++ m->r = the_repository;
+
+ m->signature = get_be32(m->data);
+ if (m->signature != MIDX_SIGNATURE)
+
+ ## midx.h ##
+@@ midx.h: struct multi_pack_index {
+
+ const char **pack_names;
+ struct packed_git **packs;
++
++ struct repository *r;
++
+ char object_dir[FLEX_ARRAY];
+ };
+
+
+ ## pack-bitmap.c ##
+@@
+-#define USE_THE_REPOSITORY_VARIABLE
+-
+ #include "git-compat-util.h"
+ #include "commit.h"
+ #include "gettext.h"
+@@ pack-bitmap.c: static uint32_t bitmap_num_objects(struct bitmap_index *index)
+ return index->pack->num_objects;
}
--static int close_one_pack(void)
-+static int close_one_pack(struct repository *repo)
++static struct repository *bitmap_repo(struct bitmap_index *bitmap_git)
++{
++ if (bitmap_is_midx(bitmap_git))
++ return bitmap_git->midx->r;
++ return bitmap_git->pack->r;
++}
++
+ static int load_bitmap_header(struct bitmap_index *index)
{
- struct packed_git *p, *lru_p = NULL;
- struct pack_window *mru_w = NULL;
- int accept_windows_inuse = 1;
+ struct bitmap_disk_header *header = (void *)index->map;
+- size_t header_size = sizeof(*header) - GIT_MAX_RAWSZ + the_hash_algo->rawsz;
++ const struct git_hash_algo *hash_algo = bitmap_repo(index)->hash_algo;
++
++ size_t header_size = sizeof(*header) - GIT_MAX_RAWSZ + hash_algo->rawsz;
-- for (p = the_repository->objects->packed_git; p; p = p->next) {
-+ for (p = repo->objects->packed_git; p; p = p->next) {
- if (p->pack_fd == -1)
- continue;
- find_lru_pack(p, &lru_p, &mru_w, &accept_windows_inuse);
-@@ packfile.c: static int open_packed_git_1(struct packed_git *p)
- pack_max_fds = 1;
+- if (index->map_size < header_size + the_hash_algo->rawsz)
++ if (index->map_size < header_size + hash_algo->rawsz)
+ return error(_("corrupted bitmap index (too small)"));
+
+ if (memcmp(header->magic, BITMAP_IDX_SIGNATURE, sizeof(BITMAP_IDX_SIGNATURE)) != 0)
+@@ pack-bitmap.c: static int load_bitmap_header(struct bitmap_index *index)
+ {
+ uint32_t flags = ntohs(header->options);
+ size_t cache_size = st_mult(bitmap_num_objects(index), sizeof(uint32_t));
+- unsigned char *index_end = index->map + index->map_size - the_hash_algo->rawsz;
++ unsigned char *index_end = index->map + index->map_size - hash_algo->rawsz;
+
+ if ((flags & BITMAP_OPT_FULL_DAG) == 0)
+ BUG("unsupported options for bitmap index file "
+@@ pack-bitmap.c: static int open_midx_bitmap_1(struct bitmap_index *bitmap_git,
+ if (bitmap_git->pack || bitmap_git->midx) {
+ struct strbuf buf = STRBUF_INIT;
+ get_midx_filename(&buf, midx->object_dir);
+- trace2_data_string("bitmap", the_repository,
++ trace2_data_string("bitmap", bitmap_repo(bitmap_git),
+ "ignoring extra midx bitmap file", buf.buf);
+ close(fd);
+ strbuf_release(&buf);
+@@ pack-bitmap.c: static int open_midx_bitmap_1(struct bitmap_index *bitmap_git,
+ goto cleanup;
+
+ if (!hasheq(get_midx_checksum(bitmap_git->midx), bitmap_git->checksum,
+- the_repository->hash_algo)) {
++ bitmap_repo(bitmap_git)->hash_algo)) {
+ error(_("checksum doesn't match in MIDX and bitmap"));
+ goto cleanup;
+ }
+@@ pack-bitmap.c: static int open_midx_bitmap_1(struct bitmap_index *bitmap_git,
+ }
+
+ for (i = 0; i < bitmap_git->midx->num_packs; i++) {
+- if (prepare_midx_pack(the_repository, bitmap_git->midx, i)) {
++ if (prepare_midx_pack(bitmap_repo(bitmap_git),
++ bitmap_git->midx,
++ i)) {
+ warning(_("could not open pack %s"),
+ bitmap_git->midx->pack_names[i]);
+ goto cleanup;
+@@ pack-bitmap.c: static int open_pack_bitmap_1(struct bitmap_index *bitmap_git, struct packed_git
}
-- while (pack_max_fds <= pack_open_fds && close_one_pack())
-+ while (pack_max_fds <= pack_open_fds && close_one_pack(p->repo))
- ; /* nothing */
+ if (bitmap_git->pack || bitmap_git->midx) {
+- trace2_data_string("bitmap", the_repository,
+- "ignoring extra bitmap file", packfile->pack_name);
++ trace2_data_string("bitmap", bitmap_repo(bitmap_git),
++ "ignoring extra bitmap file",
++ packfile->pack_name);
+ close(fd);
+ return -1;
+ }
+@@ pack-bitmap.c: static int open_pack_bitmap_1(struct bitmap_index *bitmap_git, struct packed_git
+ return -1;
+ }
- p->pack_fd = git_open(p->pack_name);
-@@ packfile.c: static int open_packed_git(struct packed_git *p)
- return -1;
+- trace2_data_string("bitmap", the_repository, "opened bitmap file",
+- packfile->pack_name);
++ trace2_data_string("bitmap", bitmap_repo(bitmap_git),
++ "opened bitmap file", packfile->pack_name);
+ return 0;
}
--static int in_window(struct pack_window *win, off_t offset)
-+static int in_window(struct repository *repo, struct pack_window *win,
-+ off_t offset)
+@@ pack-bitmap.c: struct bitmap_index *prepare_bitmap_git(struct repository *r)
+
+ struct bitmap_index *prepare_midx_bitmap_git(struct multi_pack_index *midx)
+ {
+- struct repository *r = the_repository;
++ struct repository *r = midx->r;
+ struct bitmap_index *bitmap_git = xcalloc(1, sizeof(*bitmap_git));
+
+ if (!open_midx_bitmap_1(bitmap_git, midx) && !load_bitmap(r, bitmap_git))
+@@ pack-bitmap.c: static struct bitmap *find_boundary_objects(struct bitmap_index *bitmap_git,
{
- /* We must promise at least one full hash after the
- * offset is available from this window, otherwise the offset
-@@ packfile.c: static int in_window(struct pack_window *win, off_t offset)
+ struct bitmap_boundary_cb cb;
+ struct object_list *root;
++ struct repository *repo;
+ unsigned int i;
+ unsigned int tmp_blobs, tmp_trees, tmp_tags;
+ int any_missing = 0;
+@@ pack-bitmap.c: static struct bitmap *find_boundary_objects(struct bitmap_index *bitmap_git,
+ cb.base = bitmap_new();
+ object_array_init(&cb.boundary);
+
++ repo = bitmap_repo(bitmap_git);
++
+ revs->ignore_missing_links = 1;
+
+ if (bitmap_git->pseudo_merges.nr) {
+@@ pack-bitmap.c: static struct bitmap *find_boundary_objects(struct bitmap_index *bitmap_git,
+ * revision walk to (a) OR in any bitmaps that are UNINTERESTING
+ * between the tips and boundary, and (b) record the boundary.
*/
- off_t win_off = win->offset;
- return win_off <= offset
-- && (offset + the_hash_algo->rawsz) <= (win_off + win->len);
-+ && (offset + repo->hash_algo->rawsz) <= (win_off + win->len);
+- trace2_region_enter("pack-bitmap", "boundary-prepare", the_repository);
++ trace2_region_enter("pack-bitmap", "boundary-prepare", repo);
+ if (prepare_revision_walk(revs))
+ die("revision walk setup failed");
+- trace2_region_leave("pack-bitmap", "boundary-prepare", the_repository);
++ trace2_region_leave("pack-bitmap", "boundary-prepare", repo);
+
+- trace2_region_enter("pack-bitmap", "boundary-traverse", the_repository);
++ trace2_region_enter("pack-bitmap", "boundary-traverse", repo);
+ revs->boundary = 1;
+ traverse_commit_list_filtered(revs,
+ show_boundary_commit,
+ show_boundary_object,
+ &cb, NULL);
+ revs->boundary = 0;
+- trace2_region_leave("pack-bitmap", "boundary-traverse", the_repository);
++ trace2_region_leave("pack-bitmap", "boundary-traverse", repo);
+
+ revs->blob_objects = tmp_blobs;
+ revs->tree_objects = tmp_trees;
+@@ pack-bitmap.c: static struct bitmap *find_boundary_objects(struct bitmap_index *bitmap_git,
+ /*
+ * Then add the boundary commit(s) as fill-in traversal tips.
+ */
+- trace2_region_enter("pack-bitmap", "boundary-fill-in", the_repository);
++ trace2_region_enter("pack-bitmap", "boundary-fill-in", repo);
+ for (i = 0; i < cb.boundary.nr; i++) {
+ struct object *obj = cb.boundary.objects[i].item;
+ if (bitmap_walk_contains(bitmap_git, cb.base, &obj->oid))
+@@ pack-bitmap.c: static struct bitmap *find_boundary_objects(struct bitmap_index *bitmap_git,
+ }
+ if (revs->pending.nr)
+ cb.base = fill_in_bitmap(bitmap_git, revs, cb.base, NULL);
+- trace2_region_leave("pack-bitmap", "boundary-fill-in", the_repository);
++ trace2_region_leave("pack-bitmap", "boundary-fill-in", repo);
+
+ cleanup:
+ object_array_clear(&cb.boundary);
+@@ pack-bitmap.c: static unsigned long get_size_by_pos(struct bitmap_index *bitmap_git,
+ ofs = pack_pos_to_offset(pack, pos);
+ }
+
+- if (packed_object_info(the_repository, pack, ofs, &oi) < 0) {
++ if (packed_object_info(bitmap_repo(bitmap_git), pack, ofs,
++ &oi) < 0) {
+ struct object_id oid;
+ nth_bitmap_object_oid(bitmap_git, &oid,
+ pack_pos_to_index(pack, pos));
+@@ pack-bitmap.c: static unsigned long get_size_by_pos(struct bitmap_index *bitmap_git,
+ } else {
+ struct eindex *eindex = &bitmap_git->ext_index;
+ struct object *obj = eindex->objects[pos - bitmap_num_objects(bitmap_git)];
+- if (oid_object_info_extended(the_repository, &obj->oid, &oi, 0) < 0)
++ if (oid_object_info_extended(bitmap_repo(bitmap_git), &obj->oid,
++ &oi, 0) < 0)
+ die(_("unable to get size of %s"), oid_to_hex(&obj->oid));
+ }
+
+@@ pack-bitmap.c: static void filter_packed_objects_from_bitmap(struct bitmap_index *bitmap_git,
+ bitmap_unset(result, i);
+
+ for (i = 0; i < eindex->count; ++i) {
+- if (has_object_pack(the_repository, &eindex->objects[i]->oid))
++ if (has_object_pack(bitmap_repo(bitmap_git),
++ &eindex->objects[i]->oid))
+ bitmap_unset(result, objects_nr + i);
+ }
}
+@@ pack-bitmap.c: struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs,
+ struct bitmap *haves_bitmap = NULL;
- unsigned char *use_pack(struct packed_git *p,
-@@ packfile.c: unsigned char *use_pack(struct packed_git *p,
- if (offset < 0)
- die(_("offset before end of packfile (broken .idx?)"));
-
-- if (!win || !in_window(win, offset)) {
-+ if (!win || !in_window(p->repo, win, offset)) {
- if (win)
- win->inuse_cnt--;
- for (win = p->windows; win; win = win->next) {
-- if (in_window(win, offset))
-+ if (in_window(p->repo, win, offset))
- break;
+ struct bitmap_index *bitmap_git;
++ struct repository *repo;
+
+ /*
+ * We can't do pathspec limiting with bitmaps, because we don't know
+@@ pack-bitmap.c: struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs,
+ if (!use_boundary_traversal)
+ object_array_clear(&revs->pending);
+
++ repo = bitmap_repo(bitmap_git);
++
+ if (haves) {
+- if (use_boundary_traversal) {
+- trace2_region_enter("pack-bitmap", "haves/boundary", the_repository);
++ if (use_boundary_traversal)
++ {
++ trace2_region_enter("pack-bitmap", "haves/boundary", repo);
+ haves_bitmap = find_boundary_objects(bitmap_git, revs, haves);
+- trace2_region_leave("pack-bitmap", "haves/boundary", the_repository);
+- } else {
+- trace2_region_enter("pack-bitmap", "haves/classic", the_repository);
++ trace2_region_leave("pack-bitmap", "haves/boundary", repo);
++ }
++ else
++ {
++ trace2_region_enter("pack-bitmap", "haves/classic", repo);
+ revs->ignore_missing_links = 1;
+ haves_bitmap = find_objects(bitmap_git, revs, haves, NULL);
+ reset_revision_walk();
+ revs->ignore_missing_links = 0;
+- trace2_region_leave("pack-bitmap", "haves/classic", the_repository);
++ trace2_region_leave("pack-bitmap", "haves/classic", repo);
}
- if (!win) {
+
+ if (!haves_bitmap)
+@@ pack-bitmap.c: struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs,
+ object_list_free(&wants);
+ object_list_free(&haves);
+
+- trace2_data_intmax("bitmap", the_repository, "pseudo_merges_satisfied",
++ trace2_data_intmax("bitmap", repo, "pseudo_merges_satisfied",
+ pseudo_merges_satisfied_nr);
+- trace2_data_intmax("bitmap", the_repository, "pseudo_merges_cascades",
++ trace2_data_intmax("bitmap", repo, "pseudo_merges_cascades",
+ pseudo_merges_cascades_nr);
+- trace2_data_intmax("bitmap", the_repository, "bitmap/hits",
++ trace2_data_intmax("bitmap", repo, "bitmap/hits",
+ existing_bitmaps_hits_nr);
+- trace2_data_intmax("bitmap", the_repository, "bitmap/misses",
++ trace2_data_intmax("bitmap", repo, "bitmap/misses",
+ existing_bitmaps_misses_nr);
+- trace2_data_intmax("bitmap", the_repository, "bitmap/roots_with_bitmap",
++ trace2_data_intmax("bitmap", repo, "bitmap/roots_with_bitmap",
+ roots_with_bitmaps_nr);
+- trace2_data_intmax("bitmap", the_repository, "bitmap/roots_without_bitmap",
++ trace2_data_intmax("bitmap", repo, "bitmap/roots_without_bitmap",
+ roots_without_bitmaps_nr);
+
+ return bitmap_git;
+@@ pack-bitmap.c: void reuse_partial_packfile_from_bitmap(struct bitmap_index *bitmap_git,
+ struct bitmap **reuse_out,
+ int multi_pack_reuse)
+ {
+- struct repository *r = the_repository;
++ struct repository *r = bitmap_repo(bitmap_git);
+ struct bitmapped_pack *packs = NULL;
+ struct bitmap *result = bitmap_git->result;
+ struct bitmap *reuse;
+@@ pack-bitmap.c: int rebuild_bitmap(const uint32_t *reposition,
+ uint32_t *create_bitmap_mapping(struct bitmap_index *bitmap_git,
+ struct packing_data *mapping)
+ {
+- struct repository *r = the_repository;
++ struct repository *r = bitmap_repo(bitmap_git);
+ uint32_t i, num_objects;
+ uint32_t *reposition;
+
+@@ pack-bitmap.c: static off_t get_disk_usage_for_extended(struct bitmap_index *bitmap_git)
+ st_add(bitmap_num_objects(bitmap_git), i)))
+ continue;
+
+- if (oid_object_info_extended(the_repository, &obj->oid, &oi, 0) < 0)
++ if (oid_object_info_extended(bitmap_repo(bitmap_git), &obj->oid,
++ &oi, 0) < 0)
+ die(_("unable to get disk usage of '%s'"),
+ oid_to_hex(&obj->oid));
+
--
2.47.0
^ permalink raw reply [flat|nested] 184+ messages in thread
* [PATCH v3 1/9] packfile: add repository to struct `packed_git`
2024-10-30 14:32 ` [PATCH v3 0/9] packfile: avoid using the 'the_repository' global variable Karthik Nayak
@ 2024-10-30 14:32 ` Karthik Nayak
2024-10-30 20:00 ` Taylor Blau
2024-10-30 14:32 ` [PATCH v3 2/9] packfile: use `repository` from `packed_git` directly Karthik Nayak
` (7 subsequent siblings)
8 siblings, 1 reply; 184+ messages in thread
From: Karthik Nayak @ 2024-10-30 14:32 UTC (permalink / raw)
To: karthik.188; +Cc: git, me, peff
The struct `packed_git` holds information regarding a packed object
file. Let's add the repository variable to this object, to represent the
repository that this packfile belongs to. This helps remove dependency
on the global `the_repository` object in `packfile.c` by simply using
repository information now readily available in the struct.
We do need to consider that a pack file could be part of the alternates
of a repository, but considering that we only have one repository struct
and also that we currently anyways use 'the_repository'. We should be
OK with this change.
We also modify `alloc_packed_git` to ensure that the repository is added
to newly created `packed_git` structs. This requires modifying the
function and all its callee to pass the repository object down the
levels.
Helped-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
---
builtin/fast-import.c | 3 ++-
builtin/index-pack.c | 6 ++++--
commit-graph.c | 2 +-
connected.c | 3 ++-
http.c | 2 +-
midx-write.c | 2 +-
midx.c | 2 +-
object-store-ll.h | 5 +++++
packfile.c | 15 +++++++++------
packfile.h | 6 ++++--
10 files changed, 30 insertions(+), 16 deletions(-)
diff --git a/builtin/fast-import.c b/builtin/fast-import.c
index 76d5c20f14..f8d3d7e0c7 100644
--- a/builtin/fast-import.c
+++ b/builtin/fast-import.c
@@ -765,6 +765,7 @@ static void start_packfile(void)
p->pack_fd = pack_fd;
p->do_not_close = 1;
+ p->r = the_repository;
pack_file = hashfd(pack_fd, p->pack_name);
pack_data = p;
@@ -888,7 +889,7 @@ static void end_packfile(void)
idx_name = keep_pack(create_index());
/* Register the packfile with core git's machinery. */
- new_p = add_packed_git(idx_name, strlen(idx_name), 1);
+ new_p = add_packed_git(pack_data->r, idx_name, strlen(idx_name), 1);
if (!new_p)
die("core git rejected index %s", idx_name);
all_packs[pack_id] = new_p;
diff --git a/builtin/index-pack.c b/builtin/index-pack.c
index 9d23b41b3a..be2f99625e 100644
--- a/builtin/index-pack.c
+++ b/builtin/index-pack.c
@@ -1552,7 +1552,8 @@ static void final(const char *final_pack_name, const char *curr_pack_name,
if (do_fsck_object) {
struct packed_git *p;
- p = add_packed_git(final_index_name, strlen(final_index_name), 0);
+ p = add_packed_git(the_repository, final_index_name,
+ strlen(final_index_name), 0);
if (p)
install_packed_git(the_repository, p);
}
@@ -1650,7 +1651,8 @@ static void read_v2_anomalous_offsets(struct packed_git *p,
static void read_idx_option(struct pack_idx_option *opts, const char *pack_name)
{
- struct packed_git *p = add_packed_git(pack_name, strlen(pack_name), 1);
+ struct packed_git *p = add_packed_git(the_repository, pack_name,
+ strlen(pack_name), 1);
if (!p)
die(_("Cannot open existing pack file '%s'"), pack_name);
diff --git a/commit-graph.c b/commit-graph.c
index 5bd89c0acd..83dd69bfeb 100644
--- a/commit-graph.c
+++ b/commit-graph.c
@@ -1914,7 +1914,7 @@ static int fill_oids_from_packs(struct write_commit_graph_context *ctx,
struct packed_git *p;
strbuf_setlen(&packname, dirlen);
strbuf_addstr(&packname, pack_indexes->items[i].string);
- p = add_packed_git(packname.buf, packname.len, 1);
+ p = add_packed_git(ctx->r, packname.buf, packname.len, 1);
if (!p) {
ret = error(_("error adding pack %s"), packname.buf);
goto cleanup;
diff --git a/connected.c b/connected.c
index a9e2e13995..3099da84f3 100644
--- a/connected.c
+++ b/connected.c
@@ -54,7 +54,8 @@ int check_connected(oid_iterate_fn fn, void *cb_data,
strbuf_add(&idx_file, transport->pack_lockfiles.items[0].string,
base_len);
strbuf_addstr(&idx_file, ".idx");
- new_pack = add_packed_git(idx_file.buf, idx_file.len, 1);
+ new_pack = add_packed_git(the_repository, idx_file.buf,
+ idx_file.len, 1);
strbuf_release(&idx_file);
}
diff --git a/http.c b/http.c
index 510332ab04..7e5be05207 100644
--- a/http.c
+++ b/http.c
@@ -2437,7 +2437,7 @@ static int fetch_and_setup_pack_index(struct packed_git **packs_head,
if (!tmp_idx)
return -1;
- new_pack = parse_pack_index(sha1, tmp_idx);
+ new_pack = parse_pack_index(the_repository, sha1, tmp_idx);
if (!new_pack) {
unlink(tmp_idx);
free(tmp_idx);
diff --git a/midx-write.c b/midx-write.c
index b3a5f6c516..c57726ef94 100644
--- a/midx-write.c
+++ b/midx-write.c
@@ -154,7 +154,7 @@ static void add_pack_to_midx(const char *full_path, size_t full_path_len,
return;
ALLOC_GROW(ctx->info, ctx->nr + 1, ctx->alloc);
- p = add_packed_git(full_path, full_path_len, 0);
+ p = add_packed_git(the_repository, full_path, full_path_len, 0);
if (!p) {
warning(_("failed to add packfile '%s'"),
full_path);
diff --git a/midx.c b/midx.c
index e82d4f2e65..8edb75f51d 100644
--- a/midx.c
+++ b/midx.c
@@ -464,7 +464,7 @@ int prepare_midx_pack(struct repository *r, struct multi_pack_index *m,
strhash(key.buf), key.buf,
struct packed_git, packmap_ent);
if (!p) {
- p = add_packed_git(pack_name.buf, pack_name.len, m->local);
+ p = add_packed_git(r, pack_name.buf, pack_name.len, m->local);
if (p) {
install_packed_git(r, p);
list_add_tail(&p->mru, &r->objects->packed_git_mru);
diff --git a/object-store-ll.h b/object-store-ll.h
index 53b8e693b1..e8a22ab5fc 100644
--- a/object-store-ll.h
+++ b/object-store-ll.h
@@ -10,6 +10,7 @@
struct oidmap;
struct oidtree;
struct strbuf;
+struct repository;
struct object_directory {
struct object_directory *next;
@@ -135,6 +136,10 @@ struct packed_git {
*/
const uint32_t *mtimes_map;
size_t mtimes_size;
+
+ /* repo dentoes the repository this packed file belongs to */
+ struct repository *r;
+
/* something like ".git/objects/pack/xxxxx.pack" */
char pack_name[FLEX_ARRAY]; /* more */
};
diff --git a/packfile.c b/packfile.c
index 9560f0a33c..1423f23f57 100644
--- a/packfile.c
+++ b/packfile.c
@@ -217,11 +217,12 @@ uint32_t get_pack_fanout(struct packed_git *p, uint32_t value)
return ntohl(level1_ofs[value]);
}
-static struct packed_git *alloc_packed_git(int extra)
+static struct packed_git *alloc_packed_git(struct repository *r, int extra)
{
struct packed_git *p = xmalloc(st_add(sizeof(*p), extra));
memset(p, 0, sizeof(*p));
p->pack_fd = -1;
+ p->r = r;
return p;
}
@@ -233,11 +234,12 @@ static char *pack_path_from_idx(const char *idx_path)
return xstrfmt("%.*s.pack", (int)len, idx_path);
}
-struct packed_git *parse_pack_index(unsigned char *sha1, const char *idx_path)
+struct packed_git *parse_pack_index(struct repository *r, unsigned char *sha1,
+ const char *idx_path)
{
char *path = pack_path_from_idx(idx_path);
size_t alloc = st_add(strlen(path), 1);
- struct packed_git *p = alloc_packed_git(alloc);
+ struct packed_git *p = alloc_packed_git(r, alloc);
memcpy(p->pack_name, path, alloc); /* includes NUL */
free(path);
@@ -703,7 +705,8 @@ void unuse_pack(struct pack_window **w_cursor)
}
}
-struct packed_git *add_packed_git(const char *path, size_t path_len, int local)
+struct packed_git *add_packed_git(struct repository *r, const char *path,
+ size_t path_len, int local)
{
struct stat st;
size_t alloc;
@@ -721,7 +724,7 @@ struct packed_git *add_packed_git(const char *path, size_t path_len, int local)
* the use xsnprintf double-checks that)
*/
alloc = st_add3(path_len, strlen(".promisor"), 1);
- p = alloc_packed_git(alloc);
+ p = alloc_packed_git(r, alloc);
memcpy(p->pack_name, path, path_len);
xsnprintf(p->pack_name + path_len, alloc - path_len, ".keep");
@@ -877,7 +880,7 @@ static void prepare_pack(const char *full_name, size_t full_name_len,
/* Don't reopen a pack we already have. */
if (!hashmap_get(&data->r->objects->pack_map, &hent, pack_name)) {
- p = add_packed_git(full_name, full_name_len, data->local);
+ p = add_packed_git(data->r, full_name, full_name_len, data->local);
if (p)
install_packed_git(data->r, p);
}
diff --git a/packfile.h b/packfile.h
index 08f88a7ff5..aee69d1a0b 100644
--- a/packfile.h
+++ b/packfile.h
@@ -46,7 +46,8 @@ const char *pack_basename(struct packed_git *p);
* and does not add the resulting packed_git struct to the internal list of
* packs. You probably want add_packed_git() instead.
*/
-struct packed_git *parse_pack_index(unsigned char *sha1, const char *idx_path);
+struct packed_git *parse_pack_index(struct repository *r, unsigned char *sha1,
+ const char *idx_path);
typedef void each_file_in_pack_dir_fn(const char *full_path, size_t full_path_len,
const char *file_name, void *data);
@@ -113,7 +114,8 @@ void close_pack(struct packed_git *);
void close_object_store(struct raw_object_store *o);
void unuse_pack(struct pack_window **);
void clear_delta_base_cache(void);
-struct packed_git *add_packed_git(const char *path, size_t path_len, int local);
+struct packed_git *add_packed_git(struct repository *r, const char *path,
+ size_t path_len, int local);
/*
* Unlink the .pack and associated extension files.
--
2.47.0
^ permalink raw reply related [flat|nested] 184+ messages in thread
* Re: [PATCH v3 1/9] packfile: add repository to struct `packed_git`
2024-10-30 14:32 ` [PATCH v3 1/9] packfile: add repository to struct `packed_git` Karthik Nayak
@ 2024-10-30 20:00 ` Taylor Blau
0 siblings, 0 replies; 184+ messages in thread
From: Taylor Blau @ 2024-10-30 20:00 UTC (permalink / raw)
To: Karthik Nayak; +Cc: git, peff
On Wed, Oct 30, 2024 at 03:32:26PM +0100, Karthik Nayak wrote:
> [...]
>
> We do need to consider that a pack file could be part of the alternates
> of a repository, but considering that we only have one repository struct
> and also that we currently anyways use 'the_repository'. We should be
> OK with this change.
Nicely explained.
> diff --git a/object-store-ll.h b/object-store-ll.h
> index 53b8e693b1..e8a22ab5fc 100644
> --- a/object-store-ll.h
> +++ b/object-store-ll.h
> @@ -10,6 +10,7 @@
> struct oidmap;
> struct oidtree;
> struct strbuf;
> +struct repository;
>
> struct object_directory {
> struct object_directory *next;
> @@ -135,6 +136,10 @@ struct packed_git {
> */
> const uint32_t *mtimes_map;
> size_t mtimes_size;
> +
> + /* repo dentoes the repository this packed file belongs to */
> + struct repository *r;
> +
Hmm. What I meant in my earlier suggestion was that we should leave the
member of the struct called "repo", but change the name only in function
arguments.
Sorry to split hairs, but I am somewhat opposed to having such a short
variable name in a struct. In either event, the comment should be made
consistent with the variable name.
> /* something like ".git/objects/pack/xxxxx.pack" */
> char pack_name[FLEX_ARRAY]; /* more */
> };
> diff --git a/packfile.c b/packfile.c
> index 9560f0a33c..1423f23f57 100644
> --- a/packfile.c
> +++ b/packfile.c
> @@ -217,11 +217,12 @@ uint32_t get_pack_fanout(struct packed_git *p, uint32_t value)
> return ntohl(level1_ofs[value]);
> }
>
> -static struct packed_git *alloc_packed_git(int extra)
> +static struct packed_git *alloc_packed_git(struct repository *r, int extra)
This spot I would leave alone.
> {
> struct packed_git *p = xmalloc(st_add(sizeof(*p), extra));
> memset(p, 0, sizeof(*p));
> p->pack_fd = -1;
> + p->r = r;
And this spot I would change to:
p->repo = r;
The rest is looking good.
Thanks,
Taylor
^ permalink raw reply [flat|nested] 184+ messages in thread
* [PATCH v3 2/9] packfile: use `repository` from `packed_git` directly
2024-10-30 14:32 ` [PATCH v3 0/9] packfile: avoid using the 'the_repository' global variable Karthik Nayak
2024-10-30 14:32 ` [PATCH v3 1/9] packfile: add repository to struct `packed_git` Karthik Nayak
@ 2024-10-30 14:32 ` Karthik Nayak
2024-10-30 14:32 ` [PATCH v3 3/9] packfile: pass `repository` to static function in the file Karthik Nayak
` (6 subsequent siblings)
8 siblings, 0 replies; 184+ messages in thread
From: Karthik Nayak @ 2024-10-30 14:32 UTC (permalink / raw)
To: karthik.188; +Cc: git, me, peff
In the previous commit, we introduced the `repository` structure inside
`packed_git`. This provides an alternative route instead of using the
global `the_repository` variable. Let's modify `packfile.c` now to use
this field wherever possible instead of relying on the global state.
There are still a few instances of `the_repository` usage in the file,
where there is no struct `packed_git` locally available, which will be
fixed in the following commits.
Helped-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
---
packfile.c | 50 +++++++++++++++++++++++++++-----------------------
1 file changed, 27 insertions(+), 23 deletions(-)
diff --git a/packfile.c b/packfile.c
index 1423f23f57..ecb284fd98 100644
--- a/packfile.c
+++ b/packfile.c
@@ -79,7 +79,7 @@ static int check_packed_git_idx(const char *path, struct packed_git *p)
size_t idx_size;
int fd = git_open(path), ret;
struct stat st;
- const unsigned int hashsz = the_hash_algo->rawsz;
+ const unsigned int hashsz = p->r->hash_algo->rawsz;
if (fd < 0)
return -1;
@@ -243,7 +243,7 @@ struct packed_git *parse_pack_index(struct repository *r, unsigned char *sha1,
memcpy(p->pack_name, path, alloc); /* includes NUL */
free(path);
- hashcpy(p->hash, sha1, the_repository->hash_algo);
+ hashcpy(p->hash, sha1, p->r->hash_algo);
if (check_packed_git_idx(idx_path, p)) {
free(p);
return NULL;
@@ -278,7 +278,7 @@ static int unuse_one_window(struct packed_git *current)
if (current)
scan_windows(current, &lru_p, &lru_w, &lru_l);
- for (p = the_repository->objects->packed_git; p; p = p->next)
+ for (p = current->r->objects->packed_git; p; p = p->next)
scan_windows(p, &lru_p, &lru_w, &lru_l);
if (lru_p) {
munmap(lru_w->base, lru_w->len);
@@ -540,7 +540,7 @@ static int open_packed_git_1(struct packed_git *p)
unsigned char hash[GIT_MAX_RAWSZ];
unsigned char *idx_hash;
ssize_t read_result;
- const unsigned hashsz = the_hash_algo->rawsz;
+ const unsigned hashsz = p->r->hash_algo->rawsz;
if (open_pack_index(p))
return error("packfile %s index unavailable", p->pack_name);
@@ -597,7 +597,7 @@ static int open_packed_git_1(struct packed_git *p)
if (read_result != hashsz)
return error("packfile %s signature is unavailable", p->pack_name);
idx_hash = ((unsigned char *)p->index_data) + p->index_size - hashsz * 2;
- if (!hasheq(hash, idx_hash, the_repository->hash_algo))
+ if (!hasheq(hash, idx_hash, p->r->hash_algo))
return error("packfile %s does not match index", p->pack_name);
return 0;
}
@@ -637,7 +637,7 @@ unsigned char *use_pack(struct packed_git *p,
*/
if (!p->pack_size && p->pack_fd == -1 && open_packed_git(p))
die("packfile %s cannot be accessed", p->pack_name);
- if (offset > (p->pack_size - the_hash_algo->rawsz))
+ if (offset > (p->pack_size - p->r->hash_algo->rawsz))
die("offset beyond end of packfile (truncated pack?)");
if (offset < 0)
die(_("offset before end of packfile (broken .idx?)"));
@@ -711,6 +711,7 @@ struct packed_git *add_packed_git(struct repository *r, const char *path,
struct stat st;
size_t alloc;
struct packed_git *p;
+ struct object_id oid;
/*
* Make sure a corresponding .pack file exists and that
@@ -751,9 +752,13 @@ struct packed_git *add_packed_git(struct repository *r, const char *path,
p->pack_size = st.st_size;
p->pack_local = local;
p->mtime = st.st_mtime;
- if (path_len < the_hash_algo->hexsz ||
- get_hash_hex(path + path_len - the_hash_algo->hexsz, p->hash))
- hashclr(p->hash, the_repository->hash_algo);
+ if (path_len < r->hash_algo->hexsz ||
+ get_oid_hex_algop(path + path_len - r->hash_algo->hexsz, &oid,
+ r->hash_algo))
+ hashclr(p->hash, r->hash_algo);
+ else
+ hashcpy(p->hash, oid.hash, r->hash_algo);
+
return p;
}
@@ -1243,9 +1248,9 @@ off_t get_delta_base(struct packed_git *p,
} else if (type == OBJ_REF_DELTA) {
/* The base entry _must_ be in the same pack */
struct object_id oid;
- oidread(&oid, base_info, the_repository->hash_algo);
+ oidread(&oid, base_info, p->r->hash_algo);
base_offset = find_pack_entry_one(&oid, p);
- *curpos += the_hash_algo->rawsz;
+ *curpos += p->r->hash_algo->rawsz;
} else
die("I am totally screwed");
return base_offset;
@@ -1266,7 +1271,7 @@ static int get_delta_base_oid(struct packed_git *p,
{
if (type == OBJ_REF_DELTA) {
unsigned char *base = use_pack(p, w_curs, curpos, NULL);
- oidread(oid, base, the_repository->hash_algo);
+ oidread(oid, base, p->r->hash_algo);
return 0;
} else if (type == OBJ_OFS_DELTA) {
uint32_t base_pos;
@@ -1608,7 +1613,7 @@ int packed_object_info(struct repository *r, struct packed_git *p,
goto out;
}
} else
- oidclr(oi->delta_base_oid, the_repository->hash_algo);
+ oidclr(oi->delta_base_oid, p->r->hash_algo);
}
oi->whence = in_delta_base_cache(p, obj_offset) ? OI_DBCACHED :
@@ -1897,7 +1902,7 @@ int bsearch_pack(const struct object_id *oid, const struct packed_git *p, uint32
{
const unsigned char *index_fanout = p->index_data;
const unsigned char *index_lookup;
- const unsigned int hashsz = the_hash_algo->rawsz;
+ const unsigned int hashsz = p->r->hash_algo->rawsz;
int index_lookup_width;
if (!index_fanout)
@@ -1922,7 +1927,7 @@ int nth_packed_object_id(struct object_id *oid,
uint32_t n)
{
const unsigned char *index = p->index_data;
- const unsigned int hashsz = the_hash_algo->rawsz;
+ const unsigned int hashsz = p->r->hash_algo->rawsz;
if (!index) {
if (open_pack_index(p))
return -1;
@@ -1933,11 +1938,10 @@ int nth_packed_object_id(struct object_id *oid,
index += 4 * 256;
if (p->index_version == 1) {
oidread(oid, index + st_add(st_mult(hashsz + 4, n), 4),
- the_repository->hash_algo);
+ p->r->hash_algo);
} else {
index += 8;
- oidread(oid, index + st_mult(hashsz, n),
- the_repository->hash_algo);
+ oidread(oid, index + st_mult(hashsz, n), p->r->hash_algo);
}
return 0;
}
@@ -1959,7 +1963,7 @@ void check_pack_index_ptr(const struct packed_git *p, const void *vptr)
off_t nth_packed_object_offset(const struct packed_git *p, uint32_t n)
{
const unsigned char *index = p->index_data;
- const unsigned int hashsz = the_hash_algo->rawsz;
+ const unsigned int hashsz = p->r->hash_algo->rawsz;
index += 4 * 256;
if (p->index_version == 1) {
return ntohl(*((uint32_t *)(index + st_mult(hashsz + 4, n))));
@@ -2159,7 +2163,7 @@ int for_each_object_in_pack(struct packed_git *p,
int r = 0;
if (flags & FOR_EACH_OBJECT_PACK_ORDER) {
- if (load_pack_revindex(the_repository, p))
+ if (load_pack_revindex(p->r, p))
return -1;
}
@@ -2227,7 +2231,7 @@ int for_each_packed_object(each_packed_object_fn cb, void *data,
}
static int add_promisor_object(const struct object_id *oid,
- struct packed_git *pack UNUSED,
+ struct packed_git *pack,
uint32_t pos UNUSED,
void *set_)
{
@@ -2235,12 +2239,12 @@ static int add_promisor_object(const struct object_id *oid,
struct object *obj;
int we_parsed_object;
- obj = lookup_object(the_repository, oid);
+ obj = lookup_object(pack->r, oid);
if (obj && obj->parsed) {
we_parsed_object = 0;
} else {
we_parsed_object = 1;
- obj = parse_object(the_repository, oid);
+ obj = parse_object(pack->r, oid);
}
if (!obj)
--
2.47.0
^ permalink raw reply related [flat|nested] 184+ messages in thread
* [PATCH v3 3/9] packfile: pass `repository` to static function in the file
2024-10-30 14:32 ` [PATCH v3 0/9] packfile: avoid using the 'the_repository' global variable Karthik Nayak
2024-10-30 14:32 ` [PATCH v3 1/9] packfile: add repository to struct `packed_git` Karthik Nayak
2024-10-30 14:32 ` [PATCH v3 2/9] packfile: use `repository` from `packed_git` directly Karthik Nayak
@ 2024-10-30 14:32 ` Karthik Nayak
2024-10-30 14:32 ` [PATCH v3 4/9] packfile: pass down repository to `odb_pack_name` Karthik Nayak
` (5 subsequent siblings)
8 siblings, 0 replies; 184+ messages in thread
From: Karthik Nayak @ 2024-10-30 14:32 UTC (permalink / raw)
To: karthik.188; +Cc: git, me, peff
Some of the static functions in the `packfile.c` access global
variables, which can simply be avoiding by passing the `repository`
struct down to them. Let's do that.
Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
---
packfile.c | 15 ++++++++-------
1 file changed, 8 insertions(+), 7 deletions(-)
diff --git a/packfile.c b/packfile.c
index ecb284fd98..a391474031 100644
--- a/packfile.c
+++ b/packfile.c
@@ -460,13 +460,13 @@ static void find_lru_pack(struct packed_git *p, struct packed_git **lru_p, struc
*accept_windows_inuse = has_windows_inuse;
}
-static int close_one_pack(void)
+static int close_one_pack(struct repository *r)
{
struct packed_git *p, *lru_p = NULL;
struct pack_window *mru_w = NULL;
int accept_windows_inuse = 1;
- for (p = the_repository->objects->packed_git; p; p = p->next) {
+ for (p = r->objects->packed_git; p; p = p->next) {
if (p->pack_fd == -1)
continue;
find_lru_pack(p, &lru_p, &mru_w, &accept_windows_inuse);
@@ -555,7 +555,7 @@ static int open_packed_git_1(struct packed_git *p)
pack_max_fds = 1;
}
- while (pack_max_fds <= pack_open_fds && close_one_pack())
+ while (pack_max_fds <= pack_open_fds && close_one_pack(p->r))
; /* nothing */
p->pack_fd = git_open(p->pack_name);
@@ -610,7 +610,8 @@ static int open_packed_git(struct packed_git *p)
return -1;
}
-static int in_window(struct pack_window *win, off_t offset)
+static int in_window(struct repository *r, struct pack_window *win,
+ off_t offset)
{
/* We must promise at least one full hash after the
* offset is available from this window, otherwise the offset
@@ -620,7 +621,7 @@ static int in_window(struct pack_window *win, off_t offset)
*/
off_t win_off = win->offset;
return win_off <= offset
- && (offset + the_hash_algo->rawsz) <= (win_off + win->len);
+ && (offset + r->hash_algo->rawsz) <= (win_off + win->len);
}
unsigned char *use_pack(struct packed_git *p,
@@ -642,11 +643,11 @@ unsigned char *use_pack(struct packed_git *p,
if (offset < 0)
die(_("offset before end of packfile (broken .idx?)"));
- if (!win || !in_window(win, offset)) {
+ if (!win || !in_window(p->r, win, offset)) {
if (win)
win->inuse_cnt--;
for (win = p->windows; win; win = win->next) {
- if (in_window(win, offset))
+ if (in_window(p->r, win, offset))
break;
}
if (!win) {
--
2.47.0
^ permalink raw reply related [flat|nested] 184+ messages in thread
* [PATCH v3 4/9] packfile: pass down repository to `odb_pack_name`
2024-10-30 14:32 ` [PATCH v3 0/9] packfile: avoid using the 'the_repository' global variable Karthik Nayak
` (2 preceding siblings ...)
2024-10-30 14:32 ` [PATCH v3 3/9] packfile: pass `repository` to static function in the file Karthik Nayak
@ 2024-10-30 14:32 ` Karthik Nayak
2024-10-30 14:32 ` [PATCH v3 5/9] packfile: pass down repository to `has_object[_kept]_pack` Karthik Nayak
` (4 subsequent siblings)
8 siblings, 0 replies; 184+ messages in thread
From: Karthik Nayak @ 2024-10-30 14:32 UTC (permalink / raw)
To: karthik.188; +Cc: git, me, peff
The function `odb_pack_name` currently relies on the global variable
`the_repository`. To eliminate global variable usage in `packfile.c`, we
should progressively shift the dependency on the_repository to higher
layers.
Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
---
builtin/fast-import.c | 8 ++++----
builtin/index-pack.c | 4 ++--
builtin/pack-redundant.c | 2 +-
http.c | 2 +-
packfile.c | 9 ++++-----
packfile.h | 3 ++-
6 files changed, 14 insertions(+), 14 deletions(-)
diff --git a/builtin/fast-import.c b/builtin/fast-import.c
index f8d3d7e0c7..2ca8198153 100644
--- a/builtin/fast-import.c
+++ b/builtin/fast-import.c
@@ -806,7 +806,7 @@ static char *keep_pack(const char *curr_index_name)
struct strbuf name = STRBUF_INIT;
int keep_fd;
- odb_pack_name(&name, pack_data->hash, "keep");
+ odb_pack_name(pack_data->r, &name, pack_data->hash, "keep");
keep_fd = odb_pack_keep(name.buf);
if (keep_fd < 0)
die_errno("cannot create keep file");
@@ -814,11 +814,11 @@ static char *keep_pack(const char *curr_index_name)
if (close(keep_fd))
die_errno("failed to write keep file");
- odb_pack_name(&name, pack_data->hash, "pack");
+ odb_pack_name(pack_data->r, &name, pack_data->hash, "pack");
if (finalize_object_file(pack_data->pack_name, name.buf))
die("cannot store pack file");
- odb_pack_name(&name, pack_data->hash, "idx");
+ odb_pack_name(pack_data->r, &name, pack_data->hash, "idx");
if (finalize_object_file(curr_index_name, name.buf))
die("cannot store index file");
free((void *)curr_index_name);
@@ -832,7 +832,7 @@ static void unkeep_all_packs(void)
for (k = 0; k < pack_id; k++) {
struct packed_git *p = all_packs[k];
- odb_pack_name(&name, p->hash, "keep");
+ odb_pack_name(p->r, &name, p->hash, "keep");
unlink_or_warn(name.buf);
}
strbuf_release(&name);
diff --git a/builtin/index-pack.c b/builtin/index-pack.c
index be2f99625e..eaefb41761 100644
--- a/builtin/index-pack.c
+++ b/builtin/index-pack.c
@@ -1479,7 +1479,7 @@ static void write_special_file(const char *suffix, const char *msg,
if (pack_name)
filename = derive_filename(pack_name, "pack", suffix, &name_buf);
else
- filename = odb_pack_name(&name_buf, hash, suffix);
+ filename = odb_pack_name(the_repository, &name_buf, hash, suffix);
fd = odb_pack_keep(filename);
if (fd < 0) {
@@ -1507,7 +1507,7 @@ static void rename_tmp_packfile(const char **final_name,
{
if (!*final_name || strcmp(*final_name, curr_name)) {
if (!*final_name)
- *final_name = odb_pack_name(name, hash, ext);
+ *final_name = odb_pack_name(the_repository, name, hash, ext);
if (finalize_object_file(curr_name, *final_name))
die(_("unable to rename temporary '*.%s' file to '%s'"),
ext, *final_name);
diff --git a/builtin/pack-redundant.c b/builtin/pack-redundant.c
index d2c1c4e5ec..01d1362c5b 100644
--- a/builtin/pack-redundant.c
+++ b/builtin/pack-redundant.c
@@ -690,7 +690,7 @@ int cmd_pack_redundant(int argc, const char **argv, const char *prefix UNUSED, s
pl = red = pack_list_difference(local_packs, min);
while (pl) {
printf("%s\n%s\n",
- odb_pack_name(&idx_name, pl->pack->hash, "idx"),
+ odb_pack_name(pl->pack->r, &idx_name, pl->pack->hash, "idx"),
pl->pack->pack_name);
pl = pl->next;
}
diff --git a/http.c b/http.c
index 7e5be05207..50d8811cea 100644
--- a/http.c
+++ b/http.c
@@ -2579,7 +2579,7 @@ struct http_pack_request *new_direct_http_pack_request(
preq->url = url;
- odb_pack_name(&preq->tmpfile, packed_git_hash, "pack");
+ odb_pack_name(the_repository, &preq->tmpfile, packed_git_hash, "pack");
strbuf_addstr(&preq->tmpfile, ".temp");
preq->packfile = fopen(preq->tmpfile.buf, "a");
if (!preq->packfile) {
diff --git a/packfile.c b/packfile.c
index a391474031..ce701255dd 100644
--- a/packfile.c
+++ b/packfile.c
@@ -25,13 +25,12 @@
#include "pack-revindex.h"
#include "promisor-remote.h"
-char *odb_pack_name(struct strbuf *buf,
- const unsigned char *hash,
- const char *ext)
+char *odb_pack_name(struct repository *r, struct strbuf *buf,
+ const unsigned char *hash, const char *ext)
{
strbuf_reset(buf);
- strbuf_addf(buf, "%s/pack/pack-%s.%s", repo_get_object_directory(the_repository),
- hash_to_hex(hash), ext);
+ strbuf_addf(buf, "%s/pack/pack-%s.%s", repo_get_object_directory(r),
+ hash_to_hex_algop(hash, r->hash_algo), ext);
return buf->buf;
}
diff --git a/packfile.h b/packfile.h
index aee69d1a0b..51187f2393 100644
--- a/packfile.h
+++ b/packfile.h
@@ -29,7 +29,8 @@ struct pack_entry {
*
* Example: odb_pack_name(out, sha1, "idx") => ".git/objects/pack/pack-1234..idx"
*/
-char *odb_pack_name(struct strbuf *buf, const unsigned char *sha1, const char *ext);
+char *odb_pack_name(struct repository *r, struct strbuf *buf,
+ const unsigned char *hash, const char *ext);
/*
* Return the basename of the packfile, omitting any containing directory
--
2.47.0
^ permalink raw reply related [flat|nested] 184+ messages in thread
* [PATCH v3 5/9] packfile: pass down repository to `has_object[_kept]_pack`
2024-10-30 14:32 ` [PATCH v3 0/9] packfile: avoid using the 'the_repository' global variable Karthik Nayak
` (3 preceding siblings ...)
2024-10-30 14:32 ` [PATCH v3 4/9] packfile: pass down repository to `odb_pack_name` Karthik Nayak
@ 2024-10-30 14:32 ` Karthik Nayak
2024-10-30 14:32 ` [PATCH v3 6/9] packfile: pass down repository to `for_each_packed_object` Karthik Nayak
` (3 subsequent siblings)
8 siblings, 0 replies; 184+ messages in thread
From: Karthik Nayak @ 2024-10-30 14:32 UTC (permalink / raw)
To: karthik.188; +Cc: git, me, peff
The functions `has_object[_kept]_pack` currently rely on the global
variable `the_repository`. To eliminate global variable usage in
`packfile.c`, we should progressively shift the dependency on
the_repository to higher layers. Let's remove its usage from these
functions and any related ones.
Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
---
builtin/count-objects.c | 2 +-
builtin/fsck.c | 2 +-
builtin/pack-objects.c | 4 ++--
diff.c | 3 ++-
list-objects.c | 3 ++-
pack-bitmap.c | 2 +-
packfile.c | 9 +++++----
packfile.h | 5 +++--
prune-packed.c | 2 +-
reachable.c | 2 +-
revision.c | 4 ++--
11 files changed, 21 insertions(+), 17 deletions(-)
diff --git a/builtin/count-objects.c b/builtin/count-objects.c
index 04d80887e0..1e89148ed7 100644
--- a/builtin/count-objects.c
+++ b/builtin/count-objects.c
@@ -67,7 +67,7 @@ static int count_loose(const struct object_id *oid, const char *path,
else {
loose_size += on_disk_bytes(st);
loose++;
- if (verbose && has_object_pack(oid))
+ if (verbose && has_object_pack(the_repository, oid))
packed_loose++;
}
return 0;
diff --git a/builtin/fsck.c b/builtin/fsck.c
index 7f4e2f0414..bb56eb98ac 100644
--- a/builtin/fsck.c
+++ b/builtin/fsck.c
@@ -272,7 +272,7 @@ static void check_reachable_object(struct object *obj)
if (!(obj->flags & HAS_OBJ)) {
if (is_promisor_object(&obj->oid))
return;
- if (has_object_pack(&obj->oid))
+ if (has_object_pack(the_repository, &obj->oid))
return; /* it is in pack - forget about it */
printf_ln(_("missing %s %s"),
printable_type(&obj->oid, obj->type),
diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index 0800714267..ceb7e76b10 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -1529,7 +1529,7 @@ static int want_found_object(const struct object_id *oid, int exclude,
return 0;
if (ignore_packed_keep_in_core && p->pack_keep_in_core)
return 0;
- if (has_object_kept_pack(oid, flags))
+ if (has_object_kept_pack(p->r, oid, flags))
return 0;
}
@@ -3627,7 +3627,7 @@ static void show_cruft_commit(struct commit *commit, void *data)
static int cruft_include_check_obj(struct object *obj, void *data UNUSED)
{
- return !has_object_kept_pack(&obj->oid, IN_CORE_KEEP_PACKS);
+ return !has_object_kept_pack(to_pack.repo, &obj->oid, IN_CORE_KEEP_PACKS);
}
static int cruft_include_check(struct commit *commit, void *data)
diff --git a/diff.c b/diff.c
index dceac20d18..266ddf18e7 100644
--- a/diff.c
+++ b/diff.c
@@ -4041,7 +4041,8 @@ static int reuse_worktree_file(struct index_state *istate,
* objects however would tend to be slower as they need
* to be individually opened and inflated.
*/
- if (!FAST_WORKING_DIRECTORY && !want_file && has_object_pack(oid))
+ if (!FAST_WORKING_DIRECTORY && !want_file &&
+ has_object_pack(istate->repo, oid))
return 0;
/*
diff --git a/list-objects.c b/list-objects.c
index 985d008799..31236a8dc9 100644
--- a/list-objects.c
+++ b/list-objects.c
@@ -41,7 +41,8 @@ static void show_object(struct traversal_context *ctx,
{
if (!ctx->show_object)
return;
- if (ctx->revs->unpacked && has_object_pack(&object->oid))
+ if (ctx->revs->unpacked && has_object_pack(ctx->revs->repo,
+ &object->oid))
return;
ctx->show_object(object, name, ctx->show_data);
diff --git a/pack-bitmap.c b/pack-bitmap.c
index 4fa9dfc771..d34ba9909a 100644
--- a/pack-bitmap.c
+++ b/pack-bitmap.c
@@ -1889,7 +1889,7 @@ static void filter_packed_objects_from_bitmap(struct bitmap_index *bitmap_git,
bitmap_unset(result, i);
for (i = 0; i < eindex->count; ++i) {
- if (has_object_pack(&eindex->objects[i]->oid))
+ if (has_object_pack(the_repository, &eindex->objects[i]->oid))
bitmap_unset(result, objects_nr + i);
}
}
diff --git a/packfile.c b/packfile.c
index ce701255dd..3894646573 100644
--- a/packfile.c
+++ b/packfile.c
@@ -2143,16 +2143,17 @@ int find_kept_pack_entry(struct repository *r,
return 0;
}
-int has_object_pack(const struct object_id *oid)
+int has_object_pack(struct repository *r, const struct object_id *oid)
{
struct pack_entry e;
- return find_pack_entry(the_repository, oid, &e);
+ return find_pack_entry(r, oid, &e);
}
-int has_object_kept_pack(const struct object_id *oid, unsigned flags)
+int has_object_kept_pack(struct repository *r, const struct object_id *oid,
+ unsigned flags)
{
struct pack_entry e;
- return find_kept_pack_entry(the_repository, oid, flags, &e);
+ return find_kept_pack_entry(r, oid, flags, &e);
}
int for_each_object_in_pack(struct packed_git *p,
diff --git a/packfile.h b/packfile.h
index 51187f2393..b09fb2c530 100644
--- a/packfile.h
+++ b/packfile.h
@@ -193,8 +193,9 @@ const struct packed_git *has_packed_and_bad(struct repository *, const struct ob
int find_pack_entry(struct repository *r, const struct object_id *oid, struct pack_entry *e);
int find_kept_pack_entry(struct repository *r, const struct object_id *oid, unsigned flags, struct pack_entry *e);
-int has_object_pack(const struct object_id *oid);
-int has_object_kept_pack(const struct object_id *oid, unsigned flags);
+int has_object_pack(struct repository *r, const struct object_id *oid);
+int has_object_kept_pack(struct repository *r, const struct object_id *oid,
+ unsigned flags);
/*
* Return 1 if an object in a promisor packfile is or refers to the given
diff --git a/prune-packed.c b/prune-packed.c
index 2bb99c29df..d1c65ab10e 100644
--- a/prune-packed.c
+++ b/prune-packed.c
@@ -24,7 +24,7 @@ static int prune_object(const struct object_id *oid, const char *path,
{
int *opts = data;
- if (!has_object_pack(oid))
+ if (!has_object_pack(the_repository, oid))
return 0;
if (*opts & PRUNE_PACKED_DRY_RUN)
diff --git a/reachable.c b/reachable.c
index 3e9b3dd0a4..09d2c50079 100644
--- a/reachable.c
+++ b/reachable.c
@@ -239,7 +239,7 @@ static int want_recent_object(struct recent_data *data,
const struct object_id *oid)
{
if (data->ignore_in_core_kept_packs &&
- has_object_kept_pack(oid, IN_CORE_KEEP_PACKS))
+ has_object_kept_pack(data->revs->repo, oid, IN_CORE_KEEP_PACKS))
return 0;
return 1;
}
diff --git a/revision.c b/revision.c
index f5f5b84f2b..d1d152a67b 100644
--- a/revision.c
+++ b/revision.c
@@ -4103,10 +4103,10 @@ enum commit_action get_commit_action(struct rev_info *revs, struct commit *commi
{
if (commit->object.flags & SHOWN)
return commit_ignore;
- if (revs->unpacked && has_object_pack(&commit->object.oid))
+ if (revs->unpacked && has_object_pack(revs->repo, &commit->object.oid))
return commit_ignore;
if (revs->no_kept_objects) {
- if (has_object_kept_pack(&commit->object.oid,
+ if (has_object_kept_pack(revs->repo, &commit->object.oid,
revs->keep_pack_cache_flags))
return commit_ignore;
}
--
2.47.0
^ permalink raw reply related [flat|nested] 184+ messages in thread
* [PATCH v3 6/9] packfile: pass down repository to `for_each_packed_object`
2024-10-30 14:32 ` [PATCH v3 0/9] packfile: avoid using the 'the_repository' global variable Karthik Nayak
` (4 preceding siblings ...)
2024-10-30 14:32 ` [PATCH v3 5/9] packfile: pass down repository to `has_object[_kept]_pack` Karthik Nayak
@ 2024-10-30 14:32 ` Karthik Nayak
2024-10-30 14:32 ` [PATCH v3 7/9] config: make `delta_base_cache_limit` a non-global variable Karthik Nayak
` (2 subsequent siblings)
8 siblings, 0 replies; 184+ messages in thread
From: Karthik Nayak @ 2024-10-30 14:32 UTC (permalink / raw)
To: karthik.188; +Cc: git, me, peff
The function `for_each_packed_object` currently relies on the global
variable `the_repository`. To eliminate global variable usage in
`packfile.c`, we should progressively shift the dependency on
the_repository to higher layers. Let's remove its usage from this
function and closely related function `is_promisor_object`.
Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
---
builtin/cat-file.c | 7 ++++---
builtin/fsck.c | 18 +++++++++++-------
builtin/pack-objects.c | 7 +++++--
builtin/repack.c | 2 +-
builtin/rev-list.c | 2 +-
commit-graph.c | 2 +-
fsck.c | 2 +-
list-objects.c | 4 ++--
object-store-ll.h | 4 ++--
packfile.c | 14 +++++++-------
packfile.h | 2 +-
promisor-remote.c | 2 +-
reachable.c | 2 +-
revision.c | 9 +++++----
tag.c | 2 +-
15 files changed, 44 insertions(+), 35 deletions(-)
diff --git a/builtin/cat-file.c b/builtin/cat-file.c
index bfdfb51c7c..d67b101c20 100644
--- a/builtin/cat-file.c
+++ b/builtin/cat-file.c
@@ -827,15 +827,16 @@ static int batch_objects(struct batch_options *opt)
cb.seen = &seen;
for_each_loose_object(batch_unordered_loose, &cb, 0);
- for_each_packed_object(batch_unordered_packed, &cb,
- FOR_EACH_OBJECT_PACK_ORDER);
+ for_each_packed_object(the_repository, batch_unordered_packed,
+ &cb, FOR_EACH_OBJECT_PACK_ORDER);
oidset_clear(&seen);
} else {
struct oid_array sa = OID_ARRAY_INIT;
for_each_loose_object(collect_loose_object, &sa, 0);
- for_each_packed_object(collect_packed_object, &sa, 0);
+ for_each_packed_object(the_repository, collect_packed_object,
+ &sa, 0);
oid_array_for_each_unique(&sa, batch_object_cb, &cb);
diff --git a/builtin/fsck.c b/builtin/fsck.c
index bb56eb98ac..0196c54eb6 100644
--- a/builtin/fsck.c
+++ b/builtin/fsck.c
@@ -150,7 +150,7 @@ static int mark_object(struct object *obj, enum object_type type,
return 0;
obj->flags |= REACHABLE;
- if (is_promisor_object(&obj->oid))
+ if (is_promisor_object(the_repository, &obj->oid))
/*
* Further recursion does not need to be performed on this
* object since it is a promisor object (so it does not need to
@@ -270,7 +270,7 @@ static void check_reachable_object(struct object *obj)
* do a full fsck
*/
if (!(obj->flags & HAS_OBJ)) {
- if (is_promisor_object(&obj->oid))
+ if (is_promisor_object(the_repository, &obj->oid))
return;
if (has_object_pack(the_repository, &obj->oid))
return; /* it is in pack - forget about it */
@@ -391,7 +391,10 @@ static void check_connectivity(void)
* traversal.
*/
for_each_loose_object(mark_loose_unreachable_referents, NULL, 0);
- for_each_packed_object(mark_packed_unreachable_referents, NULL, 0);
+ for_each_packed_object(the_repository,
+ mark_packed_unreachable_referents,
+ NULL,
+ 0);
}
/* Look up all the requirements, warn about missing objects.. */
@@ -488,7 +491,7 @@ static void fsck_handle_reflog_oid(const char *refname, struct object_id *oid,
refname, timestamp);
obj->flags |= USED;
mark_object_reachable(obj);
- } else if (!is_promisor_object(oid)) {
+ } else if (!is_promisor_object(the_repository, oid)) {
error(_("%s: invalid reflog entry %s"),
refname, oid_to_hex(oid));
errors_found |= ERROR_REACHABLE;
@@ -531,7 +534,7 @@ static int fsck_handle_ref(const char *refname, const char *referent UNUSED, con
obj = parse_object(the_repository, oid);
if (!obj) {
- if (is_promisor_object(oid)) {
+ if (is_promisor_object(the_repository, oid)) {
/*
* Increment default_refs anyway, because this is a
* valid ref.
@@ -966,7 +969,8 @@ int cmd_fsck(int argc,
if (connectivity_only) {
for_each_loose_object(mark_loose_for_connectivity, NULL, 0);
- for_each_packed_object(mark_packed_for_connectivity, NULL, 0);
+ for_each_packed_object(the_repository,
+ mark_packed_for_connectivity, NULL, 0);
} else {
prepare_alt_odb(the_repository);
for (odb = the_repository->objects->odb; odb; odb = odb->next)
@@ -1011,7 +1015,7 @@ int cmd_fsck(int argc,
&oid);
if (!obj || !(obj->flags & HAS_OBJ)) {
- if (is_promisor_object(&oid))
+ if (is_promisor_object(the_repository, &oid))
continue;
error(_("%s: object missing"), oid_to_hex(&oid));
errors_found |= ERROR_OBJECT;
diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index ceb7e76b10..755d521440 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -3858,7 +3858,8 @@ static void show_object__ma_allow_promisor(struct object *obj, const char *name,
* Quietly ignore EXPECTED missing objects. This avoids problems with
* staging them now and getting an odd error later.
*/
- if (!has_object(the_repository, &obj->oid, 0) && is_promisor_object(&obj->oid))
+ if (!has_object(the_repository, &obj->oid, 0) &&
+ is_promisor_object(to_pack.repo, &obj->oid))
return;
show_object(obj, name, data);
@@ -3927,7 +3928,9 @@ static int add_object_in_unpacked_pack(const struct object_id *oid,
static void add_objects_in_unpacked_packs(void)
{
- if (for_each_packed_object(add_object_in_unpacked_pack, NULL,
+ if (for_each_packed_object(to_pack.repo,
+ add_object_in_unpacked_pack,
+ NULL,
FOR_EACH_OBJECT_PACK_ORDER |
FOR_EACH_OBJECT_LOCAL_ONLY |
FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS |
diff --git a/builtin/repack.c b/builtin/repack.c
index d6bb37e84a..96a4fa234b 100644
--- a/builtin/repack.c
+++ b/builtin/repack.c
@@ -404,7 +404,7 @@ static void repack_promisor_objects(const struct pack_objects_args *args,
* {type -> existing pack order} ordering when computing deltas instead
* of a {type -> size} ordering, which may produce better deltas.
*/
- for_each_packed_object(write_oid, &cmd,
+ for_each_packed_object(the_repository, write_oid, &cmd,
FOR_EACH_OBJECT_PROMISOR_ONLY);
if (cmd.in == -1) {
diff --git a/builtin/rev-list.c b/builtin/rev-list.c
index f62bcbf2b1..43c42621e3 100644
--- a/builtin/rev-list.c
+++ b/builtin/rev-list.c
@@ -121,7 +121,7 @@ static inline void finish_object__ma(struct object *obj)
return;
case MA_ALLOW_PROMISOR:
- if (is_promisor_object(&obj->oid))
+ if (is_promisor_object(the_repository, &obj->oid))
return;
die("unexpected missing %s object '%s'",
type_name(obj->type), oid_to_hex(&obj->oid));
diff --git a/commit-graph.c b/commit-graph.c
index 83dd69bfeb..e2e2083951 100644
--- a/commit-graph.c
+++ b/commit-graph.c
@@ -1960,7 +1960,7 @@ static void fill_oids_from_all_packs(struct write_commit_graph_context *ctx)
ctx->progress = start_delayed_progress(
_("Finding commits for commit graph among packed objects"),
ctx->approx_nr_objects);
- for_each_packed_object(add_packed_commits, ctx,
+ for_each_packed_object(ctx->r, add_packed_commits, ctx,
FOR_EACH_OBJECT_PACK_ORDER);
if (ctx->progress_done < ctx->approx_nr_objects)
display_progress(ctx->progress, ctx->approx_nr_objects);
diff --git a/fsck.c b/fsck.c
index 3756f52459..87ce999a49 100644
--- a/fsck.c
+++ b/fsck.c
@@ -1295,7 +1295,7 @@ static int fsck_blobs(struct oidset *blobs_found, struct oidset *blobs_done,
buf = repo_read_object_file(the_repository, oid, &type, &size);
if (!buf) {
- if (is_promisor_object(oid))
+ if (is_promisor_object(the_repository, oid))
continue;
ret |= report(options,
oid, OBJ_BLOB, msg_missing,
diff --git a/list-objects.c b/list-objects.c
index 31236a8dc9..d11a389b3a 100644
--- a/list-objects.c
+++ b/list-objects.c
@@ -75,7 +75,7 @@ static void process_blob(struct traversal_context *ctx,
*/
if (ctx->revs->exclude_promisor_objects &&
!repo_has_object_file(the_repository, &obj->oid) &&
- is_promisor_object(&obj->oid))
+ is_promisor_object(ctx->revs->repo, &obj->oid))
return;
pathlen = path->len;
@@ -180,7 +180,7 @@ static void process_tree(struct traversal_context *ctx,
* an incomplete list of missing objects.
*/
if (revs->exclude_promisor_objects &&
- is_promisor_object(&obj->oid))
+ is_promisor_object(revs->repo, &obj->oid))
return;
if (!revs->do_not_die_on_missing_objects)
diff --git a/object-store-ll.h b/object-store-ll.h
index e8a22ab5fc..ce73d9a670 100644
--- a/object-store-ll.h
+++ b/object-store-ll.h
@@ -550,7 +550,7 @@ typedef int each_packed_object_fn(const struct object_id *oid,
int for_each_object_in_pack(struct packed_git *p,
each_packed_object_fn, void *data,
enum for_each_object_flags flags);
-int for_each_packed_object(each_packed_object_fn, void *,
- enum for_each_object_flags flags);
+int for_each_packed_object(struct repository *repo, each_packed_object_fn cb,
+ void *data, enum for_each_object_flags flags);
#endif /* OBJECT_STORE_LL_H */
diff --git a/packfile.c b/packfile.c
index 3894646573..9eca5a86a7 100644
--- a/packfile.c
+++ b/packfile.c
@@ -2200,15 +2200,15 @@ int for_each_object_in_pack(struct packed_git *p,
return r;
}
-int for_each_packed_object(each_packed_object_fn cb, void *data,
- enum for_each_object_flags flags)
+int for_each_packed_object(struct repository *repo, each_packed_object_fn cb,
+ void *data, enum for_each_object_flags flags)
{
struct packed_git *p;
int r = 0;
int pack_errors = 0;
- prepare_packed_git(the_repository);
- for (p = get_all_packs(the_repository); p; p = p->next) {
+ prepare_packed_git(repo);
+ for (p = get_all_packs(repo); p; p = p->next) {
if ((flags & FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local)
continue;
if ((flags & FOR_EACH_OBJECT_PROMISOR_ONLY) &&
@@ -2286,14 +2286,14 @@ static int add_promisor_object(const struct object_id *oid,
return 0;
}
-int is_promisor_object(const struct object_id *oid)
+int is_promisor_object(struct repository *r, const struct object_id *oid)
{
static struct oidset promisor_objects;
static int promisor_objects_prepared;
if (!promisor_objects_prepared) {
- if (repo_has_promisor_remote(the_repository)) {
- for_each_packed_object(add_promisor_object,
+ if (repo_has_promisor_remote(r)) {
+ for_each_packed_object(r, add_promisor_object,
&promisor_objects,
FOR_EACH_OBJECT_PROMISOR_ONLY |
FOR_EACH_OBJECT_PACK_ORDER);
diff --git a/packfile.h b/packfile.h
index b09fb2c530..addb95b0c4 100644
--- a/packfile.h
+++ b/packfile.h
@@ -201,7 +201,7 @@ int has_object_kept_pack(struct repository *r, const struct object_id *oid,
* Return 1 if an object in a promisor packfile is or refers to the given
* object, 0 otherwise.
*/
-int is_promisor_object(const struct object_id *oid);
+int is_promisor_object(struct repository *r, const struct object_id *oid);
/*
* Expose a function for fuzz testing.
diff --git a/promisor-remote.c b/promisor-remote.c
index 9345ae3db2..c714f4f007 100644
--- a/promisor-remote.c
+++ b/promisor-remote.c
@@ -283,7 +283,7 @@ void promisor_remote_get_direct(struct repository *repo,
}
for (i = 0; i < remaining_nr; i++) {
- if (is_promisor_object(&remaining_oids[i]))
+ if (is_promisor_object(repo, &remaining_oids[i]))
die(_("could not fetch %s from promisor remote"),
oid_to_hex(&remaining_oids[i]));
}
diff --git a/reachable.c b/reachable.c
index 09d2c50079..ecf7ccf504 100644
--- a/reachable.c
+++ b/reachable.c
@@ -324,7 +324,7 @@ int add_unseen_recent_objects_to_traversal(struct rev_info *revs,
if (ignore_in_core_kept_packs)
flags |= FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS;
- r = for_each_packed_object(add_recent_packed, &data, flags);
+ r = for_each_packed_object(revs->repo, add_recent_packed, &data, flags);
done:
oidset_clear(&data.extra_recent_oids);
diff --git a/revision.c b/revision.c
index d1d152a67b..45dc6d2819 100644
--- a/revision.c
+++ b/revision.c
@@ -390,7 +390,8 @@ static struct object *get_reference(struct rev_info *revs, const char *name,
if (!object) {
if (revs->ignore_missing)
return NULL;
- if (revs->exclude_promisor_objects && is_promisor_object(oid))
+ if (revs->exclude_promisor_objects &&
+ is_promisor_object(revs->repo, oid))
return NULL;
if (revs->do_not_die_on_missing_objects) {
oidset_insert(&revs->missing_commits, oid);
@@ -432,7 +433,7 @@ static struct commit *handle_commit(struct rev_info *revs,
if (revs->ignore_missing_links || (flags & UNINTERESTING))
return NULL;
if (revs->exclude_promisor_objects &&
- is_promisor_object(&tag->tagged->oid))
+ is_promisor_object(revs->repo, &tag->tagged->oid))
return NULL;
if (revs->do_not_die_on_missing_objects && oid) {
oidset_insert(&revs->missing_commits, oid);
@@ -1211,7 +1212,7 @@ static int process_parents(struct rev_info *revs, struct commit *commit,
revs->do_not_die_on_missing_objects;
if (repo_parse_commit_gently(revs->repo, p, gently) < 0) {
if (revs->exclude_promisor_objects &&
- is_promisor_object(&p->object.oid)) {
+ is_promisor_object(revs->repo, &p->object.oid)) {
if (revs->first_parent_only)
break;
continue;
@@ -3915,7 +3916,7 @@ int prepare_revision_walk(struct rev_info *revs)
revs->treesame.name = "treesame";
if (revs->exclude_promisor_objects) {
- for_each_packed_object(mark_uninteresting, revs,
+ for_each_packed_object(revs->repo, mark_uninteresting, revs,
FOR_EACH_OBJECT_PROMISOR_ONLY);
}
diff --git a/tag.c b/tag.c
index d24170e340..beef9571b5 100644
--- a/tag.c
+++ b/tag.c
@@ -84,7 +84,7 @@ struct object *deref_tag(struct repository *r, struct object *o, const char *war
o = NULL;
}
if (!o && warn) {
- if (last_oid && is_promisor_object(last_oid))
+ if (last_oid && is_promisor_object(r, last_oid))
return NULL;
if (!warnlen)
warnlen = strlen(warn);
--
2.47.0
^ permalink raw reply related [flat|nested] 184+ messages in thread
* [PATCH v3 7/9] config: make `delta_base_cache_limit` a non-global variable
2024-10-30 14:32 ` [PATCH v3 0/9] packfile: avoid using the 'the_repository' global variable Karthik Nayak
` (5 preceding siblings ...)
2024-10-30 14:32 ` [PATCH v3 6/9] packfile: pass down repository to `for_each_packed_object` Karthik Nayak
@ 2024-10-30 14:32 ` Karthik Nayak
2024-10-30 14:32 ` [PATCH v3 8/9] config: make `packed_git_(limit|window_size)` non-global variables Karthik Nayak
2024-10-30 14:32 ` [PATCH v3 9/9] midx: add repository to `multi_pack_index` struct Karthik Nayak
8 siblings, 0 replies; 184+ messages in thread
From: Karthik Nayak @ 2024-10-30 14:32 UTC (permalink / raw)
To: karthik.188; +Cc: git, me, peff
The `delta_base_cache_limit` variable is a global config variable used
by multiple subsystems. Let's make this non-global, by adding this
variable to the stack of each of the subsystems where it is used.
In `gc.c` we add it to the `gc_config` struct and also the constructor
function. In `index-pack.c` we add it to the `pack_idx_option` struct
and its constructor. Finally, in `packfile.c` we dynamically retrieve
this value from the repository config, since the value is only used once
in the entire subsystem.
These changes are made to remove the usage of `delta_base_cache_limit`
as a global variable in `packfile.c`. This brings us one step closer to
removing the `USE_THE_REPOSITORY_VARIABLE` definition in `packfile.c`
which we complete in the next patch.
Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
---
builtin/gc.c | 5 ++++-
builtin/index-pack.c | 10 +++++++---
config.c | 5 -----
environment.c | 1 -
environment.h | 1 -
pack-objects.h | 3 ++-
pack-write.c | 1 +
pack.h | 1 +
packfile.c | 13 +++++++++++--
9 files changed, 26 insertions(+), 14 deletions(-)
diff --git a/builtin/gc.c b/builtin/gc.c
index d52735354c..9a10eb58bc 100644
--- a/builtin/gc.c
+++ b/builtin/gc.c
@@ -138,6 +138,7 @@ struct gc_config {
char *repack_filter_to;
unsigned long big_pack_threshold;
unsigned long max_delta_cache_size;
+ unsigned long delta_base_cache_limit;
};
#define GC_CONFIG_INIT { \
@@ -153,6 +154,7 @@ struct gc_config {
.prune_expire = xstrdup("2.weeks.ago"), \
.prune_worktrees_expire = xstrdup("3.months.ago"), \
.max_delta_cache_size = DEFAULT_DELTA_CACHE_SIZE, \
+ .delta_base_cache_limit = DEFAULT_DELTA_BASE_CACHE_LIMIT, \
}
static void gc_config_release(struct gc_config *cfg)
@@ -205,6 +207,7 @@ static void gc_config(struct gc_config *cfg)
git_config_get_ulong("gc.bigpackthreshold", &cfg->big_pack_threshold);
git_config_get_ulong("pack.deltacachesize", &cfg->max_delta_cache_size);
+ git_config_get_ulong("core.deltabasecachelimit", &cfg->delta_base_cache_limit);
if (!git_config_get_string("gc.repackfilter", &owned)) {
free(cfg->repack_filter);
@@ -416,7 +419,7 @@ static uint64_t estimate_repack_memory(struct gc_config *cfg,
* read_sha1_file() (either at delta calculation phase, or
* writing phase) also fills up the delta base cache
*/
- heap += delta_base_cache_limit;
+ heap += cfg->delta_base_cache_limit;
/* and of course pack-objects has its own delta cache */
heap += cfg->max_delta_cache_size;
diff --git a/builtin/index-pack.c b/builtin/index-pack.c
index eaefb41761..23bfa45403 100644
--- a/builtin/index-pack.c
+++ b/builtin/index-pack.c
@@ -1238,7 +1238,7 @@ static void parse_pack_objects(unsigned char *hash)
* recursively checking if the resulting object is used as a base
* for some more deltas.
*/
-static void resolve_deltas(void)
+static void resolve_deltas(struct pack_idx_option *opts)
{
int i;
@@ -1254,7 +1254,7 @@ static void resolve_deltas(void)
nr_ref_deltas + nr_ofs_deltas);
nr_dispatched = 0;
- base_cache_limit = delta_base_cache_limit * nr_threads;
+ base_cache_limit = opts->delta_base_cache_limit * nr_threads;
if (nr_threads > 1 || getenv("GIT_FORCE_THREADS")) {
init_thread();
work_lock();
@@ -1604,6 +1604,10 @@ static int git_index_pack_config(const char *k, const char *v,
else
opts->flags &= ~WRITE_REV;
}
+ if (!strcmp(k, "core.deltabasecachelimit")) {
+ opts->delta_base_cache_limit = git_config_ulong(k, v, ctx->kvi);
+ return 0;
+ }
return git_default_config(k, v, ctx, cb);
}
@@ -1930,7 +1934,7 @@ int cmd_index_pack(int argc,
parse_pack_objects(pack_hash);
if (report_end_of_input)
write_in_full(2, "\0", 1);
- resolve_deltas();
+ resolve_deltas(&opts);
conclude_pack(fix_thin_pack, curr_pack, pack_hash);
free(ofs_deltas);
free(ref_deltas);
diff --git a/config.c b/config.c
index a11bb85da3..728ef98e42 100644
--- a/config.c
+++ b/config.c
@@ -1515,11 +1515,6 @@ static int git_default_core_config(const char *var, const char *value,
return 0;
}
- if (!strcmp(var, "core.deltabasecachelimit")) {
- delta_base_cache_limit = git_config_ulong(var, value, ctx->kvi);
- return 0;
- }
-
if (!strcmp(var, "core.autocrlf")) {
if (value && !strcasecmp(value, "input")) {
auto_crlf = AUTO_CRLF_INPUT;
diff --git a/environment.c b/environment.c
index a2ce998081..8e5022c282 100644
--- a/environment.c
+++ b/environment.c
@@ -51,7 +51,6 @@ enum fsync_method fsync_method = FSYNC_METHOD_DEFAULT;
enum fsync_component fsync_components = FSYNC_COMPONENTS_DEFAULT;
size_t packed_git_window_size = DEFAULT_PACKED_GIT_WINDOW_SIZE;
size_t packed_git_limit = DEFAULT_PACKED_GIT_LIMIT;
-size_t delta_base_cache_limit = 96 * 1024 * 1024;
unsigned long big_file_threshold = 512 * 1024 * 1024;
char *editor_program;
char *askpass_program;
diff --git a/environment.h b/environment.h
index 923e12661e..2f43340f0b 100644
--- a/environment.h
+++ b/environment.h
@@ -165,7 +165,6 @@ extern int zlib_compression_level;
extern int pack_compression_level;
extern size_t packed_git_window_size;
extern size_t packed_git_limit;
-extern size_t delta_base_cache_limit;
extern unsigned long big_file_threshold;
extern unsigned long pack_size_limit_cfg;
extern int max_allowed_tree_depth;
diff --git a/pack-objects.h b/pack-objects.h
index b9898a4e64..3f6f504203 100644
--- a/pack-objects.h
+++ b/pack-objects.h
@@ -7,7 +7,8 @@
struct repository;
-#define DEFAULT_DELTA_CACHE_SIZE (256 * 1024 * 1024)
+#define DEFAULT_DELTA_CACHE_SIZE (256 * 1024 * 1024)
+#define DEFAULT_DELTA_BASE_CACHE_LIMIT (96 * 1024 * 1024)
#define OE_DFS_STATE_BITS 2
#define OE_DEPTH_BITS 12
diff --git a/pack-write.c b/pack-write.c
index 8c7dfddc5a..98a8c0e785 100644
--- a/pack-write.c
+++ b/pack-write.c
@@ -21,6 +21,7 @@ void reset_pack_idx_option(struct pack_idx_option *opts)
memset(opts, 0, sizeof(*opts));
opts->version = 2;
opts->off32_limit = 0x7fffffff;
+ opts->delta_base_cache_limit = DEFAULT_DELTA_BASE_CACHE_LIMIT;
}
static int sha1_compare(const void *_a, const void *_b)
diff --git a/pack.h b/pack.h
index 02bbdfb19c..1a33751565 100644
--- a/pack.h
+++ b/pack.h
@@ -58,6 +58,7 @@ struct pack_idx_option {
*/
int anomaly_alloc, anomaly_nr;
uint32_t *anomaly;
+ unsigned long delta_base_cache_limit;
};
void reset_pack_idx_option(struct pack_idx_option *);
diff --git a/packfile.c b/packfile.c
index 9eca5a86a7..56a5c55a5d 100644
--- a/packfile.c
+++ b/packfile.c
@@ -24,6 +24,8 @@
#include "commit-graph.h"
#include "pack-revindex.h"
#include "promisor-remote.h"
+#include "config.h"
+#include "pack-objects.h"
char *odb_pack_name(struct repository *r, struct strbuf *buf,
const unsigned char *hash, const char *ext)
@@ -1496,7 +1498,9 @@ void clear_delta_base_cache(void)
}
static void add_delta_base_cache(struct packed_git *p, off_t base_offset,
- void *base, unsigned long base_size, enum object_type type)
+ void *base, unsigned long base_size,
+ unsigned long delta_base_cache_limit,
+ enum object_type type)
{
struct delta_base_cache_entry *ent;
struct list_head *lru, *tmp;
@@ -1697,6 +1701,9 @@ void *unpack_entry(struct repository *r, struct packed_git *p, off_t obj_offset,
struct unpack_entry_stack_ent *delta_stack = small_delta_stack;
int delta_stack_nr = 0, delta_stack_alloc = UNPACK_ENTRY_STACK_PREALLOC;
int base_from_cache = 0;
+ unsigned long delta_base_cache_limit = DEFAULT_DELTA_BASE_CACHE_LIMIT;
+
+ repo_config_get_ulong(r, "core.deltabasecachelimit", &delta_base_cache_limit);
write_pack_access_log(p, obj_offset);
@@ -1878,7 +1885,9 @@ void *unpack_entry(struct repository *r, struct packed_git *p, off_t obj_offset,
* before we are done using it.
*/
if (!external_base)
- add_delta_base_cache(p, base_obj_offset, base, base_size, type);
+ add_delta_base_cache(p, base_obj_offset, base,
+ base_size, delta_base_cache_limit,
+ type);
free(delta_data);
free(external_base);
--
2.47.0
^ permalink raw reply related [flat|nested] 184+ messages in thread
* [PATCH v3 8/9] config: make `packed_git_(limit|window_size)` non-global variables
2024-10-30 14:32 ` [PATCH v3 0/9] packfile: avoid using the 'the_repository' global variable Karthik Nayak
` (6 preceding siblings ...)
2024-10-30 14:32 ` [PATCH v3 7/9] config: make `delta_base_cache_limit` a non-global variable Karthik Nayak
@ 2024-10-30 14:32 ` Karthik Nayak
2024-10-30 14:32 ` [PATCH v3 9/9] midx: add repository to `multi_pack_index` struct Karthik Nayak
8 siblings, 0 replies; 184+ messages in thread
From: Karthik Nayak @ 2024-10-30 14:32 UTC (permalink / raw)
To: karthik.188; +Cc: git, me, peff
The variables `packed_git_window_size` and `packed_git_limit` are global
config variables used in the `packfile.c` file. Since it is only used in
this file, let's change it from being a global config variable to a
local variable for the subsystem.
We do this by introducing a new local `packfile_config` struct in
`packfile.c` and also adding the required function to parse the said
config. We then use this within `packfile.c` to obtain the variables.
With this, we rid `packfile.c` from all global variable usage and this
means we can also remove the `USE_THE_REPOSITORY_VARIABLE` guard from
the file.
Helped-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
---
builtin/fast-import.c | 4 +--
config.c | 17 -------------
environment.c | 2 --
packfile.c | 57 +++++++++++++++++++++++++++++++++++++------
packfile.h | 2 +-
5 files changed, 52 insertions(+), 30 deletions(-)
diff --git a/builtin/fast-import.c b/builtin/fast-import.c
index 2ca8198153..c159cc584b 100644
--- a/builtin/fast-import.c
+++ b/builtin/fast-import.c
@@ -3539,7 +3539,7 @@ static void parse_argv(void)
int cmd_fast_import(int argc,
const char **argv,
const char *prefix,
- struct repository *repo UNUSED)
+ struct repository *repo)
{
unsigned int i;
@@ -3660,7 +3660,7 @@ int cmd_fast_import(int argc,
fprintf(stderr, " pools: %10lu KiB\n", (unsigned long)((tree_entry_allocd + fi_mem_pool.pool_alloc) /1024));
fprintf(stderr, " objects: %10" PRIuMAX " KiB\n", (alloc_count*sizeof(struct object_entry))/1024);
fprintf(stderr, "---------------------------------------------------------------------\n");
- pack_report();
+ pack_report(repo);
fprintf(stderr, "---------------------------------------------------------------------\n");
fprintf(stderr, "\n");
}
diff --git a/config.c b/config.c
index 728ef98e42..2c295f7430 100644
--- a/config.c
+++ b/config.c
@@ -1493,28 +1493,11 @@ static int git_default_core_config(const char *var, const char *value,
return 0;
}
- if (!strcmp(var, "core.packedgitwindowsize")) {
- int pgsz_x2 = getpagesize() * 2;
- packed_git_window_size = git_config_ulong(var, value, ctx->kvi);
-
- /* This value must be multiple of (pagesize * 2) */
- packed_git_window_size /= pgsz_x2;
- if (packed_git_window_size < 1)
- packed_git_window_size = 1;
- packed_git_window_size *= pgsz_x2;
- return 0;
- }
-
if (!strcmp(var, "core.bigfilethreshold")) {
big_file_threshold = git_config_ulong(var, value, ctx->kvi);
return 0;
}
- if (!strcmp(var, "core.packedgitlimit")) {
- packed_git_limit = git_config_ulong(var, value, ctx->kvi);
- return 0;
- }
-
if (!strcmp(var, "core.autocrlf")) {
if (value && !strcasecmp(value, "input")) {
auto_crlf = AUTO_CRLF_INPUT;
diff --git a/environment.c b/environment.c
index 8e5022c282..8389a27270 100644
--- a/environment.c
+++ b/environment.c
@@ -49,8 +49,6 @@ int fsync_object_files = -1;
int use_fsync = -1;
enum fsync_method fsync_method = FSYNC_METHOD_DEFAULT;
enum fsync_component fsync_components = FSYNC_COMPONENTS_DEFAULT;
-size_t packed_git_window_size = DEFAULT_PACKED_GIT_WINDOW_SIZE;
-size_t packed_git_limit = DEFAULT_PACKED_GIT_LIMIT;
unsigned long big_file_threshold = 512 * 1024 * 1024;
char *editor_program;
char *askpass_program;
diff --git a/packfile.c b/packfile.c
index 56a5c55a5d..426606585f 100644
--- a/packfile.c
+++ b/packfile.c
@@ -1,4 +1,3 @@
-#define USE_THE_REPOSITORY_VARIABLE
#include "git-compat-util.h"
#include "environment.h"
@@ -27,6 +26,17 @@
#include "config.h"
#include "pack-objects.h"
+struct packfile_config {
+ unsigned long packed_git_window_size;
+ unsigned long packed_git_limit;
+};
+
+#define PACKFILE_CONFIG_INIT \
+{ \
+ .packed_git_window_size = DEFAULT_PACKED_GIT_WINDOW_SIZE, \
+ .packed_git_limit = DEFAULT_PACKED_GIT_LIMIT, \
+}
+
char *odb_pack_name(struct repository *r, struct strbuf *buf,
const unsigned char *hash, const char *ext)
{
@@ -48,15 +58,41 @@ static size_t pack_mapped;
#define SZ_FMT PRIuMAX
static inline uintmax_t sz_fmt(size_t s) { return s; }
-void pack_report(void)
+static int packfile_config(const char *var, const char *value,
+ const struct config_context *ctx, void *cb)
+{
+ struct packfile_config *config = cb;
+
+ if (!strcmp(var, "core.packedgitwindowsize")) {
+ int pgsz_x2 = getpagesize() * 2;
+ config->packed_git_window_size = git_config_ulong(var, value, ctx->kvi);
+
+ /* This value must be multiple of (pagesize * 2) */
+ config->packed_git_window_size /= pgsz_x2;
+ if (config->packed_git_window_size < 1)
+ config->packed_git_window_size = 1;
+ config->packed_git_window_size *= pgsz_x2;
+ return 0;
+ } else if (!strcmp(var, "core.packedgitlimit")) {
+ config->packed_git_limit = git_config_ulong(var, value, ctx->kvi);
+ return 0;
+ } else {
+ return git_default_config(var, value, ctx, cb);
+ }
+}
+
+void pack_report(struct repository *repo)
{
+ struct packfile_config config = PACKFILE_CONFIG_INIT;
+ repo_config(repo, packfile_config, &config);
+
fprintf(stderr,
"pack_report: getpagesize() = %10" SZ_FMT "\n"
"pack_report: core.packedGitWindowSize = %10" SZ_FMT "\n"
"pack_report: core.packedGitLimit = %10" SZ_FMT "\n",
sz_fmt(getpagesize()),
- sz_fmt(packed_git_window_size),
- sz_fmt(packed_git_limit));
+ sz_fmt(config.packed_git_window_size),
+ sz_fmt(config.packed_git_limit));
fprintf(stderr,
"pack_report: pack_used_ctr = %10u\n"
"pack_report: pack_mmap_calls = %10u\n"
@@ -652,20 +688,25 @@ unsigned char *use_pack(struct packed_git *p,
break;
}
if (!win) {
- size_t window_align = packed_git_window_size / 2;
+ struct packfile_config config = PACKFILE_CONFIG_INIT;
+ size_t window_align;
off_t len;
+ repo_config(p->r, packfile_config, &config);
+ window_align = config.packed_git_window_size / 2;
+
if (p->pack_fd == -1 && open_packed_git(p))
die("packfile %s cannot be accessed", p->pack_name);
CALLOC_ARRAY(win, 1);
win->offset = (offset / window_align) * window_align;
len = p->pack_size - win->offset;
- if (len > packed_git_window_size)
- len = packed_git_window_size;
+ if (len > config.packed_git_window_size)
+ len = config.packed_git_window_size;
win->len = (size_t)len;
pack_mapped += win->len;
- while (packed_git_limit < pack_mapped
+
+ while (config.packed_git_limit < pack_mapped
&& unuse_one_window(p))
; /* nothing */
win->base = xmmap_gently(NULL, win->len,
diff --git a/packfile.h b/packfile.h
index addb95b0c4..58104fa009 100644
--- a/packfile.h
+++ b/packfile.h
@@ -89,7 +89,7 @@ unsigned long repo_approximate_object_count(struct repository *r);
struct packed_git *find_oid_pack(const struct object_id *oid,
struct packed_git *packs);
-void pack_report(void);
+void pack_report(struct repository *repo);
/*
* mmap the index file for the specified packfile (if it is not
--
2.47.0
^ permalink raw reply related [flat|nested] 184+ messages in thread
* [PATCH v3 9/9] midx: add repository to `multi_pack_index` struct
2024-10-30 14:32 ` [PATCH v3 0/9] packfile: avoid using the 'the_repository' global variable Karthik Nayak
` (7 preceding siblings ...)
2024-10-30 14:32 ` [PATCH v3 8/9] config: make `packed_git_(limit|window_size)` non-global variables Karthik Nayak
@ 2024-10-30 14:32 ` Karthik Nayak
2024-10-30 20:13 ` Taylor Blau
8 siblings, 1 reply; 184+ messages in thread
From: Karthik Nayak @ 2024-10-30 14:32 UTC (permalink / raw)
To: karthik.188; +Cc: git, me, peff
The `multi_pack_index` struct represents the MIDX for a repository.
Here, we add a pointer to the repository in this struct, allowing direct
use of the repository variable without relying on the global
`the_repository` struct.
With this addition, we can determine the repository associated with a
`bitmap_index` struct. A `bitmap_index` points to either a `packed_git`
or a `multi_pack_index`, both of which have direct repository
references. To support this, we introduce a static helper function,
`bitmap_repo`, in `pack-bitmap.c`, which retrieves a repository given a
`bitmap_index`.
With this, we clear up all usages of `the_repository` within
`pack-bitmap.c` and also remove the `USE_THE_REPOSITORY_VARIABLE`
definition. Bringing us another step closer to remove all global
variable usage.
Although this change also opens up the potential to clean up `midx.c`,
doing so would require additional refactoring to pass the repository
struct to functions where the MIDX struct is created: a task better
suited for future patches.
Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
---
midx.c | 1 +
midx.h | 3 ++
pack-bitmap.c | 97 +++++++++++++++++++++++++++++++--------------------
3 files changed, 64 insertions(+), 37 deletions(-)
diff --git a/midx.c b/midx.c
index 8edb75f51d..7a34473010 100644
--- a/midx.c
+++ b/midx.c
@@ -131,6 +131,7 @@ static struct multi_pack_index *load_multi_pack_index_one(const char *object_dir
m->data = midx_map;
m->data_len = midx_size;
m->local = local;
+ m->r = the_repository;
m->signature = get_be32(m->data);
if (m->signature != MIDX_SIGNATURE)
diff --git a/midx.h b/midx.h
index 42d4f8d149..7d39fb24e9 100644
--- a/midx.h
+++ b/midx.h
@@ -71,6 +71,9 @@ struct multi_pack_index {
const char **pack_names;
struct packed_git **packs;
+
+ struct repository *r;
+
char object_dir[FLEX_ARRAY];
};
diff --git a/pack-bitmap.c b/pack-bitmap.c
index d34ba9909a..ef9958b96e 100644
--- a/pack-bitmap.c
+++ b/pack-bitmap.c
@@ -1,5 +1,3 @@
-#define USE_THE_REPOSITORY_VARIABLE
-
#include "git-compat-util.h"
#include "commit.h"
#include "gettext.h"
@@ -177,12 +175,21 @@ static uint32_t bitmap_num_objects(struct bitmap_index *index)
return index->pack->num_objects;
}
+static struct repository *bitmap_repo(struct bitmap_index *bitmap_git)
+{
+ if (bitmap_is_midx(bitmap_git))
+ return bitmap_git->midx->r;
+ return bitmap_git->pack->r;
+}
+
static int load_bitmap_header(struct bitmap_index *index)
{
struct bitmap_disk_header *header = (void *)index->map;
- size_t header_size = sizeof(*header) - GIT_MAX_RAWSZ + the_hash_algo->rawsz;
+ const struct git_hash_algo *hash_algo = bitmap_repo(index)->hash_algo;
+
+ size_t header_size = sizeof(*header) - GIT_MAX_RAWSZ + hash_algo->rawsz;
- if (index->map_size < header_size + the_hash_algo->rawsz)
+ if (index->map_size < header_size + hash_algo->rawsz)
return error(_("corrupted bitmap index (too small)"));
if (memcmp(header->magic, BITMAP_IDX_SIGNATURE, sizeof(BITMAP_IDX_SIGNATURE)) != 0)
@@ -196,7 +203,7 @@ static int load_bitmap_header(struct bitmap_index *index)
{
uint32_t flags = ntohs(header->options);
size_t cache_size = st_mult(bitmap_num_objects(index), sizeof(uint32_t));
- unsigned char *index_end = index->map + index->map_size - the_hash_algo->rawsz;
+ unsigned char *index_end = index->map + index->map_size - hash_algo->rawsz;
if ((flags & BITMAP_OPT_FULL_DAG) == 0)
BUG("unsupported options for bitmap index file "
@@ -409,7 +416,7 @@ static int open_midx_bitmap_1(struct bitmap_index *bitmap_git,
if (bitmap_git->pack || bitmap_git->midx) {
struct strbuf buf = STRBUF_INIT;
get_midx_filename(&buf, midx->object_dir);
- trace2_data_string("bitmap", the_repository,
+ trace2_data_string("bitmap", bitmap_repo(bitmap_git),
"ignoring extra midx bitmap file", buf.buf);
close(fd);
strbuf_release(&buf);
@@ -427,7 +434,7 @@ static int open_midx_bitmap_1(struct bitmap_index *bitmap_git,
goto cleanup;
if (!hasheq(get_midx_checksum(bitmap_git->midx), bitmap_git->checksum,
- the_repository->hash_algo)) {
+ bitmap_repo(bitmap_git)->hash_algo)) {
error(_("checksum doesn't match in MIDX and bitmap"));
goto cleanup;
}
@@ -438,7 +445,9 @@ static int open_midx_bitmap_1(struct bitmap_index *bitmap_git,
}
for (i = 0; i < bitmap_git->midx->num_packs; i++) {
- if (prepare_midx_pack(the_repository, bitmap_git->midx, i)) {
+ if (prepare_midx_pack(bitmap_repo(bitmap_git),
+ bitmap_git->midx,
+ i)) {
warning(_("could not open pack %s"),
bitmap_git->midx->pack_names[i]);
goto cleanup;
@@ -492,8 +501,9 @@ static int open_pack_bitmap_1(struct bitmap_index *bitmap_git, struct packed_git
}
if (bitmap_git->pack || bitmap_git->midx) {
- trace2_data_string("bitmap", the_repository,
- "ignoring extra bitmap file", packfile->pack_name);
+ trace2_data_string("bitmap", bitmap_repo(bitmap_git),
+ "ignoring extra bitmap file",
+ packfile->pack_name);
close(fd);
return -1;
}
@@ -518,8 +528,8 @@ static int open_pack_bitmap_1(struct bitmap_index *bitmap_git, struct packed_git
return -1;
}
- trace2_data_string("bitmap", the_repository, "opened bitmap file",
- packfile->pack_name);
+ trace2_data_string("bitmap", bitmap_repo(bitmap_git),
+ "opened bitmap file", packfile->pack_name);
return 0;
}
@@ -649,7 +659,7 @@ struct bitmap_index *prepare_bitmap_git(struct repository *r)
struct bitmap_index *prepare_midx_bitmap_git(struct multi_pack_index *midx)
{
- struct repository *r = the_repository;
+ struct repository *r = midx->r;
struct bitmap_index *bitmap_git = xcalloc(1, sizeof(*bitmap_git));
if (!open_midx_bitmap_1(bitmap_git, midx) && !load_bitmap(r, bitmap_git))
@@ -1213,6 +1223,7 @@ static struct bitmap *find_boundary_objects(struct bitmap_index *bitmap_git,
{
struct bitmap_boundary_cb cb;
struct object_list *root;
+ struct repository *repo;
unsigned int i;
unsigned int tmp_blobs, tmp_trees, tmp_tags;
int any_missing = 0;
@@ -1222,6 +1233,8 @@ static struct bitmap *find_boundary_objects(struct bitmap_index *bitmap_git,
cb.base = bitmap_new();
object_array_init(&cb.boundary);
+ repo = bitmap_repo(bitmap_git);
+
revs->ignore_missing_links = 1;
if (bitmap_git->pseudo_merges.nr) {
@@ -1280,19 +1293,19 @@ static struct bitmap *find_boundary_objects(struct bitmap_index *bitmap_git,
* revision walk to (a) OR in any bitmaps that are UNINTERESTING
* between the tips and boundary, and (b) record the boundary.
*/
- trace2_region_enter("pack-bitmap", "boundary-prepare", the_repository);
+ trace2_region_enter("pack-bitmap", "boundary-prepare", repo);
if (prepare_revision_walk(revs))
die("revision walk setup failed");
- trace2_region_leave("pack-bitmap", "boundary-prepare", the_repository);
+ trace2_region_leave("pack-bitmap", "boundary-prepare", repo);
- trace2_region_enter("pack-bitmap", "boundary-traverse", the_repository);
+ trace2_region_enter("pack-bitmap", "boundary-traverse", repo);
revs->boundary = 1;
traverse_commit_list_filtered(revs,
show_boundary_commit,
show_boundary_object,
&cb, NULL);
revs->boundary = 0;
- trace2_region_leave("pack-bitmap", "boundary-traverse", the_repository);
+ trace2_region_leave("pack-bitmap", "boundary-traverse", repo);
revs->blob_objects = tmp_blobs;
revs->tree_objects = tmp_trees;
@@ -1304,7 +1317,7 @@ static struct bitmap *find_boundary_objects(struct bitmap_index *bitmap_git,
/*
* Then add the boundary commit(s) as fill-in traversal tips.
*/
- trace2_region_enter("pack-bitmap", "boundary-fill-in", the_repository);
+ trace2_region_enter("pack-bitmap", "boundary-fill-in", repo);
for (i = 0; i < cb.boundary.nr; i++) {
struct object *obj = cb.boundary.objects[i].item;
if (bitmap_walk_contains(bitmap_git, cb.base, &obj->oid))
@@ -1314,7 +1327,7 @@ static struct bitmap *find_boundary_objects(struct bitmap_index *bitmap_git,
}
if (revs->pending.nr)
cb.base = fill_in_bitmap(bitmap_git, revs, cb.base, NULL);
- trace2_region_leave("pack-bitmap", "boundary-fill-in", the_repository);
+ trace2_region_leave("pack-bitmap", "boundary-fill-in", repo);
cleanup:
object_array_clear(&cb.boundary);
@@ -1718,7 +1731,8 @@ static unsigned long get_size_by_pos(struct bitmap_index *bitmap_git,
ofs = pack_pos_to_offset(pack, pos);
}
- if (packed_object_info(the_repository, pack, ofs, &oi) < 0) {
+ if (packed_object_info(bitmap_repo(bitmap_git), pack, ofs,
+ &oi) < 0) {
struct object_id oid;
nth_bitmap_object_oid(bitmap_git, &oid,
pack_pos_to_index(pack, pos));
@@ -1727,7 +1741,8 @@ static unsigned long get_size_by_pos(struct bitmap_index *bitmap_git,
} else {
struct eindex *eindex = &bitmap_git->ext_index;
struct object *obj = eindex->objects[pos - bitmap_num_objects(bitmap_git)];
- if (oid_object_info_extended(the_repository, &obj->oid, &oi, 0) < 0)
+ if (oid_object_info_extended(bitmap_repo(bitmap_git), &obj->oid,
+ &oi, 0) < 0)
die(_("unable to get size of %s"), oid_to_hex(&obj->oid));
}
@@ -1889,7 +1904,8 @@ static void filter_packed_objects_from_bitmap(struct bitmap_index *bitmap_git,
bitmap_unset(result, i);
for (i = 0; i < eindex->count; ++i) {
- if (has_object_pack(the_repository, &eindex->objects[i]->oid))
+ if (has_object_pack(bitmap_repo(bitmap_git),
+ &eindex->objects[i]->oid))
bitmap_unset(result, objects_nr + i);
}
}
@@ -1907,6 +1923,7 @@ struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs,
struct bitmap *haves_bitmap = NULL;
struct bitmap_index *bitmap_git;
+ struct repository *repo;
/*
* We can't do pathspec limiting with bitmaps, because we don't know
@@ -1980,18 +1997,23 @@ struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs,
if (!use_boundary_traversal)
object_array_clear(&revs->pending);
+ repo = bitmap_repo(bitmap_git);
+
if (haves) {
- if (use_boundary_traversal) {
- trace2_region_enter("pack-bitmap", "haves/boundary", the_repository);
+ if (use_boundary_traversal)
+ {
+ trace2_region_enter("pack-bitmap", "haves/boundary", repo);
haves_bitmap = find_boundary_objects(bitmap_git, revs, haves);
- trace2_region_leave("pack-bitmap", "haves/boundary", the_repository);
- } else {
- trace2_region_enter("pack-bitmap", "haves/classic", the_repository);
+ trace2_region_leave("pack-bitmap", "haves/boundary", repo);
+ }
+ else
+ {
+ trace2_region_enter("pack-bitmap", "haves/classic", repo);
revs->ignore_missing_links = 1;
haves_bitmap = find_objects(bitmap_git, revs, haves, NULL);
reset_revision_walk();
revs->ignore_missing_links = 0;
- trace2_region_leave("pack-bitmap", "haves/classic", the_repository);
+ trace2_region_leave("pack-bitmap", "haves/classic", repo);
}
if (!haves_bitmap)
@@ -2025,17 +2047,17 @@ struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs,
object_list_free(&wants);
object_list_free(&haves);
- trace2_data_intmax("bitmap", the_repository, "pseudo_merges_satisfied",
+ trace2_data_intmax("bitmap", repo, "pseudo_merges_satisfied",
pseudo_merges_satisfied_nr);
- trace2_data_intmax("bitmap", the_repository, "pseudo_merges_cascades",
+ trace2_data_intmax("bitmap", repo, "pseudo_merges_cascades",
pseudo_merges_cascades_nr);
- trace2_data_intmax("bitmap", the_repository, "bitmap/hits",
+ trace2_data_intmax("bitmap", repo, "bitmap/hits",
existing_bitmaps_hits_nr);
- trace2_data_intmax("bitmap", the_repository, "bitmap/misses",
+ trace2_data_intmax("bitmap", repo, "bitmap/misses",
existing_bitmaps_misses_nr);
- trace2_data_intmax("bitmap", the_repository, "bitmap/roots_with_bitmap",
+ trace2_data_intmax("bitmap", repo, "bitmap/roots_with_bitmap",
roots_with_bitmaps_nr);
- trace2_data_intmax("bitmap", the_repository, "bitmap/roots_without_bitmap",
+ trace2_data_intmax("bitmap", repo, "bitmap/roots_without_bitmap",
roots_without_bitmaps_nr);
return bitmap_git;
@@ -2256,7 +2278,7 @@ void reuse_partial_packfile_from_bitmap(struct bitmap_index *bitmap_git,
struct bitmap **reuse_out,
int multi_pack_reuse)
{
- struct repository *r = the_repository;
+ struct repository *r = bitmap_repo(bitmap_git);
struct bitmapped_pack *packs = NULL;
struct bitmap *result = bitmap_git->result;
struct bitmap *reuse;
@@ -2792,7 +2814,7 @@ int rebuild_bitmap(const uint32_t *reposition,
uint32_t *create_bitmap_mapping(struct bitmap_index *bitmap_git,
struct packing_data *mapping)
{
- struct repository *r = the_repository;
+ struct repository *r = bitmap_repo(bitmap_git);
uint32_t i, num_objects;
uint32_t *reposition;
@@ -2948,7 +2970,8 @@ static off_t get_disk_usage_for_extended(struct bitmap_index *bitmap_git)
st_add(bitmap_num_objects(bitmap_git), i)))
continue;
- if (oid_object_info_extended(the_repository, &obj->oid, &oi, 0) < 0)
+ if (oid_object_info_extended(bitmap_repo(bitmap_git), &obj->oid,
+ &oi, 0) < 0)
die(_("unable to get disk usage of '%s'"),
oid_to_hex(&obj->oid));
--
2.47.0
^ permalink raw reply related [flat|nested] 184+ messages in thread
* Re: [PATCH v3 9/9] midx: add repository to `multi_pack_index` struct
2024-10-30 14:32 ` [PATCH v3 9/9] midx: add repository to `multi_pack_index` struct Karthik Nayak
@ 2024-10-30 20:13 ` Taylor Blau
2024-10-31 9:34 ` karthik nayak
0 siblings, 1 reply; 184+ messages in thread
From: Taylor Blau @ 2024-10-30 20:13 UTC (permalink / raw)
To: Karthik Nayak; +Cc: git, peff
On Wed, Oct 30, 2024 at 03:32:34PM +0100, Karthik Nayak wrote:
> The `multi_pack_index` struct represents the MIDX for a repository.
> Here, we add a pointer to the repository in this struct, allowing direct
> use of the repository variable without relying on the global
> `the_repository` struct.
>
> With this addition, we can determine the repository associated with a
> `bitmap_index` struct. A `bitmap_index` points to either a `packed_git`
> or a `multi_pack_index`, both of which have direct repository
> references. To support this, we introduce a static helper function,
> `bitmap_repo`, in `pack-bitmap.c`, which retrieves a repository given a
> `bitmap_index`.
>
> With this, we clear up all usages of `the_repository` within
> `pack-bitmap.c` and also remove the `USE_THE_REPOSITORY_VARIABLE`
> definition. Bringing us another step closer to remove all global
> variable usage.
>
> Although this change also opens up the potential to clean up `midx.c`,
> doing so would require additional refactoring to pass the repository
> struct to functions where the MIDX struct is created: a task better
> suited for future patches.
>
> Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
> ---
> midx.c | 1 +
> midx.h | 3 ++
> pack-bitmap.c | 97 +++++++++++++++++++++++++++++++--------------------
> 3 files changed, 64 insertions(+), 37 deletions(-)
>
> diff --git a/midx.c b/midx.c
> index 8edb75f51d..7a34473010 100644
> --- a/midx.c
> +++ b/midx.c
> @@ -131,6 +131,7 @@ static struct multi_pack_index *load_multi_pack_index_one(const char *object_dir
> m->data = midx_map;
> m->data_len = midx_size;
> m->local = local;
> + m->r = the_repository;
Same note here about calling this 'r' rather than 'repo'.
I do wonder if it creates any awkwardness to have the_repository
assigned here unconditionally when we do specify the object_dir. I think
it's OK so long as we don't start replacing 'm->object_dir' with
'm->repo->objects->odb->path'.
> @@ -1980,18 +1997,23 @@ struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs,
> if (!use_boundary_traversal)
> object_array_clear(&revs->pending);
>
> + repo = bitmap_repo(bitmap_git);
> +
> if (haves) {
> - if (use_boundary_traversal) {
> - trace2_region_enter("pack-bitmap", "haves/boundary", the_repository);
> + if (use_boundary_traversal)
> + {
> + trace2_region_enter("pack-bitmap", "haves/boundary", repo);
> haves_bitmap = find_boundary_objects(bitmap_git, revs, haves);
> - trace2_region_leave("pack-bitmap", "haves/boundary", the_repository);
> - } else {
> - trace2_region_enter("pack-bitmap", "haves/classic", the_repository);
> + trace2_region_leave("pack-bitmap", "haves/boundary", repo);
> + }
> + else
> + {
> + trace2_region_enter("pack-bitmap", "haves/classic", repo);
> revs->ignore_missing_links = 1;
> haves_bitmap = find_objects(bitmap_git, revs, haves, NULL);
> reset_revision_walk();
> revs->ignore_missing_links = 0;
> - trace2_region_leave("pack-bitmap", "haves/classic", the_repository);
> + trace2_region_leave("pack-bitmap", "haves/classic", repo);
> }
Odd braces?
Thanks,
Taylor
^ permalink raw reply [flat|nested] 184+ messages in thread
* Re: [PATCH v3 9/9] midx: add repository to `multi_pack_index` struct
2024-10-30 20:13 ` Taylor Blau
@ 2024-10-31 9:34 ` karthik nayak
0 siblings, 0 replies; 184+ messages in thread
From: karthik nayak @ 2024-10-31 9:34 UTC (permalink / raw)
To: Taylor Blau; +Cc: git, peff
[-- Attachment #1: Type: text/plain, Size: 3376 bytes --]
Taylor Blau <me@ttaylorr.com> writes:
> On Wed, Oct 30, 2024 at 03:32:34PM +0100, Karthik Nayak wrote:
>> The `multi_pack_index` struct represents the MIDX for a repository.
>> Here, we add a pointer to the repository in this struct, allowing direct
>> use of the repository variable without relying on the global
>> `the_repository` struct.
>>
>> With this addition, we can determine the repository associated with a
>> `bitmap_index` struct. A `bitmap_index` points to either a `packed_git`
>> or a `multi_pack_index`, both of which have direct repository
>> references. To support this, we introduce a static helper function,
>> `bitmap_repo`, in `pack-bitmap.c`, which retrieves a repository given a
>> `bitmap_index`.
>>
>> With this, we clear up all usages of `the_repository` within
>> `pack-bitmap.c` and also remove the `USE_THE_REPOSITORY_VARIABLE`
>> definition. Bringing us another step closer to remove all global
>> variable usage.
>>
>> Although this change also opens up the potential to clean up `midx.c`,
>> doing so would require additional refactoring to pass the repository
>> struct to functions where the MIDX struct is created: a task better
>> suited for future patches.
>>
>> Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
>> ---
>> midx.c | 1 +
>> midx.h | 3 ++
>> pack-bitmap.c | 97 +++++++++++++++++++++++++++++++--------------------
>> 3 files changed, 64 insertions(+), 37 deletions(-)
>>
>> diff --git a/midx.c b/midx.c
>> index 8edb75f51d..7a34473010 100644
>> --- a/midx.c
>> +++ b/midx.c
>> @@ -131,6 +131,7 @@ static struct multi_pack_index *load_multi_pack_index_one(const char *object_dir
>> m->data = midx_map;
>> m->data_len = midx_size;
>> m->local = local;
>> + m->r = the_repository;
>
> Same note here about calling this 'r' rather than 'repo'.
>
My bad, I'll fix it in the next version.
> I do wonder if it creates any awkwardness to have the_repository
> assigned here unconditionally when we do specify the object_dir. I think
> it's OK so long as we don't start replacing 'm->object_dir' with
> 'm->repo->objects->odb->path'.
>
>> @@ -1980,18 +1997,23 @@ struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs,
>> if (!use_boundary_traversal)
>> object_array_clear(&revs->pending);
>>
>> + repo = bitmap_repo(bitmap_git);
>> +
>> if (haves) {
>> - if (use_boundary_traversal) {
>> - trace2_region_enter("pack-bitmap", "haves/boundary", the_repository);
>> + if (use_boundary_traversal)
>> + {
>> + trace2_region_enter("pack-bitmap", "haves/boundary", repo);
>> haves_bitmap = find_boundary_objects(bitmap_git, revs, haves);
>> - trace2_region_leave("pack-bitmap", "haves/boundary", the_repository);
>> - } else {
>> - trace2_region_enter("pack-bitmap", "haves/classic", the_repository);
>> + trace2_region_leave("pack-bitmap", "haves/boundary", repo);
>> + }
>> + else
>> + {
>> + trace2_region_enter("pack-bitmap", "haves/classic", repo);
>> revs->ignore_missing_links = 1;
>> haves_bitmap = find_objects(bitmap_git, revs, haves, NULL);
>> reset_revision_walk();
>> revs->ignore_missing_links = 0;
>> - trace2_region_leave("pack-bitmap", "haves/classic", the_repository);
>> + trace2_region_leave("pack-bitmap", "haves/classic", repo);
>> }
>
> Odd braces?
>
Indeed. Will fix this too.
> Thanks,
> Taylor
Thanks for the review.
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 690 bytes --]
^ permalink raw reply [flat|nested] 184+ messages in thread
* [PATCH v4 0/9] packfile: avoid using the 'the_repository' global variable
2024-10-21 9:57 [PATCH 00/20] packfile: avoid using the 'the_repository' global variable Karthik Nayak
` (22 preceding siblings ...)
2024-10-30 14:32 ` [PATCH v3 0/9] packfile: avoid using the 'the_repository' global variable Karthik Nayak
@ 2024-10-31 9:39 ` Karthik Nayak
2024-10-31 9:39 ` [PATCH v4 1/9] packfile: add repository to struct `packed_git` Karthik Nayak
` (9 more replies)
2024-11-04 11:41 ` [PATCH v5 " Karthik Nayak
` (5 subsequent siblings)
29 siblings, 10 replies; 184+ messages in thread
From: Karthik Nayak @ 2024-10-31 9:39 UTC (permalink / raw)
To: karthik.188; +Cc: git, me, peff
The `packfile.c` file uses the global variable 'the_repository' extensively
throughout the code. Let's remove all usecases of this, by modifying the
required functions to accept a 'struct repository' instead. This is to clean up
usage of global state.
The first 3 patches are mostly internal to `packfile.c`, we add the repository
field to the `packed_git` struct and this is used to clear up some useages of
the global variables.
The next 3 patches are more disruptive, they modify the function definition of
`odb_pack_name`, `has_object[_kept]_pack` and `for_each_packed_object` to receive
a repository, helping remove other usages of 'the_repository' variable.
Finally, the last two patches deal with global config values. These values are
localized.
This series is based off on master: 6a11438f43 (The fifth batch, 2024-10-25),
with 'jk/dumb-http-finalize' merged in. I found no issues merging this with seen,
but since these patches cover a lot of files, there might be some conflicts.
Changes in v4:
- Renamed the repository field within `packed_git` and `multi_pack_index` from
`r` to `repo`, while keeping function parameters to be `r`.
- Fixed bad braces.
Changes in v3:
- Improved commit messages. In the first commit to talk about how packed_git
struct could also be part of the alternates of a repository. In the 7th commit
to talk about the motive behind removing the global variable.
- Changed 'packed_git->repo' to 'packed_git->r' to keep it consistent with the
rest of the code base.
- Replaced 'the_repository' with locally available access to the repository
struct in multiple regions.
- Removed unecessary inclusion of the 'repository.h' header file by forward
declaring the 'repository' struct.
- Replace memcpy with hashcpy.
- Change the logic in the 7th patch to use if else statements.
- Added an extra commit to cleanup `pack-bitmap.c`.
Karthik Nayak (9):
packfile: add repository to struct `packed_git`
packfile: use `repository` from `packed_git` directly
packfile: pass `repository` to static function in the file
packfile: pass down repository to `odb_pack_name`
packfile: pass down repository to `has_object[_kept]_pack`
packfile: pass down repository to `for_each_packed_object`
config: make `delta_base_cache_limit` a non-global variable
config: make `packed_git_(limit|window_size)` non-global variables
midx: add repository to `multi_pack_index` struct
builtin/cat-file.c | 7 +-
builtin/count-objects.c | 2 +-
builtin/fast-import.c | 15 ++--
builtin/fsck.c | 20 +++--
builtin/gc.c | 5 +-
builtin/index-pack.c | 20 +++--
builtin/pack-objects.c | 11 ++-
builtin/pack-redundant.c | 2 +-
builtin/repack.c | 2 +-
builtin/rev-list.c | 2 +-
commit-graph.c | 4 +-
config.c | 22 -----
connected.c | 3 +-
diff.c | 3 +-
environment.c | 3 -
environment.h | 1 -
fsck.c | 2 +-
http.c | 4 +-
list-objects.c | 7 +-
midx-write.c | 2 +-
midx.c | 3 +-
midx.h | 3 +
object-store-ll.h | 9 +-
pack-bitmap.c | 90 +++++++++++--------
pack-objects.h | 3 +-
pack-write.c | 1 +
pack.h | 1 +
packfile.c | 182 ++++++++++++++++++++++++++-------------
packfile.h | 18 ++--
promisor-remote.c | 2 +-
prune-packed.c | 2 +-
reachable.c | 4 +-
revision.c | 13 +--
tag.c | 2 +-
34 files changed, 280 insertions(+), 190 deletions(-)
Range-diff against v3:
1: 5afb9af0af ! 1: b3d518e998 packfile: add repository to struct `packed_git`
@@ builtin/fast-import.c: static void start_packfile(void)
p->pack_fd = pack_fd;
p->do_not_close = 1;
-+ p->r = the_repository;
++ p->repo = the_repository;
pack_file = hashfd(pack_fd, p->pack_name);
pack_data = p;
@@ builtin/fast-import.c: static void end_packfile(void)
/* Register the packfile with core git's machinery. */
- new_p = add_packed_git(idx_name, strlen(idx_name), 1);
-+ new_p = add_packed_git(pack_data->r, idx_name, strlen(idx_name), 1);
++ new_p = add_packed_git(pack_data->repo, idx_name, strlen(idx_name), 1);
if (!new_p)
die("core git rejected index %s", idx_name);
all_packs[pack_id] = new_p;
@@ object-store-ll.h: struct packed_git {
size_t mtimes_size;
+
+ /* repo dentoes the repository this packed file belongs to */
-+ struct repository *r;
++ struct repository *repo;
+
/* something like ".git/objects/pack/xxxxx.pack" */
char pack_name[FLEX_ARRAY]; /* more */
@@ packfile.c: uint32_t get_pack_fanout(struct packed_git *p, uint32_t value)
struct packed_git *p = xmalloc(st_add(sizeof(*p), extra));
memset(p, 0, sizeof(*p));
p->pack_fd = -1;
-+ p->r = r;
++ p->repo = r;
return p;
}
2: 5350b4f9fb ! 2: bb9d9aa744 packfile: use `repository` from `packed_git` directly
@@ packfile.c: static int check_packed_git_idx(const char *path, struct packed_git
int fd = git_open(path), ret;
struct stat st;
- const unsigned int hashsz = the_hash_algo->rawsz;
-+ const unsigned int hashsz = p->r->hash_algo->rawsz;
++ const unsigned int hashsz = p->repo->hash_algo->rawsz;
if (fd < 0)
return -1;
@@ packfile.c: struct packed_git *parse_pack_index(struct repository *r, unsigned c
memcpy(p->pack_name, path, alloc); /* includes NUL */
free(path);
- hashcpy(p->hash, sha1, the_repository->hash_algo);
-+ hashcpy(p->hash, sha1, p->r->hash_algo);
++ hashcpy(p->hash, sha1, p->repo->hash_algo);
if (check_packed_git_idx(idx_path, p)) {
free(p);
return NULL;
@@ packfile.c: static int unuse_one_window(struct packed_git *current)
if (current)
scan_windows(current, &lru_p, &lru_w, &lru_l);
- for (p = the_repository->objects->packed_git; p; p = p->next)
-+ for (p = current->r->objects->packed_git; p; p = p->next)
++ for (p = current->repo->objects->packed_git; p; p = p->next)
scan_windows(p, &lru_p, &lru_w, &lru_l);
if (lru_p) {
munmap(lru_w->base, lru_w->len);
@@ packfile.c: static int open_packed_git_1(struct packed_git *p)
unsigned char *idx_hash;
ssize_t read_result;
- const unsigned hashsz = the_hash_algo->rawsz;
-+ const unsigned hashsz = p->r->hash_algo->rawsz;
++ const unsigned hashsz = p->repo->hash_algo->rawsz;
if (open_pack_index(p))
return error("packfile %s index unavailable", p->pack_name);
@@ packfile.c: static int open_packed_git_1(struct packed_git *p)
return error("packfile %s signature is unavailable", p->pack_name);
idx_hash = ((unsigned char *)p->index_data) + p->index_size - hashsz * 2;
- if (!hasheq(hash, idx_hash, the_repository->hash_algo))
-+ if (!hasheq(hash, idx_hash, p->r->hash_algo))
++ if (!hasheq(hash, idx_hash, p->repo->hash_algo))
return error("packfile %s does not match index", p->pack_name);
return 0;
}
@@ packfile.c: unsigned char *use_pack(struct packed_git *p,
if (!p->pack_size && p->pack_fd == -1 && open_packed_git(p))
die("packfile %s cannot be accessed", p->pack_name);
- if (offset > (p->pack_size - the_hash_algo->rawsz))
-+ if (offset > (p->pack_size - p->r->hash_algo->rawsz))
++ if (offset > (p->pack_size - p->repo->hash_algo->rawsz))
die("offset beyond end of packfile (truncated pack?)");
if (offset < 0)
die(_("offset before end of packfile (broken .idx?)"));
@@ packfile.c: off_t get_delta_base(struct packed_git *p,
/* The base entry _must_ be in the same pack */
struct object_id oid;
- oidread(&oid, base_info, the_repository->hash_algo);
-+ oidread(&oid, base_info, p->r->hash_algo);
++ oidread(&oid, base_info, p->repo->hash_algo);
base_offset = find_pack_entry_one(&oid, p);
- *curpos += the_hash_algo->rawsz;
-+ *curpos += p->r->hash_algo->rawsz;
++ *curpos += p->repo->hash_algo->rawsz;
} else
die("I am totally screwed");
return base_offset;
@@ packfile.c: static int get_delta_base_oid(struct packed_git *p,
if (type == OBJ_REF_DELTA) {
unsigned char *base = use_pack(p, w_curs, curpos, NULL);
- oidread(oid, base, the_repository->hash_algo);
-+ oidread(oid, base, p->r->hash_algo);
++ oidread(oid, base, p->repo->hash_algo);
return 0;
} else if (type == OBJ_OFS_DELTA) {
uint32_t base_pos;
@@ packfile.c: int packed_object_info(struct repository *r, struct packed_git *p,
}
} else
- oidclr(oi->delta_base_oid, the_repository->hash_algo);
-+ oidclr(oi->delta_base_oid, p->r->hash_algo);
++ oidclr(oi->delta_base_oid, p->repo->hash_algo);
}
oi->whence = in_delta_base_cache(p, obj_offset) ? OI_DBCACHED :
@@ packfile.c: int bsearch_pack(const struct object_id *oid, const struct packed_gi
const unsigned char *index_fanout = p->index_data;
const unsigned char *index_lookup;
- const unsigned int hashsz = the_hash_algo->rawsz;
-+ const unsigned int hashsz = p->r->hash_algo->rawsz;
++ const unsigned int hashsz = p->repo->hash_algo->rawsz;
int index_lookup_width;
if (!index_fanout)
@@ packfile.c: int nth_packed_object_id(struct object_id *oid,
{
const unsigned char *index = p->index_data;
- const unsigned int hashsz = the_hash_algo->rawsz;
-+ const unsigned int hashsz = p->r->hash_algo->rawsz;
++ const unsigned int hashsz = p->repo->hash_algo->rawsz;
if (!index) {
if (open_pack_index(p))
return -1;
@@ packfile.c: int nth_packed_object_id(struct object_id *oid,
if (p->index_version == 1) {
oidread(oid, index + st_add(st_mult(hashsz + 4, n), 4),
- the_repository->hash_algo);
-+ p->r->hash_algo);
++ p->repo->hash_algo);
} else {
index += 8;
- oidread(oid, index + st_mult(hashsz, n),
- the_repository->hash_algo);
-+ oidread(oid, index + st_mult(hashsz, n), p->r->hash_algo);
++ oidread(oid, index + st_mult(hashsz, n), p->repo->hash_algo);
}
return 0;
}
@@ packfile.c: void check_pack_index_ptr(const struct packed_git *p, const void *vp
{
const unsigned char *index = p->index_data;
- const unsigned int hashsz = the_hash_algo->rawsz;
-+ const unsigned int hashsz = p->r->hash_algo->rawsz;
++ const unsigned int hashsz = p->repo->hash_algo->rawsz;
index += 4 * 256;
if (p->index_version == 1) {
return ntohl(*((uint32_t *)(index + st_mult(hashsz + 4, n))));
@@ packfile.c: int for_each_object_in_pack(struct packed_git *p,
if (flags & FOR_EACH_OBJECT_PACK_ORDER) {
- if (load_pack_revindex(the_repository, p))
-+ if (load_pack_revindex(p->r, p))
++ if (load_pack_revindex(p->repo, p))
return -1;
}
@@ packfile.c: static int add_promisor_object(const struct object_id *oid,
int we_parsed_object;
- obj = lookup_object(the_repository, oid);
-+ obj = lookup_object(pack->r, oid);
++ obj = lookup_object(pack->repo, oid);
if (obj && obj->parsed) {
we_parsed_object = 0;
} else {
we_parsed_object = 1;
- obj = parse_object(the_repository, oid);
-+ obj = parse_object(pack->r, oid);
++ obj = parse_object(pack->repo, oid);
}
if (!obj)
3: 5b975cb6d6 ! 3: d5df50fa36 packfile: pass `repository` to static function in the file
@@ packfile.c: static int open_packed_git_1(struct packed_git *p)
}
- while (pack_max_fds <= pack_open_fds && close_one_pack())
-+ while (pack_max_fds <= pack_open_fds && close_one_pack(p->r))
++ while (pack_max_fds <= pack_open_fds && close_one_pack(p->repo))
; /* nothing */
p->pack_fd = git_open(p->pack_name);
@@ packfile.c: unsigned char *use_pack(struct packed_git *p,
die(_("offset before end of packfile (broken .idx?)"));
- if (!win || !in_window(win, offset)) {
-+ if (!win || !in_window(p->r, win, offset)) {
++ if (!win || !in_window(p->repo, win, offset)) {
if (win)
win->inuse_cnt--;
for (win = p->windows; win; win = win->next) {
- if (in_window(win, offset))
-+ if (in_window(p->r, win, offset))
++ if (in_window(p->repo, win, offset))
break;
}
if (!win) {
4: 13a166fcca ! 4: 0107801c3b packfile: pass down repository to `odb_pack_name`
@@ builtin/fast-import.c: static char *keep_pack(const char *curr_index_name)
int keep_fd;
- odb_pack_name(&name, pack_data->hash, "keep");
-+ odb_pack_name(pack_data->r, &name, pack_data->hash, "keep");
++ odb_pack_name(pack_data->repo, &name, pack_data->hash, "keep");
keep_fd = odb_pack_keep(name.buf);
if (keep_fd < 0)
die_errno("cannot create keep file");
@@ builtin/fast-import.c: static char *keep_pack(const char *curr_index_name)
die_errno("failed to write keep file");
- odb_pack_name(&name, pack_data->hash, "pack");
-+ odb_pack_name(pack_data->r, &name, pack_data->hash, "pack");
++ odb_pack_name(pack_data->repo, &name, pack_data->hash, "pack");
if (finalize_object_file(pack_data->pack_name, name.buf))
die("cannot store pack file");
- odb_pack_name(&name, pack_data->hash, "idx");
-+ odb_pack_name(pack_data->r, &name, pack_data->hash, "idx");
++ odb_pack_name(pack_data->repo, &name, pack_data->hash, "idx");
if (finalize_object_file(curr_index_name, name.buf))
die("cannot store index file");
free((void *)curr_index_name);
@@ builtin/fast-import.c: static void unkeep_all_packs(void)
for (k = 0; k < pack_id; k++) {
struct packed_git *p = all_packs[k];
- odb_pack_name(&name, p->hash, "keep");
-+ odb_pack_name(p->r, &name, p->hash, "keep");
++ odb_pack_name(p->repo, &name, p->hash, "keep");
unlink_or_warn(name.buf);
}
strbuf_release(&name);
@@ builtin/pack-redundant.c: int cmd_pack_redundant(int argc, const char **argv, co
while (pl) {
printf("%s\n%s\n",
- odb_pack_name(&idx_name, pl->pack->hash, "idx"),
-+ odb_pack_name(pl->pack->r, &idx_name, pl->pack->hash, "idx"),
++ odb_pack_name(pl->pack->repo, &idx_name, pl->pack->hash, "idx"),
pl->pack->pack_name);
pl = pl->next;
}
5: 1fac06f19e ! 5: 2d7608a367 packfile: pass down repository to `has_object[_kept]_pack`
@@ builtin/pack-objects.c: static int want_found_object(const struct object_id *oid
if (ignore_packed_keep_in_core && p->pack_keep_in_core)
return 0;
- if (has_object_kept_pack(oid, flags))
-+ if (has_object_kept_pack(p->r, oid, flags))
++ if (has_object_kept_pack(p->repo, oid, flags))
return 0;
}
6: a5fb3b1a4a = 6: 2c84026d02 packfile: pass down repository to `for_each_packed_object`
7: 6e5951ceea = 7: 84b89c8a0e config: make `delta_base_cache_limit` a non-global variable
8: ec9061fbbd ! 8: 5bbdc7124d config: make `packed_git_(limit|window_size)` non-global variables
@@ packfile.c: unsigned char *use_pack(struct packed_git *p,
+ size_t window_align;
off_t len;
-+ repo_config(p->r, packfile_config, &config);
++ repo_config(p->repo, packfile_config, &config);
+ window_align = config.packed_git_window_size / 2;
+
if (p->pack_fd == -1 && open_packed_git(p))
9: c0b386412d ! 9: bb15a0be56 midx: add repository to `multi_pack_index` struct
@@ midx.c: static struct multi_pack_index *load_multi_pack_index_one(const char *ob
m->data = midx_map;
m->data_len = midx_size;
m->local = local;
-+ m->r = the_repository;
++ m->repo = the_repository;
m->signature = get_be32(m->data);
if (m->signature != MIDX_SIGNATURE)
@@ midx.h: struct multi_pack_index {
const char **pack_names;
struct packed_git **packs;
+
-+ struct repository *r;
++ struct repository *repo;
+
char object_dir[FLEX_ARRAY];
};
@@ pack-bitmap.c: static uint32_t bitmap_num_objects(struct bitmap_index *index)
+static struct repository *bitmap_repo(struct bitmap_index *bitmap_git)
+{
+ if (bitmap_is_midx(bitmap_git))
-+ return bitmap_git->midx->r;
-+ return bitmap_git->pack->r;
++ return bitmap_git->midx->repo;
++ return bitmap_git->pack->repo;
+}
+
static int load_bitmap_header(struct bitmap_index *index)
@@ pack-bitmap.c: struct bitmap_index *prepare_bitmap_git(struct repository *r)
struct bitmap_index *prepare_midx_bitmap_git(struct multi_pack_index *midx)
{
- struct repository *r = the_repository;
-+ struct repository *r = midx->r;
++ struct repository *r = midx->repo;
struct bitmap_index *bitmap_git = xcalloc(1, sizeof(*bitmap_git));
if (!open_midx_bitmap_1(bitmap_git, midx) && !load_bitmap(r, bitmap_git))
@@ pack-bitmap.c: struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs,
+ repo = bitmap_repo(bitmap_git);
+
if (haves) {
-- if (use_boundary_traversal) {
+ if (use_boundary_traversal) {
- trace2_region_enter("pack-bitmap", "haves/boundary", the_repository);
-+ if (use_boundary_traversal)
-+ {
+ trace2_region_enter("pack-bitmap", "haves/boundary", repo);
haves_bitmap = find_boundary_objects(bitmap_git, revs, haves);
- trace2_region_leave("pack-bitmap", "haves/boundary", the_repository);
-- } else {
-- trace2_region_enter("pack-bitmap", "haves/classic", the_repository);
+ trace2_region_leave("pack-bitmap", "haves/boundary", repo);
-+ }
-+ else
-+ {
+ } else {
+- trace2_region_enter("pack-bitmap", "haves/classic", the_repository);
+ trace2_region_enter("pack-bitmap", "haves/classic", repo);
revs->ignore_missing_links = 1;
haves_bitmap = find_objects(bitmap_git, revs, haves, NULL);
--
2.47.0
^ permalink raw reply [flat|nested] 184+ messages in thread
* [PATCH v4 1/9] packfile: add repository to struct `packed_git`
2024-10-31 9:39 ` [PATCH v4 0/9] packfile: avoid using the 'the_repository' global variable Karthik Nayak
@ 2024-10-31 9:39 ` Karthik Nayak
2024-10-31 9:39 ` [PATCH v4 2/9] packfile: use `repository` from `packed_git` directly Karthik Nayak
` (8 subsequent siblings)
9 siblings, 0 replies; 184+ messages in thread
From: Karthik Nayak @ 2024-10-31 9:39 UTC (permalink / raw)
To: karthik.188; +Cc: git, me, peff
The struct `packed_git` holds information regarding a packed object
file. Let's add the repository variable to this object, to represent the
repository that this packfile belongs to. This helps remove dependency
on the global `the_repository` object in `packfile.c` by simply using
repository information now readily available in the struct.
We do need to consider that a pack file could be part of the alternates
of a repository, but considering that we only have one repository struct
and also that we currently anyways use 'the_repository'. We should be
OK with this change.
We also modify `alloc_packed_git` to ensure that the repository is added
to newly created `packed_git` structs. This requires modifying the
function and all its callee to pass the repository object down the
levels.
Helped-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
---
builtin/fast-import.c | 3 ++-
builtin/index-pack.c | 6 ++++--
commit-graph.c | 2 +-
connected.c | 3 ++-
http.c | 2 +-
midx-write.c | 2 +-
midx.c | 2 +-
object-store-ll.h | 5 +++++
packfile.c | 15 +++++++++------
packfile.h | 6 ++++--
10 files changed, 30 insertions(+), 16 deletions(-)
diff --git a/builtin/fast-import.c b/builtin/fast-import.c
index 76d5c20f14..da7e2d613b 100644
--- a/builtin/fast-import.c
+++ b/builtin/fast-import.c
@@ -765,6 +765,7 @@ static void start_packfile(void)
p->pack_fd = pack_fd;
p->do_not_close = 1;
+ p->repo = the_repository;
pack_file = hashfd(pack_fd, p->pack_name);
pack_data = p;
@@ -888,7 +889,7 @@ static void end_packfile(void)
idx_name = keep_pack(create_index());
/* Register the packfile with core git's machinery. */
- new_p = add_packed_git(idx_name, strlen(idx_name), 1);
+ new_p = add_packed_git(pack_data->repo, idx_name, strlen(idx_name), 1);
if (!new_p)
die("core git rejected index %s", idx_name);
all_packs[pack_id] = new_p;
diff --git a/builtin/index-pack.c b/builtin/index-pack.c
index 9d23b41b3a..be2f99625e 100644
--- a/builtin/index-pack.c
+++ b/builtin/index-pack.c
@@ -1552,7 +1552,8 @@ static void final(const char *final_pack_name, const char *curr_pack_name,
if (do_fsck_object) {
struct packed_git *p;
- p = add_packed_git(final_index_name, strlen(final_index_name), 0);
+ p = add_packed_git(the_repository, final_index_name,
+ strlen(final_index_name), 0);
if (p)
install_packed_git(the_repository, p);
}
@@ -1650,7 +1651,8 @@ static void read_v2_anomalous_offsets(struct packed_git *p,
static void read_idx_option(struct pack_idx_option *opts, const char *pack_name)
{
- struct packed_git *p = add_packed_git(pack_name, strlen(pack_name), 1);
+ struct packed_git *p = add_packed_git(the_repository, pack_name,
+ strlen(pack_name), 1);
if (!p)
die(_("Cannot open existing pack file '%s'"), pack_name);
diff --git a/commit-graph.c b/commit-graph.c
index 5bd89c0acd..83dd69bfeb 100644
--- a/commit-graph.c
+++ b/commit-graph.c
@@ -1914,7 +1914,7 @@ static int fill_oids_from_packs(struct write_commit_graph_context *ctx,
struct packed_git *p;
strbuf_setlen(&packname, dirlen);
strbuf_addstr(&packname, pack_indexes->items[i].string);
- p = add_packed_git(packname.buf, packname.len, 1);
+ p = add_packed_git(ctx->r, packname.buf, packname.len, 1);
if (!p) {
ret = error(_("error adding pack %s"), packname.buf);
goto cleanup;
diff --git a/connected.c b/connected.c
index a9e2e13995..3099da84f3 100644
--- a/connected.c
+++ b/connected.c
@@ -54,7 +54,8 @@ int check_connected(oid_iterate_fn fn, void *cb_data,
strbuf_add(&idx_file, transport->pack_lockfiles.items[0].string,
base_len);
strbuf_addstr(&idx_file, ".idx");
- new_pack = add_packed_git(idx_file.buf, idx_file.len, 1);
+ new_pack = add_packed_git(the_repository, idx_file.buf,
+ idx_file.len, 1);
strbuf_release(&idx_file);
}
diff --git a/http.c b/http.c
index 510332ab04..7e5be05207 100644
--- a/http.c
+++ b/http.c
@@ -2437,7 +2437,7 @@ static int fetch_and_setup_pack_index(struct packed_git **packs_head,
if (!tmp_idx)
return -1;
- new_pack = parse_pack_index(sha1, tmp_idx);
+ new_pack = parse_pack_index(the_repository, sha1, tmp_idx);
if (!new_pack) {
unlink(tmp_idx);
free(tmp_idx);
diff --git a/midx-write.c b/midx-write.c
index b3a5f6c516..c57726ef94 100644
--- a/midx-write.c
+++ b/midx-write.c
@@ -154,7 +154,7 @@ static void add_pack_to_midx(const char *full_path, size_t full_path_len,
return;
ALLOC_GROW(ctx->info, ctx->nr + 1, ctx->alloc);
- p = add_packed_git(full_path, full_path_len, 0);
+ p = add_packed_git(the_repository, full_path, full_path_len, 0);
if (!p) {
warning(_("failed to add packfile '%s'"),
full_path);
diff --git a/midx.c b/midx.c
index e82d4f2e65..8edb75f51d 100644
--- a/midx.c
+++ b/midx.c
@@ -464,7 +464,7 @@ int prepare_midx_pack(struct repository *r, struct multi_pack_index *m,
strhash(key.buf), key.buf,
struct packed_git, packmap_ent);
if (!p) {
- p = add_packed_git(pack_name.buf, pack_name.len, m->local);
+ p = add_packed_git(r, pack_name.buf, pack_name.len, m->local);
if (p) {
install_packed_git(r, p);
list_add_tail(&p->mru, &r->objects->packed_git_mru);
diff --git a/object-store-ll.h b/object-store-ll.h
index 53b8e693b1..538f2c60cb 100644
--- a/object-store-ll.h
+++ b/object-store-ll.h
@@ -10,6 +10,7 @@
struct oidmap;
struct oidtree;
struct strbuf;
+struct repository;
struct object_directory {
struct object_directory *next;
@@ -135,6 +136,10 @@ struct packed_git {
*/
const uint32_t *mtimes_map;
size_t mtimes_size;
+
+ /* repo dentoes the repository this packed file belongs to */
+ struct repository *repo;
+
/* something like ".git/objects/pack/xxxxx.pack" */
char pack_name[FLEX_ARRAY]; /* more */
};
diff --git a/packfile.c b/packfile.c
index 9560f0a33c..6058eddf35 100644
--- a/packfile.c
+++ b/packfile.c
@@ -217,11 +217,12 @@ uint32_t get_pack_fanout(struct packed_git *p, uint32_t value)
return ntohl(level1_ofs[value]);
}
-static struct packed_git *alloc_packed_git(int extra)
+static struct packed_git *alloc_packed_git(struct repository *r, int extra)
{
struct packed_git *p = xmalloc(st_add(sizeof(*p), extra));
memset(p, 0, sizeof(*p));
p->pack_fd = -1;
+ p->repo = r;
return p;
}
@@ -233,11 +234,12 @@ static char *pack_path_from_idx(const char *idx_path)
return xstrfmt("%.*s.pack", (int)len, idx_path);
}
-struct packed_git *parse_pack_index(unsigned char *sha1, const char *idx_path)
+struct packed_git *parse_pack_index(struct repository *r, unsigned char *sha1,
+ const char *idx_path)
{
char *path = pack_path_from_idx(idx_path);
size_t alloc = st_add(strlen(path), 1);
- struct packed_git *p = alloc_packed_git(alloc);
+ struct packed_git *p = alloc_packed_git(r, alloc);
memcpy(p->pack_name, path, alloc); /* includes NUL */
free(path);
@@ -703,7 +705,8 @@ void unuse_pack(struct pack_window **w_cursor)
}
}
-struct packed_git *add_packed_git(const char *path, size_t path_len, int local)
+struct packed_git *add_packed_git(struct repository *r, const char *path,
+ size_t path_len, int local)
{
struct stat st;
size_t alloc;
@@ -721,7 +724,7 @@ struct packed_git *add_packed_git(const char *path, size_t path_len, int local)
* the use xsnprintf double-checks that)
*/
alloc = st_add3(path_len, strlen(".promisor"), 1);
- p = alloc_packed_git(alloc);
+ p = alloc_packed_git(r, alloc);
memcpy(p->pack_name, path, path_len);
xsnprintf(p->pack_name + path_len, alloc - path_len, ".keep");
@@ -877,7 +880,7 @@ static void prepare_pack(const char *full_name, size_t full_name_len,
/* Don't reopen a pack we already have. */
if (!hashmap_get(&data->r->objects->pack_map, &hent, pack_name)) {
- p = add_packed_git(full_name, full_name_len, data->local);
+ p = add_packed_git(data->r, full_name, full_name_len, data->local);
if (p)
install_packed_git(data->r, p);
}
diff --git a/packfile.h b/packfile.h
index 08f88a7ff5..aee69d1a0b 100644
--- a/packfile.h
+++ b/packfile.h
@@ -46,7 +46,8 @@ const char *pack_basename(struct packed_git *p);
* and does not add the resulting packed_git struct to the internal list of
* packs. You probably want add_packed_git() instead.
*/
-struct packed_git *parse_pack_index(unsigned char *sha1, const char *idx_path);
+struct packed_git *parse_pack_index(struct repository *r, unsigned char *sha1,
+ const char *idx_path);
typedef void each_file_in_pack_dir_fn(const char *full_path, size_t full_path_len,
const char *file_name, void *data);
@@ -113,7 +114,8 @@ void close_pack(struct packed_git *);
void close_object_store(struct raw_object_store *o);
void unuse_pack(struct pack_window **);
void clear_delta_base_cache(void);
-struct packed_git *add_packed_git(const char *path, size_t path_len, int local);
+struct packed_git *add_packed_git(struct repository *r, const char *path,
+ size_t path_len, int local);
/*
* Unlink the .pack and associated extension files.
--
2.47.0
^ permalink raw reply related [flat|nested] 184+ messages in thread
* [PATCH v4 2/9] packfile: use `repository` from `packed_git` directly
2024-10-31 9:39 ` [PATCH v4 0/9] packfile: avoid using the 'the_repository' global variable Karthik Nayak
2024-10-31 9:39 ` [PATCH v4 1/9] packfile: add repository to struct `packed_git` Karthik Nayak
@ 2024-10-31 9:39 ` Karthik Nayak
2024-10-31 9:39 ` [PATCH v4 3/9] packfile: pass `repository` to static function in the file Karthik Nayak
` (7 subsequent siblings)
9 siblings, 0 replies; 184+ messages in thread
From: Karthik Nayak @ 2024-10-31 9:39 UTC (permalink / raw)
To: karthik.188; +Cc: git, me, peff
In the previous commit, we introduced the `repository` structure inside
`packed_git`. This provides an alternative route instead of using the
global `the_repository` variable. Let's modify `packfile.c` now to use
this field wherever possible instead of relying on the global state.
There are still a few instances of `the_repository` usage in the file,
where there is no struct `packed_git` locally available, which will be
fixed in the following commits.
Helped-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
---
packfile.c | 50 +++++++++++++++++++++++++++-----------------------
1 file changed, 27 insertions(+), 23 deletions(-)
diff --git a/packfile.c b/packfile.c
index 6058eddf35..5bfa1e17c2 100644
--- a/packfile.c
+++ b/packfile.c
@@ -79,7 +79,7 @@ static int check_packed_git_idx(const char *path, struct packed_git *p)
size_t idx_size;
int fd = git_open(path), ret;
struct stat st;
- const unsigned int hashsz = the_hash_algo->rawsz;
+ const unsigned int hashsz = p->repo->hash_algo->rawsz;
if (fd < 0)
return -1;
@@ -243,7 +243,7 @@ struct packed_git *parse_pack_index(struct repository *r, unsigned char *sha1,
memcpy(p->pack_name, path, alloc); /* includes NUL */
free(path);
- hashcpy(p->hash, sha1, the_repository->hash_algo);
+ hashcpy(p->hash, sha1, p->repo->hash_algo);
if (check_packed_git_idx(idx_path, p)) {
free(p);
return NULL;
@@ -278,7 +278,7 @@ static int unuse_one_window(struct packed_git *current)
if (current)
scan_windows(current, &lru_p, &lru_w, &lru_l);
- for (p = the_repository->objects->packed_git; p; p = p->next)
+ for (p = current->repo->objects->packed_git; p; p = p->next)
scan_windows(p, &lru_p, &lru_w, &lru_l);
if (lru_p) {
munmap(lru_w->base, lru_w->len);
@@ -540,7 +540,7 @@ static int open_packed_git_1(struct packed_git *p)
unsigned char hash[GIT_MAX_RAWSZ];
unsigned char *idx_hash;
ssize_t read_result;
- const unsigned hashsz = the_hash_algo->rawsz;
+ const unsigned hashsz = p->repo->hash_algo->rawsz;
if (open_pack_index(p))
return error("packfile %s index unavailable", p->pack_name);
@@ -597,7 +597,7 @@ static int open_packed_git_1(struct packed_git *p)
if (read_result != hashsz)
return error("packfile %s signature is unavailable", p->pack_name);
idx_hash = ((unsigned char *)p->index_data) + p->index_size - hashsz * 2;
- if (!hasheq(hash, idx_hash, the_repository->hash_algo))
+ if (!hasheq(hash, idx_hash, p->repo->hash_algo))
return error("packfile %s does not match index", p->pack_name);
return 0;
}
@@ -637,7 +637,7 @@ unsigned char *use_pack(struct packed_git *p,
*/
if (!p->pack_size && p->pack_fd == -1 && open_packed_git(p))
die("packfile %s cannot be accessed", p->pack_name);
- if (offset > (p->pack_size - the_hash_algo->rawsz))
+ if (offset > (p->pack_size - p->repo->hash_algo->rawsz))
die("offset beyond end of packfile (truncated pack?)");
if (offset < 0)
die(_("offset before end of packfile (broken .idx?)"));
@@ -711,6 +711,7 @@ struct packed_git *add_packed_git(struct repository *r, const char *path,
struct stat st;
size_t alloc;
struct packed_git *p;
+ struct object_id oid;
/*
* Make sure a corresponding .pack file exists and that
@@ -751,9 +752,13 @@ struct packed_git *add_packed_git(struct repository *r, const char *path,
p->pack_size = st.st_size;
p->pack_local = local;
p->mtime = st.st_mtime;
- if (path_len < the_hash_algo->hexsz ||
- get_hash_hex(path + path_len - the_hash_algo->hexsz, p->hash))
- hashclr(p->hash, the_repository->hash_algo);
+ if (path_len < r->hash_algo->hexsz ||
+ get_oid_hex_algop(path + path_len - r->hash_algo->hexsz, &oid,
+ r->hash_algo))
+ hashclr(p->hash, r->hash_algo);
+ else
+ hashcpy(p->hash, oid.hash, r->hash_algo);
+
return p;
}
@@ -1243,9 +1248,9 @@ off_t get_delta_base(struct packed_git *p,
} else if (type == OBJ_REF_DELTA) {
/* The base entry _must_ be in the same pack */
struct object_id oid;
- oidread(&oid, base_info, the_repository->hash_algo);
+ oidread(&oid, base_info, p->repo->hash_algo);
base_offset = find_pack_entry_one(&oid, p);
- *curpos += the_hash_algo->rawsz;
+ *curpos += p->repo->hash_algo->rawsz;
} else
die("I am totally screwed");
return base_offset;
@@ -1266,7 +1271,7 @@ static int get_delta_base_oid(struct packed_git *p,
{
if (type == OBJ_REF_DELTA) {
unsigned char *base = use_pack(p, w_curs, curpos, NULL);
- oidread(oid, base, the_repository->hash_algo);
+ oidread(oid, base, p->repo->hash_algo);
return 0;
} else if (type == OBJ_OFS_DELTA) {
uint32_t base_pos;
@@ -1608,7 +1613,7 @@ int packed_object_info(struct repository *r, struct packed_git *p,
goto out;
}
} else
- oidclr(oi->delta_base_oid, the_repository->hash_algo);
+ oidclr(oi->delta_base_oid, p->repo->hash_algo);
}
oi->whence = in_delta_base_cache(p, obj_offset) ? OI_DBCACHED :
@@ -1897,7 +1902,7 @@ int bsearch_pack(const struct object_id *oid, const struct packed_git *p, uint32
{
const unsigned char *index_fanout = p->index_data;
const unsigned char *index_lookup;
- const unsigned int hashsz = the_hash_algo->rawsz;
+ const unsigned int hashsz = p->repo->hash_algo->rawsz;
int index_lookup_width;
if (!index_fanout)
@@ -1922,7 +1927,7 @@ int nth_packed_object_id(struct object_id *oid,
uint32_t n)
{
const unsigned char *index = p->index_data;
- const unsigned int hashsz = the_hash_algo->rawsz;
+ const unsigned int hashsz = p->repo->hash_algo->rawsz;
if (!index) {
if (open_pack_index(p))
return -1;
@@ -1933,11 +1938,10 @@ int nth_packed_object_id(struct object_id *oid,
index += 4 * 256;
if (p->index_version == 1) {
oidread(oid, index + st_add(st_mult(hashsz + 4, n), 4),
- the_repository->hash_algo);
+ p->repo->hash_algo);
} else {
index += 8;
- oidread(oid, index + st_mult(hashsz, n),
- the_repository->hash_algo);
+ oidread(oid, index + st_mult(hashsz, n), p->repo->hash_algo);
}
return 0;
}
@@ -1959,7 +1963,7 @@ void check_pack_index_ptr(const struct packed_git *p, const void *vptr)
off_t nth_packed_object_offset(const struct packed_git *p, uint32_t n)
{
const unsigned char *index = p->index_data;
- const unsigned int hashsz = the_hash_algo->rawsz;
+ const unsigned int hashsz = p->repo->hash_algo->rawsz;
index += 4 * 256;
if (p->index_version == 1) {
return ntohl(*((uint32_t *)(index + st_mult(hashsz + 4, n))));
@@ -2159,7 +2163,7 @@ int for_each_object_in_pack(struct packed_git *p,
int r = 0;
if (flags & FOR_EACH_OBJECT_PACK_ORDER) {
- if (load_pack_revindex(the_repository, p))
+ if (load_pack_revindex(p->repo, p))
return -1;
}
@@ -2227,7 +2231,7 @@ int for_each_packed_object(each_packed_object_fn cb, void *data,
}
static int add_promisor_object(const struct object_id *oid,
- struct packed_git *pack UNUSED,
+ struct packed_git *pack,
uint32_t pos UNUSED,
void *set_)
{
@@ -2235,12 +2239,12 @@ static int add_promisor_object(const struct object_id *oid,
struct object *obj;
int we_parsed_object;
- obj = lookup_object(the_repository, oid);
+ obj = lookup_object(pack->repo, oid);
if (obj && obj->parsed) {
we_parsed_object = 0;
} else {
we_parsed_object = 1;
- obj = parse_object(the_repository, oid);
+ obj = parse_object(pack->repo, oid);
}
if (!obj)
--
2.47.0
^ permalink raw reply related [flat|nested] 184+ messages in thread
* [PATCH v4 3/9] packfile: pass `repository` to static function in the file
2024-10-31 9:39 ` [PATCH v4 0/9] packfile: avoid using the 'the_repository' global variable Karthik Nayak
2024-10-31 9:39 ` [PATCH v4 1/9] packfile: add repository to struct `packed_git` Karthik Nayak
2024-10-31 9:39 ` [PATCH v4 2/9] packfile: use `repository` from `packed_git` directly Karthik Nayak
@ 2024-10-31 9:39 ` Karthik Nayak
2024-10-31 9:39 ` [PATCH v4 4/9] packfile: pass down repository to `odb_pack_name` Karthik Nayak
` (6 subsequent siblings)
9 siblings, 0 replies; 184+ messages in thread
From: Karthik Nayak @ 2024-10-31 9:39 UTC (permalink / raw)
To: karthik.188; +Cc: git, me, peff
Some of the static functions in the `packfile.c` access global
variables, which can simply be avoiding by passing the `repository`
struct down to them. Let's do that.
Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
---
packfile.c | 15 ++++++++-------
1 file changed, 8 insertions(+), 7 deletions(-)
diff --git a/packfile.c b/packfile.c
index 5bfa1e17c2..c96ebc4c69 100644
--- a/packfile.c
+++ b/packfile.c
@@ -460,13 +460,13 @@ static void find_lru_pack(struct packed_git *p, struct packed_git **lru_p, struc
*accept_windows_inuse = has_windows_inuse;
}
-static int close_one_pack(void)
+static int close_one_pack(struct repository *r)
{
struct packed_git *p, *lru_p = NULL;
struct pack_window *mru_w = NULL;
int accept_windows_inuse = 1;
- for (p = the_repository->objects->packed_git; p; p = p->next) {
+ for (p = r->objects->packed_git; p; p = p->next) {
if (p->pack_fd == -1)
continue;
find_lru_pack(p, &lru_p, &mru_w, &accept_windows_inuse);
@@ -555,7 +555,7 @@ static int open_packed_git_1(struct packed_git *p)
pack_max_fds = 1;
}
- while (pack_max_fds <= pack_open_fds && close_one_pack())
+ while (pack_max_fds <= pack_open_fds && close_one_pack(p->repo))
; /* nothing */
p->pack_fd = git_open(p->pack_name);
@@ -610,7 +610,8 @@ static int open_packed_git(struct packed_git *p)
return -1;
}
-static int in_window(struct pack_window *win, off_t offset)
+static int in_window(struct repository *r, struct pack_window *win,
+ off_t offset)
{
/* We must promise at least one full hash after the
* offset is available from this window, otherwise the offset
@@ -620,7 +621,7 @@ static int in_window(struct pack_window *win, off_t offset)
*/
off_t win_off = win->offset;
return win_off <= offset
- && (offset + the_hash_algo->rawsz) <= (win_off + win->len);
+ && (offset + r->hash_algo->rawsz) <= (win_off + win->len);
}
unsigned char *use_pack(struct packed_git *p,
@@ -642,11 +643,11 @@ unsigned char *use_pack(struct packed_git *p,
if (offset < 0)
die(_("offset before end of packfile (broken .idx?)"));
- if (!win || !in_window(win, offset)) {
+ if (!win || !in_window(p->repo, win, offset)) {
if (win)
win->inuse_cnt--;
for (win = p->windows; win; win = win->next) {
- if (in_window(win, offset))
+ if (in_window(p->repo, win, offset))
break;
}
if (!win) {
--
2.47.0
^ permalink raw reply related [flat|nested] 184+ messages in thread
* [PATCH v4 4/9] packfile: pass down repository to `odb_pack_name`
2024-10-31 9:39 ` [PATCH v4 0/9] packfile: avoid using the 'the_repository' global variable Karthik Nayak
` (2 preceding siblings ...)
2024-10-31 9:39 ` [PATCH v4 3/9] packfile: pass `repository` to static function in the file Karthik Nayak
@ 2024-10-31 9:39 ` Karthik Nayak
2024-10-31 9:39 ` [PATCH v4 5/9] packfile: pass down repository to `has_object[_kept]_pack` Karthik Nayak
` (5 subsequent siblings)
9 siblings, 0 replies; 184+ messages in thread
From: Karthik Nayak @ 2024-10-31 9:39 UTC (permalink / raw)
To: karthik.188; +Cc: git, me, peff
The function `odb_pack_name` currently relies on the global variable
`the_repository`. To eliminate global variable usage in `packfile.c`, we
should progressively shift the dependency on the_repository to higher
layers.
Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
---
builtin/fast-import.c | 8 ++++----
builtin/index-pack.c | 4 ++--
builtin/pack-redundant.c | 2 +-
http.c | 2 +-
packfile.c | 9 ++++-----
packfile.h | 3 ++-
6 files changed, 14 insertions(+), 14 deletions(-)
diff --git a/builtin/fast-import.c b/builtin/fast-import.c
index da7e2d613b..3ccc4c5722 100644
--- a/builtin/fast-import.c
+++ b/builtin/fast-import.c
@@ -806,7 +806,7 @@ static char *keep_pack(const char *curr_index_name)
struct strbuf name = STRBUF_INIT;
int keep_fd;
- odb_pack_name(&name, pack_data->hash, "keep");
+ odb_pack_name(pack_data->repo, &name, pack_data->hash, "keep");
keep_fd = odb_pack_keep(name.buf);
if (keep_fd < 0)
die_errno("cannot create keep file");
@@ -814,11 +814,11 @@ static char *keep_pack(const char *curr_index_name)
if (close(keep_fd))
die_errno("failed to write keep file");
- odb_pack_name(&name, pack_data->hash, "pack");
+ odb_pack_name(pack_data->repo, &name, pack_data->hash, "pack");
if (finalize_object_file(pack_data->pack_name, name.buf))
die("cannot store pack file");
- odb_pack_name(&name, pack_data->hash, "idx");
+ odb_pack_name(pack_data->repo, &name, pack_data->hash, "idx");
if (finalize_object_file(curr_index_name, name.buf))
die("cannot store index file");
free((void *)curr_index_name);
@@ -832,7 +832,7 @@ static void unkeep_all_packs(void)
for (k = 0; k < pack_id; k++) {
struct packed_git *p = all_packs[k];
- odb_pack_name(&name, p->hash, "keep");
+ odb_pack_name(p->repo, &name, p->hash, "keep");
unlink_or_warn(name.buf);
}
strbuf_release(&name);
diff --git a/builtin/index-pack.c b/builtin/index-pack.c
index be2f99625e..eaefb41761 100644
--- a/builtin/index-pack.c
+++ b/builtin/index-pack.c
@@ -1479,7 +1479,7 @@ static void write_special_file(const char *suffix, const char *msg,
if (pack_name)
filename = derive_filename(pack_name, "pack", suffix, &name_buf);
else
- filename = odb_pack_name(&name_buf, hash, suffix);
+ filename = odb_pack_name(the_repository, &name_buf, hash, suffix);
fd = odb_pack_keep(filename);
if (fd < 0) {
@@ -1507,7 +1507,7 @@ static void rename_tmp_packfile(const char **final_name,
{
if (!*final_name || strcmp(*final_name, curr_name)) {
if (!*final_name)
- *final_name = odb_pack_name(name, hash, ext);
+ *final_name = odb_pack_name(the_repository, name, hash, ext);
if (finalize_object_file(curr_name, *final_name))
die(_("unable to rename temporary '*.%s' file to '%s'"),
ext, *final_name);
diff --git a/builtin/pack-redundant.c b/builtin/pack-redundant.c
index d2c1c4e5ec..bc61990a93 100644
--- a/builtin/pack-redundant.c
+++ b/builtin/pack-redundant.c
@@ -690,7 +690,7 @@ int cmd_pack_redundant(int argc, const char **argv, const char *prefix UNUSED, s
pl = red = pack_list_difference(local_packs, min);
while (pl) {
printf("%s\n%s\n",
- odb_pack_name(&idx_name, pl->pack->hash, "idx"),
+ odb_pack_name(pl->pack->repo, &idx_name, pl->pack->hash, "idx"),
pl->pack->pack_name);
pl = pl->next;
}
diff --git a/http.c b/http.c
index 7e5be05207..50d8811cea 100644
--- a/http.c
+++ b/http.c
@@ -2579,7 +2579,7 @@ struct http_pack_request *new_direct_http_pack_request(
preq->url = url;
- odb_pack_name(&preq->tmpfile, packed_git_hash, "pack");
+ odb_pack_name(the_repository, &preq->tmpfile, packed_git_hash, "pack");
strbuf_addstr(&preq->tmpfile, ".temp");
preq->packfile = fopen(preq->tmpfile.buf, "a");
if (!preq->packfile) {
diff --git a/packfile.c b/packfile.c
index c96ebc4c69..1015dac6db 100644
--- a/packfile.c
+++ b/packfile.c
@@ -25,13 +25,12 @@
#include "pack-revindex.h"
#include "promisor-remote.h"
-char *odb_pack_name(struct strbuf *buf,
- const unsigned char *hash,
- const char *ext)
+char *odb_pack_name(struct repository *r, struct strbuf *buf,
+ const unsigned char *hash, const char *ext)
{
strbuf_reset(buf);
- strbuf_addf(buf, "%s/pack/pack-%s.%s", repo_get_object_directory(the_repository),
- hash_to_hex(hash), ext);
+ strbuf_addf(buf, "%s/pack/pack-%s.%s", repo_get_object_directory(r),
+ hash_to_hex_algop(hash, r->hash_algo), ext);
return buf->buf;
}
diff --git a/packfile.h b/packfile.h
index aee69d1a0b..51187f2393 100644
--- a/packfile.h
+++ b/packfile.h
@@ -29,7 +29,8 @@ struct pack_entry {
*
* Example: odb_pack_name(out, sha1, "idx") => ".git/objects/pack/pack-1234..idx"
*/
-char *odb_pack_name(struct strbuf *buf, const unsigned char *sha1, const char *ext);
+char *odb_pack_name(struct repository *r, struct strbuf *buf,
+ const unsigned char *hash, const char *ext);
/*
* Return the basename of the packfile, omitting any containing directory
--
2.47.0
^ permalink raw reply related [flat|nested] 184+ messages in thread
* [PATCH v4 5/9] packfile: pass down repository to `has_object[_kept]_pack`
2024-10-31 9:39 ` [PATCH v4 0/9] packfile: avoid using the 'the_repository' global variable Karthik Nayak
` (3 preceding siblings ...)
2024-10-31 9:39 ` [PATCH v4 4/9] packfile: pass down repository to `odb_pack_name` Karthik Nayak
@ 2024-10-31 9:39 ` Karthik Nayak
2024-10-31 9:39 ` [PATCH v4 6/9] packfile: pass down repository to `for_each_packed_object` Karthik Nayak
` (4 subsequent siblings)
9 siblings, 0 replies; 184+ messages in thread
From: Karthik Nayak @ 2024-10-31 9:39 UTC (permalink / raw)
To: karthik.188; +Cc: git, me, peff
The functions `has_object[_kept]_pack` currently rely on the global
variable `the_repository`. To eliminate global variable usage in
`packfile.c`, we should progressively shift the dependency on
the_repository to higher layers. Let's remove its usage from these
functions and any related ones.
Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
---
builtin/count-objects.c | 2 +-
builtin/fsck.c | 2 +-
builtin/pack-objects.c | 4 ++--
diff.c | 3 ++-
list-objects.c | 3 ++-
pack-bitmap.c | 2 +-
packfile.c | 9 +++++----
packfile.h | 5 +++--
prune-packed.c | 2 +-
reachable.c | 2 +-
revision.c | 4 ++--
11 files changed, 21 insertions(+), 17 deletions(-)
diff --git a/builtin/count-objects.c b/builtin/count-objects.c
index 04d80887e0..1e89148ed7 100644
--- a/builtin/count-objects.c
+++ b/builtin/count-objects.c
@@ -67,7 +67,7 @@ static int count_loose(const struct object_id *oid, const char *path,
else {
loose_size += on_disk_bytes(st);
loose++;
- if (verbose && has_object_pack(oid))
+ if (verbose && has_object_pack(the_repository, oid))
packed_loose++;
}
return 0;
diff --git a/builtin/fsck.c b/builtin/fsck.c
index 7f4e2f0414..bb56eb98ac 100644
--- a/builtin/fsck.c
+++ b/builtin/fsck.c
@@ -272,7 +272,7 @@ static void check_reachable_object(struct object *obj)
if (!(obj->flags & HAS_OBJ)) {
if (is_promisor_object(&obj->oid))
return;
- if (has_object_pack(&obj->oid))
+ if (has_object_pack(the_repository, &obj->oid))
return; /* it is in pack - forget about it */
printf_ln(_("missing %s %s"),
printable_type(&obj->oid, obj->type),
diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index 0800714267..0f32e92a3a 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -1529,7 +1529,7 @@ static int want_found_object(const struct object_id *oid, int exclude,
return 0;
if (ignore_packed_keep_in_core && p->pack_keep_in_core)
return 0;
- if (has_object_kept_pack(oid, flags))
+ if (has_object_kept_pack(p->repo, oid, flags))
return 0;
}
@@ -3627,7 +3627,7 @@ static void show_cruft_commit(struct commit *commit, void *data)
static int cruft_include_check_obj(struct object *obj, void *data UNUSED)
{
- return !has_object_kept_pack(&obj->oid, IN_CORE_KEEP_PACKS);
+ return !has_object_kept_pack(to_pack.repo, &obj->oid, IN_CORE_KEEP_PACKS);
}
static int cruft_include_check(struct commit *commit, void *data)
diff --git a/diff.c b/diff.c
index dceac20d18..266ddf18e7 100644
--- a/diff.c
+++ b/diff.c
@@ -4041,7 +4041,8 @@ static int reuse_worktree_file(struct index_state *istate,
* objects however would tend to be slower as they need
* to be individually opened and inflated.
*/
- if (!FAST_WORKING_DIRECTORY && !want_file && has_object_pack(oid))
+ if (!FAST_WORKING_DIRECTORY && !want_file &&
+ has_object_pack(istate->repo, oid))
return 0;
/*
diff --git a/list-objects.c b/list-objects.c
index 985d008799..31236a8dc9 100644
--- a/list-objects.c
+++ b/list-objects.c
@@ -41,7 +41,8 @@ static void show_object(struct traversal_context *ctx,
{
if (!ctx->show_object)
return;
- if (ctx->revs->unpacked && has_object_pack(&object->oid))
+ if (ctx->revs->unpacked && has_object_pack(ctx->revs->repo,
+ &object->oid))
return;
ctx->show_object(object, name, ctx->show_data);
diff --git a/pack-bitmap.c b/pack-bitmap.c
index 4fa9dfc771..d34ba9909a 100644
--- a/pack-bitmap.c
+++ b/pack-bitmap.c
@@ -1889,7 +1889,7 @@ static void filter_packed_objects_from_bitmap(struct bitmap_index *bitmap_git,
bitmap_unset(result, i);
for (i = 0; i < eindex->count; ++i) {
- if (has_object_pack(&eindex->objects[i]->oid))
+ if (has_object_pack(the_repository, &eindex->objects[i]->oid))
bitmap_unset(result, objects_nr + i);
}
}
diff --git a/packfile.c b/packfile.c
index 1015dac6db..e7dd270217 100644
--- a/packfile.c
+++ b/packfile.c
@@ -2143,16 +2143,17 @@ int find_kept_pack_entry(struct repository *r,
return 0;
}
-int has_object_pack(const struct object_id *oid)
+int has_object_pack(struct repository *r, const struct object_id *oid)
{
struct pack_entry e;
- return find_pack_entry(the_repository, oid, &e);
+ return find_pack_entry(r, oid, &e);
}
-int has_object_kept_pack(const struct object_id *oid, unsigned flags)
+int has_object_kept_pack(struct repository *r, const struct object_id *oid,
+ unsigned flags)
{
struct pack_entry e;
- return find_kept_pack_entry(the_repository, oid, flags, &e);
+ return find_kept_pack_entry(r, oid, flags, &e);
}
int for_each_object_in_pack(struct packed_git *p,
diff --git a/packfile.h b/packfile.h
index 51187f2393..b09fb2c530 100644
--- a/packfile.h
+++ b/packfile.h
@@ -193,8 +193,9 @@ const struct packed_git *has_packed_and_bad(struct repository *, const struct ob
int find_pack_entry(struct repository *r, const struct object_id *oid, struct pack_entry *e);
int find_kept_pack_entry(struct repository *r, const struct object_id *oid, unsigned flags, struct pack_entry *e);
-int has_object_pack(const struct object_id *oid);
-int has_object_kept_pack(const struct object_id *oid, unsigned flags);
+int has_object_pack(struct repository *r, const struct object_id *oid);
+int has_object_kept_pack(struct repository *r, const struct object_id *oid,
+ unsigned flags);
/*
* Return 1 if an object in a promisor packfile is or refers to the given
diff --git a/prune-packed.c b/prune-packed.c
index 2bb99c29df..d1c65ab10e 100644
--- a/prune-packed.c
+++ b/prune-packed.c
@@ -24,7 +24,7 @@ static int prune_object(const struct object_id *oid, const char *path,
{
int *opts = data;
- if (!has_object_pack(oid))
+ if (!has_object_pack(the_repository, oid))
return 0;
if (*opts & PRUNE_PACKED_DRY_RUN)
diff --git a/reachable.c b/reachable.c
index 3e9b3dd0a4..09d2c50079 100644
--- a/reachable.c
+++ b/reachable.c
@@ -239,7 +239,7 @@ static int want_recent_object(struct recent_data *data,
const struct object_id *oid)
{
if (data->ignore_in_core_kept_packs &&
- has_object_kept_pack(oid, IN_CORE_KEEP_PACKS))
+ has_object_kept_pack(data->revs->repo, oid, IN_CORE_KEEP_PACKS))
return 0;
return 1;
}
diff --git a/revision.c b/revision.c
index f5f5b84f2b..d1d152a67b 100644
--- a/revision.c
+++ b/revision.c
@@ -4103,10 +4103,10 @@ enum commit_action get_commit_action(struct rev_info *revs, struct commit *commi
{
if (commit->object.flags & SHOWN)
return commit_ignore;
- if (revs->unpacked && has_object_pack(&commit->object.oid))
+ if (revs->unpacked && has_object_pack(revs->repo, &commit->object.oid))
return commit_ignore;
if (revs->no_kept_objects) {
- if (has_object_kept_pack(&commit->object.oid,
+ if (has_object_kept_pack(revs->repo, &commit->object.oid,
revs->keep_pack_cache_flags))
return commit_ignore;
}
--
2.47.0
^ permalink raw reply related [flat|nested] 184+ messages in thread
* [PATCH v4 6/9] packfile: pass down repository to `for_each_packed_object`
2024-10-31 9:39 ` [PATCH v4 0/9] packfile: avoid using the 'the_repository' global variable Karthik Nayak
` (4 preceding siblings ...)
2024-10-31 9:39 ` [PATCH v4 5/9] packfile: pass down repository to `has_object[_kept]_pack` Karthik Nayak
@ 2024-10-31 9:39 ` Karthik Nayak
2024-10-31 9:39 ` [PATCH v4 7/9] config: make `delta_base_cache_limit` a non-global variable Karthik Nayak
` (3 subsequent siblings)
9 siblings, 0 replies; 184+ messages in thread
From: Karthik Nayak @ 2024-10-31 9:39 UTC (permalink / raw)
To: karthik.188; +Cc: git, me, peff
The function `for_each_packed_object` currently relies on the global
variable `the_repository`. To eliminate global variable usage in
`packfile.c`, we should progressively shift the dependency on
the_repository to higher layers. Let's remove its usage from this
function and closely related function `is_promisor_object`.
Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
---
builtin/cat-file.c | 7 ++++---
builtin/fsck.c | 18 +++++++++++-------
builtin/pack-objects.c | 7 +++++--
builtin/repack.c | 2 +-
builtin/rev-list.c | 2 +-
commit-graph.c | 2 +-
fsck.c | 2 +-
list-objects.c | 4 ++--
object-store-ll.h | 4 ++--
packfile.c | 14 +++++++-------
packfile.h | 2 +-
promisor-remote.c | 2 +-
reachable.c | 2 +-
revision.c | 9 +++++----
tag.c | 2 +-
15 files changed, 44 insertions(+), 35 deletions(-)
diff --git a/builtin/cat-file.c b/builtin/cat-file.c
index bfdfb51c7c..d67b101c20 100644
--- a/builtin/cat-file.c
+++ b/builtin/cat-file.c
@@ -827,15 +827,16 @@ static int batch_objects(struct batch_options *opt)
cb.seen = &seen;
for_each_loose_object(batch_unordered_loose, &cb, 0);
- for_each_packed_object(batch_unordered_packed, &cb,
- FOR_EACH_OBJECT_PACK_ORDER);
+ for_each_packed_object(the_repository, batch_unordered_packed,
+ &cb, FOR_EACH_OBJECT_PACK_ORDER);
oidset_clear(&seen);
} else {
struct oid_array sa = OID_ARRAY_INIT;
for_each_loose_object(collect_loose_object, &sa, 0);
- for_each_packed_object(collect_packed_object, &sa, 0);
+ for_each_packed_object(the_repository, collect_packed_object,
+ &sa, 0);
oid_array_for_each_unique(&sa, batch_object_cb, &cb);
diff --git a/builtin/fsck.c b/builtin/fsck.c
index bb56eb98ac..0196c54eb6 100644
--- a/builtin/fsck.c
+++ b/builtin/fsck.c
@@ -150,7 +150,7 @@ static int mark_object(struct object *obj, enum object_type type,
return 0;
obj->flags |= REACHABLE;
- if (is_promisor_object(&obj->oid))
+ if (is_promisor_object(the_repository, &obj->oid))
/*
* Further recursion does not need to be performed on this
* object since it is a promisor object (so it does not need to
@@ -270,7 +270,7 @@ static void check_reachable_object(struct object *obj)
* do a full fsck
*/
if (!(obj->flags & HAS_OBJ)) {
- if (is_promisor_object(&obj->oid))
+ if (is_promisor_object(the_repository, &obj->oid))
return;
if (has_object_pack(the_repository, &obj->oid))
return; /* it is in pack - forget about it */
@@ -391,7 +391,10 @@ static void check_connectivity(void)
* traversal.
*/
for_each_loose_object(mark_loose_unreachable_referents, NULL, 0);
- for_each_packed_object(mark_packed_unreachable_referents, NULL, 0);
+ for_each_packed_object(the_repository,
+ mark_packed_unreachable_referents,
+ NULL,
+ 0);
}
/* Look up all the requirements, warn about missing objects.. */
@@ -488,7 +491,7 @@ static void fsck_handle_reflog_oid(const char *refname, struct object_id *oid,
refname, timestamp);
obj->flags |= USED;
mark_object_reachable(obj);
- } else if (!is_promisor_object(oid)) {
+ } else if (!is_promisor_object(the_repository, oid)) {
error(_("%s: invalid reflog entry %s"),
refname, oid_to_hex(oid));
errors_found |= ERROR_REACHABLE;
@@ -531,7 +534,7 @@ static int fsck_handle_ref(const char *refname, const char *referent UNUSED, con
obj = parse_object(the_repository, oid);
if (!obj) {
- if (is_promisor_object(oid)) {
+ if (is_promisor_object(the_repository, oid)) {
/*
* Increment default_refs anyway, because this is a
* valid ref.
@@ -966,7 +969,8 @@ int cmd_fsck(int argc,
if (connectivity_only) {
for_each_loose_object(mark_loose_for_connectivity, NULL, 0);
- for_each_packed_object(mark_packed_for_connectivity, NULL, 0);
+ for_each_packed_object(the_repository,
+ mark_packed_for_connectivity, NULL, 0);
} else {
prepare_alt_odb(the_repository);
for (odb = the_repository->objects->odb; odb; odb = odb->next)
@@ -1011,7 +1015,7 @@ int cmd_fsck(int argc,
&oid);
if (!obj || !(obj->flags & HAS_OBJ)) {
- if (is_promisor_object(&oid))
+ if (is_promisor_object(the_repository, &oid))
continue;
error(_("%s: object missing"), oid_to_hex(&oid));
errors_found |= ERROR_OBJECT;
diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index 0f32e92a3a..db20f0cf51 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -3858,7 +3858,8 @@ static void show_object__ma_allow_promisor(struct object *obj, const char *name,
* Quietly ignore EXPECTED missing objects. This avoids problems with
* staging them now and getting an odd error later.
*/
- if (!has_object(the_repository, &obj->oid, 0) && is_promisor_object(&obj->oid))
+ if (!has_object(the_repository, &obj->oid, 0) &&
+ is_promisor_object(to_pack.repo, &obj->oid))
return;
show_object(obj, name, data);
@@ -3927,7 +3928,9 @@ static int add_object_in_unpacked_pack(const struct object_id *oid,
static void add_objects_in_unpacked_packs(void)
{
- if (for_each_packed_object(add_object_in_unpacked_pack, NULL,
+ if (for_each_packed_object(to_pack.repo,
+ add_object_in_unpacked_pack,
+ NULL,
FOR_EACH_OBJECT_PACK_ORDER |
FOR_EACH_OBJECT_LOCAL_ONLY |
FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS |
diff --git a/builtin/repack.c b/builtin/repack.c
index d6bb37e84a..96a4fa234b 100644
--- a/builtin/repack.c
+++ b/builtin/repack.c
@@ -404,7 +404,7 @@ static void repack_promisor_objects(const struct pack_objects_args *args,
* {type -> existing pack order} ordering when computing deltas instead
* of a {type -> size} ordering, which may produce better deltas.
*/
- for_each_packed_object(write_oid, &cmd,
+ for_each_packed_object(the_repository, write_oid, &cmd,
FOR_EACH_OBJECT_PROMISOR_ONLY);
if (cmd.in == -1) {
diff --git a/builtin/rev-list.c b/builtin/rev-list.c
index f62bcbf2b1..43c42621e3 100644
--- a/builtin/rev-list.c
+++ b/builtin/rev-list.c
@@ -121,7 +121,7 @@ static inline void finish_object__ma(struct object *obj)
return;
case MA_ALLOW_PROMISOR:
- if (is_promisor_object(&obj->oid))
+ if (is_promisor_object(the_repository, &obj->oid))
return;
die("unexpected missing %s object '%s'",
type_name(obj->type), oid_to_hex(&obj->oid));
diff --git a/commit-graph.c b/commit-graph.c
index 83dd69bfeb..e2e2083951 100644
--- a/commit-graph.c
+++ b/commit-graph.c
@@ -1960,7 +1960,7 @@ static void fill_oids_from_all_packs(struct write_commit_graph_context *ctx)
ctx->progress = start_delayed_progress(
_("Finding commits for commit graph among packed objects"),
ctx->approx_nr_objects);
- for_each_packed_object(add_packed_commits, ctx,
+ for_each_packed_object(ctx->r, add_packed_commits, ctx,
FOR_EACH_OBJECT_PACK_ORDER);
if (ctx->progress_done < ctx->approx_nr_objects)
display_progress(ctx->progress, ctx->approx_nr_objects);
diff --git a/fsck.c b/fsck.c
index 3756f52459..87ce999a49 100644
--- a/fsck.c
+++ b/fsck.c
@@ -1295,7 +1295,7 @@ static int fsck_blobs(struct oidset *blobs_found, struct oidset *blobs_done,
buf = repo_read_object_file(the_repository, oid, &type, &size);
if (!buf) {
- if (is_promisor_object(oid))
+ if (is_promisor_object(the_repository, oid))
continue;
ret |= report(options,
oid, OBJ_BLOB, msg_missing,
diff --git a/list-objects.c b/list-objects.c
index 31236a8dc9..d11a389b3a 100644
--- a/list-objects.c
+++ b/list-objects.c
@@ -75,7 +75,7 @@ static void process_blob(struct traversal_context *ctx,
*/
if (ctx->revs->exclude_promisor_objects &&
!repo_has_object_file(the_repository, &obj->oid) &&
- is_promisor_object(&obj->oid))
+ is_promisor_object(ctx->revs->repo, &obj->oid))
return;
pathlen = path->len;
@@ -180,7 +180,7 @@ static void process_tree(struct traversal_context *ctx,
* an incomplete list of missing objects.
*/
if (revs->exclude_promisor_objects &&
- is_promisor_object(&obj->oid))
+ is_promisor_object(revs->repo, &obj->oid))
return;
if (!revs->do_not_die_on_missing_objects)
diff --git a/object-store-ll.h b/object-store-ll.h
index 538f2c60cb..bcfae2e1bf 100644
--- a/object-store-ll.h
+++ b/object-store-ll.h
@@ -550,7 +550,7 @@ typedef int each_packed_object_fn(const struct object_id *oid,
int for_each_object_in_pack(struct packed_git *p,
each_packed_object_fn, void *data,
enum for_each_object_flags flags);
-int for_each_packed_object(each_packed_object_fn, void *,
- enum for_each_object_flags flags);
+int for_each_packed_object(struct repository *repo, each_packed_object_fn cb,
+ void *data, enum for_each_object_flags flags);
#endif /* OBJECT_STORE_LL_H */
diff --git a/packfile.c b/packfile.c
index e7dd270217..5e8019b1fe 100644
--- a/packfile.c
+++ b/packfile.c
@@ -2200,15 +2200,15 @@ int for_each_object_in_pack(struct packed_git *p,
return r;
}
-int for_each_packed_object(each_packed_object_fn cb, void *data,
- enum for_each_object_flags flags)
+int for_each_packed_object(struct repository *repo, each_packed_object_fn cb,
+ void *data, enum for_each_object_flags flags)
{
struct packed_git *p;
int r = 0;
int pack_errors = 0;
- prepare_packed_git(the_repository);
- for (p = get_all_packs(the_repository); p; p = p->next) {
+ prepare_packed_git(repo);
+ for (p = get_all_packs(repo); p; p = p->next) {
if ((flags & FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local)
continue;
if ((flags & FOR_EACH_OBJECT_PROMISOR_ONLY) &&
@@ -2286,14 +2286,14 @@ static int add_promisor_object(const struct object_id *oid,
return 0;
}
-int is_promisor_object(const struct object_id *oid)
+int is_promisor_object(struct repository *r, const struct object_id *oid)
{
static struct oidset promisor_objects;
static int promisor_objects_prepared;
if (!promisor_objects_prepared) {
- if (repo_has_promisor_remote(the_repository)) {
- for_each_packed_object(add_promisor_object,
+ if (repo_has_promisor_remote(r)) {
+ for_each_packed_object(r, add_promisor_object,
&promisor_objects,
FOR_EACH_OBJECT_PROMISOR_ONLY |
FOR_EACH_OBJECT_PACK_ORDER);
diff --git a/packfile.h b/packfile.h
index b09fb2c530..addb95b0c4 100644
--- a/packfile.h
+++ b/packfile.h
@@ -201,7 +201,7 @@ int has_object_kept_pack(struct repository *r, const struct object_id *oid,
* Return 1 if an object in a promisor packfile is or refers to the given
* object, 0 otherwise.
*/
-int is_promisor_object(const struct object_id *oid);
+int is_promisor_object(struct repository *r, const struct object_id *oid);
/*
* Expose a function for fuzz testing.
diff --git a/promisor-remote.c b/promisor-remote.c
index 9345ae3db2..c714f4f007 100644
--- a/promisor-remote.c
+++ b/promisor-remote.c
@@ -283,7 +283,7 @@ void promisor_remote_get_direct(struct repository *repo,
}
for (i = 0; i < remaining_nr; i++) {
- if (is_promisor_object(&remaining_oids[i]))
+ if (is_promisor_object(repo, &remaining_oids[i]))
die(_("could not fetch %s from promisor remote"),
oid_to_hex(&remaining_oids[i]));
}
diff --git a/reachable.c b/reachable.c
index 09d2c50079..ecf7ccf504 100644
--- a/reachable.c
+++ b/reachable.c
@@ -324,7 +324,7 @@ int add_unseen_recent_objects_to_traversal(struct rev_info *revs,
if (ignore_in_core_kept_packs)
flags |= FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS;
- r = for_each_packed_object(add_recent_packed, &data, flags);
+ r = for_each_packed_object(revs->repo, add_recent_packed, &data, flags);
done:
oidset_clear(&data.extra_recent_oids);
diff --git a/revision.c b/revision.c
index d1d152a67b..45dc6d2819 100644
--- a/revision.c
+++ b/revision.c
@@ -390,7 +390,8 @@ static struct object *get_reference(struct rev_info *revs, const char *name,
if (!object) {
if (revs->ignore_missing)
return NULL;
- if (revs->exclude_promisor_objects && is_promisor_object(oid))
+ if (revs->exclude_promisor_objects &&
+ is_promisor_object(revs->repo, oid))
return NULL;
if (revs->do_not_die_on_missing_objects) {
oidset_insert(&revs->missing_commits, oid);
@@ -432,7 +433,7 @@ static struct commit *handle_commit(struct rev_info *revs,
if (revs->ignore_missing_links || (flags & UNINTERESTING))
return NULL;
if (revs->exclude_promisor_objects &&
- is_promisor_object(&tag->tagged->oid))
+ is_promisor_object(revs->repo, &tag->tagged->oid))
return NULL;
if (revs->do_not_die_on_missing_objects && oid) {
oidset_insert(&revs->missing_commits, oid);
@@ -1211,7 +1212,7 @@ static int process_parents(struct rev_info *revs, struct commit *commit,
revs->do_not_die_on_missing_objects;
if (repo_parse_commit_gently(revs->repo, p, gently) < 0) {
if (revs->exclude_promisor_objects &&
- is_promisor_object(&p->object.oid)) {
+ is_promisor_object(revs->repo, &p->object.oid)) {
if (revs->first_parent_only)
break;
continue;
@@ -3915,7 +3916,7 @@ int prepare_revision_walk(struct rev_info *revs)
revs->treesame.name = "treesame";
if (revs->exclude_promisor_objects) {
- for_each_packed_object(mark_uninteresting, revs,
+ for_each_packed_object(revs->repo, mark_uninteresting, revs,
FOR_EACH_OBJECT_PROMISOR_ONLY);
}
diff --git a/tag.c b/tag.c
index d24170e340..beef9571b5 100644
--- a/tag.c
+++ b/tag.c
@@ -84,7 +84,7 @@ struct object *deref_tag(struct repository *r, struct object *o, const char *war
o = NULL;
}
if (!o && warn) {
- if (last_oid && is_promisor_object(last_oid))
+ if (last_oid && is_promisor_object(r, last_oid))
return NULL;
if (!warnlen)
warnlen = strlen(warn);
--
2.47.0
^ permalink raw reply related [flat|nested] 184+ messages in thread
* [PATCH v4 7/9] config: make `delta_base_cache_limit` a non-global variable
2024-10-31 9:39 ` [PATCH v4 0/9] packfile: avoid using the 'the_repository' global variable Karthik Nayak
` (5 preceding siblings ...)
2024-10-31 9:39 ` [PATCH v4 6/9] packfile: pass down repository to `for_each_packed_object` Karthik Nayak
@ 2024-10-31 9:39 ` Karthik Nayak
2024-10-31 9:39 ` [PATCH v4 8/9] config: make `packed_git_(limit|window_size)` non-global variables Karthik Nayak
` (2 subsequent siblings)
9 siblings, 0 replies; 184+ messages in thread
From: Karthik Nayak @ 2024-10-31 9:39 UTC (permalink / raw)
To: karthik.188; +Cc: git, me, peff
The `delta_base_cache_limit` variable is a global config variable used
by multiple subsystems. Let's make this non-global, by adding this
variable to the stack of each of the subsystems where it is used.
In `gc.c` we add it to the `gc_config` struct and also the constructor
function. In `index-pack.c` we add it to the `pack_idx_option` struct
and its constructor. Finally, in `packfile.c` we dynamically retrieve
this value from the repository config, since the value is only used once
in the entire subsystem.
These changes are made to remove the usage of `delta_base_cache_limit`
as a global variable in `packfile.c`. This brings us one step closer to
removing the `USE_THE_REPOSITORY_VARIABLE` definition in `packfile.c`
which we complete in the next patch.
Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
---
builtin/gc.c | 5 ++++-
builtin/index-pack.c | 10 +++++++---
config.c | 5 -----
environment.c | 1 -
environment.h | 1 -
pack-objects.h | 3 ++-
pack-write.c | 1 +
pack.h | 1 +
packfile.c | 13 +++++++++++--
9 files changed, 26 insertions(+), 14 deletions(-)
diff --git a/builtin/gc.c b/builtin/gc.c
index d52735354c..9a10eb58bc 100644
--- a/builtin/gc.c
+++ b/builtin/gc.c
@@ -138,6 +138,7 @@ struct gc_config {
char *repack_filter_to;
unsigned long big_pack_threshold;
unsigned long max_delta_cache_size;
+ unsigned long delta_base_cache_limit;
};
#define GC_CONFIG_INIT { \
@@ -153,6 +154,7 @@ struct gc_config {
.prune_expire = xstrdup("2.weeks.ago"), \
.prune_worktrees_expire = xstrdup("3.months.ago"), \
.max_delta_cache_size = DEFAULT_DELTA_CACHE_SIZE, \
+ .delta_base_cache_limit = DEFAULT_DELTA_BASE_CACHE_LIMIT, \
}
static void gc_config_release(struct gc_config *cfg)
@@ -205,6 +207,7 @@ static void gc_config(struct gc_config *cfg)
git_config_get_ulong("gc.bigpackthreshold", &cfg->big_pack_threshold);
git_config_get_ulong("pack.deltacachesize", &cfg->max_delta_cache_size);
+ git_config_get_ulong("core.deltabasecachelimit", &cfg->delta_base_cache_limit);
if (!git_config_get_string("gc.repackfilter", &owned)) {
free(cfg->repack_filter);
@@ -416,7 +419,7 @@ static uint64_t estimate_repack_memory(struct gc_config *cfg,
* read_sha1_file() (either at delta calculation phase, or
* writing phase) also fills up the delta base cache
*/
- heap += delta_base_cache_limit;
+ heap += cfg->delta_base_cache_limit;
/* and of course pack-objects has its own delta cache */
heap += cfg->max_delta_cache_size;
diff --git a/builtin/index-pack.c b/builtin/index-pack.c
index eaefb41761..23bfa45403 100644
--- a/builtin/index-pack.c
+++ b/builtin/index-pack.c
@@ -1238,7 +1238,7 @@ static void parse_pack_objects(unsigned char *hash)
* recursively checking if the resulting object is used as a base
* for some more deltas.
*/
-static void resolve_deltas(void)
+static void resolve_deltas(struct pack_idx_option *opts)
{
int i;
@@ -1254,7 +1254,7 @@ static void resolve_deltas(void)
nr_ref_deltas + nr_ofs_deltas);
nr_dispatched = 0;
- base_cache_limit = delta_base_cache_limit * nr_threads;
+ base_cache_limit = opts->delta_base_cache_limit * nr_threads;
if (nr_threads > 1 || getenv("GIT_FORCE_THREADS")) {
init_thread();
work_lock();
@@ -1604,6 +1604,10 @@ static int git_index_pack_config(const char *k, const char *v,
else
opts->flags &= ~WRITE_REV;
}
+ if (!strcmp(k, "core.deltabasecachelimit")) {
+ opts->delta_base_cache_limit = git_config_ulong(k, v, ctx->kvi);
+ return 0;
+ }
return git_default_config(k, v, ctx, cb);
}
@@ -1930,7 +1934,7 @@ int cmd_index_pack(int argc,
parse_pack_objects(pack_hash);
if (report_end_of_input)
write_in_full(2, "\0", 1);
- resolve_deltas();
+ resolve_deltas(&opts);
conclude_pack(fix_thin_pack, curr_pack, pack_hash);
free(ofs_deltas);
free(ref_deltas);
diff --git a/config.c b/config.c
index a11bb85da3..728ef98e42 100644
--- a/config.c
+++ b/config.c
@@ -1515,11 +1515,6 @@ static int git_default_core_config(const char *var, const char *value,
return 0;
}
- if (!strcmp(var, "core.deltabasecachelimit")) {
- delta_base_cache_limit = git_config_ulong(var, value, ctx->kvi);
- return 0;
- }
-
if (!strcmp(var, "core.autocrlf")) {
if (value && !strcasecmp(value, "input")) {
auto_crlf = AUTO_CRLF_INPUT;
diff --git a/environment.c b/environment.c
index a2ce998081..8e5022c282 100644
--- a/environment.c
+++ b/environment.c
@@ -51,7 +51,6 @@ enum fsync_method fsync_method = FSYNC_METHOD_DEFAULT;
enum fsync_component fsync_components = FSYNC_COMPONENTS_DEFAULT;
size_t packed_git_window_size = DEFAULT_PACKED_GIT_WINDOW_SIZE;
size_t packed_git_limit = DEFAULT_PACKED_GIT_LIMIT;
-size_t delta_base_cache_limit = 96 * 1024 * 1024;
unsigned long big_file_threshold = 512 * 1024 * 1024;
char *editor_program;
char *askpass_program;
diff --git a/environment.h b/environment.h
index 923e12661e..2f43340f0b 100644
--- a/environment.h
+++ b/environment.h
@@ -165,7 +165,6 @@ extern int zlib_compression_level;
extern int pack_compression_level;
extern size_t packed_git_window_size;
extern size_t packed_git_limit;
-extern size_t delta_base_cache_limit;
extern unsigned long big_file_threshold;
extern unsigned long pack_size_limit_cfg;
extern int max_allowed_tree_depth;
diff --git a/pack-objects.h b/pack-objects.h
index b9898a4e64..3f6f504203 100644
--- a/pack-objects.h
+++ b/pack-objects.h
@@ -7,7 +7,8 @@
struct repository;
-#define DEFAULT_DELTA_CACHE_SIZE (256 * 1024 * 1024)
+#define DEFAULT_DELTA_CACHE_SIZE (256 * 1024 * 1024)
+#define DEFAULT_DELTA_BASE_CACHE_LIMIT (96 * 1024 * 1024)
#define OE_DFS_STATE_BITS 2
#define OE_DEPTH_BITS 12
diff --git a/pack-write.c b/pack-write.c
index 8c7dfddc5a..98a8c0e785 100644
--- a/pack-write.c
+++ b/pack-write.c
@@ -21,6 +21,7 @@ void reset_pack_idx_option(struct pack_idx_option *opts)
memset(opts, 0, sizeof(*opts));
opts->version = 2;
opts->off32_limit = 0x7fffffff;
+ opts->delta_base_cache_limit = DEFAULT_DELTA_BASE_CACHE_LIMIT;
}
static int sha1_compare(const void *_a, const void *_b)
diff --git a/pack.h b/pack.h
index 02bbdfb19c..1a33751565 100644
--- a/pack.h
+++ b/pack.h
@@ -58,6 +58,7 @@ struct pack_idx_option {
*/
int anomaly_alloc, anomaly_nr;
uint32_t *anomaly;
+ unsigned long delta_base_cache_limit;
};
void reset_pack_idx_option(struct pack_idx_option *);
diff --git a/packfile.c b/packfile.c
index 5e8019b1fe..2ae35dd03f 100644
--- a/packfile.c
+++ b/packfile.c
@@ -24,6 +24,8 @@
#include "commit-graph.h"
#include "pack-revindex.h"
#include "promisor-remote.h"
+#include "config.h"
+#include "pack-objects.h"
char *odb_pack_name(struct repository *r, struct strbuf *buf,
const unsigned char *hash, const char *ext)
@@ -1496,7 +1498,9 @@ void clear_delta_base_cache(void)
}
static void add_delta_base_cache(struct packed_git *p, off_t base_offset,
- void *base, unsigned long base_size, enum object_type type)
+ void *base, unsigned long base_size,
+ unsigned long delta_base_cache_limit,
+ enum object_type type)
{
struct delta_base_cache_entry *ent;
struct list_head *lru, *tmp;
@@ -1697,6 +1701,9 @@ void *unpack_entry(struct repository *r, struct packed_git *p, off_t obj_offset,
struct unpack_entry_stack_ent *delta_stack = small_delta_stack;
int delta_stack_nr = 0, delta_stack_alloc = UNPACK_ENTRY_STACK_PREALLOC;
int base_from_cache = 0;
+ unsigned long delta_base_cache_limit = DEFAULT_DELTA_BASE_CACHE_LIMIT;
+
+ repo_config_get_ulong(r, "core.deltabasecachelimit", &delta_base_cache_limit);
write_pack_access_log(p, obj_offset);
@@ -1878,7 +1885,9 @@ void *unpack_entry(struct repository *r, struct packed_git *p, off_t obj_offset,
* before we are done using it.
*/
if (!external_base)
- add_delta_base_cache(p, base_obj_offset, base, base_size, type);
+ add_delta_base_cache(p, base_obj_offset, base,
+ base_size, delta_base_cache_limit,
+ type);
free(delta_data);
free(external_base);
--
2.47.0
^ permalink raw reply related [flat|nested] 184+ messages in thread
* [PATCH v4 8/9] config: make `packed_git_(limit|window_size)` non-global variables
2024-10-31 9:39 ` [PATCH v4 0/9] packfile: avoid using the 'the_repository' global variable Karthik Nayak
` (6 preceding siblings ...)
2024-10-31 9:39 ` [PATCH v4 7/9] config: make `delta_base_cache_limit` a non-global variable Karthik Nayak
@ 2024-10-31 9:39 ` Karthik Nayak
2024-11-01 17:45 ` Jeff King
2024-10-31 9:39 ` [PATCH v4 9/9] midx: add repository to `multi_pack_index` struct Karthik Nayak
2024-10-31 20:05 ` [PATCH v4 0/9] packfile: avoid using the 'the_repository' global variable Taylor Blau
9 siblings, 1 reply; 184+ messages in thread
From: Karthik Nayak @ 2024-10-31 9:39 UTC (permalink / raw)
To: karthik.188; +Cc: git, me, peff
The variables `packed_git_window_size` and `packed_git_limit` are global
config variables used in the `packfile.c` file. Since it is only used in
this file, let's change it from being a global config variable to a
local variable for the subsystem.
We do this by introducing a new local `packfile_config` struct in
`packfile.c` and also adding the required function to parse the said
config. We then use this within `packfile.c` to obtain the variables.
With this, we rid `packfile.c` from all global variable usage and this
means we can also remove the `USE_THE_REPOSITORY_VARIABLE` guard from
the file.
Helped-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
---
builtin/fast-import.c | 4 +--
config.c | 17 -------------
environment.c | 2 --
packfile.c | 57 +++++++++++++++++++++++++++++++++++++------
packfile.h | 2 +-
5 files changed, 52 insertions(+), 30 deletions(-)
diff --git a/builtin/fast-import.c b/builtin/fast-import.c
index 3ccc4c5722..0ece070260 100644
--- a/builtin/fast-import.c
+++ b/builtin/fast-import.c
@@ -3539,7 +3539,7 @@ static void parse_argv(void)
int cmd_fast_import(int argc,
const char **argv,
const char *prefix,
- struct repository *repo UNUSED)
+ struct repository *repo)
{
unsigned int i;
@@ -3660,7 +3660,7 @@ int cmd_fast_import(int argc,
fprintf(stderr, " pools: %10lu KiB\n", (unsigned long)((tree_entry_allocd + fi_mem_pool.pool_alloc) /1024));
fprintf(stderr, " objects: %10" PRIuMAX " KiB\n", (alloc_count*sizeof(struct object_entry))/1024);
fprintf(stderr, "---------------------------------------------------------------------\n");
- pack_report();
+ pack_report(repo);
fprintf(stderr, "---------------------------------------------------------------------\n");
fprintf(stderr, "\n");
}
diff --git a/config.c b/config.c
index 728ef98e42..2c295f7430 100644
--- a/config.c
+++ b/config.c
@@ -1493,28 +1493,11 @@ static int git_default_core_config(const char *var, const char *value,
return 0;
}
- if (!strcmp(var, "core.packedgitwindowsize")) {
- int pgsz_x2 = getpagesize() * 2;
- packed_git_window_size = git_config_ulong(var, value, ctx->kvi);
-
- /* This value must be multiple of (pagesize * 2) */
- packed_git_window_size /= pgsz_x2;
- if (packed_git_window_size < 1)
- packed_git_window_size = 1;
- packed_git_window_size *= pgsz_x2;
- return 0;
- }
-
if (!strcmp(var, "core.bigfilethreshold")) {
big_file_threshold = git_config_ulong(var, value, ctx->kvi);
return 0;
}
- if (!strcmp(var, "core.packedgitlimit")) {
- packed_git_limit = git_config_ulong(var, value, ctx->kvi);
- return 0;
- }
-
if (!strcmp(var, "core.autocrlf")) {
if (value && !strcasecmp(value, "input")) {
auto_crlf = AUTO_CRLF_INPUT;
diff --git a/environment.c b/environment.c
index 8e5022c282..8389a27270 100644
--- a/environment.c
+++ b/environment.c
@@ -49,8 +49,6 @@ int fsync_object_files = -1;
int use_fsync = -1;
enum fsync_method fsync_method = FSYNC_METHOD_DEFAULT;
enum fsync_component fsync_components = FSYNC_COMPONENTS_DEFAULT;
-size_t packed_git_window_size = DEFAULT_PACKED_GIT_WINDOW_SIZE;
-size_t packed_git_limit = DEFAULT_PACKED_GIT_LIMIT;
unsigned long big_file_threshold = 512 * 1024 * 1024;
char *editor_program;
char *askpass_program;
diff --git a/packfile.c b/packfile.c
index 2ae35dd03f..f626d38071 100644
--- a/packfile.c
+++ b/packfile.c
@@ -1,4 +1,3 @@
-#define USE_THE_REPOSITORY_VARIABLE
#include "git-compat-util.h"
#include "environment.h"
@@ -27,6 +26,17 @@
#include "config.h"
#include "pack-objects.h"
+struct packfile_config {
+ unsigned long packed_git_window_size;
+ unsigned long packed_git_limit;
+};
+
+#define PACKFILE_CONFIG_INIT \
+{ \
+ .packed_git_window_size = DEFAULT_PACKED_GIT_WINDOW_SIZE, \
+ .packed_git_limit = DEFAULT_PACKED_GIT_LIMIT, \
+}
+
char *odb_pack_name(struct repository *r, struct strbuf *buf,
const unsigned char *hash, const char *ext)
{
@@ -48,15 +58,41 @@ static size_t pack_mapped;
#define SZ_FMT PRIuMAX
static inline uintmax_t sz_fmt(size_t s) { return s; }
-void pack_report(void)
+static int packfile_config(const char *var, const char *value,
+ const struct config_context *ctx, void *cb)
+{
+ struct packfile_config *config = cb;
+
+ if (!strcmp(var, "core.packedgitwindowsize")) {
+ int pgsz_x2 = getpagesize() * 2;
+ config->packed_git_window_size = git_config_ulong(var, value, ctx->kvi);
+
+ /* This value must be multiple of (pagesize * 2) */
+ config->packed_git_window_size /= pgsz_x2;
+ if (config->packed_git_window_size < 1)
+ config->packed_git_window_size = 1;
+ config->packed_git_window_size *= pgsz_x2;
+ return 0;
+ } else if (!strcmp(var, "core.packedgitlimit")) {
+ config->packed_git_limit = git_config_ulong(var, value, ctx->kvi);
+ return 0;
+ } else {
+ return git_default_config(var, value, ctx, cb);
+ }
+}
+
+void pack_report(struct repository *repo)
{
+ struct packfile_config config = PACKFILE_CONFIG_INIT;
+ repo_config(repo, packfile_config, &config);
+
fprintf(stderr,
"pack_report: getpagesize() = %10" SZ_FMT "\n"
"pack_report: core.packedGitWindowSize = %10" SZ_FMT "\n"
"pack_report: core.packedGitLimit = %10" SZ_FMT "\n",
sz_fmt(getpagesize()),
- sz_fmt(packed_git_window_size),
- sz_fmt(packed_git_limit));
+ sz_fmt(config.packed_git_window_size),
+ sz_fmt(config.packed_git_limit));
fprintf(stderr,
"pack_report: pack_used_ctr = %10u\n"
"pack_report: pack_mmap_calls = %10u\n"
@@ -652,20 +688,25 @@ unsigned char *use_pack(struct packed_git *p,
break;
}
if (!win) {
- size_t window_align = packed_git_window_size / 2;
+ struct packfile_config config = PACKFILE_CONFIG_INIT;
+ size_t window_align;
off_t len;
+ repo_config(p->repo, packfile_config, &config);
+ window_align = config.packed_git_window_size / 2;
+
if (p->pack_fd == -1 && open_packed_git(p))
die("packfile %s cannot be accessed", p->pack_name);
CALLOC_ARRAY(win, 1);
win->offset = (offset / window_align) * window_align;
len = p->pack_size - win->offset;
- if (len > packed_git_window_size)
- len = packed_git_window_size;
+ if (len > config.packed_git_window_size)
+ len = config.packed_git_window_size;
win->len = (size_t)len;
pack_mapped += win->len;
- while (packed_git_limit < pack_mapped
+
+ while (config.packed_git_limit < pack_mapped
&& unuse_one_window(p))
; /* nothing */
win->base = xmmap_gently(NULL, win->len,
diff --git a/packfile.h b/packfile.h
index addb95b0c4..58104fa009 100644
--- a/packfile.h
+++ b/packfile.h
@@ -89,7 +89,7 @@ unsigned long repo_approximate_object_count(struct repository *r);
struct packed_git *find_oid_pack(const struct object_id *oid,
struct packed_git *packs);
-void pack_report(void);
+void pack_report(struct repository *repo);
/*
* mmap the index file for the specified packfile (if it is not
--
2.47.0
^ permalink raw reply related [flat|nested] 184+ messages in thread
* Re: [PATCH v4 8/9] config: make `packed_git_(limit|window_size)` non-global variables
2024-10-31 9:39 ` [PATCH v4 8/9] config: make `packed_git_(limit|window_size)` non-global variables Karthik Nayak
@ 2024-11-01 17:45 ` Jeff King
2024-11-01 19:00 ` Taylor Blau
2024-11-04 9:35 ` karthik nayak
0 siblings, 2 replies; 184+ messages in thread
From: Jeff King @ 2024-11-01 17:45 UTC (permalink / raw)
To: Karthik Nayak; +Cc: git, me
On Thu, Oct 31, 2024 at 10:39:51AM +0100, Karthik Nayak wrote:
> @@ -652,20 +688,25 @@ unsigned char *use_pack(struct packed_git *p,
> break;
> }
> if (!win) {
> - size_t window_align = packed_git_window_size / 2;
> + struct packfile_config config = PACKFILE_CONFIG_INIT;
> + size_t window_align;
> off_t len;
>
> + repo_config(p->repo, packfile_config, &config);
> + window_align = config.packed_git_window_size / 2;
> +
Parsing config like this is somewhat expensive (remember we're going to
hit your callback for every single config key in the system, user, and
repo-level config files).
And use_pack() is a relatively hot code path, as we call it any time we
need to access bytes from a mapped pack! This "!win" conditional isn't
quite as hot, as it only triggers when we establish a new window for a
pack. But that still happens at least once per pack, more if we need to
move the window around in a big pack, and lots more if we are under
memory pressure and need to open/close windows a lot.
I think we need to parse these values once and then store them somewhere
with cheaper access. Can we grab them in prepare_repo_settings(), for
example, which would cache them? We need a repo struct, but we have one
(the same packed_git->repo you are using to call repo_config()).
-Peff
^ permalink raw reply [flat|nested] 184+ messages in thread
* Re: [PATCH v4 8/9] config: make `packed_git_(limit|window_size)` non-global variables
2024-11-01 17:45 ` Jeff King
@ 2024-11-01 19:00 ` Taylor Blau
2024-11-04 9:35 ` karthik nayak
1 sibling, 0 replies; 184+ messages in thread
From: Taylor Blau @ 2024-11-01 19:00 UTC (permalink / raw)
To: Jeff King; +Cc: Karthik Nayak, git
On Fri, Nov 01, 2024 at 01:45:47PM -0400, Jeff King wrote:
> On Thu, Oct 31, 2024 at 10:39:51AM +0100, Karthik Nayak wrote:
>
> > @@ -652,20 +688,25 @@ unsigned char *use_pack(struct packed_git *p,
> > break;
> > }
> > if (!win) {
> > - size_t window_align = packed_git_window_size / 2;
> > + struct packfile_config config = PACKFILE_CONFIG_INIT;
> > + size_t window_align;
> > off_t len;
> >
> > + repo_config(p->repo, packfile_config, &config);
> > + window_align = config.packed_git_window_size / 2;
> > +
>
> Parsing config like this is somewhat expensive (remember we're going to
> hit your callback for every single config key in the system, user, and
> repo-level config files).
>
> And use_pack() is a relatively hot code path, as we call it any time we
> need to access bytes from a mapped pack! This "!win" conditional isn't
> quite as hot, as it only triggers when we establish a new window for a
> pack. But that still happens at least once per pack, more if we need to
> move the window around in a big pack, and lots more if we are under
> memory pressure and need to open/close windows a lot.
>
> I think we need to parse these values once and then store them somewhere
> with cheaper access. Can we grab them in prepare_repo_settings(), for
> example, which would cache them? We need a repo struct, but we have one
> (the same packed_git->repo you are using to call repo_config()).
Oh, wow, I can't believe that I missed this in my earlier reviews. Yes,
we should definitely *not* be calling an expensive function which
computes the same value every time in a hot path like 'use_pack()'.
Thanks for spotting.
Thanks,
Taylor
^ permalink raw reply [flat|nested] 184+ messages in thread
* Re: [PATCH v4 8/9] config: make `packed_git_(limit|window_size)` non-global variables
2024-11-01 17:45 ` Jeff King
2024-11-01 19:00 ` Taylor Blau
@ 2024-11-04 9:35 ` karthik nayak
1 sibling, 0 replies; 184+ messages in thread
From: karthik nayak @ 2024-11-04 9:35 UTC (permalink / raw)
To: Jeff King; +Cc: git, me
[-- Attachment #1: Type: text/plain, Size: 1783 bytes --]
Jeff King <peff@peff.net> writes:
> On Thu, Oct 31, 2024 at 10:39:51AM +0100, Karthik Nayak wrote:
>
>> @@ -652,20 +688,25 @@ unsigned char *use_pack(struct packed_git *p,
>> break;
>> }
>> if (!win) {
>> - size_t window_align = packed_git_window_size / 2;
>> + struct packfile_config config = PACKFILE_CONFIG_INIT;
>> + size_t window_align;
>> off_t len;
>>
>> + repo_config(p->repo, packfile_config, &config);
>> + window_align = config.packed_git_window_size / 2;
>> +
>
> Parsing config like this is somewhat expensive (remember we're going to
> hit your callback for every single config key in the system, user, and
> repo-level config files).
>
> And use_pack() is a relatively hot code path, as we call it any time we
> need to access bytes from a mapped pack! This "!win" conditional isn't
> quite as hot, as it only triggers when we establish a new window for a
> pack. But that still happens at least once per pack, more if we need to
> move the window around in a big pack, and lots more if we are under
> memory pressure and need to open/close windows a lot.
>
I must admit, I'm not too aware of the pack objects code base, but that
was my assumption indeed, that this conditional wasn't the hot path. But
even once per pack seems like quite the regression then.
> I think we need to parse these values once and then store them somewhere
> with cheaper access. Can we grab them in prepare_repo_settings(), for
> example, which would cache them? We need a repo struct, but we have one
> (the same packed_git->repo you are using to call repo_config()).
>
> -Peff
This seems like a good idea, I will amend this commit to move the config
to `repo_settings`. I think the previous commit doesn't require any
changes and can stay.
Thanks
- Karthik
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 690 bytes --]
^ permalink raw reply [flat|nested] 184+ messages in thread
* [PATCH v4 9/9] midx: add repository to `multi_pack_index` struct
2024-10-31 9:39 ` [PATCH v4 0/9] packfile: avoid using the 'the_repository' global variable Karthik Nayak
` (7 preceding siblings ...)
2024-10-31 9:39 ` [PATCH v4 8/9] config: make `packed_git_(limit|window_size)` non-global variables Karthik Nayak
@ 2024-10-31 9:39 ` Karthik Nayak
2024-10-31 20:05 ` [PATCH v4 0/9] packfile: avoid using the 'the_repository' global variable Taylor Blau
9 siblings, 0 replies; 184+ messages in thread
From: Karthik Nayak @ 2024-10-31 9:39 UTC (permalink / raw)
To: karthik.188; +Cc: git, me, peff
The `multi_pack_index` struct represents the MIDX for a repository.
Here, we add a pointer to the repository in this struct, allowing direct
use of the repository variable without relying on the global
`the_repository` struct.
With this addition, we can determine the repository associated with a
`bitmap_index` struct. A `bitmap_index` points to either a `packed_git`
or a `multi_pack_index`, both of which have direct repository
references. To support this, we introduce a static helper function,
`bitmap_repo`, in `pack-bitmap.c`, which retrieves a repository given a
`bitmap_index`.
With this, we clear up all usages of `the_repository` within
`pack-bitmap.c` and also remove the `USE_THE_REPOSITORY_VARIABLE`
definition. Bringing us another step closer to remove all global
variable usage.
Although this change also opens up the potential to clean up `midx.c`,
doing so would require additional refactoring to pass the repository
struct to functions where the MIDX struct is created: a task better
suited for future patches.
Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
---
midx.c | 1 +
midx.h | 3 ++
pack-bitmap.c | 90 +++++++++++++++++++++++++++++++--------------------
3 files changed, 59 insertions(+), 35 deletions(-)
diff --git a/midx.c b/midx.c
index 8edb75f51d..079c45a1aa 100644
--- a/midx.c
+++ b/midx.c
@@ -131,6 +131,7 @@ static struct multi_pack_index *load_multi_pack_index_one(const char *object_dir
m->data = midx_map;
m->data_len = midx_size;
m->local = local;
+ m->repo = the_repository;
m->signature = get_be32(m->data);
if (m->signature != MIDX_SIGNATURE)
diff --git a/midx.h b/midx.h
index 42d4f8d149..3b0ac4d878 100644
--- a/midx.h
+++ b/midx.h
@@ -71,6 +71,9 @@ struct multi_pack_index {
const char **pack_names;
struct packed_git **packs;
+
+ struct repository *repo;
+
char object_dir[FLEX_ARRAY];
};
diff --git a/pack-bitmap.c b/pack-bitmap.c
index d34ba9909a..0cb1b56c9d 100644
--- a/pack-bitmap.c
+++ b/pack-bitmap.c
@@ -1,5 +1,3 @@
-#define USE_THE_REPOSITORY_VARIABLE
-
#include "git-compat-util.h"
#include "commit.h"
#include "gettext.h"
@@ -177,12 +175,21 @@ static uint32_t bitmap_num_objects(struct bitmap_index *index)
return index->pack->num_objects;
}
+static struct repository *bitmap_repo(struct bitmap_index *bitmap_git)
+{
+ if (bitmap_is_midx(bitmap_git))
+ return bitmap_git->midx->repo;
+ return bitmap_git->pack->repo;
+}
+
static int load_bitmap_header(struct bitmap_index *index)
{
struct bitmap_disk_header *header = (void *)index->map;
- size_t header_size = sizeof(*header) - GIT_MAX_RAWSZ + the_hash_algo->rawsz;
+ const struct git_hash_algo *hash_algo = bitmap_repo(index)->hash_algo;
+
+ size_t header_size = sizeof(*header) - GIT_MAX_RAWSZ + hash_algo->rawsz;
- if (index->map_size < header_size + the_hash_algo->rawsz)
+ if (index->map_size < header_size + hash_algo->rawsz)
return error(_("corrupted bitmap index (too small)"));
if (memcmp(header->magic, BITMAP_IDX_SIGNATURE, sizeof(BITMAP_IDX_SIGNATURE)) != 0)
@@ -196,7 +203,7 @@ static int load_bitmap_header(struct bitmap_index *index)
{
uint32_t flags = ntohs(header->options);
size_t cache_size = st_mult(bitmap_num_objects(index), sizeof(uint32_t));
- unsigned char *index_end = index->map + index->map_size - the_hash_algo->rawsz;
+ unsigned char *index_end = index->map + index->map_size - hash_algo->rawsz;
if ((flags & BITMAP_OPT_FULL_DAG) == 0)
BUG("unsupported options for bitmap index file "
@@ -409,7 +416,7 @@ static int open_midx_bitmap_1(struct bitmap_index *bitmap_git,
if (bitmap_git->pack || bitmap_git->midx) {
struct strbuf buf = STRBUF_INIT;
get_midx_filename(&buf, midx->object_dir);
- trace2_data_string("bitmap", the_repository,
+ trace2_data_string("bitmap", bitmap_repo(bitmap_git),
"ignoring extra midx bitmap file", buf.buf);
close(fd);
strbuf_release(&buf);
@@ -427,7 +434,7 @@ static int open_midx_bitmap_1(struct bitmap_index *bitmap_git,
goto cleanup;
if (!hasheq(get_midx_checksum(bitmap_git->midx), bitmap_git->checksum,
- the_repository->hash_algo)) {
+ bitmap_repo(bitmap_git)->hash_algo)) {
error(_("checksum doesn't match in MIDX and bitmap"));
goto cleanup;
}
@@ -438,7 +445,9 @@ static int open_midx_bitmap_1(struct bitmap_index *bitmap_git,
}
for (i = 0; i < bitmap_git->midx->num_packs; i++) {
- if (prepare_midx_pack(the_repository, bitmap_git->midx, i)) {
+ if (prepare_midx_pack(bitmap_repo(bitmap_git),
+ bitmap_git->midx,
+ i)) {
warning(_("could not open pack %s"),
bitmap_git->midx->pack_names[i]);
goto cleanup;
@@ -492,8 +501,9 @@ static int open_pack_bitmap_1(struct bitmap_index *bitmap_git, struct packed_git
}
if (bitmap_git->pack || bitmap_git->midx) {
- trace2_data_string("bitmap", the_repository,
- "ignoring extra bitmap file", packfile->pack_name);
+ trace2_data_string("bitmap", bitmap_repo(bitmap_git),
+ "ignoring extra bitmap file",
+ packfile->pack_name);
close(fd);
return -1;
}
@@ -518,8 +528,8 @@ static int open_pack_bitmap_1(struct bitmap_index *bitmap_git, struct packed_git
return -1;
}
- trace2_data_string("bitmap", the_repository, "opened bitmap file",
- packfile->pack_name);
+ trace2_data_string("bitmap", bitmap_repo(bitmap_git),
+ "opened bitmap file", packfile->pack_name);
return 0;
}
@@ -649,7 +659,7 @@ struct bitmap_index *prepare_bitmap_git(struct repository *r)
struct bitmap_index *prepare_midx_bitmap_git(struct multi_pack_index *midx)
{
- struct repository *r = the_repository;
+ struct repository *r = midx->repo;
struct bitmap_index *bitmap_git = xcalloc(1, sizeof(*bitmap_git));
if (!open_midx_bitmap_1(bitmap_git, midx) && !load_bitmap(r, bitmap_git))
@@ -1213,6 +1223,7 @@ static struct bitmap *find_boundary_objects(struct bitmap_index *bitmap_git,
{
struct bitmap_boundary_cb cb;
struct object_list *root;
+ struct repository *repo;
unsigned int i;
unsigned int tmp_blobs, tmp_trees, tmp_tags;
int any_missing = 0;
@@ -1222,6 +1233,8 @@ static struct bitmap *find_boundary_objects(struct bitmap_index *bitmap_git,
cb.base = bitmap_new();
object_array_init(&cb.boundary);
+ repo = bitmap_repo(bitmap_git);
+
revs->ignore_missing_links = 1;
if (bitmap_git->pseudo_merges.nr) {
@@ -1280,19 +1293,19 @@ static struct bitmap *find_boundary_objects(struct bitmap_index *bitmap_git,
* revision walk to (a) OR in any bitmaps that are UNINTERESTING
* between the tips and boundary, and (b) record the boundary.
*/
- trace2_region_enter("pack-bitmap", "boundary-prepare", the_repository);
+ trace2_region_enter("pack-bitmap", "boundary-prepare", repo);
if (prepare_revision_walk(revs))
die("revision walk setup failed");
- trace2_region_leave("pack-bitmap", "boundary-prepare", the_repository);
+ trace2_region_leave("pack-bitmap", "boundary-prepare", repo);
- trace2_region_enter("pack-bitmap", "boundary-traverse", the_repository);
+ trace2_region_enter("pack-bitmap", "boundary-traverse", repo);
revs->boundary = 1;
traverse_commit_list_filtered(revs,
show_boundary_commit,
show_boundary_object,
&cb, NULL);
revs->boundary = 0;
- trace2_region_leave("pack-bitmap", "boundary-traverse", the_repository);
+ trace2_region_leave("pack-bitmap", "boundary-traverse", repo);
revs->blob_objects = tmp_blobs;
revs->tree_objects = tmp_trees;
@@ -1304,7 +1317,7 @@ static struct bitmap *find_boundary_objects(struct bitmap_index *bitmap_git,
/*
* Then add the boundary commit(s) as fill-in traversal tips.
*/
- trace2_region_enter("pack-bitmap", "boundary-fill-in", the_repository);
+ trace2_region_enter("pack-bitmap", "boundary-fill-in", repo);
for (i = 0; i < cb.boundary.nr; i++) {
struct object *obj = cb.boundary.objects[i].item;
if (bitmap_walk_contains(bitmap_git, cb.base, &obj->oid))
@@ -1314,7 +1327,7 @@ static struct bitmap *find_boundary_objects(struct bitmap_index *bitmap_git,
}
if (revs->pending.nr)
cb.base = fill_in_bitmap(bitmap_git, revs, cb.base, NULL);
- trace2_region_leave("pack-bitmap", "boundary-fill-in", the_repository);
+ trace2_region_leave("pack-bitmap", "boundary-fill-in", repo);
cleanup:
object_array_clear(&cb.boundary);
@@ -1718,7 +1731,8 @@ static unsigned long get_size_by_pos(struct bitmap_index *bitmap_git,
ofs = pack_pos_to_offset(pack, pos);
}
- if (packed_object_info(the_repository, pack, ofs, &oi) < 0) {
+ if (packed_object_info(bitmap_repo(bitmap_git), pack, ofs,
+ &oi) < 0) {
struct object_id oid;
nth_bitmap_object_oid(bitmap_git, &oid,
pack_pos_to_index(pack, pos));
@@ -1727,7 +1741,8 @@ static unsigned long get_size_by_pos(struct bitmap_index *bitmap_git,
} else {
struct eindex *eindex = &bitmap_git->ext_index;
struct object *obj = eindex->objects[pos - bitmap_num_objects(bitmap_git)];
- if (oid_object_info_extended(the_repository, &obj->oid, &oi, 0) < 0)
+ if (oid_object_info_extended(bitmap_repo(bitmap_git), &obj->oid,
+ &oi, 0) < 0)
die(_("unable to get size of %s"), oid_to_hex(&obj->oid));
}
@@ -1889,7 +1904,8 @@ static void filter_packed_objects_from_bitmap(struct bitmap_index *bitmap_git,
bitmap_unset(result, i);
for (i = 0; i < eindex->count; ++i) {
- if (has_object_pack(the_repository, &eindex->objects[i]->oid))
+ if (has_object_pack(bitmap_repo(bitmap_git),
+ &eindex->objects[i]->oid))
bitmap_unset(result, objects_nr + i);
}
}
@@ -1907,6 +1923,7 @@ struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs,
struct bitmap *haves_bitmap = NULL;
struct bitmap_index *bitmap_git;
+ struct repository *repo;
/*
* We can't do pathspec limiting with bitmaps, because we don't know
@@ -1980,18 +1997,20 @@ struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs,
if (!use_boundary_traversal)
object_array_clear(&revs->pending);
+ repo = bitmap_repo(bitmap_git);
+
if (haves) {
if (use_boundary_traversal) {
- trace2_region_enter("pack-bitmap", "haves/boundary", the_repository);
+ trace2_region_enter("pack-bitmap", "haves/boundary", repo);
haves_bitmap = find_boundary_objects(bitmap_git, revs, haves);
- trace2_region_leave("pack-bitmap", "haves/boundary", the_repository);
+ trace2_region_leave("pack-bitmap", "haves/boundary", repo);
} else {
- trace2_region_enter("pack-bitmap", "haves/classic", the_repository);
+ trace2_region_enter("pack-bitmap", "haves/classic", repo);
revs->ignore_missing_links = 1;
haves_bitmap = find_objects(bitmap_git, revs, haves, NULL);
reset_revision_walk();
revs->ignore_missing_links = 0;
- trace2_region_leave("pack-bitmap", "haves/classic", the_repository);
+ trace2_region_leave("pack-bitmap", "haves/classic", repo);
}
if (!haves_bitmap)
@@ -2025,17 +2044,17 @@ struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs,
object_list_free(&wants);
object_list_free(&haves);
- trace2_data_intmax("bitmap", the_repository, "pseudo_merges_satisfied",
+ trace2_data_intmax("bitmap", repo, "pseudo_merges_satisfied",
pseudo_merges_satisfied_nr);
- trace2_data_intmax("bitmap", the_repository, "pseudo_merges_cascades",
+ trace2_data_intmax("bitmap", repo, "pseudo_merges_cascades",
pseudo_merges_cascades_nr);
- trace2_data_intmax("bitmap", the_repository, "bitmap/hits",
+ trace2_data_intmax("bitmap", repo, "bitmap/hits",
existing_bitmaps_hits_nr);
- trace2_data_intmax("bitmap", the_repository, "bitmap/misses",
+ trace2_data_intmax("bitmap", repo, "bitmap/misses",
existing_bitmaps_misses_nr);
- trace2_data_intmax("bitmap", the_repository, "bitmap/roots_with_bitmap",
+ trace2_data_intmax("bitmap", repo, "bitmap/roots_with_bitmap",
roots_with_bitmaps_nr);
- trace2_data_intmax("bitmap", the_repository, "bitmap/roots_without_bitmap",
+ trace2_data_intmax("bitmap", repo, "bitmap/roots_without_bitmap",
roots_without_bitmaps_nr);
return bitmap_git;
@@ -2256,7 +2275,7 @@ void reuse_partial_packfile_from_bitmap(struct bitmap_index *bitmap_git,
struct bitmap **reuse_out,
int multi_pack_reuse)
{
- struct repository *r = the_repository;
+ struct repository *r = bitmap_repo(bitmap_git);
struct bitmapped_pack *packs = NULL;
struct bitmap *result = bitmap_git->result;
struct bitmap *reuse;
@@ -2792,7 +2811,7 @@ int rebuild_bitmap(const uint32_t *reposition,
uint32_t *create_bitmap_mapping(struct bitmap_index *bitmap_git,
struct packing_data *mapping)
{
- struct repository *r = the_repository;
+ struct repository *r = bitmap_repo(bitmap_git);
uint32_t i, num_objects;
uint32_t *reposition;
@@ -2948,7 +2967,8 @@ static off_t get_disk_usage_for_extended(struct bitmap_index *bitmap_git)
st_add(bitmap_num_objects(bitmap_git), i)))
continue;
- if (oid_object_info_extended(the_repository, &obj->oid, &oi, 0) < 0)
+ if (oid_object_info_extended(bitmap_repo(bitmap_git), &obj->oid,
+ &oi, 0) < 0)
die(_("unable to get disk usage of '%s'"),
oid_to_hex(&obj->oid));
--
2.47.0
^ permalink raw reply related [flat|nested] 184+ messages in thread
* Re: [PATCH v4 0/9] packfile: avoid using the 'the_repository' global variable
2024-10-31 9:39 ` [PATCH v4 0/9] packfile: avoid using the 'the_repository' global variable Karthik Nayak
` (8 preceding siblings ...)
2024-10-31 9:39 ` [PATCH v4 9/9] midx: add repository to `multi_pack_index` struct Karthik Nayak
@ 2024-10-31 20:05 ` Taylor Blau
2024-11-01 14:36 ` Taylor Blau
9 siblings, 1 reply; 184+ messages in thread
From: Taylor Blau @ 2024-10-31 20:05 UTC (permalink / raw)
To: Karthik Nayak; +Cc: git, peff
Hi Karthik,
On Thu, Oct 31, 2024 at 10:39:43AM +0100, Karthik Nayak wrote:
> Range-diff against v3:
Skimming the range-diff, this new version looks good to me. It would be
nice to hear from another reviewer or two before we start merging it
down, but I think that this is looking good to me.
Thanks for working on this!
Thanks,
Taylor
^ permalink raw reply [flat|nested] 184+ messages in thread
* Re: [PATCH v4 0/9] packfile: avoid using the 'the_repository' global variable
2024-10-31 20:05 ` [PATCH v4 0/9] packfile: avoid using the 'the_repository' global variable Taylor Blau
@ 2024-11-01 14:36 ` Taylor Blau
2024-11-01 16:07 ` karthik nayak
0 siblings, 1 reply; 184+ messages in thread
From: Taylor Blau @ 2024-11-01 14:36 UTC (permalink / raw)
To: Karthik Nayak; +Cc: git, peff
On Thu, Oct 31, 2024 at 04:05:56PM -0400, Taylor Blau wrote:
> Hi Karthik,
>
> On Thu, Oct 31, 2024 at 10:39:43AM +0100, Karthik Nayak wrote:
> > Range-diff against v3:
>
> Skimming the range-diff, this new version looks good to me. It would be
> nice to hear from another reviewer or two before we start merging it
> down, but I think that this is looking good to me.
Hmmph. I spoke too soon, this new version appears to break CI on
Windows, and thus broke the builds of 'jch' (and 'seen', by extension).
https://github.com/ttaylorr/git/actions/runs/11602969593/job/32309061019
Can you have a look?
In the meantime, I'm going to move this out of 'jch' to let CI run there
again.
Thanks,
Taylor
^ permalink raw reply [flat|nested] 184+ messages in thread
* Re: [PATCH v4 0/9] packfile: avoid using the 'the_repository' global variable
2024-11-01 14:36 ` Taylor Blau
@ 2024-11-01 16:07 ` karthik nayak
2024-11-01 17:29 ` Jeff King
0 siblings, 1 reply; 184+ messages in thread
From: karthik nayak @ 2024-11-01 16:07 UTC (permalink / raw)
To: Taylor Blau; +Cc: git, peff
[-- Attachment #1: Type: text/plain, Size: 1343 bytes --]
Taylor Blau <me@ttaylorr.com> writes:
> On Thu, Oct 31, 2024 at 04:05:56PM -0400, Taylor Blau wrote:
>> Hi Karthik,
>>
>> On Thu, Oct 31, 2024 at 10:39:43AM +0100, Karthik Nayak wrote:
>> > Range-diff against v3:
>>
>> Skimming the range-diff, this new version looks good to me. It would be
>> nice to hear from another reviewer or two before we start merging it
>> down, but I think that this is looking good to me.
>
> Hmmph. I spoke too soon, this new version appears to break CI on
> Windows, and thus broke the builds of 'jch' (and 'seen', by extension).
>
> https://github.com/ttaylorr/git/actions/runs/11602969593/job/32309061019
>
> Can you have a look?
>
> In the meantime, I'm going to move this out of 'jch' to let CI run there
> again.
>
Thanks for letting me know, I think the fix is simply
diff --git a/packfile.c b/packfile.c
index f626d38071..737cd60377 100644
--- a/packfile.c
+++ b/packfile.c
@@ -27,8 +27,8 @@
#include "pack-objects.h"
struct packfile_config {
- unsigned long packed_git_window_size;
- unsigned long packed_git_limit;
+ unsigned long long packed_git_window_size;
+ unsigned long long packed_git_limit;
};
#define PACKFILE_CONFIG_INIT \
Tested it on GitLab's CI too this time.
https://gitlab.com/gitlab-org/git/-/jobs/8248707713
Will send in a new version including the fix tomorrow!
Karthik
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 690 bytes --]
^ permalink raw reply related [flat|nested] 184+ messages in thread
* Re: [PATCH v4 0/9] packfile: avoid using the 'the_repository' global variable
2024-11-01 16:07 ` karthik nayak
@ 2024-11-01 17:29 ` Jeff King
2024-11-04 9:39 ` karthik nayak
0 siblings, 1 reply; 184+ messages in thread
From: Jeff King @ 2024-11-01 17:29 UTC (permalink / raw)
To: karthik nayak; +Cc: Taylor Blau, git
On Fri, Nov 01, 2024 at 11:07:48AM -0500, karthik nayak wrote:
> Thanks for letting me know, I think the fix is simply
>
> diff --git a/packfile.c b/packfile.c
> index f626d38071..737cd60377 100644
> --- a/packfile.c
> +++ b/packfile.c
> @@ -27,8 +27,8 @@
> #include "pack-objects.h"
>
> struct packfile_config {
> - unsigned long packed_git_window_size;
> - unsigned long packed_git_limit;
> + unsigned long long packed_git_window_size;
> + unsigned long long packed_git_limit;
> };
>
> #define PACKFILE_CONFIG_INIT \
Wait, why did these change from "size_t" to "unsigned long" in your
series in the first place? If the goal is moving them into a struct,
they should otherwise retain the same types, no?
Two asides, one for your series and one #leftoverbits:
1. Since these are now fields of packfile_config, do they need the
long packed_git_ prefix anymore? Just window_size and limit would
be nicer, I'd think.
2. I can imagine you might have used "unsigned long" because they are
parsed with git_config_ulong(). That is OK on Linux, where size_t
and "unsigned long" are the same size (either 32- or 64-bits). But
on Windows I think it means that you cannot configure a window
larger than 4GB on a 64-bit system. Or ironically you cannot set a
total limit larger than 4GB, even though the default is 32TB. ;)
-Peff
^ permalink raw reply [flat|nested] 184+ messages in thread
* Re: [PATCH v4 0/9] packfile: avoid using the 'the_repository' global variable
2024-11-01 17:29 ` Jeff King
@ 2024-11-04 9:39 ` karthik nayak
2024-11-04 17:27 ` Jeff King
0 siblings, 1 reply; 184+ messages in thread
From: karthik nayak @ 2024-11-04 9:39 UTC (permalink / raw)
To: Jeff King; +Cc: Taylor Blau, git
[-- Attachment #1: Type: text/plain, Size: 1726 bytes --]
Jeff King <peff@peff.net> writes:
> On Fri, Nov 01, 2024 at 11:07:48AM -0500, karthik nayak wrote:
>
>> Thanks for letting me know, I think the fix is simply
>>
>> diff --git a/packfile.c b/packfile.c
>> index f626d38071..737cd60377 100644
>> --- a/packfile.c
>> +++ b/packfile.c
>> @@ -27,8 +27,8 @@
>> #include "pack-objects.h"
>>
>> struct packfile_config {
>> - unsigned long packed_git_window_size;
>> - unsigned long packed_git_limit;
>> + unsigned long long packed_git_window_size;
>> + unsigned long long packed_git_limit;
>> };
>>
>> #define PACKFILE_CONFIG_INIT \
>
> Wait, why did these change from "size_t" to "unsigned long" in your
> series in the first place? If the goal is moving them into a struct,
> they should otherwise retain the same types, no?
>
> Two asides, one for your series and one #leftoverbits:
>
> 1. Since these are now fields of packfile_config, do they need the
> long packed_git_ prefix anymore? Just window_size and limit would
> be nicer, I'd think.
>
Moving them to repo_settings means we'd have to keep the long names,
otherwise, I agree the shorter names would be better.
> 2. I can imagine you might have used "unsigned long" because they are
> parsed with git_config_ulong(). That is OK on Linux, where size_t
> and "unsigned long" are the same size (either 32- or 64-bits). But
> on Windows I think it means that you cannot configure a window
> larger than 4GB on a 64-bit system. Or ironically you cannot set a
> total limit larger than 4GB, even though the default is 32TB. ;)
Yup that's the reason I changed them. TIL about size_t and how it works.
Thanks, I'll change the types accordingly and push a new version soon.
>
> -Peff
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 690 bytes --]
^ permalink raw reply [flat|nested] 184+ messages in thread
* Re: [PATCH v4 0/9] packfile: avoid using the 'the_repository' global variable
2024-11-04 9:39 ` karthik nayak
@ 2024-11-04 17:27 ` Jeff King
0 siblings, 0 replies; 184+ messages in thread
From: Jeff King @ 2024-11-04 17:27 UTC (permalink / raw)
To: karthik nayak; +Cc: Taylor Blau, git
On Mon, Nov 04, 2024 at 01:39:31AM -0800, karthik nayak wrote:
> > 2. I can imagine you might have used "unsigned long" because they are
> > parsed with git_config_ulong(). That is OK on Linux, where size_t
> > and "unsigned long" are the same size (either 32- or 64-bits). But
> > on Windows I think it means that you cannot configure a window
> > larger than 4GB on a 64-bit system. Or ironically you cannot set a
> > total limit larger than 4GB, even though the default is 32TB. ;)
>
> Yup that's the reason I changed them. TIL about size_t and how it works.
> Thanks, I'll change the types accordingly and push a new version soon.
Thanks. I think I got so busy talking about the issue that I forgot to
mention why I think this is potential #leftoverbits: we probably ought
to be parsing that config with git_config_ssize_t() or similar. But that
is outside the scope of your series.
-Peff
^ permalink raw reply [flat|nested] 184+ messages in thread
* [PATCH v5 0/9] packfile: avoid using the 'the_repository' global variable
2024-10-21 9:57 [PATCH 00/20] packfile: avoid using the 'the_repository' global variable Karthik Nayak
` (23 preceding siblings ...)
2024-10-31 9:39 ` [PATCH v4 0/9] packfile: avoid using the 'the_repository' global variable Karthik Nayak
@ 2024-11-04 11:41 ` Karthik Nayak
2024-11-04 11:41 ` [PATCH v5 1/9] packfile: add repository to struct `packed_git` Karthik Nayak
` (9 more replies)
2024-11-07 14:10 ` [PATCH v6 " Karthik Nayak
` (4 subsequent siblings)
29 siblings, 10 replies; 184+ messages in thread
From: Karthik Nayak @ 2024-11-04 11:41 UTC (permalink / raw)
To: karthik.188; +Cc: git, me, peff
The `packfile.c` file uses the global variable 'the_repository' extensively
throughout the code. Let's remove all usecases of this, by modifying the
required functions to accept a 'struct repository' instead. This is to clean up
usage of global state.
The first 3 patches are mostly internal to `packfile.c`, we add the repository
field to the `packed_git` struct and this is used to clear up some useages of
the global variables.
The next 3 patches are more disruptive, they modify the function definition of
`odb_pack_name`, `has_object[_kept]_pack` and `for_each_packed_object` to receive
a repository, helping remove other usages of 'the_repository' variable.
Finally, the last two patches deal with global config values. These values are
localized.
For v5, I've rebased the series off the new master: 8f8d6eee53 (The seventh
batch, 2024-11-01), as a dependency for this series 'jk/dumb-http-finalize' was
merged to master. I've found no conflicts while merging with seen & next. But
since this series does touch multiple files, there could be future conflicts.
Karthik Nayak (9):
packfile: add repository to struct `packed_git`
packfile: use `repository` from `packed_git` directly
packfile: pass `repository` to static function in the file
packfile: pass down repository to `odb_pack_name`
packfile: pass down repository to `has_object[_kept]_pack`
packfile: pass down repository to `for_each_packed_object`
config: make `delta_base_cache_limit` a non-global variable
config: make `packed_git_(limit|window_size)` non-global variables
midx: add repository to `multi_pack_index` struct
builtin/cat-file.c | 7 +-
builtin/count-objects.c | 2 +-
builtin/fast-import.c | 15 ++--
builtin/fsck.c | 20 +++---
builtin/gc.c | 5 +-
builtin/index-pack.c | 20 ++++--
builtin/pack-objects.c | 11 +--
builtin/pack-redundant.c | 2 +-
builtin/repack.c | 2 +-
builtin/rev-list.c | 2 +-
commit-graph.c | 4 +-
config.c | 22 ------
connected.c | 3 +-
diff.c | 3 +-
environment.c | 3 -
environment.h | 1 -
fsck.c | 2 +-
http.c | 4 +-
list-objects.c | 7 +-
midx-write.c | 2 +-
midx.c | 3 +-
midx.h | 3 +
object-store-ll.h | 9 ++-
pack-bitmap.c | 90 ++++++++++++++----------
pack-objects.h | 3 +-
pack-write.c | 1 +
pack.h | 1 +
packfile.c | 144 ++++++++++++++++++++++-----------------
packfile.h | 18 +++--
promisor-remote.c | 2 +-
prune-packed.c | 2 +-
reachable.c | 4 +-
repo-settings.c | 14 ++++
repo-settings.h | 5 ++
revision.c | 13 ++--
tag.c | 2 +-
36 files changed, 261 insertions(+), 190 deletions(-)
Range-diff against v4:
1: b3d518e998 = 1: 6c00e25c86 packfile: add repository to struct `packed_git`
2: bb9d9aa744 = 2: 70fc8a79af packfile: use `repository` from `packed_git` directly
3: d5df50fa36 = 3: 167a1f3a11 packfile: pass `repository` to static function in the file
4: 0107801c3b = 4: b7cfe78217 packfile: pass down repository to `odb_pack_name`
5: 2d7608a367 = 5: 5566f5554c packfile: pass down repository to `has_object[_kept]_pack`
6: 2c84026d02 = 6: 1b26e45a9b packfile: pass down repository to `for_each_packed_object`
7: 84b89c8a0e ! 7: 7654bf5e7e config: make `delta_base_cache_limit` a non-global variable
@@ Commit message
this value from the repository config, since the value is only used once
in the entire subsystem.
+ The type of the value is changed from `size_t` to an `unsigned long`
+ since the default value is small enough to fit inside the latter on all
+ platforms.
+
These changes are made to remove the usage of `delta_base_cache_limit`
as a global variable in `packfile.c`. This brings us one step closer to
removing the `USE_THE_REPOSITORY_VARIABLE` definition in `packfile.c`
8: 5bbdc7124d ! 8: 2730aacd8e config: make `packed_git_(limit|window_size)` non-global variables
@@ packfile.c
#include "git-compat-util.h"
#include "environment.h"
-@@
- #include "config.h"
- #include "pack-objects.h"
-
-+struct packfile_config {
-+ unsigned long packed_git_window_size;
-+ unsigned long packed_git_limit;
-+};
-+
-+#define PACKFILE_CONFIG_INIT \
-+{ \
-+ .packed_git_window_size = DEFAULT_PACKED_GIT_WINDOW_SIZE, \
-+ .packed_git_limit = DEFAULT_PACKED_GIT_LIMIT, \
-+}
-+
- char *odb_pack_name(struct repository *r, struct strbuf *buf,
- const unsigned char *hash, const char *ext)
- {
@@ packfile.c: static size_t pack_mapped;
#define SZ_FMT PRIuMAX
static inline uintmax_t sz_fmt(size_t s) { return s; }
-void pack_report(void)
-+static int packfile_config(const char *var, const char *value,
-+ const struct config_context *ctx, void *cb)
-+{
-+ struct packfile_config *config = cb;
-+
-+ if (!strcmp(var, "core.packedgitwindowsize")) {
-+ int pgsz_x2 = getpagesize() * 2;
-+ config->packed_git_window_size = git_config_ulong(var, value, ctx->kvi);
-+
-+ /* This value must be multiple of (pagesize * 2) */
-+ config->packed_git_window_size /= pgsz_x2;
-+ if (config->packed_git_window_size < 1)
-+ config->packed_git_window_size = 1;
-+ config->packed_git_window_size *= pgsz_x2;
-+ return 0;
-+ } else if (!strcmp(var, "core.packedgitlimit")) {
-+ config->packed_git_limit = git_config_ulong(var, value, ctx->kvi);
-+ return 0;
-+ } else {
-+ return git_default_config(var, value, ctx, cb);
-+ }
-+}
-+
+void pack_report(struct repository *repo)
{
-+ struct packfile_config config = PACKFILE_CONFIG_INIT;
-+ repo_config(repo, packfile_config, &config);
-+
fprintf(stderr,
"pack_report: getpagesize() = %10" SZ_FMT "\n"
"pack_report: core.packedGitWindowSize = %10" SZ_FMT "\n"
@@ packfile.c: static size_t pack_mapped;
sz_fmt(getpagesize()),
- sz_fmt(packed_git_window_size),
- sz_fmt(packed_git_limit));
-+ sz_fmt(config.packed_git_window_size),
-+ sz_fmt(config.packed_git_limit));
++ sz_fmt(repo->settings.packed_git_window_size),
++ sz_fmt(repo->settings.packed_git_limit));
fprintf(stderr,
"pack_report: pack_used_ctr = %10u\n"
"pack_report: pack_mmap_calls = %10u\n"
@@ packfile.c: unsigned char *use_pack(struct packed_git *p,
}
if (!win) {
- size_t window_align = packed_git_window_size / 2;
-+ struct packfile_config config = PACKFILE_CONFIG_INIT;
+ size_t window_align;
off_t len;
-
-+ repo_config(p->repo, packfile_config, &config);
-+ window_align = config.packed_git_window_size / 2;
++ struct repo_settings *settings = &p->repo->settings;
+
++ window_align = settings->packed_git_window_size / 2;
+
if (p->pack_fd == -1 && open_packed_git(p))
die("packfile %s cannot be accessed", p->pack_name);
-
+@@ packfile.c: unsigned char *use_pack(struct packed_git *p,
CALLOC_ARRAY(win, 1);
win->offset = (offset / window_align) * window_align;
len = p->pack_size - win->offset;
- if (len > packed_git_window_size)
- len = packed_git_window_size;
-+ if (len > config.packed_git_window_size)
-+ len = config.packed_git_window_size;
++ if (len > settings->packed_git_window_size)
++ len = settings->packed_git_window_size;
win->len = (size_t)len;
pack_mapped += win->len;
- while (packed_git_limit < pack_mapped
+
-+ while (config.packed_git_limit < pack_mapped
++ while (settings->packed_git_limit < pack_mapped
&& unuse_one_window(p))
; /* nothing */
win->base = xmmap_gently(NULL, win->len,
@@ packfile.h: unsigned long repo_approximate_object_count(struct repository *r);
/*
* mmap the index file for the specified packfile (if it is not
+
+ ## repo-settings.c ##
+@@ repo-settings.c: void prepare_repo_settings(struct repository *r)
+ const char *strval;
+ int manyfiles;
+ int read_changed_paths;
++ unsigned long longval;
+
+ if (!r->gitdir)
+ BUG("Cannot add settings for uninitialized repository");
+@@ repo-settings.c: void prepare_repo_settings(struct repository *r)
+ * removed.
+ */
+ r->settings.command_requires_full_index = 1;
++
++ if (!repo_config_get_ulong(r, "core.packedgitwindowsize", &longval)) {
++ int pgsz_x2 = getpagesize() * 2;
++
++ /* This value must be multiple of (pagesize * 2) */
++ longval /= pgsz_x2;
++ if (longval < 1)
++ longval = 1;
++ r->settings.packed_git_window_size = longval * pgsz_x2;
++ }
++
++ if (!repo_config_get_ulong(r, "core.packedgitlimit", &longval))
++ r->settings.packed_git_limit = longval;
+ }
+
+ enum log_refs_config repo_settings_get_log_all_ref_updates(struct repository *repo)
+
+ ## repo-settings.h ##
+@@ repo-settings.h: struct repo_settings {
+
+ int core_multi_pack_index;
+ int warn_ambiguous_refs; /* lazily loaded via accessor */
++
++ size_t packed_git_window_size;
++ size_t packed_git_limit;
+ };
+ #define REPO_SETTINGS_INIT { \
+ .index_version = -1, \
+ .core_untracked_cache = UNTRACKED_CACHE_KEEP, \
+ .fetch_negotiation_algorithm = FETCH_NEGOTIATION_CONSECUTIVE, \
+ .warn_ambiguous_refs = -1, \
++ .packed_git_window_size = DEFAULT_PACKED_GIT_WINDOW_SIZE, \
++ .packed_git_limit = DEFAULT_PACKED_GIT_LIMIT, \
+ }
+
+ void prepare_repo_settings(struct repository *r);
9: bb15a0be56 = 9: 8e33d40077 midx: add repository to `multi_pack_index` struct
--
2.47.0
^ permalink raw reply [flat|nested] 184+ messages in thread
* [PATCH v5 1/9] packfile: add repository to struct `packed_git`
2024-11-04 11:41 ` [PATCH v5 " Karthik Nayak
@ 2024-11-04 11:41 ` Karthik Nayak
2024-11-04 11:41 ` [PATCH v5 2/9] packfile: use `repository` from `packed_git` directly Karthik Nayak
` (8 subsequent siblings)
9 siblings, 0 replies; 184+ messages in thread
From: Karthik Nayak @ 2024-11-04 11:41 UTC (permalink / raw)
To: karthik.188; +Cc: git, me, peff
The struct `packed_git` holds information regarding a packed object
file. Let's add the repository variable to this object, to represent the
repository that this packfile belongs to. This helps remove dependency
on the global `the_repository` object in `packfile.c` by simply using
repository information now readily available in the struct.
We do need to consider that a pack file could be part of the alternates
of a repository, but considering that we only have one repository struct
and also that we currently anyways use 'the_repository'. We should be
OK with this change.
We also modify `alloc_packed_git` to ensure that the repository is added
to newly created `packed_git` structs. This requires modifying the
function and all its callee to pass the repository object down the
levels.
Helped-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
---
builtin/fast-import.c | 3 ++-
builtin/index-pack.c | 6 ++++--
commit-graph.c | 2 +-
connected.c | 3 ++-
http.c | 2 +-
midx-write.c | 2 +-
midx.c | 2 +-
object-store-ll.h | 5 +++++
packfile.c | 15 +++++++++------
packfile.h | 6 ++++--
10 files changed, 30 insertions(+), 16 deletions(-)
diff --git a/builtin/fast-import.c b/builtin/fast-import.c
index 76d5c20f14..da7e2d613b 100644
--- a/builtin/fast-import.c
+++ b/builtin/fast-import.c
@@ -765,6 +765,7 @@ static void start_packfile(void)
p->pack_fd = pack_fd;
p->do_not_close = 1;
+ p->repo = the_repository;
pack_file = hashfd(pack_fd, p->pack_name);
pack_data = p;
@@ -888,7 +889,7 @@ static void end_packfile(void)
idx_name = keep_pack(create_index());
/* Register the packfile with core git's machinery. */
- new_p = add_packed_git(idx_name, strlen(idx_name), 1);
+ new_p = add_packed_git(pack_data->repo, idx_name, strlen(idx_name), 1);
if (!new_p)
die("core git rejected index %s", idx_name);
all_packs[pack_id] = new_p;
diff --git a/builtin/index-pack.c b/builtin/index-pack.c
index 9d23b41b3a..be2f99625e 100644
--- a/builtin/index-pack.c
+++ b/builtin/index-pack.c
@@ -1552,7 +1552,8 @@ static void final(const char *final_pack_name, const char *curr_pack_name,
if (do_fsck_object) {
struct packed_git *p;
- p = add_packed_git(final_index_name, strlen(final_index_name), 0);
+ p = add_packed_git(the_repository, final_index_name,
+ strlen(final_index_name), 0);
if (p)
install_packed_git(the_repository, p);
}
@@ -1650,7 +1651,8 @@ static void read_v2_anomalous_offsets(struct packed_git *p,
static void read_idx_option(struct pack_idx_option *opts, const char *pack_name)
{
- struct packed_git *p = add_packed_git(pack_name, strlen(pack_name), 1);
+ struct packed_git *p = add_packed_git(the_repository, pack_name,
+ strlen(pack_name), 1);
if (!p)
die(_("Cannot open existing pack file '%s'"), pack_name);
diff --git a/commit-graph.c b/commit-graph.c
index 5bd89c0acd..83dd69bfeb 100644
--- a/commit-graph.c
+++ b/commit-graph.c
@@ -1914,7 +1914,7 @@ static int fill_oids_from_packs(struct write_commit_graph_context *ctx,
struct packed_git *p;
strbuf_setlen(&packname, dirlen);
strbuf_addstr(&packname, pack_indexes->items[i].string);
- p = add_packed_git(packname.buf, packname.len, 1);
+ p = add_packed_git(ctx->r, packname.buf, packname.len, 1);
if (!p) {
ret = error(_("error adding pack %s"), packname.buf);
goto cleanup;
diff --git a/connected.c b/connected.c
index a9e2e13995..3099da84f3 100644
--- a/connected.c
+++ b/connected.c
@@ -54,7 +54,8 @@ int check_connected(oid_iterate_fn fn, void *cb_data,
strbuf_add(&idx_file, transport->pack_lockfiles.items[0].string,
base_len);
strbuf_addstr(&idx_file, ".idx");
- new_pack = add_packed_git(idx_file.buf, idx_file.len, 1);
+ new_pack = add_packed_git(the_repository, idx_file.buf,
+ idx_file.len, 1);
strbuf_release(&idx_file);
}
diff --git a/http.c b/http.c
index 58242b9d2d..6744e18409 100644
--- a/http.c
+++ b/http.c
@@ -2439,7 +2439,7 @@ static int fetch_and_setup_pack_index(struct packed_git **packs_head,
if (!tmp_idx)
return -1;
- new_pack = parse_pack_index(sha1, tmp_idx);
+ new_pack = parse_pack_index(the_repository, sha1, tmp_idx);
if (!new_pack) {
unlink(tmp_idx);
free(tmp_idx);
diff --git a/midx-write.c b/midx-write.c
index b3a5f6c516..c57726ef94 100644
--- a/midx-write.c
+++ b/midx-write.c
@@ -154,7 +154,7 @@ static void add_pack_to_midx(const char *full_path, size_t full_path_len,
return;
ALLOC_GROW(ctx->info, ctx->nr + 1, ctx->alloc);
- p = add_packed_git(full_path, full_path_len, 0);
+ p = add_packed_git(the_repository, full_path, full_path_len, 0);
if (!p) {
warning(_("failed to add packfile '%s'"),
full_path);
diff --git a/midx.c b/midx.c
index e82d4f2e65..8edb75f51d 100644
--- a/midx.c
+++ b/midx.c
@@ -464,7 +464,7 @@ int prepare_midx_pack(struct repository *r, struct multi_pack_index *m,
strhash(key.buf), key.buf,
struct packed_git, packmap_ent);
if (!p) {
- p = add_packed_git(pack_name.buf, pack_name.len, m->local);
+ p = add_packed_git(r, pack_name.buf, pack_name.len, m->local);
if (p) {
install_packed_git(r, p);
list_add_tail(&p->mru, &r->objects->packed_git_mru);
diff --git a/object-store-ll.h b/object-store-ll.h
index 53b8e693b1..538f2c60cb 100644
--- a/object-store-ll.h
+++ b/object-store-ll.h
@@ -10,6 +10,7 @@
struct oidmap;
struct oidtree;
struct strbuf;
+struct repository;
struct object_directory {
struct object_directory *next;
@@ -135,6 +136,10 @@ struct packed_git {
*/
const uint32_t *mtimes_map;
size_t mtimes_size;
+
+ /* repo dentoes the repository this packed file belongs to */
+ struct repository *repo;
+
/* something like ".git/objects/pack/xxxxx.pack" */
char pack_name[FLEX_ARRAY]; /* more */
};
diff --git a/packfile.c b/packfile.c
index 9560f0a33c..6058eddf35 100644
--- a/packfile.c
+++ b/packfile.c
@@ -217,11 +217,12 @@ uint32_t get_pack_fanout(struct packed_git *p, uint32_t value)
return ntohl(level1_ofs[value]);
}
-static struct packed_git *alloc_packed_git(int extra)
+static struct packed_git *alloc_packed_git(struct repository *r, int extra)
{
struct packed_git *p = xmalloc(st_add(sizeof(*p), extra));
memset(p, 0, sizeof(*p));
p->pack_fd = -1;
+ p->repo = r;
return p;
}
@@ -233,11 +234,12 @@ static char *pack_path_from_idx(const char *idx_path)
return xstrfmt("%.*s.pack", (int)len, idx_path);
}
-struct packed_git *parse_pack_index(unsigned char *sha1, const char *idx_path)
+struct packed_git *parse_pack_index(struct repository *r, unsigned char *sha1,
+ const char *idx_path)
{
char *path = pack_path_from_idx(idx_path);
size_t alloc = st_add(strlen(path), 1);
- struct packed_git *p = alloc_packed_git(alloc);
+ struct packed_git *p = alloc_packed_git(r, alloc);
memcpy(p->pack_name, path, alloc); /* includes NUL */
free(path);
@@ -703,7 +705,8 @@ void unuse_pack(struct pack_window **w_cursor)
}
}
-struct packed_git *add_packed_git(const char *path, size_t path_len, int local)
+struct packed_git *add_packed_git(struct repository *r, const char *path,
+ size_t path_len, int local)
{
struct stat st;
size_t alloc;
@@ -721,7 +724,7 @@ struct packed_git *add_packed_git(const char *path, size_t path_len, int local)
* the use xsnprintf double-checks that)
*/
alloc = st_add3(path_len, strlen(".promisor"), 1);
- p = alloc_packed_git(alloc);
+ p = alloc_packed_git(r, alloc);
memcpy(p->pack_name, path, path_len);
xsnprintf(p->pack_name + path_len, alloc - path_len, ".keep");
@@ -877,7 +880,7 @@ static void prepare_pack(const char *full_name, size_t full_name_len,
/* Don't reopen a pack we already have. */
if (!hashmap_get(&data->r->objects->pack_map, &hent, pack_name)) {
- p = add_packed_git(full_name, full_name_len, data->local);
+ p = add_packed_git(data->r, full_name, full_name_len, data->local);
if (p)
install_packed_git(data->r, p);
}
diff --git a/packfile.h b/packfile.h
index 08f88a7ff5..aee69d1a0b 100644
--- a/packfile.h
+++ b/packfile.h
@@ -46,7 +46,8 @@ const char *pack_basename(struct packed_git *p);
* and does not add the resulting packed_git struct to the internal list of
* packs. You probably want add_packed_git() instead.
*/
-struct packed_git *parse_pack_index(unsigned char *sha1, const char *idx_path);
+struct packed_git *parse_pack_index(struct repository *r, unsigned char *sha1,
+ const char *idx_path);
typedef void each_file_in_pack_dir_fn(const char *full_path, size_t full_path_len,
const char *file_name, void *data);
@@ -113,7 +114,8 @@ void close_pack(struct packed_git *);
void close_object_store(struct raw_object_store *o);
void unuse_pack(struct pack_window **);
void clear_delta_base_cache(void);
-struct packed_git *add_packed_git(const char *path, size_t path_len, int local);
+struct packed_git *add_packed_git(struct repository *r, const char *path,
+ size_t path_len, int local);
/*
* Unlink the .pack and associated extension files.
--
2.47.0
^ permalink raw reply related [flat|nested] 184+ messages in thread
* [PATCH v5 2/9] packfile: use `repository` from `packed_git` directly
2024-11-04 11:41 ` [PATCH v5 " Karthik Nayak
2024-11-04 11:41 ` [PATCH v5 1/9] packfile: add repository to struct `packed_git` Karthik Nayak
@ 2024-11-04 11:41 ` Karthik Nayak
2024-11-04 11:41 ` [PATCH v5 3/9] packfile: pass `repository` to static function in the file Karthik Nayak
` (7 subsequent siblings)
9 siblings, 0 replies; 184+ messages in thread
From: Karthik Nayak @ 2024-11-04 11:41 UTC (permalink / raw)
To: karthik.188; +Cc: git, me, peff
In the previous commit, we introduced the `repository` structure inside
`packed_git`. This provides an alternative route instead of using the
global `the_repository` variable. Let's modify `packfile.c` now to use
this field wherever possible instead of relying on the global state.
There are still a few instances of `the_repository` usage in the file,
where there is no struct `packed_git` locally available, which will be
fixed in the following commits.
Helped-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
---
packfile.c | 50 +++++++++++++++++++++++++++-----------------------
1 file changed, 27 insertions(+), 23 deletions(-)
diff --git a/packfile.c b/packfile.c
index 6058eddf35..5bfa1e17c2 100644
--- a/packfile.c
+++ b/packfile.c
@@ -79,7 +79,7 @@ static int check_packed_git_idx(const char *path, struct packed_git *p)
size_t idx_size;
int fd = git_open(path), ret;
struct stat st;
- const unsigned int hashsz = the_hash_algo->rawsz;
+ const unsigned int hashsz = p->repo->hash_algo->rawsz;
if (fd < 0)
return -1;
@@ -243,7 +243,7 @@ struct packed_git *parse_pack_index(struct repository *r, unsigned char *sha1,
memcpy(p->pack_name, path, alloc); /* includes NUL */
free(path);
- hashcpy(p->hash, sha1, the_repository->hash_algo);
+ hashcpy(p->hash, sha1, p->repo->hash_algo);
if (check_packed_git_idx(idx_path, p)) {
free(p);
return NULL;
@@ -278,7 +278,7 @@ static int unuse_one_window(struct packed_git *current)
if (current)
scan_windows(current, &lru_p, &lru_w, &lru_l);
- for (p = the_repository->objects->packed_git; p; p = p->next)
+ for (p = current->repo->objects->packed_git; p; p = p->next)
scan_windows(p, &lru_p, &lru_w, &lru_l);
if (lru_p) {
munmap(lru_w->base, lru_w->len);
@@ -540,7 +540,7 @@ static int open_packed_git_1(struct packed_git *p)
unsigned char hash[GIT_MAX_RAWSZ];
unsigned char *idx_hash;
ssize_t read_result;
- const unsigned hashsz = the_hash_algo->rawsz;
+ const unsigned hashsz = p->repo->hash_algo->rawsz;
if (open_pack_index(p))
return error("packfile %s index unavailable", p->pack_name);
@@ -597,7 +597,7 @@ static int open_packed_git_1(struct packed_git *p)
if (read_result != hashsz)
return error("packfile %s signature is unavailable", p->pack_name);
idx_hash = ((unsigned char *)p->index_data) + p->index_size - hashsz * 2;
- if (!hasheq(hash, idx_hash, the_repository->hash_algo))
+ if (!hasheq(hash, idx_hash, p->repo->hash_algo))
return error("packfile %s does not match index", p->pack_name);
return 0;
}
@@ -637,7 +637,7 @@ unsigned char *use_pack(struct packed_git *p,
*/
if (!p->pack_size && p->pack_fd == -1 && open_packed_git(p))
die("packfile %s cannot be accessed", p->pack_name);
- if (offset > (p->pack_size - the_hash_algo->rawsz))
+ if (offset > (p->pack_size - p->repo->hash_algo->rawsz))
die("offset beyond end of packfile (truncated pack?)");
if (offset < 0)
die(_("offset before end of packfile (broken .idx?)"));
@@ -711,6 +711,7 @@ struct packed_git *add_packed_git(struct repository *r, const char *path,
struct stat st;
size_t alloc;
struct packed_git *p;
+ struct object_id oid;
/*
* Make sure a corresponding .pack file exists and that
@@ -751,9 +752,13 @@ struct packed_git *add_packed_git(struct repository *r, const char *path,
p->pack_size = st.st_size;
p->pack_local = local;
p->mtime = st.st_mtime;
- if (path_len < the_hash_algo->hexsz ||
- get_hash_hex(path + path_len - the_hash_algo->hexsz, p->hash))
- hashclr(p->hash, the_repository->hash_algo);
+ if (path_len < r->hash_algo->hexsz ||
+ get_oid_hex_algop(path + path_len - r->hash_algo->hexsz, &oid,
+ r->hash_algo))
+ hashclr(p->hash, r->hash_algo);
+ else
+ hashcpy(p->hash, oid.hash, r->hash_algo);
+
return p;
}
@@ -1243,9 +1248,9 @@ off_t get_delta_base(struct packed_git *p,
} else if (type == OBJ_REF_DELTA) {
/* The base entry _must_ be in the same pack */
struct object_id oid;
- oidread(&oid, base_info, the_repository->hash_algo);
+ oidread(&oid, base_info, p->repo->hash_algo);
base_offset = find_pack_entry_one(&oid, p);
- *curpos += the_hash_algo->rawsz;
+ *curpos += p->repo->hash_algo->rawsz;
} else
die("I am totally screwed");
return base_offset;
@@ -1266,7 +1271,7 @@ static int get_delta_base_oid(struct packed_git *p,
{
if (type == OBJ_REF_DELTA) {
unsigned char *base = use_pack(p, w_curs, curpos, NULL);
- oidread(oid, base, the_repository->hash_algo);
+ oidread(oid, base, p->repo->hash_algo);
return 0;
} else if (type == OBJ_OFS_DELTA) {
uint32_t base_pos;
@@ -1608,7 +1613,7 @@ int packed_object_info(struct repository *r, struct packed_git *p,
goto out;
}
} else
- oidclr(oi->delta_base_oid, the_repository->hash_algo);
+ oidclr(oi->delta_base_oid, p->repo->hash_algo);
}
oi->whence = in_delta_base_cache(p, obj_offset) ? OI_DBCACHED :
@@ -1897,7 +1902,7 @@ int bsearch_pack(const struct object_id *oid, const struct packed_git *p, uint32
{
const unsigned char *index_fanout = p->index_data;
const unsigned char *index_lookup;
- const unsigned int hashsz = the_hash_algo->rawsz;
+ const unsigned int hashsz = p->repo->hash_algo->rawsz;
int index_lookup_width;
if (!index_fanout)
@@ -1922,7 +1927,7 @@ int nth_packed_object_id(struct object_id *oid,
uint32_t n)
{
const unsigned char *index = p->index_data;
- const unsigned int hashsz = the_hash_algo->rawsz;
+ const unsigned int hashsz = p->repo->hash_algo->rawsz;
if (!index) {
if (open_pack_index(p))
return -1;
@@ -1933,11 +1938,10 @@ int nth_packed_object_id(struct object_id *oid,
index += 4 * 256;
if (p->index_version == 1) {
oidread(oid, index + st_add(st_mult(hashsz + 4, n), 4),
- the_repository->hash_algo);
+ p->repo->hash_algo);
} else {
index += 8;
- oidread(oid, index + st_mult(hashsz, n),
- the_repository->hash_algo);
+ oidread(oid, index + st_mult(hashsz, n), p->repo->hash_algo);
}
return 0;
}
@@ -1959,7 +1963,7 @@ void check_pack_index_ptr(const struct packed_git *p, const void *vptr)
off_t nth_packed_object_offset(const struct packed_git *p, uint32_t n)
{
const unsigned char *index = p->index_data;
- const unsigned int hashsz = the_hash_algo->rawsz;
+ const unsigned int hashsz = p->repo->hash_algo->rawsz;
index += 4 * 256;
if (p->index_version == 1) {
return ntohl(*((uint32_t *)(index + st_mult(hashsz + 4, n))));
@@ -2159,7 +2163,7 @@ int for_each_object_in_pack(struct packed_git *p,
int r = 0;
if (flags & FOR_EACH_OBJECT_PACK_ORDER) {
- if (load_pack_revindex(the_repository, p))
+ if (load_pack_revindex(p->repo, p))
return -1;
}
@@ -2227,7 +2231,7 @@ int for_each_packed_object(each_packed_object_fn cb, void *data,
}
static int add_promisor_object(const struct object_id *oid,
- struct packed_git *pack UNUSED,
+ struct packed_git *pack,
uint32_t pos UNUSED,
void *set_)
{
@@ -2235,12 +2239,12 @@ static int add_promisor_object(const struct object_id *oid,
struct object *obj;
int we_parsed_object;
- obj = lookup_object(the_repository, oid);
+ obj = lookup_object(pack->repo, oid);
if (obj && obj->parsed) {
we_parsed_object = 0;
} else {
we_parsed_object = 1;
- obj = parse_object(the_repository, oid);
+ obj = parse_object(pack->repo, oid);
}
if (!obj)
--
2.47.0
^ permalink raw reply related [flat|nested] 184+ messages in thread
* [PATCH v5 3/9] packfile: pass `repository` to static function in the file
2024-11-04 11:41 ` [PATCH v5 " Karthik Nayak
2024-11-04 11:41 ` [PATCH v5 1/9] packfile: add repository to struct `packed_git` Karthik Nayak
2024-11-04 11:41 ` [PATCH v5 2/9] packfile: use `repository` from `packed_git` directly Karthik Nayak
@ 2024-11-04 11:41 ` Karthik Nayak
2024-11-04 11:41 ` [PATCH v5 4/9] packfile: pass down repository to `odb_pack_name` Karthik Nayak
` (6 subsequent siblings)
9 siblings, 0 replies; 184+ messages in thread
From: Karthik Nayak @ 2024-11-04 11:41 UTC (permalink / raw)
To: karthik.188; +Cc: git, me, peff
Some of the static functions in the `packfile.c` access global
variables, which can simply be avoiding by passing the `repository`
struct down to them. Let's do that.
Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
---
packfile.c | 15 ++++++++-------
1 file changed, 8 insertions(+), 7 deletions(-)
diff --git a/packfile.c b/packfile.c
index 5bfa1e17c2..c96ebc4c69 100644
--- a/packfile.c
+++ b/packfile.c
@@ -460,13 +460,13 @@ static void find_lru_pack(struct packed_git *p, struct packed_git **lru_p, struc
*accept_windows_inuse = has_windows_inuse;
}
-static int close_one_pack(void)
+static int close_one_pack(struct repository *r)
{
struct packed_git *p, *lru_p = NULL;
struct pack_window *mru_w = NULL;
int accept_windows_inuse = 1;
- for (p = the_repository->objects->packed_git; p; p = p->next) {
+ for (p = r->objects->packed_git; p; p = p->next) {
if (p->pack_fd == -1)
continue;
find_lru_pack(p, &lru_p, &mru_w, &accept_windows_inuse);
@@ -555,7 +555,7 @@ static int open_packed_git_1(struct packed_git *p)
pack_max_fds = 1;
}
- while (pack_max_fds <= pack_open_fds && close_one_pack())
+ while (pack_max_fds <= pack_open_fds && close_one_pack(p->repo))
; /* nothing */
p->pack_fd = git_open(p->pack_name);
@@ -610,7 +610,8 @@ static int open_packed_git(struct packed_git *p)
return -1;
}
-static int in_window(struct pack_window *win, off_t offset)
+static int in_window(struct repository *r, struct pack_window *win,
+ off_t offset)
{
/* We must promise at least one full hash after the
* offset is available from this window, otherwise the offset
@@ -620,7 +621,7 @@ static int in_window(struct pack_window *win, off_t offset)
*/
off_t win_off = win->offset;
return win_off <= offset
- && (offset + the_hash_algo->rawsz) <= (win_off + win->len);
+ && (offset + r->hash_algo->rawsz) <= (win_off + win->len);
}
unsigned char *use_pack(struct packed_git *p,
@@ -642,11 +643,11 @@ unsigned char *use_pack(struct packed_git *p,
if (offset < 0)
die(_("offset before end of packfile (broken .idx?)"));
- if (!win || !in_window(win, offset)) {
+ if (!win || !in_window(p->repo, win, offset)) {
if (win)
win->inuse_cnt--;
for (win = p->windows; win; win = win->next) {
- if (in_window(win, offset))
+ if (in_window(p->repo, win, offset))
break;
}
if (!win) {
--
2.47.0
^ permalink raw reply related [flat|nested] 184+ messages in thread
* [PATCH v5 4/9] packfile: pass down repository to `odb_pack_name`
2024-11-04 11:41 ` [PATCH v5 " Karthik Nayak
` (2 preceding siblings ...)
2024-11-04 11:41 ` [PATCH v5 3/9] packfile: pass `repository` to static function in the file Karthik Nayak
@ 2024-11-04 11:41 ` Karthik Nayak
2024-11-04 11:41 ` [PATCH v5 5/9] packfile: pass down repository to `has_object[_kept]_pack` Karthik Nayak
` (5 subsequent siblings)
9 siblings, 0 replies; 184+ messages in thread
From: Karthik Nayak @ 2024-11-04 11:41 UTC (permalink / raw)
To: karthik.188; +Cc: git, me, peff
The function `odb_pack_name` currently relies on the global variable
`the_repository`. To eliminate global variable usage in `packfile.c`, we
should progressively shift the dependency on the_repository to higher
layers.
Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
---
builtin/fast-import.c | 8 ++++----
builtin/index-pack.c | 4 ++--
builtin/pack-redundant.c | 2 +-
http.c | 2 +-
packfile.c | 9 ++++-----
packfile.h | 3 ++-
6 files changed, 14 insertions(+), 14 deletions(-)
diff --git a/builtin/fast-import.c b/builtin/fast-import.c
index da7e2d613b..3ccc4c5722 100644
--- a/builtin/fast-import.c
+++ b/builtin/fast-import.c
@@ -806,7 +806,7 @@ static char *keep_pack(const char *curr_index_name)
struct strbuf name = STRBUF_INIT;
int keep_fd;
- odb_pack_name(&name, pack_data->hash, "keep");
+ odb_pack_name(pack_data->repo, &name, pack_data->hash, "keep");
keep_fd = odb_pack_keep(name.buf);
if (keep_fd < 0)
die_errno("cannot create keep file");
@@ -814,11 +814,11 @@ static char *keep_pack(const char *curr_index_name)
if (close(keep_fd))
die_errno("failed to write keep file");
- odb_pack_name(&name, pack_data->hash, "pack");
+ odb_pack_name(pack_data->repo, &name, pack_data->hash, "pack");
if (finalize_object_file(pack_data->pack_name, name.buf))
die("cannot store pack file");
- odb_pack_name(&name, pack_data->hash, "idx");
+ odb_pack_name(pack_data->repo, &name, pack_data->hash, "idx");
if (finalize_object_file(curr_index_name, name.buf))
die("cannot store index file");
free((void *)curr_index_name);
@@ -832,7 +832,7 @@ static void unkeep_all_packs(void)
for (k = 0; k < pack_id; k++) {
struct packed_git *p = all_packs[k];
- odb_pack_name(&name, p->hash, "keep");
+ odb_pack_name(p->repo, &name, p->hash, "keep");
unlink_or_warn(name.buf);
}
strbuf_release(&name);
diff --git a/builtin/index-pack.c b/builtin/index-pack.c
index be2f99625e..eaefb41761 100644
--- a/builtin/index-pack.c
+++ b/builtin/index-pack.c
@@ -1479,7 +1479,7 @@ static void write_special_file(const char *suffix, const char *msg,
if (pack_name)
filename = derive_filename(pack_name, "pack", suffix, &name_buf);
else
- filename = odb_pack_name(&name_buf, hash, suffix);
+ filename = odb_pack_name(the_repository, &name_buf, hash, suffix);
fd = odb_pack_keep(filename);
if (fd < 0) {
@@ -1507,7 +1507,7 @@ static void rename_tmp_packfile(const char **final_name,
{
if (!*final_name || strcmp(*final_name, curr_name)) {
if (!*final_name)
- *final_name = odb_pack_name(name, hash, ext);
+ *final_name = odb_pack_name(the_repository, name, hash, ext);
if (finalize_object_file(curr_name, *final_name))
die(_("unable to rename temporary '*.%s' file to '%s'"),
ext, *final_name);
diff --git a/builtin/pack-redundant.c b/builtin/pack-redundant.c
index d2c1c4e5ec..bc61990a93 100644
--- a/builtin/pack-redundant.c
+++ b/builtin/pack-redundant.c
@@ -690,7 +690,7 @@ int cmd_pack_redundant(int argc, const char **argv, const char *prefix UNUSED, s
pl = red = pack_list_difference(local_packs, min);
while (pl) {
printf("%s\n%s\n",
- odb_pack_name(&idx_name, pl->pack->hash, "idx"),
+ odb_pack_name(pl->pack->repo, &idx_name, pl->pack->hash, "idx"),
pl->pack->pack_name);
pl = pl->next;
}
diff --git a/http.c b/http.c
index 6744e18409..420f1566f0 100644
--- a/http.c
+++ b/http.c
@@ -2581,7 +2581,7 @@ struct http_pack_request *new_direct_http_pack_request(
preq->url = url;
- odb_pack_name(&preq->tmpfile, packed_git_hash, "pack");
+ odb_pack_name(the_repository, &preq->tmpfile, packed_git_hash, "pack");
strbuf_addstr(&preq->tmpfile, ".temp");
preq->packfile = fopen(preq->tmpfile.buf, "a");
if (!preq->packfile) {
diff --git a/packfile.c b/packfile.c
index c96ebc4c69..1015dac6db 100644
--- a/packfile.c
+++ b/packfile.c
@@ -25,13 +25,12 @@
#include "pack-revindex.h"
#include "promisor-remote.h"
-char *odb_pack_name(struct strbuf *buf,
- const unsigned char *hash,
- const char *ext)
+char *odb_pack_name(struct repository *r, struct strbuf *buf,
+ const unsigned char *hash, const char *ext)
{
strbuf_reset(buf);
- strbuf_addf(buf, "%s/pack/pack-%s.%s", repo_get_object_directory(the_repository),
- hash_to_hex(hash), ext);
+ strbuf_addf(buf, "%s/pack/pack-%s.%s", repo_get_object_directory(r),
+ hash_to_hex_algop(hash, r->hash_algo), ext);
return buf->buf;
}
diff --git a/packfile.h b/packfile.h
index aee69d1a0b..51187f2393 100644
--- a/packfile.h
+++ b/packfile.h
@@ -29,7 +29,8 @@ struct pack_entry {
*
* Example: odb_pack_name(out, sha1, "idx") => ".git/objects/pack/pack-1234..idx"
*/
-char *odb_pack_name(struct strbuf *buf, const unsigned char *sha1, const char *ext);
+char *odb_pack_name(struct repository *r, struct strbuf *buf,
+ const unsigned char *hash, const char *ext);
/*
* Return the basename of the packfile, omitting any containing directory
--
2.47.0
^ permalink raw reply related [flat|nested] 184+ messages in thread
* [PATCH v5 5/9] packfile: pass down repository to `has_object[_kept]_pack`
2024-11-04 11:41 ` [PATCH v5 " Karthik Nayak
` (3 preceding siblings ...)
2024-11-04 11:41 ` [PATCH v5 4/9] packfile: pass down repository to `odb_pack_name` Karthik Nayak
@ 2024-11-04 11:41 ` Karthik Nayak
2024-11-04 11:41 ` [PATCH v5 6/9] packfile: pass down repository to `for_each_packed_object` Karthik Nayak
` (4 subsequent siblings)
9 siblings, 0 replies; 184+ messages in thread
From: Karthik Nayak @ 2024-11-04 11:41 UTC (permalink / raw)
To: karthik.188; +Cc: git, me, peff
The functions `has_object[_kept]_pack` currently rely on the global
variable `the_repository`. To eliminate global variable usage in
`packfile.c`, we should progressively shift the dependency on
the_repository to higher layers. Let's remove its usage from these
functions and any related ones.
Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
---
builtin/count-objects.c | 2 +-
builtin/fsck.c | 2 +-
builtin/pack-objects.c | 4 ++--
diff.c | 3 ++-
list-objects.c | 3 ++-
pack-bitmap.c | 2 +-
packfile.c | 9 +++++----
packfile.h | 5 +++--
prune-packed.c | 2 +-
reachable.c | 2 +-
revision.c | 4 ++--
11 files changed, 21 insertions(+), 17 deletions(-)
diff --git a/builtin/count-objects.c b/builtin/count-objects.c
index 04d80887e0..1e89148ed7 100644
--- a/builtin/count-objects.c
+++ b/builtin/count-objects.c
@@ -67,7 +67,7 @@ static int count_loose(const struct object_id *oid, const char *path,
else {
loose_size += on_disk_bytes(st);
loose++;
- if (verbose && has_object_pack(oid))
+ if (verbose && has_object_pack(the_repository, oid))
packed_loose++;
}
return 0;
diff --git a/builtin/fsck.c b/builtin/fsck.c
index 7f4e2f0414..bb56eb98ac 100644
--- a/builtin/fsck.c
+++ b/builtin/fsck.c
@@ -272,7 +272,7 @@ static void check_reachable_object(struct object *obj)
if (!(obj->flags & HAS_OBJ)) {
if (is_promisor_object(&obj->oid))
return;
- if (has_object_pack(&obj->oid))
+ if (has_object_pack(the_repository, &obj->oid))
return; /* it is in pack - forget about it */
printf_ln(_("missing %s %s"),
printable_type(&obj->oid, obj->type),
diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index 0800714267..0f32e92a3a 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -1529,7 +1529,7 @@ static int want_found_object(const struct object_id *oid, int exclude,
return 0;
if (ignore_packed_keep_in_core && p->pack_keep_in_core)
return 0;
- if (has_object_kept_pack(oid, flags))
+ if (has_object_kept_pack(p->repo, oid, flags))
return 0;
}
@@ -3627,7 +3627,7 @@ static void show_cruft_commit(struct commit *commit, void *data)
static int cruft_include_check_obj(struct object *obj, void *data UNUSED)
{
- return !has_object_kept_pack(&obj->oid, IN_CORE_KEEP_PACKS);
+ return !has_object_kept_pack(to_pack.repo, &obj->oid, IN_CORE_KEEP_PACKS);
}
static int cruft_include_check(struct commit *commit, void *data)
diff --git a/diff.c b/diff.c
index dceac20d18..266ddf18e7 100644
--- a/diff.c
+++ b/diff.c
@@ -4041,7 +4041,8 @@ static int reuse_worktree_file(struct index_state *istate,
* objects however would tend to be slower as they need
* to be individually opened and inflated.
*/
- if (!FAST_WORKING_DIRECTORY && !want_file && has_object_pack(oid))
+ if (!FAST_WORKING_DIRECTORY && !want_file &&
+ has_object_pack(istate->repo, oid))
return 0;
/*
diff --git a/list-objects.c b/list-objects.c
index 985d008799..31236a8dc9 100644
--- a/list-objects.c
+++ b/list-objects.c
@@ -41,7 +41,8 @@ static void show_object(struct traversal_context *ctx,
{
if (!ctx->show_object)
return;
- if (ctx->revs->unpacked && has_object_pack(&object->oid))
+ if (ctx->revs->unpacked && has_object_pack(ctx->revs->repo,
+ &object->oid))
return;
ctx->show_object(object, name, ctx->show_data);
diff --git a/pack-bitmap.c b/pack-bitmap.c
index 4fa9dfc771..d34ba9909a 100644
--- a/pack-bitmap.c
+++ b/pack-bitmap.c
@@ -1889,7 +1889,7 @@ static void filter_packed_objects_from_bitmap(struct bitmap_index *bitmap_git,
bitmap_unset(result, i);
for (i = 0; i < eindex->count; ++i) {
- if (has_object_pack(&eindex->objects[i]->oid))
+ if (has_object_pack(the_repository, &eindex->objects[i]->oid))
bitmap_unset(result, objects_nr + i);
}
}
diff --git a/packfile.c b/packfile.c
index 1015dac6db..e7dd270217 100644
--- a/packfile.c
+++ b/packfile.c
@@ -2143,16 +2143,17 @@ int find_kept_pack_entry(struct repository *r,
return 0;
}
-int has_object_pack(const struct object_id *oid)
+int has_object_pack(struct repository *r, const struct object_id *oid)
{
struct pack_entry e;
- return find_pack_entry(the_repository, oid, &e);
+ return find_pack_entry(r, oid, &e);
}
-int has_object_kept_pack(const struct object_id *oid, unsigned flags)
+int has_object_kept_pack(struct repository *r, const struct object_id *oid,
+ unsigned flags)
{
struct pack_entry e;
- return find_kept_pack_entry(the_repository, oid, flags, &e);
+ return find_kept_pack_entry(r, oid, flags, &e);
}
int for_each_object_in_pack(struct packed_git *p,
diff --git a/packfile.h b/packfile.h
index 51187f2393..b09fb2c530 100644
--- a/packfile.h
+++ b/packfile.h
@@ -193,8 +193,9 @@ const struct packed_git *has_packed_and_bad(struct repository *, const struct ob
int find_pack_entry(struct repository *r, const struct object_id *oid, struct pack_entry *e);
int find_kept_pack_entry(struct repository *r, const struct object_id *oid, unsigned flags, struct pack_entry *e);
-int has_object_pack(const struct object_id *oid);
-int has_object_kept_pack(const struct object_id *oid, unsigned flags);
+int has_object_pack(struct repository *r, const struct object_id *oid);
+int has_object_kept_pack(struct repository *r, const struct object_id *oid,
+ unsigned flags);
/*
* Return 1 if an object in a promisor packfile is or refers to the given
diff --git a/prune-packed.c b/prune-packed.c
index 2bb99c29df..d1c65ab10e 100644
--- a/prune-packed.c
+++ b/prune-packed.c
@@ -24,7 +24,7 @@ static int prune_object(const struct object_id *oid, const char *path,
{
int *opts = data;
- if (!has_object_pack(oid))
+ if (!has_object_pack(the_repository, oid))
return 0;
if (*opts & PRUNE_PACKED_DRY_RUN)
diff --git a/reachable.c b/reachable.c
index 3e9b3dd0a4..09d2c50079 100644
--- a/reachable.c
+++ b/reachable.c
@@ -239,7 +239,7 @@ static int want_recent_object(struct recent_data *data,
const struct object_id *oid)
{
if (data->ignore_in_core_kept_packs &&
- has_object_kept_pack(oid, IN_CORE_KEEP_PACKS))
+ has_object_kept_pack(data->revs->repo, oid, IN_CORE_KEEP_PACKS))
return 0;
return 1;
}
diff --git a/revision.c b/revision.c
index f5f5b84f2b..d1d152a67b 100644
--- a/revision.c
+++ b/revision.c
@@ -4103,10 +4103,10 @@ enum commit_action get_commit_action(struct rev_info *revs, struct commit *commi
{
if (commit->object.flags & SHOWN)
return commit_ignore;
- if (revs->unpacked && has_object_pack(&commit->object.oid))
+ if (revs->unpacked && has_object_pack(revs->repo, &commit->object.oid))
return commit_ignore;
if (revs->no_kept_objects) {
- if (has_object_kept_pack(&commit->object.oid,
+ if (has_object_kept_pack(revs->repo, &commit->object.oid,
revs->keep_pack_cache_flags))
return commit_ignore;
}
--
2.47.0
^ permalink raw reply related [flat|nested] 184+ messages in thread
* [PATCH v5 6/9] packfile: pass down repository to `for_each_packed_object`
2024-11-04 11:41 ` [PATCH v5 " Karthik Nayak
` (4 preceding siblings ...)
2024-11-04 11:41 ` [PATCH v5 5/9] packfile: pass down repository to `has_object[_kept]_pack` Karthik Nayak
@ 2024-11-04 11:41 ` Karthik Nayak
2024-11-04 11:41 ` [PATCH v5 7/9] config: make `delta_base_cache_limit` a non-global variable Karthik Nayak
` (3 subsequent siblings)
9 siblings, 0 replies; 184+ messages in thread
From: Karthik Nayak @ 2024-11-04 11:41 UTC (permalink / raw)
To: karthik.188; +Cc: git, me, peff
The function `for_each_packed_object` currently relies on the global
variable `the_repository`. To eliminate global variable usage in
`packfile.c`, we should progressively shift the dependency on
the_repository to higher layers. Let's remove its usage from this
function and closely related function `is_promisor_object`.
Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
---
builtin/cat-file.c | 7 ++++---
builtin/fsck.c | 18 +++++++++++-------
builtin/pack-objects.c | 7 +++++--
builtin/repack.c | 2 +-
builtin/rev-list.c | 2 +-
commit-graph.c | 2 +-
fsck.c | 2 +-
list-objects.c | 4 ++--
object-store-ll.h | 4 ++--
packfile.c | 14 +++++++-------
packfile.h | 2 +-
promisor-remote.c | 2 +-
reachable.c | 2 +-
revision.c | 9 +++++----
tag.c | 2 +-
15 files changed, 44 insertions(+), 35 deletions(-)
diff --git a/builtin/cat-file.c b/builtin/cat-file.c
index bfdfb51c7c..d67b101c20 100644
--- a/builtin/cat-file.c
+++ b/builtin/cat-file.c
@@ -827,15 +827,16 @@ static int batch_objects(struct batch_options *opt)
cb.seen = &seen;
for_each_loose_object(batch_unordered_loose, &cb, 0);
- for_each_packed_object(batch_unordered_packed, &cb,
- FOR_EACH_OBJECT_PACK_ORDER);
+ for_each_packed_object(the_repository, batch_unordered_packed,
+ &cb, FOR_EACH_OBJECT_PACK_ORDER);
oidset_clear(&seen);
} else {
struct oid_array sa = OID_ARRAY_INIT;
for_each_loose_object(collect_loose_object, &sa, 0);
- for_each_packed_object(collect_packed_object, &sa, 0);
+ for_each_packed_object(the_repository, collect_packed_object,
+ &sa, 0);
oid_array_for_each_unique(&sa, batch_object_cb, &cb);
diff --git a/builtin/fsck.c b/builtin/fsck.c
index bb56eb98ac..0196c54eb6 100644
--- a/builtin/fsck.c
+++ b/builtin/fsck.c
@@ -150,7 +150,7 @@ static int mark_object(struct object *obj, enum object_type type,
return 0;
obj->flags |= REACHABLE;
- if (is_promisor_object(&obj->oid))
+ if (is_promisor_object(the_repository, &obj->oid))
/*
* Further recursion does not need to be performed on this
* object since it is a promisor object (so it does not need to
@@ -270,7 +270,7 @@ static void check_reachable_object(struct object *obj)
* do a full fsck
*/
if (!(obj->flags & HAS_OBJ)) {
- if (is_promisor_object(&obj->oid))
+ if (is_promisor_object(the_repository, &obj->oid))
return;
if (has_object_pack(the_repository, &obj->oid))
return; /* it is in pack - forget about it */
@@ -391,7 +391,10 @@ static void check_connectivity(void)
* traversal.
*/
for_each_loose_object(mark_loose_unreachable_referents, NULL, 0);
- for_each_packed_object(mark_packed_unreachable_referents, NULL, 0);
+ for_each_packed_object(the_repository,
+ mark_packed_unreachable_referents,
+ NULL,
+ 0);
}
/* Look up all the requirements, warn about missing objects.. */
@@ -488,7 +491,7 @@ static void fsck_handle_reflog_oid(const char *refname, struct object_id *oid,
refname, timestamp);
obj->flags |= USED;
mark_object_reachable(obj);
- } else if (!is_promisor_object(oid)) {
+ } else if (!is_promisor_object(the_repository, oid)) {
error(_("%s: invalid reflog entry %s"),
refname, oid_to_hex(oid));
errors_found |= ERROR_REACHABLE;
@@ -531,7 +534,7 @@ static int fsck_handle_ref(const char *refname, const char *referent UNUSED, con
obj = parse_object(the_repository, oid);
if (!obj) {
- if (is_promisor_object(oid)) {
+ if (is_promisor_object(the_repository, oid)) {
/*
* Increment default_refs anyway, because this is a
* valid ref.
@@ -966,7 +969,8 @@ int cmd_fsck(int argc,
if (connectivity_only) {
for_each_loose_object(mark_loose_for_connectivity, NULL, 0);
- for_each_packed_object(mark_packed_for_connectivity, NULL, 0);
+ for_each_packed_object(the_repository,
+ mark_packed_for_connectivity, NULL, 0);
} else {
prepare_alt_odb(the_repository);
for (odb = the_repository->objects->odb; odb; odb = odb->next)
@@ -1011,7 +1015,7 @@ int cmd_fsck(int argc,
&oid);
if (!obj || !(obj->flags & HAS_OBJ)) {
- if (is_promisor_object(&oid))
+ if (is_promisor_object(the_repository, &oid))
continue;
error(_("%s: object missing"), oid_to_hex(&oid));
errors_found |= ERROR_OBJECT;
diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index 0f32e92a3a..db20f0cf51 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -3858,7 +3858,8 @@ static void show_object__ma_allow_promisor(struct object *obj, const char *name,
* Quietly ignore EXPECTED missing objects. This avoids problems with
* staging them now and getting an odd error later.
*/
- if (!has_object(the_repository, &obj->oid, 0) && is_promisor_object(&obj->oid))
+ if (!has_object(the_repository, &obj->oid, 0) &&
+ is_promisor_object(to_pack.repo, &obj->oid))
return;
show_object(obj, name, data);
@@ -3927,7 +3928,9 @@ static int add_object_in_unpacked_pack(const struct object_id *oid,
static void add_objects_in_unpacked_packs(void)
{
- if (for_each_packed_object(add_object_in_unpacked_pack, NULL,
+ if (for_each_packed_object(to_pack.repo,
+ add_object_in_unpacked_pack,
+ NULL,
FOR_EACH_OBJECT_PACK_ORDER |
FOR_EACH_OBJECT_LOCAL_ONLY |
FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS |
diff --git a/builtin/repack.c b/builtin/repack.c
index d6bb37e84a..96a4fa234b 100644
--- a/builtin/repack.c
+++ b/builtin/repack.c
@@ -404,7 +404,7 @@ static void repack_promisor_objects(const struct pack_objects_args *args,
* {type -> existing pack order} ordering when computing deltas instead
* of a {type -> size} ordering, which may produce better deltas.
*/
- for_each_packed_object(write_oid, &cmd,
+ for_each_packed_object(the_repository, write_oid, &cmd,
FOR_EACH_OBJECT_PROMISOR_ONLY);
if (cmd.in == -1) {
diff --git a/builtin/rev-list.c b/builtin/rev-list.c
index f62bcbf2b1..43c42621e3 100644
--- a/builtin/rev-list.c
+++ b/builtin/rev-list.c
@@ -121,7 +121,7 @@ static inline void finish_object__ma(struct object *obj)
return;
case MA_ALLOW_PROMISOR:
- if (is_promisor_object(&obj->oid))
+ if (is_promisor_object(the_repository, &obj->oid))
return;
die("unexpected missing %s object '%s'",
type_name(obj->type), oid_to_hex(&obj->oid));
diff --git a/commit-graph.c b/commit-graph.c
index 83dd69bfeb..e2e2083951 100644
--- a/commit-graph.c
+++ b/commit-graph.c
@@ -1960,7 +1960,7 @@ static void fill_oids_from_all_packs(struct write_commit_graph_context *ctx)
ctx->progress = start_delayed_progress(
_("Finding commits for commit graph among packed objects"),
ctx->approx_nr_objects);
- for_each_packed_object(add_packed_commits, ctx,
+ for_each_packed_object(ctx->r, add_packed_commits, ctx,
FOR_EACH_OBJECT_PACK_ORDER);
if (ctx->progress_done < ctx->approx_nr_objects)
display_progress(ctx->progress, ctx->approx_nr_objects);
diff --git a/fsck.c b/fsck.c
index 3756f52459..87ce999a49 100644
--- a/fsck.c
+++ b/fsck.c
@@ -1295,7 +1295,7 @@ static int fsck_blobs(struct oidset *blobs_found, struct oidset *blobs_done,
buf = repo_read_object_file(the_repository, oid, &type, &size);
if (!buf) {
- if (is_promisor_object(oid))
+ if (is_promisor_object(the_repository, oid))
continue;
ret |= report(options,
oid, OBJ_BLOB, msg_missing,
diff --git a/list-objects.c b/list-objects.c
index 31236a8dc9..d11a389b3a 100644
--- a/list-objects.c
+++ b/list-objects.c
@@ -75,7 +75,7 @@ static void process_blob(struct traversal_context *ctx,
*/
if (ctx->revs->exclude_promisor_objects &&
!repo_has_object_file(the_repository, &obj->oid) &&
- is_promisor_object(&obj->oid))
+ is_promisor_object(ctx->revs->repo, &obj->oid))
return;
pathlen = path->len;
@@ -180,7 +180,7 @@ static void process_tree(struct traversal_context *ctx,
* an incomplete list of missing objects.
*/
if (revs->exclude_promisor_objects &&
- is_promisor_object(&obj->oid))
+ is_promisor_object(revs->repo, &obj->oid))
return;
if (!revs->do_not_die_on_missing_objects)
diff --git a/object-store-ll.h b/object-store-ll.h
index 538f2c60cb..bcfae2e1bf 100644
--- a/object-store-ll.h
+++ b/object-store-ll.h
@@ -550,7 +550,7 @@ typedef int each_packed_object_fn(const struct object_id *oid,
int for_each_object_in_pack(struct packed_git *p,
each_packed_object_fn, void *data,
enum for_each_object_flags flags);
-int for_each_packed_object(each_packed_object_fn, void *,
- enum for_each_object_flags flags);
+int for_each_packed_object(struct repository *repo, each_packed_object_fn cb,
+ void *data, enum for_each_object_flags flags);
#endif /* OBJECT_STORE_LL_H */
diff --git a/packfile.c b/packfile.c
index e7dd270217..5e8019b1fe 100644
--- a/packfile.c
+++ b/packfile.c
@@ -2200,15 +2200,15 @@ int for_each_object_in_pack(struct packed_git *p,
return r;
}
-int for_each_packed_object(each_packed_object_fn cb, void *data,
- enum for_each_object_flags flags)
+int for_each_packed_object(struct repository *repo, each_packed_object_fn cb,
+ void *data, enum for_each_object_flags flags)
{
struct packed_git *p;
int r = 0;
int pack_errors = 0;
- prepare_packed_git(the_repository);
- for (p = get_all_packs(the_repository); p; p = p->next) {
+ prepare_packed_git(repo);
+ for (p = get_all_packs(repo); p; p = p->next) {
if ((flags & FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local)
continue;
if ((flags & FOR_EACH_OBJECT_PROMISOR_ONLY) &&
@@ -2286,14 +2286,14 @@ static int add_promisor_object(const struct object_id *oid,
return 0;
}
-int is_promisor_object(const struct object_id *oid)
+int is_promisor_object(struct repository *r, const struct object_id *oid)
{
static struct oidset promisor_objects;
static int promisor_objects_prepared;
if (!promisor_objects_prepared) {
- if (repo_has_promisor_remote(the_repository)) {
- for_each_packed_object(add_promisor_object,
+ if (repo_has_promisor_remote(r)) {
+ for_each_packed_object(r, add_promisor_object,
&promisor_objects,
FOR_EACH_OBJECT_PROMISOR_ONLY |
FOR_EACH_OBJECT_PACK_ORDER);
diff --git a/packfile.h b/packfile.h
index b09fb2c530..addb95b0c4 100644
--- a/packfile.h
+++ b/packfile.h
@@ -201,7 +201,7 @@ int has_object_kept_pack(struct repository *r, const struct object_id *oid,
* Return 1 if an object in a promisor packfile is or refers to the given
* object, 0 otherwise.
*/
-int is_promisor_object(const struct object_id *oid);
+int is_promisor_object(struct repository *r, const struct object_id *oid);
/*
* Expose a function for fuzz testing.
diff --git a/promisor-remote.c b/promisor-remote.c
index 9345ae3db2..c714f4f007 100644
--- a/promisor-remote.c
+++ b/promisor-remote.c
@@ -283,7 +283,7 @@ void promisor_remote_get_direct(struct repository *repo,
}
for (i = 0; i < remaining_nr; i++) {
- if (is_promisor_object(&remaining_oids[i]))
+ if (is_promisor_object(repo, &remaining_oids[i]))
die(_("could not fetch %s from promisor remote"),
oid_to_hex(&remaining_oids[i]));
}
diff --git a/reachable.c b/reachable.c
index 09d2c50079..ecf7ccf504 100644
--- a/reachable.c
+++ b/reachable.c
@@ -324,7 +324,7 @@ int add_unseen_recent_objects_to_traversal(struct rev_info *revs,
if (ignore_in_core_kept_packs)
flags |= FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS;
- r = for_each_packed_object(add_recent_packed, &data, flags);
+ r = for_each_packed_object(revs->repo, add_recent_packed, &data, flags);
done:
oidset_clear(&data.extra_recent_oids);
diff --git a/revision.c b/revision.c
index d1d152a67b..45dc6d2819 100644
--- a/revision.c
+++ b/revision.c
@@ -390,7 +390,8 @@ static struct object *get_reference(struct rev_info *revs, const char *name,
if (!object) {
if (revs->ignore_missing)
return NULL;
- if (revs->exclude_promisor_objects && is_promisor_object(oid))
+ if (revs->exclude_promisor_objects &&
+ is_promisor_object(revs->repo, oid))
return NULL;
if (revs->do_not_die_on_missing_objects) {
oidset_insert(&revs->missing_commits, oid);
@@ -432,7 +433,7 @@ static struct commit *handle_commit(struct rev_info *revs,
if (revs->ignore_missing_links || (flags & UNINTERESTING))
return NULL;
if (revs->exclude_promisor_objects &&
- is_promisor_object(&tag->tagged->oid))
+ is_promisor_object(revs->repo, &tag->tagged->oid))
return NULL;
if (revs->do_not_die_on_missing_objects && oid) {
oidset_insert(&revs->missing_commits, oid);
@@ -1211,7 +1212,7 @@ static int process_parents(struct rev_info *revs, struct commit *commit,
revs->do_not_die_on_missing_objects;
if (repo_parse_commit_gently(revs->repo, p, gently) < 0) {
if (revs->exclude_promisor_objects &&
- is_promisor_object(&p->object.oid)) {
+ is_promisor_object(revs->repo, &p->object.oid)) {
if (revs->first_parent_only)
break;
continue;
@@ -3915,7 +3916,7 @@ int prepare_revision_walk(struct rev_info *revs)
revs->treesame.name = "treesame";
if (revs->exclude_promisor_objects) {
- for_each_packed_object(mark_uninteresting, revs,
+ for_each_packed_object(revs->repo, mark_uninteresting, revs,
FOR_EACH_OBJECT_PROMISOR_ONLY);
}
diff --git a/tag.c b/tag.c
index d24170e340..beef9571b5 100644
--- a/tag.c
+++ b/tag.c
@@ -84,7 +84,7 @@ struct object *deref_tag(struct repository *r, struct object *o, const char *war
o = NULL;
}
if (!o && warn) {
- if (last_oid && is_promisor_object(last_oid))
+ if (last_oid && is_promisor_object(r, last_oid))
return NULL;
if (!warnlen)
warnlen = strlen(warn);
--
2.47.0
^ permalink raw reply related [flat|nested] 184+ messages in thread
* [PATCH v5 7/9] config: make `delta_base_cache_limit` a non-global variable
2024-11-04 11:41 ` [PATCH v5 " Karthik Nayak
` (5 preceding siblings ...)
2024-11-04 11:41 ` [PATCH v5 6/9] packfile: pass down repository to `for_each_packed_object` Karthik Nayak
@ 2024-11-04 11:41 ` Karthik Nayak
2024-11-04 11:41 ` [PATCH v5 8/9] config: make `packed_git_(limit|window_size)` non-global variables Karthik Nayak
` (2 subsequent siblings)
9 siblings, 0 replies; 184+ messages in thread
From: Karthik Nayak @ 2024-11-04 11:41 UTC (permalink / raw)
To: karthik.188; +Cc: git, me, peff
The `delta_base_cache_limit` variable is a global config variable used
by multiple subsystems. Let's make this non-global, by adding this
variable to the stack of each of the subsystems where it is used.
In `gc.c` we add it to the `gc_config` struct and also the constructor
function. In `index-pack.c` we add it to the `pack_idx_option` struct
and its constructor. Finally, in `packfile.c` we dynamically retrieve
this value from the repository config, since the value is only used once
in the entire subsystem.
The type of the value is changed from `size_t` to an `unsigned long`
since the default value is small enough to fit inside the latter on all
platforms.
These changes are made to remove the usage of `delta_base_cache_limit`
as a global variable in `packfile.c`. This brings us one step closer to
removing the `USE_THE_REPOSITORY_VARIABLE` definition in `packfile.c`
which we complete in the next patch.
Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
---
builtin/gc.c | 5 ++++-
builtin/index-pack.c | 10 +++++++---
config.c | 5 -----
environment.c | 1 -
environment.h | 1 -
pack-objects.h | 3 ++-
pack-write.c | 1 +
pack.h | 1 +
packfile.c | 13 +++++++++++--
9 files changed, 26 insertions(+), 14 deletions(-)
diff --git a/builtin/gc.c b/builtin/gc.c
index d52735354c..9a10eb58bc 100644
--- a/builtin/gc.c
+++ b/builtin/gc.c
@@ -138,6 +138,7 @@ struct gc_config {
char *repack_filter_to;
unsigned long big_pack_threshold;
unsigned long max_delta_cache_size;
+ unsigned long delta_base_cache_limit;
};
#define GC_CONFIG_INIT { \
@@ -153,6 +154,7 @@ struct gc_config {
.prune_expire = xstrdup("2.weeks.ago"), \
.prune_worktrees_expire = xstrdup("3.months.ago"), \
.max_delta_cache_size = DEFAULT_DELTA_CACHE_SIZE, \
+ .delta_base_cache_limit = DEFAULT_DELTA_BASE_CACHE_LIMIT, \
}
static void gc_config_release(struct gc_config *cfg)
@@ -205,6 +207,7 @@ static void gc_config(struct gc_config *cfg)
git_config_get_ulong("gc.bigpackthreshold", &cfg->big_pack_threshold);
git_config_get_ulong("pack.deltacachesize", &cfg->max_delta_cache_size);
+ git_config_get_ulong("core.deltabasecachelimit", &cfg->delta_base_cache_limit);
if (!git_config_get_string("gc.repackfilter", &owned)) {
free(cfg->repack_filter);
@@ -416,7 +419,7 @@ static uint64_t estimate_repack_memory(struct gc_config *cfg,
* read_sha1_file() (either at delta calculation phase, or
* writing phase) also fills up the delta base cache
*/
- heap += delta_base_cache_limit;
+ heap += cfg->delta_base_cache_limit;
/* and of course pack-objects has its own delta cache */
heap += cfg->max_delta_cache_size;
diff --git a/builtin/index-pack.c b/builtin/index-pack.c
index eaefb41761..23bfa45403 100644
--- a/builtin/index-pack.c
+++ b/builtin/index-pack.c
@@ -1238,7 +1238,7 @@ static void parse_pack_objects(unsigned char *hash)
* recursively checking if the resulting object is used as a base
* for some more deltas.
*/
-static void resolve_deltas(void)
+static void resolve_deltas(struct pack_idx_option *opts)
{
int i;
@@ -1254,7 +1254,7 @@ static void resolve_deltas(void)
nr_ref_deltas + nr_ofs_deltas);
nr_dispatched = 0;
- base_cache_limit = delta_base_cache_limit * nr_threads;
+ base_cache_limit = opts->delta_base_cache_limit * nr_threads;
if (nr_threads > 1 || getenv("GIT_FORCE_THREADS")) {
init_thread();
work_lock();
@@ -1604,6 +1604,10 @@ static int git_index_pack_config(const char *k, const char *v,
else
opts->flags &= ~WRITE_REV;
}
+ if (!strcmp(k, "core.deltabasecachelimit")) {
+ opts->delta_base_cache_limit = git_config_ulong(k, v, ctx->kvi);
+ return 0;
+ }
return git_default_config(k, v, ctx, cb);
}
@@ -1930,7 +1934,7 @@ int cmd_index_pack(int argc,
parse_pack_objects(pack_hash);
if (report_end_of_input)
write_in_full(2, "\0", 1);
- resolve_deltas();
+ resolve_deltas(&opts);
conclude_pack(fix_thin_pack, curr_pack, pack_hash);
free(ofs_deltas);
free(ref_deltas);
diff --git a/config.c b/config.c
index a11bb85da3..728ef98e42 100644
--- a/config.c
+++ b/config.c
@@ -1515,11 +1515,6 @@ static int git_default_core_config(const char *var, const char *value,
return 0;
}
- if (!strcmp(var, "core.deltabasecachelimit")) {
- delta_base_cache_limit = git_config_ulong(var, value, ctx->kvi);
- return 0;
- }
-
if (!strcmp(var, "core.autocrlf")) {
if (value && !strcasecmp(value, "input")) {
auto_crlf = AUTO_CRLF_INPUT;
diff --git a/environment.c b/environment.c
index a2ce998081..8e5022c282 100644
--- a/environment.c
+++ b/environment.c
@@ -51,7 +51,6 @@ enum fsync_method fsync_method = FSYNC_METHOD_DEFAULT;
enum fsync_component fsync_components = FSYNC_COMPONENTS_DEFAULT;
size_t packed_git_window_size = DEFAULT_PACKED_GIT_WINDOW_SIZE;
size_t packed_git_limit = DEFAULT_PACKED_GIT_LIMIT;
-size_t delta_base_cache_limit = 96 * 1024 * 1024;
unsigned long big_file_threshold = 512 * 1024 * 1024;
char *editor_program;
char *askpass_program;
diff --git a/environment.h b/environment.h
index 923e12661e..2f43340f0b 100644
--- a/environment.h
+++ b/environment.h
@@ -165,7 +165,6 @@ extern int zlib_compression_level;
extern int pack_compression_level;
extern size_t packed_git_window_size;
extern size_t packed_git_limit;
-extern size_t delta_base_cache_limit;
extern unsigned long big_file_threshold;
extern unsigned long pack_size_limit_cfg;
extern int max_allowed_tree_depth;
diff --git a/pack-objects.h b/pack-objects.h
index b9898a4e64..3f6f504203 100644
--- a/pack-objects.h
+++ b/pack-objects.h
@@ -7,7 +7,8 @@
struct repository;
-#define DEFAULT_DELTA_CACHE_SIZE (256 * 1024 * 1024)
+#define DEFAULT_DELTA_CACHE_SIZE (256 * 1024 * 1024)
+#define DEFAULT_DELTA_BASE_CACHE_LIMIT (96 * 1024 * 1024)
#define OE_DFS_STATE_BITS 2
#define OE_DEPTH_BITS 12
diff --git a/pack-write.c b/pack-write.c
index 8c7dfddc5a..98a8c0e785 100644
--- a/pack-write.c
+++ b/pack-write.c
@@ -21,6 +21,7 @@ void reset_pack_idx_option(struct pack_idx_option *opts)
memset(opts, 0, sizeof(*opts));
opts->version = 2;
opts->off32_limit = 0x7fffffff;
+ opts->delta_base_cache_limit = DEFAULT_DELTA_BASE_CACHE_LIMIT;
}
static int sha1_compare(const void *_a, const void *_b)
diff --git a/pack.h b/pack.h
index 02bbdfb19c..1a33751565 100644
--- a/pack.h
+++ b/pack.h
@@ -58,6 +58,7 @@ struct pack_idx_option {
*/
int anomaly_alloc, anomaly_nr;
uint32_t *anomaly;
+ unsigned long delta_base_cache_limit;
};
void reset_pack_idx_option(struct pack_idx_option *);
diff --git a/packfile.c b/packfile.c
index 5e8019b1fe..2ae35dd03f 100644
--- a/packfile.c
+++ b/packfile.c
@@ -24,6 +24,8 @@
#include "commit-graph.h"
#include "pack-revindex.h"
#include "promisor-remote.h"
+#include "config.h"
+#include "pack-objects.h"
char *odb_pack_name(struct repository *r, struct strbuf *buf,
const unsigned char *hash, const char *ext)
@@ -1496,7 +1498,9 @@ void clear_delta_base_cache(void)
}
static void add_delta_base_cache(struct packed_git *p, off_t base_offset,
- void *base, unsigned long base_size, enum object_type type)
+ void *base, unsigned long base_size,
+ unsigned long delta_base_cache_limit,
+ enum object_type type)
{
struct delta_base_cache_entry *ent;
struct list_head *lru, *tmp;
@@ -1697,6 +1701,9 @@ void *unpack_entry(struct repository *r, struct packed_git *p, off_t obj_offset,
struct unpack_entry_stack_ent *delta_stack = small_delta_stack;
int delta_stack_nr = 0, delta_stack_alloc = UNPACK_ENTRY_STACK_PREALLOC;
int base_from_cache = 0;
+ unsigned long delta_base_cache_limit = DEFAULT_DELTA_BASE_CACHE_LIMIT;
+
+ repo_config_get_ulong(r, "core.deltabasecachelimit", &delta_base_cache_limit);
write_pack_access_log(p, obj_offset);
@@ -1878,7 +1885,9 @@ void *unpack_entry(struct repository *r, struct packed_git *p, off_t obj_offset,
* before we are done using it.
*/
if (!external_base)
- add_delta_base_cache(p, base_obj_offset, base, base_size, type);
+ add_delta_base_cache(p, base_obj_offset, base,
+ base_size, delta_base_cache_limit,
+ type);
free(delta_data);
free(external_base);
--
2.47.0
^ permalink raw reply related [flat|nested] 184+ messages in thread
* [PATCH v5 8/9] config: make `packed_git_(limit|window_size)` non-global variables
2024-11-04 11:41 ` [PATCH v5 " Karthik Nayak
` (6 preceding siblings ...)
2024-11-04 11:41 ` [PATCH v5 7/9] config: make `delta_base_cache_limit` a non-global variable Karthik Nayak
@ 2024-11-04 11:41 ` Karthik Nayak
2024-11-04 17:38 ` Jeff King
2024-11-04 11:41 ` [PATCH v5 9/9] midx: add repository to `multi_pack_index` struct Karthik Nayak
2024-11-04 17:32 ` [PATCH v5 0/9] packfile: avoid using the 'the_repository' global variable Jeff King
9 siblings, 1 reply; 184+ messages in thread
From: Karthik Nayak @ 2024-11-04 11:41 UTC (permalink / raw)
To: karthik.188; +Cc: git, me, peff
The variables `packed_git_window_size` and `packed_git_limit` are global
config variables used in the `packfile.c` file. Since it is only used in
this file, let's change it from being a global config variable to a
local variable for the subsystem.
We do this by introducing a new local `packfile_config` struct in
`packfile.c` and also adding the required function to parse the said
config. We then use this within `packfile.c` to obtain the variables.
With this, we rid `packfile.c` from all global variable usage and this
means we can also remove the `USE_THE_REPOSITORY_VARIABLE` guard from
the file.
Helped-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
---
builtin/fast-import.c | 4 ++--
config.c | 17 -----------------
environment.c | 2 --
packfile.c | 19 +++++++++++--------
packfile.h | 2 +-
repo-settings.c | 14 ++++++++++++++
repo-settings.h | 5 +++++
7 files changed, 33 insertions(+), 30 deletions(-)
diff --git a/builtin/fast-import.c b/builtin/fast-import.c
index 3ccc4c5722..0ece070260 100644
--- a/builtin/fast-import.c
+++ b/builtin/fast-import.c
@@ -3539,7 +3539,7 @@ static void parse_argv(void)
int cmd_fast_import(int argc,
const char **argv,
const char *prefix,
- struct repository *repo UNUSED)
+ struct repository *repo)
{
unsigned int i;
@@ -3660,7 +3660,7 @@ int cmd_fast_import(int argc,
fprintf(stderr, " pools: %10lu KiB\n", (unsigned long)((tree_entry_allocd + fi_mem_pool.pool_alloc) /1024));
fprintf(stderr, " objects: %10" PRIuMAX " KiB\n", (alloc_count*sizeof(struct object_entry))/1024);
fprintf(stderr, "---------------------------------------------------------------------\n");
- pack_report();
+ pack_report(repo);
fprintf(stderr, "---------------------------------------------------------------------\n");
fprintf(stderr, "\n");
}
diff --git a/config.c b/config.c
index 728ef98e42..2c295f7430 100644
--- a/config.c
+++ b/config.c
@@ -1493,28 +1493,11 @@ static int git_default_core_config(const char *var, const char *value,
return 0;
}
- if (!strcmp(var, "core.packedgitwindowsize")) {
- int pgsz_x2 = getpagesize() * 2;
- packed_git_window_size = git_config_ulong(var, value, ctx->kvi);
-
- /* This value must be multiple of (pagesize * 2) */
- packed_git_window_size /= pgsz_x2;
- if (packed_git_window_size < 1)
- packed_git_window_size = 1;
- packed_git_window_size *= pgsz_x2;
- return 0;
- }
-
if (!strcmp(var, "core.bigfilethreshold")) {
big_file_threshold = git_config_ulong(var, value, ctx->kvi);
return 0;
}
- if (!strcmp(var, "core.packedgitlimit")) {
- packed_git_limit = git_config_ulong(var, value, ctx->kvi);
- return 0;
- }
-
if (!strcmp(var, "core.autocrlf")) {
if (value && !strcasecmp(value, "input")) {
auto_crlf = AUTO_CRLF_INPUT;
diff --git a/environment.c b/environment.c
index 8e5022c282..8389a27270 100644
--- a/environment.c
+++ b/environment.c
@@ -49,8 +49,6 @@ int fsync_object_files = -1;
int use_fsync = -1;
enum fsync_method fsync_method = FSYNC_METHOD_DEFAULT;
enum fsync_component fsync_components = FSYNC_COMPONENTS_DEFAULT;
-size_t packed_git_window_size = DEFAULT_PACKED_GIT_WINDOW_SIZE;
-size_t packed_git_limit = DEFAULT_PACKED_GIT_LIMIT;
unsigned long big_file_threshold = 512 * 1024 * 1024;
char *editor_program;
char *askpass_program;
diff --git a/packfile.c b/packfile.c
index 2ae35dd03f..e1b04a2a6a 100644
--- a/packfile.c
+++ b/packfile.c
@@ -1,4 +1,3 @@
-#define USE_THE_REPOSITORY_VARIABLE
#include "git-compat-util.h"
#include "environment.h"
@@ -48,15 +47,15 @@ static size_t pack_mapped;
#define SZ_FMT PRIuMAX
static inline uintmax_t sz_fmt(size_t s) { return s; }
-void pack_report(void)
+void pack_report(struct repository *repo)
{
fprintf(stderr,
"pack_report: getpagesize() = %10" SZ_FMT "\n"
"pack_report: core.packedGitWindowSize = %10" SZ_FMT "\n"
"pack_report: core.packedGitLimit = %10" SZ_FMT "\n",
sz_fmt(getpagesize()),
- sz_fmt(packed_git_window_size),
- sz_fmt(packed_git_limit));
+ sz_fmt(repo->settings.packed_git_window_size),
+ sz_fmt(repo->settings.packed_git_limit));
fprintf(stderr,
"pack_report: pack_used_ctr = %10u\n"
"pack_report: pack_mmap_calls = %10u\n"
@@ -652,8 +651,11 @@ unsigned char *use_pack(struct packed_git *p,
break;
}
if (!win) {
- size_t window_align = packed_git_window_size / 2;
+ size_t window_align;
off_t len;
+ struct repo_settings *settings = &p->repo->settings;
+
+ window_align = settings->packed_git_window_size / 2;
if (p->pack_fd == -1 && open_packed_git(p))
die("packfile %s cannot be accessed", p->pack_name);
@@ -661,11 +663,12 @@ unsigned char *use_pack(struct packed_git *p,
CALLOC_ARRAY(win, 1);
win->offset = (offset / window_align) * window_align;
len = p->pack_size - win->offset;
- if (len > packed_git_window_size)
- len = packed_git_window_size;
+ if (len > settings->packed_git_window_size)
+ len = settings->packed_git_window_size;
win->len = (size_t)len;
pack_mapped += win->len;
- while (packed_git_limit < pack_mapped
+
+ while (settings->packed_git_limit < pack_mapped
&& unuse_one_window(p))
; /* nothing */
win->base = xmmap_gently(NULL, win->len,
diff --git a/packfile.h b/packfile.h
index addb95b0c4..58104fa009 100644
--- a/packfile.h
+++ b/packfile.h
@@ -89,7 +89,7 @@ unsigned long repo_approximate_object_count(struct repository *r);
struct packed_git *find_oid_pack(const struct object_id *oid,
struct packed_git *packs);
-void pack_report(void);
+void pack_report(struct repository *repo);
/*
* mmap the index file for the specified packfile (if it is not
diff --git a/repo-settings.c b/repo-settings.c
index 4699b4b365..0d875fdd86 100644
--- a/repo-settings.c
+++ b/repo-settings.c
@@ -26,6 +26,7 @@ void prepare_repo_settings(struct repository *r)
const char *strval;
int manyfiles;
int read_changed_paths;
+ unsigned long longval;
if (!r->gitdir)
BUG("Cannot add settings for uninitialized repository");
@@ -123,6 +124,19 @@ void prepare_repo_settings(struct repository *r)
* removed.
*/
r->settings.command_requires_full_index = 1;
+
+ if (!repo_config_get_ulong(r, "core.packedgitwindowsize", &longval)) {
+ int pgsz_x2 = getpagesize() * 2;
+
+ /* This value must be multiple of (pagesize * 2) */
+ longval /= pgsz_x2;
+ if (longval < 1)
+ longval = 1;
+ r->settings.packed_git_window_size = longval * pgsz_x2;
+ }
+
+ if (!repo_config_get_ulong(r, "core.packedgitlimit", &longval))
+ r->settings.packed_git_limit = longval;
}
enum log_refs_config repo_settings_get_log_all_ref_updates(struct repository *repo)
diff --git a/repo-settings.h b/repo-settings.h
index 51d6156a11..b22d6438e2 100644
--- a/repo-settings.h
+++ b/repo-settings.h
@@ -57,12 +57,17 @@ struct repo_settings {
int core_multi_pack_index;
int warn_ambiguous_refs; /* lazily loaded via accessor */
+
+ size_t packed_git_window_size;
+ size_t packed_git_limit;
};
#define REPO_SETTINGS_INIT { \
.index_version = -1, \
.core_untracked_cache = UNTRACKED_CACHE_KEEP, \
.fetch_negotiation_algorithm = FETCH_NEGOTIATION_CONSECUTIVE, \
.warn_ambiguous_refs = -1, \
+ .packed_git_window_size = DEFAULT_PACKED_GIT_WINDOW_SIZE, \
+ .packed_git_limit = DEFAULT_PACKED_GIT_LIMIT, \
}
void prepare_repo_settings(struct repository *r);
--
2.47.0
^ permalink raw reply related [flat|nested] 184+ messages in thread
* Re: [PATCH v5 8/9] config: make `packed_git_(limit|window_size)` non-global variables
2024-11-04 11:41 ` [PATCH v5 8/9] config: make `packed_git_(limit|window_size)` non-global variables Karthik Nayak
@ 2024-11-04 17:38 ` Jeff King
2024-11-05 9:50 ` karthik nayak
0 siblings, 1 reply; 184+ messages in thread
From: Jeff King @ 2024-11-04 17:38 UTC (permalink / raw)
To: Karthik Nayak; +Cc: git, me
On Mon, Nov 04, 2024 at 12:41:46PM +0100, Karthik Nayak wrote:
> @@ -652,8 +651,11 @@ unsigned char *use_pack(struct packed_git *p,
> break;
> }
> if (!win) {
> - size_t window_align = packed_git_window_size / 2;
> + size_t window_align;
> off_t len;
> + struct repo_settings *settings = &p->repo->settings;
> +
> + window_align = settings->packed_git_window_size / 2;
>
> if (p->pack_fd == -1 && open_packed_git(p))
> die("packfile %s cannot be accessed", p->pack_name);
> @@ -661,11 +663,12 @@ unsigned char *use_pack(struct packed_git *p,
> CALLOC_ARRAY(win, 1);
> win->offset = (offset / window_align) * window_align;
> len = p->pack_size - win->offset;
> - if (len > packed_git_window_size)
> - len = packed_git_window_size;
> + if (len > settings->packed_git_window_size)
> + len = settings->packed_git_window_size;
> win->len = (size_t)len;
> pack_mapped += win->len;
> - while (packed_git_limit < pack_mapped
> +
> + while (settings->packed_git_limit < pack_mapped
> && unuse_one_window(p))
> ; /* nothing */
Much nicer than the earlier version of the patch.
Do we need to call prepare_repo_settings() here? It looks like the
intent is that it would be lazy-loaded, and I don't think there's any
guarantee that somebody else would have done so.
> @@ -123,6 +124,19 @@ void prepare_repo_settings(struct repository *r)
> * removed.
> */
> r->settings.command_requires_full_index = 1;
> +
> + if (!repo_config_get_ulong(r, "core.packedgitwindowsize", &longval)) {
> + int pgsz_x2 = getpagesize() * 2;
> +
> + /* This value must be multiple of (pagesize * 2) */
> + longval /= pgsz_x2;
> + if (longval < 1)
> + longval = 1;
> + r->settings.packed_git_window_size = longval * pgsz_x2;
> + }
> +
> + if (!repo_config_get_ulong(r, "core.packedgitlimit", &longval))
> + r->settings.packed_git_limit = longval;
And this looks like a faithful conversion of the existing parsing. Since
we're switching from git_config_ulong() to repo_config_get_ulong(), we
could take the opportunity to swap out for the size_t parser, but:
1. I'm just as happy for that to happen separately, and leave this as
a patch which should not have any behavior change.
2. It looks like we do not yet have a size_t variant for the configset
accessors. :)
-Peff
^ permalink raw reply [flat|nested] 184+ messages in thread
* Re: [PATCH v5 8/9] config: make `packed_git_(limit|window_size)` non-global variables
2024-11-04 17:38 ` Jeff King
@ 2024-11-05 9:50 ` karthik nayak
0 siblings, 0 replies; 184+ messages in thread
From: karthik nayak @ 2024-11-05 9:50 UTC (permalink / raw)
To: Jeff King; +Cc: git, me
[-- Attachment #1: Type: text/plain, Size: 2639 bytes --]
Jeff King <peff@peff.net> writes:
> On Mon, Nov 04, 2024 at 12:41:46PM +0100, Karthik Nayak wrote:
>
>> @@ -652,8 +651,11 @@ unsigned char *use_pack(struct packed_git *p,
>> break;
>> }
>> if (!win) {
>> - size_t window_align = packed_git_window_size / 2;
>> + size_t window_align;
>> off_t len;
>> + struct repo_settings *settings = &p->repo->settings;
>> +
>> + window_align = settings->packed_git_window_size / 2;
>>
>> if (p->pack_fd == -1 && open_packed_git(p))
>> die("packfile %s cannot be accessed", p->pack_name);
>> @@ -661,11 +663,12 @@ unsigned char *use_pack(struct packed_git *p,
>> CALLOC_ARRAY(win, 1);
>> win->offset = (offset / window_align) * window_align;
>> len = p->pack_size - win->offset;
>> - if (len > packed_git_window_size)
>> - len = packed_git_window_size;
>> + if (len > settings->packed_git_window_size)
>> + len = settings->packed_git_window_size;
>> win->len = (size_t)len;
>> pack_mapped += win->len;
>> - while (packed_git_limit < pack_mapped
>> +
>> + while (settings->packed_git_limit < pack_mapped
>> && unuse_one_window(p))
>> ; /* nothing */
>
> Much nicer than the earlier version of the patch.
>
> Do we need to call prepare_repo_settings() here? It looks like the
> intent is that it would be lazy-loaded, and I don't think there's any
> guarantee that somebody else would have done so.
>
I think it would be safer to do that, than to rely on the tests like I
did. I'll change that.
>> @@ -123,6 +124,19 @@ void prepare_repo_settings(struct repository *r)
>> * removed.
>> */
>> r->settings.command_requires_full_index = 1;
>> +
>> + if (!repo_config_get_ulong(r, "core.packedgitwindowsize", &longval)) {
>> + int pgsz_x2 = getpagesize() * 2;
>> +
>> + /* This value must be multiple of (pagesize * 2) */
>> + longval /= pgsz_x2;
>> + if (longval < 1)
>> + longval = 1;
>> + r->settings.packed_git_window_size = longval * pgsz_x2;
>> + }
>> +
>> + if (!repo_config_get_ulong(r, "core.packedgitlimit", &longval))
>> + r->settings.packed_git_limit = longval;
>
> And this looks like a faithful conversion of the existing parsing. Since
> we're switching from git_config_ulong() to repo_config_get_ulong(), we
> could take the opportunity to swap out for the size_t parser, but:
>
> 1. I'm just as happy for that to happen separately, and leave this as
> a patch which should not have any behavior change.
>
> 2. It looks like we do not yet have a size_t variant for the configset
> accessors. :)
>
> -Peff
Yes, indeed. I'll leave it out of this. I'll follow up if I can! Thanks
Karthik
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 690 bytes --]
^ permalink raw reply [flat|nested] 184+ messages in thread
* [PATCH v5 9/9] midx: add repository to `multi_pack_index` struct
2024-11-04 11:41 ` [PATCH v5 " Karthik Nayak
` (7 preceding siblings ...)
2024-11-04 11:41 ` [PATCH v5 8/9] config: make `packed_git_(limit|window_size)` non-global variables Karthik Nayak
@ 2024-11-04 11:41 ` Karthik Nayak
2024-11-04 17:32 ` [PATCH v5 0/9] packfile: avoid using the 'the_repository' global variable Jeff King
9 siblings, 0 replies; 184+ messages in thread
From: Karthik Nayak @ 2024-11-04 11:41 UTC (permalink / raw)
To: karthik.188; +Cc: git, me, peff
The `multi_pack_index` struct represents the MIDX for a repository.
Here, we add a pointer to the repository in this struct, allowing direct
use of the repository variable without relying on the global
`the_repository` struct.
With this addition, we can determine the repository associated with a
`bitmap_index` struct. A `bitmap_index` points to either a `packed_git`
or a `multi_pack_index`, both of which have direct repository
references. To support this, we introduce a static helper function,
`bitmap_repo`, in `pack-bitmap.c`, which retrieves a repository given a
`bitmap_index`.
With this, we clear up all usages of `the_repository` within
`pack-bitmap.c` and also remove the `USE_THE_REPOSITORY_VARIABLE`
definition. Bringing us another step closer to remove all global
variable usage.
Although this change also opens up the potential to clean up `midx.c`,
doing so would require additional refactoring to pass the repository
struct to functions where the MIDX struct is created: a task better
suited for future patches.
Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
---
midx.c | 1 +
midx.h | 3 ++
pack-bitmap.c | 90 +++++++++++++++++++++++++++++++--------------------
3 files changed, 59 insertions(+), 35 deletions(-)
diff --git a/midx.c b/midx.c
index 8edb75f51d..079c45a1aa 100644
--- a/midx.c
+++ b/midx.c
@@ -131,6 +131,7 @@ static struct multi_pack_index *load_multi_pack_index_one(const char *object_dir
m->data = midx_map;
m->data_len = midx_size;
m->local = local;
+ m->repo = the_repository;
m->signature = get_be32(m->data);
if (m->signature != MIDX_SIGNATURE)
diff --git a/midx.h b/midx.h
index 42d4f8d149..3b0ac4d878 100644
--- a/midx.h
+++ b/midx.h
@@ -71,6 +71,9 @@ struct multi_pack_index {
const char **pack_names;
struct packed_git **packs;
+
+ struct repository *repo;
+
char object_dir[FLEX_ARRAY];
};
diff --git a/pack-bitmap.c b/pack-bitmap.c
index d34ba9909a..0cb1b56c9d 100644
--- a/pack-bitmap.c
+++ b/pack-bitmap.c
@@ -1,5 +1,3 @@
-#define USE_THE_REPOSITORY_VARIABLE
-
#include "git-compat-util.h"
#include "commit.h"
#include "gettext.h"
@@ -177,12 +175,21 @@ static uint32_t bitmap_num_objects(struct bitmap_index *index)
return index->pack->num_objects;
}
+static struct repository *bitmap_repo(struct bitmap_index *bitmap_git)
+{
+ if (bitmap_is_midx(bitmap_git))
+ return bitmap_git->midx->repo;
+ return bitmap_git->pack->repo;
+}
+
static int load_bitmap_header(struct bitmap_index *index)
{
struct bitmap_disk_header *header = (void *)index->map;
- size_t header_size = sizeof(*header) - GIT_MAX_RAWSZ + the_hash_algo->rawsz;
+ const struct git_hash_algo *hash_algo = bitmap_repo(index)->hash_algo;
+
+ size_t header_size = sizeof(*header) - GIT_MAX_RAWSZ + hash_algo->rawsz;
- if (index->map_size < header_size + the_hash_algo->rawsz)
+ if (index->map_size < header_size + hash_algo->rawsz)
return error(_("corrupted bitmap index (too small)"));
if (memcmp(header->magic, BITMAP_IDX_SIGNATURE, sizeof(BITMAP_IDX_SIGNATURE)) != 0)
@@ -196,7 +203,7 @@ static int load_bitmap_header(struct bitmap_index *index)
{
uint32_t flags = ntohs(header->options);
size_t cache_size = st_mult(bitmap_num_objects(index), sizeof(uint32_t));
- unsigned char *index_end = index->map + index->map_size - the_hash_algo->rawsz;
+ unsigned char *index_end = index->map + index->map_size - hash_algo->rawsz;
if ((flags & BITMAP_OPT_FULL_DAG) == 0)
BUG("unsupported options for bitmap index file "
@@ -409,7 +416,7 @@ static int open_midx_bitmap_1(struct bitmap_index *bitmap_git,
if (bitmap_git->pack || bitmap_git->midx) {
struct strbuf buf = STRBUF_INIT;
get_midx_filename(&buf, midx->object_dir);
- trace2_data_string("bitmap", the_repository,
+ trace2_data_string("bitmap", bitmap_repo(bitmap_git),
"ignoring extra midx bitmap file", buf.buf);
close(fd);
strbuf_release(&buf);
@@ -427,7 +434,7 @@ static int open_midx_bitmap_1(struct bitmap_index *bitmap_git,
goto cleanup;
if (!hasheq(get_midx_checksum(bitmap_git->midx), bitmap_git->checksum,
- the_repository->hash_algo)) {
+ bitmap_repo(bitmap_git)->hash_algo)) {
error(_("checksum doesn't match in MIDX and bitmap"));
goto cleanup;
}
@@ -438,7 +445,9 @@ static int open_midx_bitmap_1(struct bitmap_index *bitmap_git,
}
for (i = 0; i < bitmap_git->midx->num_packs; i++) {
- if (prepare_midx_pack(the_repository, bitmap_git->midx, i)) {
+ if (prepare_midx_pack(bitmap_repo(bitmap_git),
+ bitmap_git->midx,
+ i)) {
warning(_("could not open pack %s"),
bitmap_git->midx->pack_names[i]);
goto cleanup;
@@ -492,8 +501,9 @@ static int open_pack_bitmap_1(struct bitmap_index *bitmap_git, struct packed_git
}
if (bitmap_git->pack || bitmap_git->midx) {
- trace2_data_string("bitmap", the_repository,
- "ignoring extra bitmap file", packfile->pack_name);
+ trace2_data_string("bitmap", bitmap_repo(bitmap_git),
+ "ignoring extra bitmap file",
+ packfile->pack_name);
close(fd);
return -1;
}
@@ -518,8 +528,8 @@ static int open_pack_bitmap_1(struct bitmap_index *bitmap_git, struct packed_git
return -1;
}
- trace2_data_string("bitmap", the_repository, "opened bitmap file",
- packfile->pack_name);
+ trace2_data_string("bitmap", bitmap_repo(bitmap_git),
+ "opened bitmap file", packfile->pack_name);
return 0;
}
@@ -649,7 +659,7 @@ struct bitmap_index *prepare_bitmap_git(struct repository *r)
struct bitmap_index *prepare_midx_bitmap_git(struct multi_pack_index *midx)
{
- struct repository *r = the_repository;
+ struct repository *r = midx->repo;
struct bitmap_index *bitmap_git = xcalloc(1, sizeof(*bitmap_git));
if (!open_midx_bitmap_1(bitmap_git, midx) && !load_bitmap(r, bitmap_git))
@@ -1213,6 +1223,7 @@ static struct bitmap *find_boundary_objects(struct bitmap_index *bitmap_git,
{
struct bitmap_boundary_cb cb;
struct object_list *root;
+ struct repository *repo;
unsigned int i;
unsigned int tmp_blobs, tmp_trees, tmp_tags;
int any_missing = 0;
@@ -1222,6 +1233,8 @@ static struct bitmap *find_boundary_objects(struct bitmap_index *bitmap_git,
cb.base = bitmap_new();
object_array_init(&cb.boundary);
+ repo = bitmap_repo(bitmap_git);
+
revs->ignore_missing_links = 1;
if (bitmap_git->pseudo_merges.nr) {
@@ -1280,19 +1293,19 @@ static struct bitmap *find_boundary_objects(struct bitmap_index *bitmap_git,
* revision walk to (a) OR in any bitmaps that are UNINTERESTING
* between the tips and boundary, and (b) record the boundary.
*/
- trace2_region_enter("pack-bitmap", "boundary-prepare", the_repository);
+ trace2_region_enter("pack-bitmap", "boundary-prepare", repo);
if (prepare_revision_walk(revs))
die("revision walk setup failed");
- trace2_region_leave("pack-bitmap", "boundary-prepare", the_repository);
+ trace2_region_leave("pack-bitmap", "boundary-prepare", repo);
- trace2_region_enter("pack-bitmap", "boundary-traverse", the_repository);
+ trace2_region_enter("pack-bitmap", "boundary-traverse", repo);
revs->boundary = 1;
traverse_commit_list_filtered(revs,
show_boundary_commit,
show_boundary_object,
&cb, NULL);
revs->boundary = 0;
- trace2_region_leave("pack-bitmap", "boundary-traverse", the_repository);
+ trace2_region_leave("pack-bitmap", "boundary-traverse", repo);
revs->blob_objects = tmp_blobs;
revs->tree_objects = tmp_trees;
@@ -1304,7 +1317,7 @@ static struct bitmap *find_boundary_objects(struct bitmap_index *bitmap_git,
/*
* Then add the boundary commit(s) as fill-in traversal tips.
*/
- trace2_region_enter("pack-bitmap", "boundary-fill-in", the_repository);
+ trace2_region_enter("pack-bitmap", "boundary-fill-in", repo);
for (i = 0; i < cb.boundary.nr; i++) {
struct object *obj = cb.boundary.objects[i].item;
if (bitmap_walk_contains(bitmap_git, cb.base, &obj->oid))
@@ -1314,7 +1327,7 @@ static struct bitmap *find_boundary_objects(struct bitmap_index *bitmap_git,
}
if (revs->pending.nr)
cb.base = fill_in_bitmap(bitmap_git, revs, cb.base, NULL);
- trace2_region_leave("pack-bitmap", "boundary-fill-in", the_repository);
+ trace2_region_leave("pack-bitmap", "boundary-fill-in", repo);
cleanup:
object_array_clear(&cb.boundary);
@@ -1718,7 +1731,8 @@ static unsigned long get_size_by_pos(struct bitmap_index *bitmap_git,
ofs = pack_pos_to_offset(pack, pos);
}
- if (packed_object_info(the_repository, pack, ofs, &oi) < 0) {
+ if (packed_object_info(bitmap_repo(bitmap_git), pack, ofs,
+ &oi) < 0) {
struct object_id oid;
nth_bitmap_object_oid(bitmap_git, &oid,
pack_pos_to_index(pack, pos));
@@ -1727,7 +1741,8 @@ static unsigned long get_size_by_pos(struct bitmap_index *bitmap_git,
} else {
struct eindex *eindex = &bitmap_git->ext_index;
struct object *obj = eindex->objects[pos - bitmap_num_objects(bitmap_git)];
- if (oid_object_info_extended(the_repository, &obj->oid, &oi, 0) < 0)
+ if (oid_object_info_extended(bitmap_repo(bitmap_git), &obj->oid,
+ &oi, 0) < 0)
die(_("unable to get size of %s"), oid_to_hex(&obj->oid));
}
@@ -1889,7 +1904,8 @@ static void filter_packed_objects_from_bitmap(struct bitmap_index *bitmap_git,
bitmap_unset(result, i);
for (i = 0; i < eindex->count; ++i) {
- if (has_object_pack(the_repository, &eindex->objects[i]->oid))
+ if (has_object_pack(bitmap_repo(bitmap_git),
+ &eindex->objects[i]->oid))
bitmap_unset(result, objects_nr + i);
}
}
@@ -1907,6 +1923,7 @@ struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs,
struct bitmap *haves_bitmap = NULL;
struct bitmap_index *bitmap_git;
+ struct repository *repo;
/*
* We can't do pathspec limiting with bitmaps, because we don't know
@@ -1980,18 +1997,20 @@ struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs,
if (!use_boundary_traversal)
object_array_clear(&revs->pending);
+ repo = bitmap_repo(bitmap_git);
+
if (haves) {
if (use_boundary_traversal) {
- trace2_region_enter("pack-bitmap", "haves/boundary", the_repository);
+ trace2_region_enter("pack-bitmap", "haves/boundary", repo);
haves_bitmap = find_boundary_objects(bitmap_git, revs, haves);
- trace2_region_leave("pack-bitmap", "haves/boundary", the_repository);
+ trace2_region_leave("pack-bitmap", "haves/boundary", repo);
} else {
- trace2_region_enter("pack-bitmap", "haves/classic", the_repository);
+ trace2_region_enter("pack-bitmap", "haves/classic", repo);
revs->ignore_missing_links = 1;
haves_bitmap = find_objects(bitmap_git, revs, haves, NULL);
reset_revision_walk();
revs->ignore_missing_links = 0;
- trace2_region_leave("pack-bitmap", "haves/classic", the_repository);
+ trace2_region_leave("pack-bitmap", "haves/classic", repo);
}
if (!haves_bitmap)
@@ -2025,17 +2044,17 @@ struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs,
object_list_free(&wants);
object_list_free(&haves);
- trace2_data_intmax("bitmap", the_repository, "pseudo_merges_satisfied",
+ trace2_data_intmax("bitmap", repo, "pseudo_merges_satisfied",
pseudo_merges_satisfied_nr);
- trace2_data_intmax("bitmap", the_repository, "pseudo_merges_cascades",
+ trace2_data_intmax("bitmap", repo, "pseudo_merges_cascades",
pseudo_merges_cascades_nr);
- trace2_data_intmax("bitmap", the_repository, "bitmap/hits",
+ trace2_data_intmax("bitmap", repo, "bitmap/hits",
existing_bitmaps_hits_nr);
- trace2_data_intmax("bitmap", the_repository, "bitmap/misses",
+ trace2_data_intmax("bitmap", repo, "bitmap/misses",
existing_bitmaps_misses_nr);
- trace2_data_intmax("bitmap", the_repository, "bitmap/roots_with_bitmap",
+ trace2_data_intmax("bitmap", repo, "bitmap/roots_with_bitmap",
roots_with_bitmaps_nr);
- trace2_data_intmax("bitmap", the_repository, "bitmap/roots_without_bitmap",
+ trace2_data_intmax("bitmap", repo, "bitmap/roots_without_bitmap",
roots_without_bitmaps_nr);
return bitmap_git;
@@ -2256,7 +2275,7 @@ void reuse_partial_packfile_from_bitmap(struct bitmap_index *bitmap_git,
struct bitmap **reuse_out,
int multi_pack_reuse)
{
- struct repository *r = the_repository;
+ struct repository *r = bitmap_repo(bitmap_git);
struct bitmapped_pack *packs = NULL;
struct bitmap *result = bitmap_git->result;
struct bitmap *reuse;
@@ -2792,7 +2811,7 @@ int rebuild_bitmap(const uint32_t *reposition,
uint32_t *create_bitmap_mapping(struct bitmap_index *bitmap_git,
struct packing_data *mapping)
{
- struct repository *r = the_repository;
+ struct repository *r = bitmap_repo(bitmap_git);
uint32_t i, num_objects;
uint32_t *reposition;
@@ -2948,7 +2967,8 @@ static off_t get_disk_usage_for_extended(struct bitmap_index *bitmap_git)
st_add(bitmap_num_objects(bitmap_git), i)))
continue;
- if (oid_object_info_extended(the_repository, &obj->oid, &oi, 0) < 0)
+ if (oid_object_info_extended(bitmap_repo(bitmap_git), &obj->oid,
+ &oi, 0) < 0)
die(_("unable to get disk usage of '%s'"),
oid_to_hex(&obj->oid));
--
2.47.0
^ permalink raw reply related [flat|nested] 184+ messages in thread
* Re: [PATCH v5 0/9] packfile: avoid using the 'the_repository' global variable
2024-11-04 11:41 ` [PATCH v5 " Karthik Nayak
` (8 preceding siblings ...)
2024-11-04 11:41 ` [PATCH v5 9/9] midx: add repository to `multi_pack_index` struct Karthik Nayak
@ 2024-11-04 17:32 ` Jeff King
2024-11-05 9:43 ` karthik nayak
9 siblings, 1 reply; 184+ messages in thread
From: Jeff King @ 2024-11-04 17:32 UTC (permalink / raw)
To: Karthik Nayak; +Cc: git, me
On Mon, Nov 04, 2024 at 12:41:38PM +0100, Karthik Nayak wrote:
> Range-diff against v4:
> 1: b3d518e998 = 1: 6c00e25c86 packfile: add repository to struct `packed_git`
> 2: bb9d9aa744 = 2: 70fc8a79af packfile: use `repository` from `packed_git` directly
> 3: d5df50fa36 = 3: 167a1f3a11 packfile: pass `repository` to static function in the file
> 4: 0107801c3b = 4: b7cfe78217 packfile: pass down repository to `odb_pack_name`
> 5: 2d7608a367 = 5: 5566f5554c packfile: pass down repository to `has_object[_kept]_pack`
> 6: 2c84026d02 = 6: 1b26e45a9b packfile: pass down repository to `for_each_packed_object`
> 7: 84b89c8a0e ! 7: 7654bf5e7e config: make `delta_base_cache_limit` a non-global variable
> @@ Commit message
> this value from the repository config, since the value is only used once
> in the entire subsystem.
>
> + The type of the value is changed from `size_t` to an `unsigned long`
> + since the default value is small enough to fit inside the latter on all
> + platforms.
> +
I think this change is not ideal, for the same reason that the other
type changes were: you can conceivably have a 4GB or larger cache here.
On Windows using "unsigned long" would prevent that. (On most other
systems it is OK either way since "unsigned long" and "size_t" are
generally the same size).
I do think the config parsing should change to use size_t here (like I
mentioned elsewhere in the thread), which would fix it on Windows.
That's outside the scope of your patch, but in the meantime we should
not be making things worse by moving the variable itself to the inferior
type.
-Peff
^ permalink raw reply [flat|nested] 184+ messages in thread
* Re: [PATCH v5 0/9] packfile: avoid using the 'the_repository' global variable
2024-11-04 17:32 ` [PATCH v5 0/9] packfile: avoid using the 'the_repository' global variable Jeff King
@ 2024-11-05 9:43 ` karthik nayak
0 siblings, 0 replies; 184+ messages in thread
From: karthik nayak @ 2024-11-05 9:43 UTC (permalink / raw)
To: Jeff King; +Cc: git, me
[-- Attachment #1: Type: text/plain, Size: 2270 bytes --]
Jeff King <peff@peff.net> writes:
> On Mon, Nov 04, 2024 at 12:41:38PM +0100, Karthik Nayak wrote:
>
>> Range-diff against v4:
>> 1: b3d518e998 = 1: 6c00e25c86 packfile: add repository to struct `packed_git`
>> 2: bb9d9aa744 = 2: 70fc8a79af packfile: use `repository` from `packed_git` directly
>> 3: d5df50fa36 = 3: 167a1f3a11 packfile: pass `repository` to static function in the file
>> 4: 0107801c3b = 4: b7cfe78217 packfile: pass down repository to `odb_pack_name`
>> 5: 2d7608a367 = 5: 5566f5554c packfile: pass down repository to `has_object[_kept]_pack`
>> 6: 2c84026d02 = 6: 1b26e45a9b packfile: pass down repository to `for_each_packed_object`
>> 7: 84b89c8a0e ! 7: 7654bf5e7e config: make `delta_base_cache_limit` a non-global variable
>> @@ Commit message
>> this value from the repository config, since the value is only used once
>> in the entire subsystem.
>>
>> + The type of the value is changed from `size_t` to an `unsigned long`
>> + since the default value is small enough to fit inside the latter on all
>> + platforms.
>> +
>
> I think this change is not ideal, for the same reason that the other
> type changes were: you can conceivably have a 4GB or larger cache here.
> On Windows using "unsigned long" would prevent that. (On most other
> systems it is OK either way since "unsigned long" and "size_t" are
> generally the same size).
>
> I do think the config parsing should change to use size_t here (like I
> mentioned elsewhere in the thread), which would fix it on Windows.
> That's outside the scope of your patch, but in the meantime we should
> not be making things worse by moving the variable itself to the inferior
> type.
>
There is a subtle difference though. Both the configs (this and next
commit) although are initialized with 'size_t' they are bounded by
'unsigned long's range, since they do to the functions used to obtain
the information. The only difference being the defaults, which could've
been greater than the range of 'unsigned long'.
While having said that, keeping it 'size_t' makes it easier to know
which configs need to use the yet_to_be introduced 'size_t' variants of
the config parsers. So let me change this too. Thanks.
Karthik
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 690 bytes --]
^ permalink raw reply [flat|nested] 184+ messages in thread
* [PATCH v6 0/9] packfile: avoid using the 'the_repository' global variable
2024-10-21 9:57 [PATCH 00/20] packfile: avoid using the 'the_repository' global variable Karthik Nayak
` (24 preceding siblings ...)
2024-11-04 11:41 ` [PATCH v5 " Karthik Nayak
@ 2024-11-07 14:10 ` Karthik Nayak
2024-11-07 14:10 ` [PATCH v6 1/9] packfile: add repository to struct `packed_git` Karthik Nayak
` (9 more replies)
2024-11-11 11:14 ` [PATCH v7 " Karthik Nayak
` (3 subsequent siblings)
29 siblings, 10 replies; 184+ messages in thread
From: Karthik Nayak @ 2024-11-07 14:10 UTC (permalink / raw)
To: karthik.188; +Cc: git, me, peff
The `packfile.c` file uses the global variable 'the_repository' extensively
throughout the code. Let's remove all usecases of this, by modifying the
required functions to accept a 'struct repository' instead. This is to clean up
usage of global state.
The first 3 patches are mostly internal to `packfile.c`, we add the repository
field to the `packed_git` struct and this is used to clear up some useages of
the global variables.
The next 3 patches are more disruptive, they modify the function definition of
`odb_pack_name`, `has_object[_kept]_pack` and `for_each_packed_object` to receive
a repository, helping remove other usages of 'the_repository' variable.
Finally, the last two patches deal with global config values. These values are
localized.
For v5/6 onwards, I've rebased the series off the new master: 8f8d6eee53 (The
seventh batch, 2024-11-01), as a dependency for this series 'jk/dumb-http-finalize'
was merged to master. I've found no conflicts while merging with seen & next. But
since this series does touch multiple files, there could be future conflicts.
Changes in v6:
- Lazy load repository settings in packfile.c. This ensures that the settings are
available for sure and we do not rely on callees setting it.
- Use `size_t` for `delta_base_cache_limit`.
Changes in v5:
- Move packed_git* settings to repo_settings to ensure we don't keep reparsing the
settings in `use_pack`.
Changes in v4:
- Renamed the repository field within `packed_git` and `multi_pack_index` from
`r` to `repo`, while keeping function parameters to be `r`.
- Fixed bad braces.
Changes in v3:
- Improved commit messages. In the first commit to talk about how packed_git
struct could also be part of the alternates of a repository. In the 7th commit
to talk about the motive behind removing the global variable.
- Changed 'packed_git->repo' to 'packed_git->r' to keep it consistent with the
rest of the code base.
- Replaced 'the_repository' with locally available access to the repository
struct in multiple regions.
- Removed unecessary inclusion of the 'repository.h' header file by forward
declaring the 'repository' struct.
- Replace memcpy with hashcpy.
- Change the logic in the 7th patch to use if else statements.
- Added an extra commit to cleanup `pack-bitmap.c`.
Karthik Nayak (9):
packfile: add repository to struct `packed_git`
packfile: use `repository` from `packed_git` directly
packfile: pass `repository` to static function in the file
packfile: pass down repository to `odb_pack_name`
packfile: pass down repository to `has_object[_kept]_pack`
packfile: pass down repository to `for_each_packed_object`
config: make `delta_base_cache_limit` a non-global variable
config: make `packed_git_(limit|window_size)` non-global variables
midx: add repository to `multi_pack_index` struct
builtin/cat-file.c | 7 +-
builtin/count-objects.c | 2 +-
builtin/fast-import.c | 15 ++--
builtin/fsck.c | 20 +++---
builtin/gc.c | 8 ++-
builtin/index-pack.c | 20 ++++--
builtin/pack-objects.c | 11 +--
builtin/pack-redundant.c | 2 +-
builtin/repack.c | 2 +-
builtin/rev-list.c | 2 +-
commit-graph.c | 4 +-
config.c | 22 ------
connected.c | 3 +-
diff.c | 3 +-
environment.c | 3 -
environment.h | 1 -
fsck.c | 2 +-
http.c | 4 +-
list-objects.c | 7 +-
midx-write.c | 2 +-
midx.c | 3 +-
midx.h | 3 +
object-store-ll.h | 9 ++-
pack-bitmap.c | 90 +++++++++++++++---------
pack-objects.h | 3 +-
pack-write.c | 1 +
pack.h | 1 +
packfile.c | 148 +++++++++++++++++++++++----------------
packfile.h | 18 +++--
promisor-remote.c | 2 +-
prune-packed.c | 2 +-
reachable.c | 4 +-
repo-settings.c | 14 ++++
repo-settings.h | 5 ++
revision.c | 13 ++--
tag.c | 2 +-
36 files changed, 268 insertions(+), 190 deletions(-)
Range-diff against v5:
-: ---------- > 1: 6c00e25c86 packfile: add repository to struct `packed_git`
-: ---------- > 2: 70fc8a79af packfile: use `repository` from `packed_git` directly
-: ---------- > 3: 167a1f3a11 packfile: pass `repository` to static function in the file
-: ---------- > 4: b7cfe78217 packfile: pass down repository to `odb_pack_name`
-: ---------- > 5: 5566f5554c packfile: pass down repository to `has_object[_kept]_pack`
-: ---------- > 6: 1b26e45a9b packfile: pass down repository to `for_each_packed_object`
1: 7654bf5e7e ! 7: 89313cfed4 config: make `delta_base_cache_limit` a non-global variable
@@ Commit message
this value from the repository config, since the value is only used once
in the entire subsystem.
- The type of the value is changed from `size_t` to an `unsigned long`
- since the default value is small enough to fit inside the latter on all
- platforms.
-
These changes are made to remove the usage of `delta_base_cache_limit`
as a global variable in `packfile.c`. This brings us one step closer to
removing the `USE_THE_REPOSITORY_VARIABLE` definition in `packfile.c`
@@ builtin/gc.c: struct gc_config {
char *repack_filter_to;
unsigned long big_pack_threshold;
unsigned long max_delta_cache_size;
-+ unsigned long delta_base_cache_limit;
++ size_t delta_base_cache_limit;
};
#define GC_CONFIG_INIT { \
@@ builtin/gc.c: struct gc_config {
static void gc_config_release(struct gc_config *cfg)
@@ builtin/gc.c: static void gc_config(struct gc_config *cfg)
+ {
+ const char *value;
+ char *owned = NULL;
++ unsigned long longval;
+ if (!git_config_get_value("gc.packrefs", &value)) {
+ if (value && !strcmp(value, "notbare"))
+@@ builtin/gc.c: static void gc_config(struct gc_config *cfg)
git_config_get_ulong("gc.bigpackthreshold", &cfg->big_pack_threshold);
git_config_get_ulong("pack.deltacachesize", &cfg->max_delta_cache_size);
-+ git_config_get_ulong("core.deltabasecachelimit", &cfg->delta_base_cache_limit);
++ if(!git_config_get_ulong("core.deltabasecachelimit", &longval))
++ cfg->delta_base_cache_limit = longval;
++
if (!git_config_get_string("gc.repackfilter", &owned)) {
free(cfg->repack_filter);
+ cfg->repack_filter = owned;
@@ builtin/gc.c: static uint64_t estimate_repack_memory(struct gc_config *cfg,
* read_sha1_file() (either at delta calculation phase, or
* writing phase) also fills up the delta base cache
2: 2730aacd8e ! 8: 3a8e3b88df config: make `packed_git_(limit|window_size)` non-global variables
@@ packfile.c: unsigned char *use_pack(struct packed_git *p,
- size_t window_align = packed_git_window_size / 2;
+ size_t window_align;
off_t len;
-+ struct repo_settings *settings = &p->repo->settings;
++ struct repo_settings *settings;
++
++ /* lazy load the settings incase it hasn't been setup */
++ prepare_repo_settings(p->repo);
++ settings = &p->repo->settings;
+
+ window_align = settings->packed_git_window_size / 2;
3: 8e33d40077 = 9: 2f9a146978 midx: add repository to `multi_pack_index` struct
--
2.47.0
^ permalink raw reply [flat|nested] 184+ messages in thread
* [PATCH v6 1/9] packfile: add repository to struct `packed_git`
2024-11-07 14:10 ` [PATCH v6 " Karthik Nayak
@ 2024-11-07 14:10 ` Karthik Nayak
2024-11-07 14:10 ` [PATCH v6 2/9] packfile: use `repository` from `packed_git` directly Karthik Nayak
` (8 subsequent siblings)
9 siblings, 0 replies; 184+ messages in thread
From: Karthik Nayak @ 2024-11-07 14:10 UTC (permalink / raw)
To: karthik.188; +Cc: git, me, peff
The struct `packed_git` holds information regarding a packed object
file. Let's add the repository variable to this object, to represent the
repository that this packfile belongs to. This helps remove dependency
on the global `the_repository` object in `packfile.c` by simply using
repository information now readily available in the struct.
We do need to consider that a pack file could be part of the alternates
of a repository, but considering that we only have one repository struct
and also that we currently anyways use 'the_repository'. We should be
OK with this change.
We also modify `alloc_packed_git` to ensure that the repository is added
to newly created `packed_git` structs. This requires modifying the
function and all its callee to pass the repository object down the
levels.
Helped-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
---
builtin/fast-import.c | 3 ++-
builtin/index-pack.c | 6 ++++--
commit-graph.c | 2 +-
connected.c | 3 ++-
http.c | 2 +-
midx-write.c | 2 +-
midx.c | 2 +-
object-store-ll.h | 5 +++++
packfile.c | 15 +++++++++------
packfile.h | 6 ++++--
10 files changed, 30 insertions(+), 16 deletions(-)
diff --git a/builtin/fast-import.c b/builtin/fast-import.c
index 76d5c20f14..da7e2d613b 100644
--- a/builtin/fast-import.c
+++ b/builtin/fast-import.c
@@ -765,6 +765,7 @@ static void start_packfile(void)
p->pack_fd = pack_fd;
p->do_not_close = 1;
+ p->repo = the_repository;
pack_file = hashfd(pack_fd, p->pack_name);
pack_data = p;
@@ -888,7 +889,7 @@ static void end_packfile(void)
idx_name = keep_pack(create_index());
/* Register the packfile with core git's machinery. */
- new_p = add_packed_git(idx_name, strlen(idx_name), 1);
+ new_p = add_packed_git(pack_data->repo, idx_name, strlen(idx_name), 1);
if (!new_p)
die("core git rejected index %s", idx_name);
all_packs[pack_id] = new_p;
diff --git a/builtin/index-pack.c b/builtin/index-pack.c
index 9d23b41b3a..be2f99625e 100644
--- a/builtin/index-pack.c
+++ b/builtin/index-pack.c
@@ -1552,7 +1552,8 @@ static void final(const char *final_pack_name, const char *curr_pack_name,
if (do_fsck_object) {
struct packed_git *p;
- p = add_packed_git(final_index_name, strlen(final_index_name), 0);
+ p = add_packed_git(the_repository, final_index_name,
+ strlen(final_index_name), 0);
if (p)
install_packed_git(the_repository, p);
}
@@ -1650,7 +1651,8 @@ static void read_v2_anomalous_offsets(struct packed_git *p,
static void read_idx_option(struct pack_idx_option *opts, const char *pack_name)
{
- struct packed_git *p = add_packed_git(pack_name, strlen(pack_name), 1);
+ struct packed_git *p = add_packed_git(the_repository, pack_name,
+ strlen(pack_name), 1);
if (!p)
die(_("Cannot open existing pack file '%s'"), pack_name);
diff --git a/commit-graph.c b/commit-graph.c
index 5bd89c0acd..83dd69bfeb 100644
--- a/commit-graph.c
+++ b/commit-graph.c
@@ -1914,7 +1914,7 @@ static int fill_oids_from_packs(struct write_commit_graph_context *ctx,
struct packed_git *p;
strbuf_setlen(&packname, dirlen);
strbuf_addstr(&packname, pack_indexes->items[i].string);
- p = add_packed_git(packname.buf, packname.len, 1);
+ p = add_packed_git(ctx->r, packname.buf, packname.len, 1);
if (!p) {
ret = error(_("error adding pack %s"), packname.buf);
goto cleanup;
diff --git a/connected.c b/connected.c
index a9e2e13995..3099da84f3 100644
--- a/connected.c
+++ b/connected.c
@@ -54,7 +54,8 @@ int check_connected(oid_iterate_fn fn, void *cb_data,
strbuf_add(&idx_file, transport->pack_lockfiles.items[0].string,
base_len);
strbuf_addstr(&idx_file, ".idx");
- new_pack = add_packed_git(idx_file.buf, idx_file.len, 1);
+ new_pack = add_packed_git(the_repository, idx_file.buf,
+ idx_file.len, 1);
strbuf_release(&idx_file);
}
diff --git a/http.c b/http.c
index 58242b9d2d..6744e18409 100644
--- a/http.c
+++ b/http.c
@@ -2439,7 +2439,7 @@ static int fetch_and_setup_pack_index(struct packed_git **packs_head,
if (!tmp_idx)
return -1;
- new_pack = parse_pack_index(sha1, tmp_idx);
+ new_pack = parse_pack_index(the_repository, sha1, tmp_idx);
if (!new_pack) {
unlink(tmp_idx);
free(tmp_idx);
diff --git a/midx-write.c b/midx-write.c
index b3a5f6c516..c57726ef94 100644
--- a/midx-write.c
+++ b/midx-write.c
@@ -154,7 +154,7 @@ static void add_pack_to_midx(const char *full_path, size_t full_path_len,
return;
ALLOC_GROW(ctx->info, ctx->nr + 1, ctx->alloc);
- p = add_packed_git(full_path, full_path_len, 0);
+ p = add_packed_git(the_repository, full_path, full_path_len, 0);
if (!p) {
warning(_("failed to add packfile '%s'"),
full_path);
diff --git a/midx.c b/midx.c
index e82d4f2e65..8edb75f51d 100644
--- a/midx.c
+++ b/midx.c
@@ -464,7 +464,7 @@ int prepare_midx_pack(struct repository *r, struct multi_pack_index *m,
strhash(key.buf), key.buf,
struct packed_git, packmap_ent);
if (!p) {
- p = add_packed_git(pack_name.buf, pack_name.len, m->local);
+ p = add_packed_git(r, pack_name.buf, pack_name.len, m->local);
if (p) {
install_packed_git(r, p);
list_add_tail(&p->mru, &r->objects->packed_git_mru);
diff --git a/object-store-ll.h b/object-store-ll.h
index 53b8e693b1..538f2c60cb 100644
--- a/object-store-ll.h
+++ b/object-store-ll.h
@@ -10,6 +10,7 @@
struct oidmap;
struct oidtree;
struct strbuf;
+struct repository;
struct object_directory {
struct object_directory *next;
@@ -135,6 +136,10 @@ struct packed_git {
*/
const uint32_t *mtimes_map;
size_t mtimes_size;
+
+ /* repo dentoes the repository this packed file belongs to */
+ struct repository *repo;
+
/* something like ".git/objects/pack/xxxxx.pack" */
char pack_name[FLEX_ARRAY]; /* more */
};
diff --git a/packfile.c b/packfile.c
index 9560f0a33c..6058eddf35 100644
--- a/packfile.c
+++ b/packfile.c
@@ -217,11 +217,12 @@ uint32_t get_pack_fanout(struct packed_git *p, uint32_t value)
return ntohl(level1_ofs[value]);
}
-static struct packed_git *alloc_packed_git(int extra)
+static struct packed_git *alloc_packed_git(struct repository *r, int extra)
{
struct packed_git *p = xmalloc(st_add(sizeof(*p), extra));
memset(p, 0, sizeof(*p));
p->pack_fd = -1;
+ p->repo = r;
return p;
}
@@ -233,11 +234,12 @@ static char *pack_path_from_idx(const char *idx_path)
return xstrfmt("%.*s.pack", (int)len, idx_path);
}
-struct packed_git *parse_pack_index(unsigned char *sha1, const char *idx_path)
+struct packed_git *parse_pack_index(struct repository *r, unsigned char *sha1,
+ const char *idx_path)
{
char *path = pack_path_from_idx(idx_path);
size_t alloc = st_add(strlen(path), 1);
- struct packed_git *p = alloc_packed_git(alloc);
+ struct packed_git *p = alloc_packed_git(r, alloc);
memcpy(p->pack_name, path, alloc); /* includes NUL */
free(path);
@@ -703,7 +705,8 @@ void unuse_pack(struct pack_window **w_cursor)
}
}
-struct packed_git *add_packed_git(const char *path, size_t path_len, int local)
+struct packed_git *add_packed_git(struct repository *r, const char *path,
+ size_t path_len, int local)
{
struct stat st;
size_t alloc;
@@ -721,7 +724,7 @@ struct packed_git *add_packed_git(const char *path, size_t path_len, int local)
* the use xsnprintf double-checks that)
*/
alloc = st_add3(path_len, strlen(".promisor"), 1);
- p = alloc_packed_git(alloc);
+ p = alloc_packed_git(r, alloc);
memcpy(p->pack_name, path, path_len);
xsnprintf(p->pack_name + path_len, alloc - path_len, ".keep");
@@ -877,7 +880,7 @@ static void prepare_pack(const char *full_name, size_t full_name_len,
/* Don't reopen a pack we already have. */
if (!hashmap_get(&data->r->objects->pack_map, &hent, pack_name)) {
- p = add_packed_git(full_name, full_name_len, data->local);
+ p = add_packed_git(data->r, full_name, full_name_len, data->local);
if (p)
install_packed_git(data->r, p);
}
diff --git a/packfile.h b/packfile.h
index 08f88a7ff5..aee69d1a0b 100644
--- a/packfile.h
+++ b/packfile.h
@@ -46,7 +46,8 @@ const char *pack_basename(struct packed_git *p);
* and does not add the resulting packed_git struct to the internal list of
* packs. You probably want add_packed_git() instead.
*/
-struct packed_git *parse_pack_index(unsigned char *sha1, const char *idx_path);
+struct packed_git *parse_pack_index(struct repository *r, unsigned char *sha1,
+ const char *idx_path);
typedef void each_file_in_pack_dir_fn(const char *full_path, size_t full_path_len,
const char *file_name, void *data);
@@ -113,7 +114,8 @@ void close_pack(struct packed_git *);
void close_object_store(struct raw_object_store *o);
void unuse_pack(struct pack_window **);
void clear_delta_base_cache(void);
-struct packed_git *add_packed_git(const char *path, size_t path_len, int local);
+struct packed_git *add_packed_git(struct repository *r, const char *path,
+ size_t path_len, int local);
/*
* Unlink the .pack and associated extension files.
--
2.47.0
^ permalink raw reply related [flat|nested] 184+ messages in thread
* [PATCH v6 2/9] packfile: use `repository` from `packed_git` directly
2024-11-07 14:10 ` [PATCH v6 " Karthik Nayak
2024-11-07 14:10 ` [PATCH v6 1/9] packfile: add repository to struct `packed_git` Karthik Nayak
@ 2024-11-07 14:10 ` Karthik Nayak
2024-11-07 14:10 ` [PATCH v6 3/9] packfile: pass `repository` to static function in the file Karthik Nayak
` (7 subsequent siblings)
9 siblings, 0 replies; 184+ messages in thread
From: Karthik Nayak @ 2024-11-07 14:10 UTC (permalink / raw)
To: karthik.188; +Cc: git, me, peff
In the previous commit, we introduced the `repository` structure inside
`packed_git`. This provides an alternative route instead of using the
global `the_repository` variable. Let's modify `packfile.c` now to use
this field wherever possible instead of relying on the global state.
There are still a few instances of `the_repository` usage in the file,
where there is no struct `packed_git` locally available, which will be
fixed in the following commits.
Helped-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
---
packfile.c | 50 +++++++++++++++++++++++++++-----------------------
1 file changed, 27 insertions(+), 23 deletions(-)
diff --git a/packfile.c b/packfile.c
index 6058eddf35..5bfa1e17c2 100644
--- a/packfile.c
+++ b/packfile.c
@@ -79,7 +79,7 @@ static int check_packed_git_idx(const char *path, struct packed_git *p)
size_t idx_size;
int fd = git_open(path), ret;
struct stat st;
- const unsigned int hashsz = the_hash_algo->rawsz;
+ const unsigned int hashsz = p->repo->hash_algo->rawsz;
if (fd < 0)
return -1;
@@ -243,7 +243,7 @@ struct packed_git *parse_pack_index(struct repository *r, unsigned char *sha1,
memcpy(p->pack_name, path, alloc); /* includes NUL */
free(path);
- hashcpy(p->hash, sha1, the_repository->hash_algo);
+ hashcpy(p->hash, sha1, p->repo->hash_algo);
if (check_packed_git_idx(idx_path, p)) {
free(p);
return NULL;
@@ -278,7 +278,7 @@ static int unuse_one_window(struct packed_git *current)
if (current)
scan_windows(current, &lru_p, &lru_w, &lru_l);
- for (p = the_repository->objects->packed_git; p; p = p->next)
+ for (p = current->repo->objects->packed_git; p; p = p->next)
scan_windows(p, &lru_p, &lru_w, &lru_l);
if (lru_p) {
munmap(lru_w->base, lru_w->len);
@@ -540,7 +540,7 @@ static int open_packed_git_1(struct packed_git *p)
unsigned char hash[GIT_MAX_RAWSZ];
unsigned char *idx_hash;
ssize_t read_result;
- const unsigned hashsz = the_hash_algo->rawsz;
+ const unsigned hashsz = p->repo->hash_algo->rawsz;
if (open_pack_index(p))
return error("packfile %s index unavailable", p->pack_name);
@@ -597,7 +597,7 @@ static int open_packed_git_1(struct packed_git *p)
if (read_result != hashsz)
return error("packfile %s signature is unavailable", p->pack_name);
idx_hash = ((unsigned char *)p->index_data) + p->index_size - hashsz * 2;
- if (!hasheq(hash, idx_hash, the_repository->hash_algo))
+ if (!hasheq(hash, idx_hash, p->repo->hash_algo))
return error("packfile %s does not match index", p->pack_name);
return 0;
}
@@ -637,7 +637,7 @@ unsigned char *use_pack(struct packed_git *p,
*/
if (!p->pack_size && p->pack_fd == -1 && open_packed_git(p))
die("packfile %s cannot be accessed", p->pack_name);
- if (offset > (p->pack_size - the_hash_algo->rawsz))
+ if (offset > (p->pack_size - p->repo->hash_algo->rawsz))
die("offset beyond end of packfile (truncated pack?)");
if (offset < 0)
die(_("offset before end of packfile (broken .idx?)"));
@@ -711,6 +711,7 @@ struct packed_git *add_packed_git(struct repository *r, const char *path,
struct stat st;
size_t alloc;
struct packed_git *p;
+ struct object_id oid;
/*
* Make sure a corresponding .pack file exists and that
@@ -751,9 +752,13 @@ struct packed_git *add_packed_git(struct repository *r, const char *path,
p->pack_size = st.st_size;
p->pack_local = local;
p->mtime = st.st_mtime;
- if (path_len < the_hash_algo->hexsz ||
- get_hash_hex(path + path_len - the_hash_algo->hexsz, p->hash))
- hashclr(p->hash, the_repository->hash_algo);
+ if (path_len < r->hash_algo->hexsz ||
+ get_oid_hex_algop(path + path_len - r->hash_algo->hexsz, &oid,
+ r->hash_algo))
+ hashclr(p->hash, r->hash_algo);
+ else
+ hashcpy(p->hash, oid.hash, r->hash_algo);
+
return p;
}
@@ -1243,9 +1248,9 @@ off_t get_delta_base(struct packed_git *p,
} else if (type == OBJ_REF_DELTA) {
/* The base entry _must_ be in the same pack */
struct object_id oid;
- oidread(&oid, base_info, the_repository->hash_algo);
+ oidread(&oid, base_info, p->repo->hash_algo);
base_offset = find_pack_entry_one(&oid, p);
- *curpos += the_hash_algo->rawsz;
+ *curpos += p->repo->hash_algo->rawsz;
} else
die("I am totally screwed");
return base_offset;
@@ -1266,7 +1271,7 @@ static int get_delta_base_oid(struct packed_git *p,
{
if (type == OBJ_REF_DELTA) {
unsigned char *base = use_pack(p, w_curs, curpos, NULL);
- oidread(oid, base, the_repository->hash_algo);
+ oidread(oid, base, p->repo->hash_algo);
return 0;
} else if (type == OBJ_OFS_DELTA) {
uint32_t base_pos;
@@ -1608,7 +1613,7 @@ int packed_object_info(struct repository *r, struct packed_git *p,
goto out;
}
} else
- oidclr(oi->delta_base_oid, the_repository->hash_algo);
+ oidclr(oi->delta_base_oid, p->repo->hash_algo);
}
oi->whence = in_delta_base_cache(p, obj_offset) ? OI_DBCACHED :
@@ -1897,7 +1902,7 @@ int bsearch_pack(const struct object_id *oid, const struct packed_git *p, uint32
{
const unsigned char *index_fanout = p->index_data;
const unsigned char *index_lookup;
- const unsigned int hashsz = the_hash_algo->rawsz;
+ const unsigned int hashsz = p->repo->hash_algo->rawsz;
int index_lookup_width;
if (!index_fanout)
@@ -1922,7 +1927,7 @@ int nth_packed_object_id(struct object_id *oid,
uint32_t n)
{
const unsigned char *index = p->index_data;
- const unsigned int hashsz = the_hash_algo->rawsz;
+ const unsigned int hashsz = p->repo->hash_algo->rawsz;
if (!index) {
if (open_pack_index(p))
return -1;
@@ -1933,11 +1938,10 @@ int nth_packed_object_id(struct object_id *oid,
index += 4 * 256;
if (p->index_version == 1) {
oidread(oid, index + st_add(st_mult(hashsz + 4, n), 4),
- the_repository->hash_algo);
+ p->repo->hash_algo);
} else {
index += 8;
- oidread(oid, index + st_mult(hashsz, n),
- the_repository->hash_algo);
+ oidread(oid, index + st_mult(hashsz, n), p->repo->hash_algo);
}
return 0;
}
@@ -1959,7 +1963,7 @@ void check_pack_index_ptr(const struct packed_git *p, const void *vptr)
off_t nth_packed_object_offset(const struct packed_git *p, uint32_t n)
{
const unsigned char *index = p->index_data;
- const unsigned int hashsz = the_hash_algo->rawsz;
+ const unsigned int hashsz = p->repo->hash_algo->rawsz;
index += 4 * 256;
if (p->index_version == 1) {
return ntohl(*((uint32_t *)(index + st_mult(hashsz + 4, n))));
@@ -2159,7 +2163,7 @@ int for_each_object_in_pack(struct packed_git *p,
int r = 0;
if (flags & FOR_EACH_OBJECT_PACK_ORDER) {
- if (load_pack_revindex(the_repository, p))
+ if (load_pack_revindex(p->repo, p))
return -1;
}
@@ -2227,7 +2231,7 @@ int for_each_packed_object(each_packed_object_fn cb, void *data,
}
static int add_promisor_object(const struct object_id *oid,
- struct packed_git *pack UNUSED,
+ struct packed_git *pack,
uint32_t pos UNUSED,
void *set_)
{
@@ -2235,12 +2239,12 @@ static int add_promisor_object(const struct object_id *oid,
struct object *obj;
int we_parsed_object;
- obj = lookup_object(the_repository, oid);
+ obj = lookup_object(pack->repo, oid);
if (obj && obj->parsed) {
we_parsed_object = 0;
} else {
we_parsed_object = 1;
- obj = parse_object(the_repository, oid);
+ obj = parse_object(pack->repo, oid);
}
if (!obj)
--
2.47.0
^ permalink raw reply related [flat|nested] 184+ messages in thread
* [PATCH v6 3/9] packfile: pass `repository` to static function in the file
2024-11-07 14:10 ` [PATCH v6 " Karthik Nayak
2024-11-07 14:10 ` [PATCH v6 1/9] packfile: add repository to struct `packed_git` Karthik Nayak
2024-11-07 14:10 ` [PATCH v6 2/9] packfile: use `repository` from `packed_git` directly Karthik Nayak
@ 2024-11-07 14:10 ` Karthik Nayak
2024-11-07 14:10 ` [PATCH v6 4/9] packfile: pass down repository to `odb_pack_name` Karthik Nayak
` (6 subsequent siblings)
9 siblings, 0 replies; 184+ messages in thread
From: Karthik Nayak @ 2024-11-07 14:10 UTC (permalink / raw)
To: karthik.188; +Cc: git, me, peff
Some of the static functions in the `packfile.c` access global
variables, which can simply be avoiding by passing the `repository`
struct down to them. Let's do that.
Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
---
packfile.c | 15 ++++++++-------
1 file changed, 8 insertions(+), 7 deletions(-)
diff --git a/packfile.c b/packfile.c
index 5bfa1e17c2..c96ebc4c69 100644
--- a/packfile.c
+++ b/packfile.c
@@ -460,13 +460,13 @@ static void find_lru_pack(struct packed_git *p, struct packed_git **lru_p, struc
*accept_windows_inuse = has_windows_inuse;
}
-static int close_one_pack(void)
+static int close_one_pack(struct repository *r)
{
struct packed_git *p, *lru_p = NULL;
struct pack_window *mru_w = NULL;
int accept_windows_inuse = 1;
- for (p = the_repository->objects->packed_git; p; p = p->next) {
+ for (p = r->objects->packed_git; p; p = p->next) {
if (p->pack_fd == -1)
continue;
find_lru_pack(p, &lru_p, &mru_w, &accept_windows_inuse);
@@ -555,7 +555,7 @@ static int open_packed_git_1(struct packed_git *p)
pack_max_fds = 1;
}
- while (pack_max_fds <= pack_open_fds && close_one_pack())
+ while (pack_max_fds <= pack_open_fds && close_one_pack(p->repo))
; /* nothing */
p->pack_fd = git_open(p->pack_name);
@@ -610,7 +610,8 @@ static int open_packed_git(struct packed_git *p)
return -1;
}
-static int in_window(struct pack_window *win, off_t offset)
+static int in_window(struct repository *r, struct pack_window *win,
+ off_t offset)
{
/* We must promise at least one full hash after the
* offset is available from this window, otherwise the offset
@@ -620,7 +621,7 @@ static int in_window(struct pack_window *win, off_t offset)
*/
off_t win_off = win->offset;
return win_off <= offset
- && (offset + the_hash_algo->rawsz) <= (win_off + win->len);
+ && (offset + r->hash_algo->rawsz) <= (win_off + win->len);
}
unsigned char *use_pack(struct packed_git *p,
@@ -642,11 +643,11 @@ unsigned char *use_pack(struct packed_git *p,
if (offset < 0)
die(_("offset before end of packfile (broken .idx?)"));
- if (!win || !in_window(win, offset)) {
+ if (!win || !in_window(p->repo, win, offset)) {
if (win)
win->inuse_cnt--;
for (win = p->windows; win; win = win->next) {
- if (in_window(win, offset))
+ if (in_window(p->repo, win, offset))
break;
}
if (!win) {
--
2.47.0
^ permalink raw reply related [flat|nested] 184+ messages in thread
* [PATCH v6 4/9] packfile: pass down repository to `odb_pack_name`
2024-11-07 14:10 ` [PATCH v6 " Karthik Nayak
` (2 preceding siblings ...)
2024-11-07 14:10 ` [PATCH v6 3/9] packfile: pass `repository` to static function in the file Karthik Nayak
@ 2024-11-07 14:10 ` Karthik Nayak
2024-11-07 14:10 ` [PATCH v6 5/9] packfile: pass down repository to `has_object[_kept]_pack` Karthik Nayak
` (5 subsequent siblings)
9 siblings, 0 replies; 184+ messages in thread
From: Karthik Nayak @ 2024-11-07 14:10 UTC (permalink / raw)
To: karthik.188; +Cc: git, me, peff
The function `odb_pack_name` currently relies on the global variable
`the_repository`. To eliminate global variable usage in `packfile.c`, we
should progressively shift the dependency on the_repository to higher
layers.
Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
---
builtin/fast-import.c | 8 ++++----
builtin/index-pack.c | 4 ++--
builtin/pack-redundant.c | 2 +-
http.c | 2 +-
packfile.c | 9 ++++-----
packfile.h | 3 ++-
6 files changed, 14 insertions(+), 14 deletions(-)
diff --git a/builtin/fast-import.c b/builtin/fast-import.c
index da7e2d613b..3ccc4c5722 100644
--- a/builtin/fast-import.c
+++ b/builtin/fast-import.c
@@ -806,7 +806,7 @@ static char *keep_pack(const char *curr_index_name)
struct strbuf name = STRBUF_INIT;
int keep_fd;
- odb_pack_name(&name, pack_data->hash, "keep");
+ odb_pack_name(pack_data->repo, &name, pack_data->hash, "keep");
keep_fd = odb_pack_keep(name.buf);
if (keep_fd < 0)
die_errno("cannot create keep file");
@@ -814,11 +814,11 @@ static char *keep_pack(const char *curr_index_name)
if (close(keep_fd))
die_errno("failed to write keep file");
- odb_pack_name(&name, pack_data->hash, "pack");
+ odb_pack_name(pack_data->repo, &name, pack_data->hash, "pack");
if (finalize_object_file(pack_data->pack_name, name.buf))
die("cannot store pack file");
- odb_pack_name(&name, pack_data->hash, "idx");
+ odb_pack_name(pack_data->repo, &name, pack_data->hash, "idx");
if (finalize_object_file(curr_index_name, name.buf))
die("cannot store index file");
free((void *)curr_index_name);
@@ -832,7 +832,7 @@ static void unkeep_all_packs(void)
for (k = 0; k < pack_id; k++) {
struct packed_git *p = all_packs[k];
- odb_pack_name(&name, p->hash, "keep");
+ odb_pack_name(p->repo, &name, p->hash, "keep");
unlink_or_warn(name.buf);
}
strbuf_release(&name);
diff --git a/builtin/index-pack.c b/builtin/index-pack.c
index be2f99625e..eaefb41761 100644
--- a/builtin/index-pack.c
+++ b/builtin/index-pack.c
@@ -1479,7 +1479,7 @@ static void write_special_file(const char *suffix, const char *msg,
if (pack_name)
filename = derive_filename(pack_name, "pack", suffix, &name_buf);
else
- filename = odb_pack_name(&name_buf, hash, suffix);
+ filename = odb_pack_name(the_repository, &name_buf, hash, suffix);
fd = odb_pack_keep(filename);
if (fd < 0) {
@@ -1507,7 +1507,7 @@ static void rename_tmp_packfile(const char **final_name,
{
if (!*final_name || strcmp(*final_name, curr_name)) {
if (!*final_name)
- *final_name = odb_pack_name(name, hash, ext);
+ *final_name = odb_pack_name(the_repository, name, hash, ext);
if (finalize_object_file(curr_name, *final_name))
die(_("unable to rename temporary '*.%s' file to '%s'"),
ext, *final_name);
diff --git a/builtin/pack-redundant.c b/builtin/pack-redundant.c
index d2c1c4e5ec..bc61990a93 100644
--- a/builtin/pack-redundant.c
+++ b/builtin/pack-redundant.c
@@ -690,7 +690,7 @@ int cmd_pack_redundant(int argc, const char **argv, const char *prefix UNUSED, s
pl = red = pack_list_difference(local_packs, min);
while (pl) {
printf("%s\n%s\n",
- odb_pack_name(&idx_name, pl->pack->hash, "idx"),
+ odb_pack_name(pl->pack->repo, &idx_name, pl->pack->hash, "idx"),
pl->pack->pack_name);
pl = pl->next;
}
diff --git a/http.c b/http.c
index 6744e18409..420f1566f0 100644
--- a/http.c
+++ b/http.c
@@ -2581,7 +2581,7 @@ struct http_pack_request *new_direct_http_pack_request(
preq->url = url;
- odb_pack_name(&preq->tmpfile, packed_git_hash, "pack");
+ odb_pack_name(the_repository, &preq->tmpfile, packed_git_hash, "pack");
strbuf_addstr(&preq->tmpfile, ".temp");
preq->packfile = fopen(preq->tmpfile.buf, "a");
if (!preq->packfile) {
diff --git a/packfile.c b/packfile.c
index c96ebc4c69..1015dac6db 100644
--- a/packfile.c
+++ b/packfile.c
@@ -25,13 +25,12 @@
#include "pack-revindex.h"
#include "promisor-remote.h"
-char *odb_pack_name(struct strbuf *buf,
- const unsigned char *hash,
- const char *ext)
+char *odb_pack_name(struct repository *r, struct strbuf *buf,
+ const unsigned char *hash, const char *ext)
{
strbuf_reset(buf);
- strbuf_addf(buf, "%s/pack/pack-%s.%s", repo_get_object_directory(the_repository),
- hash_to_hex(hash), ext);
+ strbuf_addf(buf, "%s/pack/pack-%s.%s", repo_get_object_directory(r),
+ hash_to_hex_algop(hash, r->hash_algo), ext);
return buf->buf;
}
diff --git a/packfile.h b/packfile.h
index aee69d1a0b..51187f2393 100644
--- a/packfile.h
+++ b/packfile.h
@@ -29,7 +29,8 @@ struct pack_entry {
*
* Example: odb_pack_name(out, sha1, "idx") => ".git/objects/pack/pack-1234..idx"
*/
-char *odb_pack_name(struct strbuf *buf, const unsigned char *sha1, const char *ext);
+char *odb_pack_name(struct repository *r, struct strbuf *buf,
+ const unsigned char *hash, const char *ext);
/*
* Return the basename of the packfile, omitting any containing directory
--
2.47.0
^ permalink raw reply related [flat|nested] 184+ messages in thread
* [PATCH v6 5/9] packfile: pass down repository to `has_object[_kept]_pack`
2024-11-07 14:10 ` [PATCH v6 " Karthik Nayak
` (3 preceding siblings ...)
2024-11-07 14:10 ` [PATCH v6 4/9] packfile: pass down repository to `odb_pack_name` Karthik Nayak
@ 2024-11-07 14:10 ` Karthik Nayak
2024-11-07 14:10 ` [PATCH v6 6/9] packfile: pass down repository to `for_each_packed_object` Karthik Nayak
` (4 subsequent siblings)
9 siblings, 0 replies; 184+ messages in thread
From: Karthik Nayak @ 2024-11-07 14:10 UTC (permalink / raw)
To: karthik.188; +Cc: git, me, peff
The functions `has_object[_kept]_pack` currently rely on the global
variable `the_repository`. To eliminate global variable usage in
`packfile.c`, we should progressively shift the dependency on
the_repository to higher layers. Let's remove its usage from these
functions and any related ones.
Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
---
builtin/count-objects.c | 2 +-
builtin/fsck.c | 2 +-
builtin/pack-objects.c | 4 ++--
diff.c | 3 ++-
list-objects.c | 3 ++-
pack-bitmap.c | 2 +-
packfile.c | 9 +++++----
packfile.h | 5 +++--
prune-packed.c | 2 +-
reachable.c | 2 +-
revision.c | 4 ++--
11 files changed, 21 insertions(+), 17 deletions(-)
diff --git a/builtin/count-objects.c b/builtin/count-objects.c
index 04d80887e0..1e89148ed7 100644
--- a/builtin/count-objects.c
+++ b/builtin/count-objects.c
@@ -67,7 +67,7 @@ static int count_loose(const struct object_id *oid, const char *path,
else {
loose_size += on_disk_bytes(st);
loose++;
- if (verbose && has_object_pack(oid))
+ if (verbose && has_object_pack(the_repository, oid))
packed_loose++;
}
return 0;
diff --git a/builtin/fsck.c b/builtin/fsck.c
index 7f4e2f0414..bb56eb98ac 100644
--- a/builtin/fsck.c
+++ b/builtin/fsck.c
@@ -272,7 +272,7 @@ static void check_reachable_object(struct object *obj)
if (!(obj->flags & HAS_OBJ)) {
if (is_promisor_object(&obj->oid))
return;
- if (has_object_pack(&obj->oid))
+ if (has_object_pack(the_repository, &obj->oid))
return; /* it is in pack - forget about it */
printf_ln(_("missing %s %s"),
printable_type(&obj->oid, obj->type),
diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index 0800714267..0f32e92a3a 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -1529,7 +1529,7 @@ static int want_found_object(const struct object_id *oid, int exclude,
return 0;
if (ignore_packed_keep_in_core && p->pack_keep_in_core)
return 0;
- if (has_object_kept_pack(oid, flags))
+ if (has_object_kept_pack(p->repo, oid, flags))
return 0;
}
@@ -3627,7 +3627,7 @@ static void show_cruft_commit(struct commit *commit, void *data)
static int cruft_include_check_obj(struct object *obj, void *data UNUSED)
{
- return !has_object_kept_pack(&obj->oid, IN_CORE_KEEP_PACKS);
+ return !has_object_kept_pack(to_pack.repo, &obj->oid, IN_CORE_KEEP_PACKS);
}
static int cruft_include_check(struct commit *commit, void *data)
diff --git a/diff.c b/diff.c
index dceac20d18..266ddf18e7 100644
--- a/diff.c
+++ b/diff.c
@@ -4041,7 +4041,8 @@ static int reuse_worktree_file(struct index_state *istate,
* objects however would tend to be slower as they need
* to be individually opened and inflated.
*/
- if (!FAST_WORKING_DIRECTORY && !want_file && has_object_pack(oid))
+ if (!FAST_WORKING_DIRECTORY && !want_file &&
+ has_object_pack(istate->repo, oid))
return 0;
/*
diff --git a/list-objects.c b/list-objects.c
index 985d008799..31236a8dc9 100644
--- a/list-objects.c
+++ b/list-objects.c
@@ -41,7 +41,8 @@ static void show_object(struct traversal_context *ctx,
{
if (!ctx->show_object)
return;
- if (ctx->revs->unpacked && has_object_pack(&object->oid))
+ if (ctx->revs->unpacked && has_object_pack(ctx->revs->repo,
+ &object->oid))
return;
ctx->show_object(object, name, ctx->show_data);
diff --git a/pack-bitmap.c b/pack-bitmap.c
index 4fa9dfc771..d34ba9909a 100644
--- a/pack-bitmap.c
+++ b/pack-bitmap.c
@@ -1889,7 +1889,7 @@ static void filter_packed_objects_from_bitmap(struct bitmap_index *bitmap_git,
bitmap_unset(result, i);
for (i = 0; i < eindex->count; ++i) {
- if (has_object_pack(&eindex->objects[i]->oid))
+ if (has_object_pack(the_repository, &eindex->objects[i]->oid))
bitmap_unset(result, objects_nr + i);
}
}
diff --git a/packfile.c b/packfile.c
index 1015dac6db..e7dd270217 100644
--- a/packfile.c
+++ b/packfile.c
@@ -2143,16 +2143,17 @@ int find_kept_pack_entry(struct repository *r,
return 0;
}
-int has_object_pack(const struct object_id *oid)
+int has_object_pack(struct repository *r, const struct object_id *oid)
{
struct pack_entry e;
- return find_pack_entry(the_repository, oid, &e);
+ return find_pack_entry(r, oid, &e);
}
-int has_object_kept_pack(const struct object_id *oid, unsigned flags)
+int has_object_kept_pack(struct repository *r, const struct object_id *oid,
+ unsigned flags)
{
struct pack_entry e;
- return find_kept_pack_entry(the_repository, oid, flags, &e);
+ return find_kept_pack_entry(r, oid, flags, &e);
}
int for_each_object_in_pack(struct packed_git *p,
diff --git a/packfile.h b/packfile.h
index 51187f2393..b09fb2c530 100644
--- a/packfile.h
+++ b/packfile.h
@@ -193,8 +193,9 @@ const struct packed_git *has_packed_and_bad(struct repository *, const struct ob
int find_pack_entry(struct repository *r, const struct object_id *oid, struct pack_entry *e);
int find_kept_pack_entry(struct repository *r, const struct object_id *oid, unsigned flags, struct pack_entry *e);
-int has_object_pack(const struct object_id *oid);
-int has_object_kept_pack(const struct object_id *oid, unsigned flags);
+int has_object_pack(struct repository *r, const struct object_id *oid);
+int has_object_kept_pack(struct repository *r, const struct object_id *oid,
+ unsigned flags);
/*
* Return 1 if an object in a promisor packfile is or refers to the given
diff --git a/prune-packed.c b/prune-packed.c
index 2bb99c29df..d1c65ab10e 100644
--- a/prune-packed.c
+++ b/prune-packed.c
@@ -24,7 +24,7 @@ static int prune_object(const struct object_id *oid, const char *path,
{
int *opts = data;
- if (!has_object_pack(oid))
+ if (!has_object_pack(the_repository, oid))
return 0;
if (*opts & PRUNE_PACKED_DRY_RUN)
diff --git a/reachable.c b/reachable.c
index 3e9b3dd0a4..09d2c50079 100644
--- a/reachable.c
+++ b/reachable.c
@@ -239,7 +239,7 @@ static int want_recent_object(struct recent_data *data,
const struct object_id *oid)
{
if (data->ignore_in_core_kept_packs &&
- has_object_kept_pack(oid, IN_CORE_KEEP_PACKS))
+ has_object_kept_pack(data->revs->repo, oid, IN_CORE_KEEP_PACKS))
return 0;
return 1;
}
diff --git a/revision.c b/revision.c
index f5f5b84f2b..d1d152a67b 100644
--- a/revision.c
+++ b/revision.c
@@ -4103,10 +4103,10 @@ enum commit_action get_commit_action(struct rev_info *revs, struct commit *commi
{
if (commit->object.flags & SHOWN)
return commit_ignore;
- if (revs->unpacked && has_object_pack(&commit->object.oid))
+ if (revs->unpacked && has_object_pack(revs->repo, &commit->object.oid))
return commit_ignore;
if (revs->no_kept_objects) {
- if (has_object_kept_pack(&commit->object.oid,
+ if (has_object_kept_pack(revs->repo, &commit->object.oid,
revs->keep_pack_cache_flags))
return commit_ignore;
}
--
2.47.0
^ permalink raw reply related [flat|nested] 184+ messages in thread
* [PATCH v6 6/9] packfile: pass down repository to `for_each_packed_object`
2024-11-07 14:10 ` [PATCH v6 " Karthik Nayak
` (4 preceding siblings ...)
2024-11-07 14:10 ` [PATCH v6 5/9] packfile: pass down repository to `has_object[_kept]_pack` Karthik Nayak
@ 2024-11-07 14:10 ` Karthik Nayak
2024-11-07 14:10 ` [PATCH v6 7/9] config: make `delta_base_cache_limit` a non-global variable Karthik Nayak
` (3 subsequent siblings)
9 siblings, 0 replies; 184+ messages in thread
From: Karthik Nayak @ 2024-11-07 14:10 UTC (permalink / raw)
To: karthik.188; +Cc: git, me, peff
The function `for_each_packed_object` currently relies on the global
variable `the_repository`. To eliminate global variable usage in
`packfile.c`, we should progressively shift the dependency on
the_repository to higher layers. Let's remove its usage from this
function and closely related function `is_promisor_object`.
Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
---
builtin/cat-file.c | 7 ++++---
builtin/fsck.c | 18 +++++++++++-------
builtin/pack-objects.c | 7 +++++--
builtin/repack.c | 2 +-
builtin/rev-list.c | 2 +-
commit-graph.c | 2 +-
fsck.c | 2 +-
list-objects.c | 4 ++--
object-store-ll.h | 4 ++--
packfile.c | 14 +++++++-------
packfile.h | 2 +-
promisor-remote.c | 2 +-
reachable.c | 2 +-
revision.c | 9 +++++----
tag.c | 2 +-
15 files changed, 44 insertions(+), 35 deletions(-)
diff --git a/builtin/cat-file.c b/builtin/cat-file.c
index bfdfb51c7c..d67b101c20 100644
--- a/builtin/cat-file.c
+++ b/builtin/cat-file.c
@@ -827,15 +827,16 @@ static int batch_objects(struct batch_options *opt)
cb.seen = &seen;
for_each_loose_object(batch_unordered_loose, &cb, 0);
- for_each_packed_object(batch_unordered_packed, &cb,
- FOR_EACH_OBJECT_PACK_ORDER);
+ for_each_packed_object(the_repository, batch_unordered_packed,
+ &cb, FOR_EACH_OBJECT_PACK_ORDER);
oidset_clear(&seen);
} else {
struct oid_array sa = OID_ARRAY_INIT;
for_each_loose_object(collect_loose_object, &sa, 0);
- for_each_packed_object(collect_packed_object, &sa, 0);
+ for_each_packed_object(the_repository, collect_packed_object,
+ &sa, 0);
oid_array_for_each_unique(&sa, batch_object_cb, &cb);
diff --git a/builtin/fsck.c b/builtin/fsck.c
index bb56eb98ac..0196c54eb6 100644
--- a/builtin/fsck.c
+++ b/builtin/fsck.c
@@ -150,7 +150,7 @@ static int mark_object(struct object *obj, enum object_type type,
return 0;
obj->flags |= REACHABLE;
- if (is_promisor_object(&obj->oid))
+ if (is_promisor_object(the_repository, &obj->oid))
/*
* Further recursion does not need to be performed on this
* object since it is a promisor object (so it does not need to
@@ -270,7 +270,7 @@ static void check_reachable_object(struct object *obj)
* do a full fsck
*/
if (!(obj->flags & HAS_OBJ)) {
- if (is_promisor_object(&obj->oid))
+ if (is_promisor_object(the_repository, &obj->oid))
return;
if (has_object_pack(the_repository, &obj->oid))
return; /* it is in pack - forget about it */
@@ -391,7 +391,10 @@ static void check_connectivity(void)
* traversal.
*/
for_each_loose_object(mark_loose_unreachable_referents, NULL, 0);
- for_each_packed_object(mark_packed_unreachable_referents, NULL, 0);
+ for_each_packed_object(the_repository,
+ mark_packed_unreachable_referents,
+ NULL,
+ 0);
}
/* Look up all the requirements, warn about missing objects.. */
@@ -488,7 +491,7 @@ static void fsck_handle_reflog_oid(const char *refname, struct object_id *oid,
refname, timestamp);
obj->flags |= USED;
mark_object_reachable(obj);
- } else if (!is_promisor_object(oid)) {
+ } else if (!is_promisor_object(the_repository, oid)) {
error(_("%s: invalid reflog entry %s"),
refname, oid_to_hex(oid));
errors_found |= ERROR_REACHABLE;
@@ -531,7 +534,7 @@ static int fsck_handle_ref(const char *refname, const char *referent UNUSED, con
obj = parse_object(the_repository, oid);
if (!obj) {
- if (is_promisor_object(oid)) {
+ if (is_promisor_object(the_repository, oid)) {
/*
* Increment default_refs anyway, because this is a
* valid ref.
@@ -966,7 +969,8 @@ int cmd_fsck(int argc,
if (connectivity_only) {
for_each_loose_object(mark_loose_for_connectivity, NULL, 0);
- for_each_packed_object(mark_packed_for_connectivity, NULL, 0);
+ for_each_packed_object(the_repository,
+ mark_packed_for_connectivity, NULL, 0);
} else {
prepare_alt_odb(the_repository);
for (odb = the_repository->objects->odb; odb; odb = odb->next)
@@ -1011,7 +1015,7 @@ int cmd_fsck(int argc,
&oid);
if (!obj || !(obj->flags & HAS_OBJ)) {
- if (is_promisor_object(&oid))
+ if (is_promisor_object(the_repository, &oid))
continue;
error(_("%s: object missing"), oid_to_hex(&oid));
errors_found |= ERROR_OBJECT;
diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index 0f32e92a3a..db20f0cf51 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -3858,7 +3858,8 @@ static void show_object__ma_allow_promisor(struct object *obj, const char *name,
* Quietly ignore EXPECTED missing objects. This avoids problems with
* staging them now and getting an odd error later.
*/
- if (!has_object(the_repository, &obj->oid, 0) && is_promisor_object(&obj->oid))
+ if (!has_object(the_repository, &obj->oid, 0) &&
+ is_promisor_object(to_pack.repo, &obj->oid))
return;
show_object(obj, name, data);
@@ -3927,7 +3928,9 @@ static int add_object_in_unpacked_pack(const struct object_id *oid,
static void add_objects_in_unpacked_packs(void)
{
- if (for_each_packed_object(add_object_in_unpacked_pack, NULL,
+ if (for_each_packed_object(to_pack.repo,
+ add_object_in_unpacked_pack,
+ NULL,
FOR_EACH_OBJECT_PACK_ORDER |
FOR_EACH_OBJECT_LOCAL_ONLY |
FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS |
diff --git a/builtin/repack.c b/builtin/repack.c
index d6bb37e84a..96a4fa234b 100644
--- a/builtin/repack.c
+++ b/builtin/repack.c
@@ -404,7 +404,7 @@ static void repack_promisor_objects(const struct pack_objects_args *args,
* {type -> existing pack order} ordering when computing deltas instead
* of a {type -> size} ordering, which may produce better deltas.
*/
- for_each_packed_object(write_oid, &cmd,
+ for_each_packed_object(the_repository, write_oid, &cmd,
FOR_EACH_OBJECT_PROMISOR_ONLY);
if (cmd.in == -1) {
diff --git a/builtin/rev-list.c b/builtin/rev-list.c
index f62bcbf2b1..43c42621e3 100644
--- a/builtin/rev-list.c
+++ b/builtin/rev-list.c
@@ -121,7 +121,7 @@ static inline void finish_object__ma(struct object *obj)
return;
case MA_ALLOW_PROMISOR:
- if (is_promisor_object(&obj->oid))
+ if (is_promisor_object(the_repository, &obj->oid))
return;
die("unexpected missing %s object '%s'",
type_name(obj->type), oid_to_hex(&obj->oid));
diff --git a/commit-graph.c b/commit-graph.c
index 83dd69bfeb..e2e2083951 100644
--- a/commit-graph.c
+++ b/commit-graph.c
@@ -1960,7 +1960,7 @@ static void fill_oids_from_all_packs(struct write_commit_graph_context *ctx)
ctx->progress = start_delayed_progress(
_("Finding commits for commit graph among packed objects"),
ctx->approx_nr_objects);
- for_each_packed_object(add_packed_commits, ctx,
+ for_each_packed_object(ctx->r, add_packed_commits, ctx,
FOR_EACH_OBJECT_PACK_ORDER);
if (ctx->progress_done < ctx->approx_nr_objects)
display_progress(ctx->progress, ctx->approx_nr_objects);
diff --git a/fsck.c b/fsck.c
index 3756f52459..87ce999a49 100644
--- a/fsck.c
+++ b/fsck.c
@@ -1295,7 +1295,7 @@ static int fsck_blobs(struct oidset *blobs_found, struct oidset *blobs_done,
buf = repo_read_object_file(the_repository, oid, &type, &size);
if (!buf) {
- if (is_promisor_object(oid))
+ if (is_promisor_object(the_repository, oid))
continue;
ret |= report(options,
oid, OBJ_BLOB, msg_missing,
diff --git a/list-objects.c b/list-objects.c
index 31236a8dc9..d11a389b3a 100644
--- a/list-objects.c
+++ b/list-objects.c
@@ -75,7 +75,7 @@ static void process_blob(struct traversal_context *ctx,
*/
if (ctx->revs->exclude_promisor_objects &&
!repo_has_object_file(the_repository, &obj->oid) &&
- is_promisor_object(&obj->oid))
+ is_promisor_object(ctx->revs->repo, &obj->oid))
return;
pathlen = path->len;
@@ -180,7 +180,7 @@ static void process_tree(struct traversal_context *ctx,
* an incomplete list of missing objects.
*/
if (revs->exclude_promisor_objects &&
- is_promisor_object(&obj->oid))
+ is_promisor_object(revs->repo, &obj->oid))
return;
if (!revs->do_not_die_on_missing_objects)
diff --git a/object-store-ll.h b/object-store-ll.h
index 538f2c60cb..bcfae2e1bf 100644
--- a/object-store-ll.h
+++ b/object-store-ll.h
@@ -550,7 +550,7 @@ typedef int each_packed_object_fn(const struct object_id *oid,
int for_each_object_in_pack(struct packed_git *p,
each_packed_object_fn, void *data,
enum for_each_object_flags flags);
-int for_each_packed_object(each_packed_object_fn, void *,
- enum for_each_object_flags flags);
+int for_each_packed_object(struct repository *repo, each_packed_object_fn cb,
+ void *data, enum for_each_object_flags flags);
#endif /* OBJECT_STORE_LL_H */
diff --git a/packfile.c b/packfile.c
index e7dd270217..5e8019b1fe 100644
--- a/packfile.c
+++ b/packfile.c
@@ -2200,15 +2200,15 @@ int for_each_object_in_pack(struct packed_git *p,
return r;
}
-int for_each_packed_object(each_packed_object_fn cb, void *data,
- enum for_each_object_flags flags)
+int for_each_packed_object(struct repository *repo, each_packed_object_fn cb,
+ void *data, enum for_each_object_flags flags)
{
struct packed_git *p;
int r = 0;
int pack_errors = 0;
- prepare_packed_git(the_repository);
- for (p = get_all_packs(the_repository); p; p = p->next) {
+ prepare_packed_git(repo);
+ for (p = get_all_packs(repo); p; p = p->next) {
if ((flags & FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local)
continue;
if ((flags & FOR_EACH_OBJECT_PROMISOR_ONLY) &&
@@ -2286,14 +2286,14 @@ static int add_promisor_object(const struct object_id *oid,
return 0;
}
-int is_promisor_object(const struct object_id *oid)
+int is_promisor_object(struct repository *r, const struct object_id *oid)
{
static struct oidset promisor_objects;
static int promisor_objects_prepared;
if (!promisor_objects_prepared) {
- if (repo_has_promisor_remote(the_repository)) {
- for_each_packed_object(add_promisor_object,
+ if (repo_has_promisor_remote(r)) {
+ for_each_packed_object(r, add_promisor_object,
&promisor_objects,
FOR_EACH_OBJECT_PROMISOR_ONLY |
FOR_EACH_OBJECT_PACK_ORDER);
diff --git a/packfile.h b/packfile.h
index b09fb2c530..addb95b0c4 100644
--- a/packfile.h
+++ b/packfile.h
@@ -201,7 +201,7 @@ int has_object_kept_pack(struct repository *r, const struct object_id *oid,
* Return 1 if an object in a promisor packfile is or refers to the given
* object, 0 otherwise.
*/
-int is_promisor_object(const struct object_id *oid);
+int is_promisor_object(struct repository *r, const struct object_id *oid);
/*
* Expose a function for fuzz testing.
diff --git a/promisor-remote.c b/promisor-remote.c
index 9345ae3db2..c714f4f007 100644
--- a/promisor-remote.c
+++ b/promisor-remote.c
@@ -283,7 +283,7 @@ void promisor_remote_get_direct(struct repository *repo,
}
for (i = 0; i < remaining_nr; i++) {
- if (is_promisor_object(&remaining_oids[i]))
+ if (is_promisor_object(repo, &remaining_oids[i]))
die(_("could not fetch %s from promisor remote"),
oid_to_hex(&remaining_oids[i]));
}
diff --git a/reachable.c b/reachable.c
index 09d2c50079..ecf7ccf504 100644
--- a/reachable.c
+++ b/reachable.c
@@ -324,7 +324,7 @@ int add_unseen_recent_objects_to_traversal(struct rev_info *revs,
if (ignore_in_core_kept_packs)
flags |= FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS;
- r = for_each_packed_object(add_recent_packed, &data, flags);
+ r = for_each_packed_object(revs->repo, add_recent_packed, &data, flags);
done:
oidset_clear(&data.extra_recent_oids);
diff --git a/revision.c b/revision.c
index d1d152a67b..45dc6d2819 100644
--- a/revision.c
+++ b/revision.c
@@ -390,7 +390,8 @@ static struct object *get_reference(struct rev_info *revs, const char *name,
if (!object) {
if (revs->ignore_missing)
return NULL;
- if (revs->exclude_promisor_objects && is_promisor_object(oid))
+ if (revs->exclude_promisor_objects &&
+ is_promisor_object(revs->repo, oid))
return NULL;
if (revs->do_not_die_on_missing_objects) {
oidset_insert(&revs->missing_commits, oid);
@@ -432,7 +433,7 @@ static struct commit *handle_commit(struct rev_info *revs,
if (revs->ignore_missing_links || (flags & UNINTERESTING))
return NULL;
if (revs->exclude_promisor_objects &&
- is_promisor_object(&tag->tagged->oid))
+ is_promisor_object(revs->repo, &tag->tagged->oid))
return NULL;
if (revs->do_not_die_on_missing_objects && oid) {
oidset_insert(&revs->missing_commits, oid);
@@ -1211,7 +1212,7 @@ static int process_parents(struct rev_info *revs, struct commit *commit,
revs->do_not_die_on_missing_objects;
if (repo_parse_commit_gently(revs->repo, p, gently) < 0) {
if (revs->exclude_promisor_objects &&
- is_promisor_object(&p->object.oid)) {
+ is_promisor_object(revs->repo, &p->object.oid)) {
if (revs->first_parent_only)
break;
continue;
@@ -3915,7 +3916,7 @@ int prepare_revision_walk(struct rev_info *revs)
revs->treesame.name = "treesame";
if (revs->exclude_promisor_objects) {
- for_each_packed_object(mark_uninteresting, revs,
+ for_each_packed_object(revs->repo, mark_uninteresting, revs,
FOR_EACH_OBJECT_PROMISOR_ONLY);
}
diff --git a/tag.c b/tag.c
index d24170e340..beef9571b5 100644
--- a/tag.c
+++ b/tag.c
@@ -84,7 +84,7 @@ struct object *deref_tag(struct repository *r, struct object *o, const char *war
o = NULL;
}
if (!o && warn) {
- if (last_oid && is_promisor_object(last_oid))
+ if (last_oid && is_promisor_object(r, last_oid))
return NULL;
if (!warnlen)
warnlen = strlen(warn);
--
2.47.0
^ permalink raw reply related [flat|nested] 184+ messages in thread
* [PATCH v6 7/9] config: make `delta_base_cache_limit` a non-global variable
2024-11-07 14:10 ` [PATCH v6 " Karthik Nayak
` (5 preceding siblings ...)
2024-11-07 14:10 ` [PATCH v6 6/9] packfile: pass down repository to `for_each_packed_object` Karthik Nayak
@ 2024-11-07 14:10 ` Karthik Nayak
2024-11-07 14:10 ` [PATCH v6 8/9] config: make `packed_git_(limit|window_size)` non-global variables Karthik Nayak
` (2 subsequent siblings)
9 siblings, 0 replies; 184+ messages in thread
From: Karthik Nayak @ 2024-11-07 14:10 UTC (permalink / raw)
To: karthik.188; +Cc: git, me, peff
The `delta_base_cache_limit` variable is a global config variable used
by multiple subsystems. Let's make this non-global, by adding this
variable to the stack of each of the subsystems where it is used.
In `gc.c` we add it to the `gc_config` struct and also the constructor
function. In `index-pack.c` we add it to the `pack_idx_option` struct
and its constructor. Finally, in `packfile.c` we dynamically retrieve
this value from the repository config, since the value is only used once
in the entire subsystem.
These changes are made to remove the usage of `delta_base_cache_limit`
as a global variable in `packfile.c`. This brings us one step closer to
removing the `USE_THE_REPOSITORY_VARIABLE` definition in `packfile.c`
which we complete in the next patch.
Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
---
builtin/gc.c | 8 +++++++-
builtin/index-pack.c | 10 +++++++---
config.c | 5 -----
environment.c | 1 -
environment.h | 1 -
pack-objects.h | 3 ++-
pack-write.c | 1 +
pack.h | 1 +
packfile.c | 13 +++++++++++--
9 files changed, 29 insertions(+), 14 deletions(-)
diff --git a/builtin/gc.c b/builtin/gc.c
index d52735354c..ad80c3aed2 100644
--- a/builtin/gc.c
+++ b/builtin/gc.c
@@ -138,6 +138,7 @@ struct gc_config {
char *repack_filter_to;
unsigned long big_pack_threshold;
unsigned long max_delta_cache_size;
+ size_t delta_base_cache_limit;
};
#define GC_CONFIG_INIT { \
@@ -153,6 +154,7 @@ struct gc_config {
.prune_expire = xstrdup("2.weeks.ago"), \
.prune_worktrees_expire = xstrdup("3.months.ago"), \
.max_delta_cache_size = DEFAULT_DELTA_CACHE_SIZE, \
+ .delta_base_cache_limit = DEFAULT_DELTA_BASE_CACHE_LIMIT, \
}
static void gc_config_release(struct gc_config *cfg)
@@ -168,6 +170,7 @@ static void gc_config(struct gc_config *cfg)
{
const char *value;
char *owned = NULL;
+ unsigned long longval;
if (!git_config_get_value("gc.packrefs", &value)) {
if (value && !strcmp(value, "notbare"))
@@ -206,6 +209,9 @@ static void gc_config(struct gc_config *cfg)
git_config_get_ulong("gc.bigpackthreshold", &cfg->big_pack_threshold);
git_config_get_ulong("pack.deltacachesize", &cfg->max_delta_cache_size);
+ if(!git_config_get_ulong("core.deltabasecachelimit", &longval))
+ cfg->delta_base_cache_limit = longval;
+
if (!git_config_get_string("gc.repackfilter", &owned)) {
free(cfg->repack_filter);
cfg->repack_filter = owned;
@@ -416,7 +422,7 @@ static uint64_t estimate_repack_memory(struct gc_config *cfg,
* read_sha1_file() (either at delta calculation phase, or
* writing phase) also fills up the delta base cache
*/
- heap += delta_base_cache_limit;
+ heap += cfg->delta_base_cache_limit;
/* and of course pack-objects has its own delta cache */
heap += cfg->max_delta_cache_size;
diff --git a/builtin/index-pack.c b/builtin/index-pack.c
index eaefb41761..23bfa45403 100644
--- a/builtin/index-pack.c
+++ b/builtin/index-pack.c
@@ -1238,7 +1238,7 @@ static void parse_pack_objects(unsigned char *hash)
* recursively checking if the resulting object is used as a base
* for some more deltas.
*/
-static void resolve_deltas(void)
+static void resolve_deltas(struct pack_idx_option *opts)
{
int i;
@@ -1254,7 +1254,7 @@ static void resolve_deltas(void)
nr_ref_deltas + nr_ofs_deltas);
nr_dispatched = 0;
- base_cache_limit = delta_base_cache_limit * nr_threads;
+ base_cache_limit = opts->delta_base_cache_limit * nr_threads;
if (nr_threads > 1 || getenv("GIT_FORCE_THREADS")) {
init_thread();
work_lock();
@@ -1604,6 +1604,10 @@ static int git_index_pack_config(const char *k, const char *v,
else
opts->flags &= ~WRITE_REV;
}
+ if (!strcmp(k, "core.deltabasecachelimit")) {
+ opts->delta_base_cache_limit = git_config_ulong(k, v, ctx->kvi);
+ return 0;
+ }
return git_default_config(k, v, ctx, cb);
}
@@ -1930,7 +1934,7 @@ int cmd_index_pack(int argc,
parse_pack_objects(pack_hash);
if (report_end_of_input)
write_in_full(2, "\0", 1);
- resolve_deltas();
+ resolve_deltas(&opts);
conclude_pack(fix_thin_pack, curr_pack, pack_hash);
free(ofs_deltas);
free(ref_deltas);
diff --git a/config.c b/config.c
index a11bb85da3..728ef98e42 100644
--- a/config.c
+++ b/config.c
@@ -1515,11 +1515,6 @@ static int git_default_core_config(const char *var, const char *value,
return 0;
}
- if (!strcmp(var, "core.deltabasecachelimit")) {
- delta_base_cache_limit = git_config_ulong(var, value, ctx->kvi);
- return 0;
- }
-
if (!strcmp(var, "core.autocrlf")) {
if (value && !strcasecmp(value, "input")) {
auto_crlf = AUTO_CRLF_INPUT;
diff --git a/environment.c b/environment.c
index a2ce998081..8e5022c282 100644
--- a/environment.c
+++ b/environment.c
@@ -51,7 +51,6 @@ enum fsync_method fsync_method = FSYNC_METHOD_DEFAULT;
enum fsync_component fsync_components = FSYNC_COMPONENTS_DEFAULT;
size_t packed_git_window_size = DEFAULT_PACKED_GIT_WINDOW_SIZE;
size_t packed_git_limit = DEFAULT_PACKED_GIT_LIMIT;
-size_t delta_base_cache_limit = 96 * 1024 * 1024;
unsigned long big_file_threshold = 512 * 1024 * 1024;
char *editor_program;
char *askpass_program;
diff --git a/environment.h b/environment.h
index 923e12661e..2f43340f0b 100644
--- a/environment.h
+++ b/environment.h
@@ -165,7 +165,6 @@ extern int zlib_compression_level;
extern int pack_compression_level;
extern size_t packed_git_window_size;
extern size_t packed_git_limit;
-extern size_t delta_base_cache_limit;
extern unsigned long big_file_threshold;
extern unsigned long pack_size_limit_cfg;
extern int max_allowed_tree_depth;
diff --git a/pack-objects.h b/pack-objects.h
index b9898a4e64..3f6f504203 100644
--- a/pack-objects.h
+++ b/pack-objects.h
@@ -7,7 +7,8 @@
struct repository;
-#define DEFAULT_DELTA_CACHE_SIZE (256 * 1024 * 1024)
+#define DEFAULT_DELTA_CACHE_SIZE (256 * 1024 * 1024)
+#define DEFAULT_DELTA_BASE_CACHE_LIMIT (96 * 1024 * 1024)
#define OE_DFS_STATE_BITS 2
#define OE_DEPTH_BITS 12
diff --git a/pack-write.c b/pack-write.c
index 8c7dfddc5a..98a8c0e785 100644
--- a/pack-write.c
+++ b/pack-write.c
@@ -21,6 +21,7 @@ void reset_pack_idx_option(struct pack_idx_option *opts)
memset(opts, 0, sizeof(*opts));
opts->version = 2;
opts->off32_limit = 0x7fffffff;
+ opts->delta_base_cache_limit = DEFAULT_DELTA_BASE_CACHE_LIMIT;
}
static int sha1_compare(const void *_a, const void *_b)
diff --git a/pack.h b/pack.h
index 02bbdfb19c..1a33751565 100644
--- a/pack.h
+++ b/pack.h
@@ -58,6 +58,7 @@ struct pack_idx_option {
*/
int anomaly_alloc, anomaly_nr;
uint32_t *anomaly;
+ unsigned long delta_base_cache_limit;
};
void reset_pack_idx_option(struct pack_idx_option *);
diff --git a/packfile.c b/packfile.c
index 5e8019b1fe..2ae35dd03f 100644
--- a/packfile.c
+++ b/packfile.c
@@ -24,6 +24,8 @@
#include "commit-graph.h"
#include "pack-revindex.h"
#include "promisor-remote.h"
+#include "config.h"
+#include "pack-objects.h"
char *odb_pack_name(struct repository *r, struct strbuf *buf,
const unsigned char *hash, const char *ext)
@@ -1496,7 +1498,9 @@ void clear_delta_base_cache(void)
}
static void add_delta_base_cache(struct packed_git *p, off_t base_offset,
- void *base, unsigned long base_size, enum object_type type)
+ void *base, unsigned long base_size,
+ unsigned long delta_base_cache_limit,
+ enum object_type type)
{
struct delta_base_cache_entry *ent;
struct list_head *lru, *tmp;
@@ -1697,6 +1701,9 @@ void *unpack_entry(struct repository *r, struct packed_git *p, off_t obj_offset,
struct unpack_entry_stack_ent *delta_stack = small_delta_stack;
int delta_stack_nr = 0, delta_stack_alloc = UNPACK_ENTRY_STACK_PREALLOC;
int base_from_cache = 0;
+ unsigned long delta_base_cache_limit = DEFAULT_DELTA_BASE_CACHE_LIMIT;
+
+ repo_config_get_ulong(r, "core.deltabasecachelimit", &delta_base_cache_limit);
write_pack_access_log(p, obj_offset);
@@ -1878,7 +1885,9 @@ void *unpack_entry(struct repository *r, struct packed_git *p, off_t obj_offset,
* before we are done using it.
*/
if (!external_base)
- add_delta_base_cache(p, base_obj_offset, base, base_size, type);
+ add_delta_base_cache(p, base_obj_offset, base,
+ base_size, delta_base_cache_limit,
+ type);
free(delta_data);
free(external_base);
--
2.47.0
^ permalink raw reply related [flat|nested] 184+ messages in thread
* [PATCH v6 8/9] config: make `packed_git_(limit|window_size)` non-global variables
2024-11-07 14:10 ` [PATCH v6 " Karthik Nayak
` (6 preceding siblings ...)
2024-11-07 14:10 ` [PATCH v6 7/9] config: make `delta_base_cache_limit` a non-global variable Karthik Nayak
@ 2024-11-07 14:10 ` Karthik Nayak
2024-11-08 3:42 ` Junio C Hamano
2024-11-07 14:10 ` [PATCH v6 9/9] midx: add repository to `multi_pack_index` struct Karthik Nayak
2024-11-08 7:49 ` [PATCH v6 0/9] packfile: avoid using the 'the_repository' global variable Junio C Hamano
9 siblings, 1 reply; 184+ messages in thread
From: Karthik Nayak @ 2024-11-07 14:10 UTC (permalink / raw)
To: karthik.188; +Cc: git, me, peff
The variables `packed_git_window_size` and `packed_git_limit` are global
config variables used in the `packfile.c` file. Since it is only used in
this file, let's change it from being a global config variable to a
local variable for the subsystem.
We do this by introducing a new local `packfile_config` struct in
`packfile.c` and also adding the required function to parse the said
config. We then use this within `packfile.c` to obtain the variables.
With this, we rid `packfile.c` from all global variable usage and this
means we can also remove the `USE_THE_REPOSITORY_VARIABLE` guard from
the file.
Helped-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
---
builtin/fast-import.c | 4 ++--
config.c | 17 -----------------
environment.c | 2 --
packfile.c | 23 +++++++++++++++--------
packfile.h | 2 +-
repo-settings.c | 14 ++++++++++++++
repo-settings.h | 5 +++++
7 files changed, 37 insertions(+), 30 deletions(-)
diff --git a/builtin/fast-import.c b/builtin/fast-import.c
index 3ccc4c5722..0ece070260 100644
--- a/builtin/fast-import.c
+++ b/builtin/fast-import.c
@@ -3539,7 +3539,7 @@ static void parse_argv(void)
int cmd_fast_import(int argc,
const char **argv,
const char *prefix,
- struct repository *repo UNUSED)
+ struct repository *repo)
{
unsigned int i;
@@ -3660,7 +3660,7 @@ int cmd_fast_import(int argc,
fprintf(stderr, " pools: %10lu KiB\n", (unsigned long)((tree_entry_allocd + fi_mem_pool.pool_alloc) /1024));
fprintf(stderr, " objects: %10" PRIuMAX " KiB\n", (alloc_count*sizeof(struct object_entry))/1024);
fprintf(stderr, "---------------------------------------------------------------------\n");
- pack_report();
+ pack_report(repo);
fprintf(stderr, "---------------------------------------------------------------------\n");
fprintf(stderr, "\n");
}
diff --git a/config.c b/config.c
index 728ef98e42..2c295f7430 100644
--- a/config.c
+++ b/config.c
@@ -1493,28 +1493,11 @@ static int git_default_core_config(const char *var, const char *value,
return 0;
}
- if (!strcmp(var, "core.packedgitwindowsize")) {
- int pgsz_x2 = getpagesize() * 2;
- packed_git_window_size = git_config_ulong(var, value, ctx->kvi);
-
- /* This value must be multiple of (pagesize * 2) */
- packed_git_window_size /= pgsz_x2;
- if (packed_git_window_size < 1)
- packed_git_window_size = 1;
- packed_git_window_size *= pgsz_x2;
- return 0;
- }
-
if (!strcmp(var, "core.bigfilethreshold")) {
big_file_threshold = git_config_ulong(var, value, ctx->kvi);
return 0;
}
- if (!strcmp(var, "core.packedgitlimit")) {
- packed_git_limit = git_config_ulong(var, value, ctx->kvi);
- return 0;
- }
-
if (!strcmp(var, "core.autocrlf")) {
if (value && !strcasecmp(value, "input")) {
auto_crlf = AUTO_CRLF_INPUT;
diff --git a/environment.c b/environment.c
index 8e5022c282..8389a27270 100644
--- a/environment.c
+++ b/environment.c
@@ -49,8 +49,6 @@ int fsync_object_files = -1;
int use_fsync = -1;
enum fsync_method fsync_method = FSYNC_METHOD_DEFAULT;
enum fsync_component fsync_components = FSYNC_COMPONENTS_DEFAULT;
-size_t packed_git_window_size = DEFAULT_PACKED_GIT_WINDOW_SIZE;
-size_t packed_git_limit = DEFAULT_PACKED_GIT_LIMIT;
unsigned long big_file_threshold = 512 * 1024 * 1024;
char *editor_program;
char *askpass_program;
diff --git a/packfile.c b/packfile.c
index 2ae35dd03f..46f5369173 100644
--- a/packfile.c
+++ b/packfile.c
@@ -1,4 +1,3 @@
-#define USE_THE_REPOSITORY_VARIABLE
#include "git-compat-util.h"
#include "environment.h"
@@ -48,15 +47,15 @@ static size_t pack_mapped;
#define SZ_FMT PRIuMAX
static inline uintmax_t sz_fmt(size_t s) { return s; }
-void pack_report(void)
+void pack_report(struct repository *repo)
{
fprintf(stderr,
"pack_report: getpagesize() = %10" SZ_FMT "\n"
"pack_report: core.packedGitWindowSize = %10" SZ_FMT "\n"
"pack_report: core.packedGitLimit = %10" SZ_FMT "\n",
sz_fmt(getpagesize()),
- sz_fmt(packed_git_window_size),
- sz_fmt(packed_git_limit));
+ sz_fmt(repo->settings.packed_git_window_size),
+ sz_fmt(repo->settings.packed_git_limit));
fprintf(stderr,
"pack_report: pack_used_ctr = %10u\n"
"pack_report: pack_mmap_calls = %10u\n"
@@ -652,8 +651,15 @@ unsigned char *use_pack(struct packed_git *p,
break;
}
if (!win) {
- size_t window_align = packed_git_window_size / 2;
+ size_t window_align;
off_t len;
+ struct repo_settings *settings;
+
+ /* lazy load the settings incase it hasn't been setup */
+ prepare_repo_settings(p->repo);
+ settings = &p->repo->settings;
+
+ window_align = settings->packed_git_window_size / 2;
if (p->pack_fd == -1 && open_packed_git(p))
die("packfile %s cannot be accessed", p->pack_name);
@@ -661,11 +667,12 @@ unsigned char *use_pack(struct packed_git *p,
CALLOC_ARRAY(win, 1);
win->offset = (offset / window_align) * window_align;
len = p->pack_size - win->offset;
- if (len > packed_git_window_size)
- len = packed_git_window_size;
+ if (len > settings->packed_git_window_size)
+ len = settings->packed_git_window_size;
win->len = (size_t)len;
pack_mapped += win->len;
- while (packed_git_limit < pack_mapped
+
+ while (settings->packed_git_limit < pack_mapped
&& unuse_one_window(p))
; /* nothing */
win->base = xmmap_gently(NULL, win->len,
diff --git a/packfile.h b/packfile.h
index addb95b0c4..58104fa009 100644
--- a/packfile.h
+++ b/packfile.h
@@ -89,7 +89,7 @@ unsigned long repo_approximate_object_count(struct repository *r);
struct packed_git *find_oid_pack(const struct object_id *oid,
struct packed_git *packs);
-void pack_report(void);
+void pack_report(struct repository *repo);
/*
* mmap the index file for the specified packfile (if it is not
diff --git a/repo-settings.c b/repo-settings.c
index 4699b4b365..0d875fdd86 100644
--- a/repo-settings.c
+++ b/repo-settings.c
@@ -26,6 +26,7 @@ void prepare_repo_settings(struct repository *r)
const char *strval;
int manyfiles;
int read_changed_paths;
+ unsigned long longval;
if (!r->gitdir)
BUG("Cannot add settings for uninitialized repository");
@@ -123,6 +124,19 @@ void prepare_repo_settings(struct repository *r)
* removed.
*/
r->settings.command_requires_full_index = 1;
+
+ if (!repo_config_get_ulong(r, "core.packedgitwindowsize", &longval)) {
+ int pgsz_x2 = getpagesize() * 2;
+
+ /* This value must be multiple of (pagesize * 2) */
+ longval /= pgsz_x2;
+ if (longval < 1)
+ longval = 1;
+ r->settings.packed_git_window_size = longval * pgsz_x2;
+ }
+
+ if (!repo_config_get_ulong(r, "core.packedgitlimit", &longval))
+ r->settings.packed_git_limit = longval;
}
enum log_refs_config repo_settings_get_log_all_ref_updates(struct repository *repo)
diff --git a/repo-settings.h b/repo-settings.h
index 51d6156a11..b22d6438e2 100644
--- a/repo-settings.h
+++ b/repo-settings.h
@@ -57,12 +57,17 @@ struct repo_settings {
int core_multi_pack_index;
int warn_ambiguous_refs; /* lazily loaded via accessor */
+
+ size_t packed_git_window_size;
+ size_t packed_git_limit;
};
#define REPO_SETTINGS_INIT { \
.index_version = -1, \
.core_untracked_cache = UNTRACKED_CACHE_KEEP, \
.fetch_negotiation_algorithm = FETCH_NEGOTIATION_CONSECUTIVE, \
.warn_ambiguous_refs = -1, \
+ .packed_git_window_size = DEFAULT_PACKED_GIT_WINDOW_SIZE, \
+ .packed_git_limit = DEFAULT_PACKED_GIT_LIMIT, \
}
void prepare_repo_settings(struct repository *r);
--
2.47.0
^ permalink raw reply related [flat|nested] 184+ messages in thread
* Re: [PATCH v6 8/9] config: make `packed_git_(limit|window_size)` non-global variables
2024-11-07 14:10 ` [PATCH v6 8/9] config: make `packed_git_(limit|window_size)` non-global variables Karthik Nayak
@ 2024-11-08 3:42 ` Junio C Hamano
2024-11-08 9:27 ` karthik nayak
0 siblings, 1 reply; 184+ messages in thread
From: Junio C Hamano @ 2024-11-08 3:42 UTC (permalink / raw)
To: Karthik Nayak; +Cc: git, me, peff
Karthik Nayak <karthik.188@gmail.com> writes:
> The variables `packed_git_window_size` and `packed_git_limit` are global
> config variables used in the `packfile.c` file. Since it is only used in
> this file, let's change it from being a global config variable to a
> local variable for the subsystem.
>
> We do this by introducing a new local `packfile_config` struct in
> `packfile.c` and also adding the required function to parse the said
> config. We then use this within `packfile.c` to obtain the variables.
This patch has no string "packfile_config" in it, other than the one
in the above string. A stale description?
> if (!win) {
> - size_t window_align = packed_git_window_size / 2;
> + size_t window_align;
> off_t len;
> + struct repo_settings *settings;
> +
> + /* lazy load the settings incase it hasn't been setup */
"incase" -> "in case"?
> + prepare_repo_settings(p->repo);
> + settings = &p->repo->settings;
This change is curious. How can p->repo be uninitialized? p is a
packed-git list created in some repository, surely it should already
be initialized, no?
> +
> + window_align = settings->packed_git_window_size / 2;
> if (p->pack_fd == -1 && open_packed_git(p))
> die("packfile %s cannot be accessed", p->pack_name);
> @@ -661,11 +667,12 @@ unsigned char *use_pack(struct packed_git *p,
> CALLOC_ARRAY(win, 1);
> win->offset = (offset / window_align) * window_align;
> len = p->pack_size - win->offset;
> - if (len > packed_git_window_size)
> - len = packed_git_window_size;
> + if (len > settings->packed_git_window_size)
> + len = settings->packed_git_window_size;
> win->len = (size_t)len;
> pack_mapped += win->len;
> - while (packed_git_limit < pack_mapped
> +
> + while (settings->packed_git_limit < pack_mapped
> && unuse_one_window(p))
> ; /* nothing */
> win->base = xmmap_gently(NULL, win->len,
Other than that, the changes to the above block that uses the local
variable "settings" looks good.
Thanks.
^ permalink raw reply [flat|nested] 184+ messages in thread
* Re: [PATCH v6 8/9] config: make `packed_git_(limit|window_size)` non-global variables
2024-11-08 3:42 ` Junio C Hamano
@ 2024-11-08 9:27 ` karthik nayak
0 siblings, 0 replies; 184+ messages in thread
From: karthik nayak @ 2024-11-08 9:27 UTC (permalink / raw)
To: Junio C Hamano; +Cc: git, me, peff
[-- Attachment #1: Type: text/plain, Size: 2361 bytes --]
Junio C Hamano <gitster@pobox.com> writes:
> Karthik Nayak <karthik.188@gmail.com> writes:
>
>> The variables `packed_git_window_size` and `packed_git_limit` are global
>> config variables used in the `packfile.c` file. Since it is only used in
>> this file, let's change it from being a global config variable to a
>> local variable for the subsystem.
>>
>> We do this by introducing a new local `packfile_config` struct in
>> `packfile.c` and also adding the required function to parse the said
>> config. We then use this within `packfile.c` to obtain the variables.
>
> This patch has no string "packfile_config" in it, other than the one
> in the above string. A stale description?
>
Yup indeed.
>> if (!win) {
>> - size_t window_align = packed_git_window_size / 2;
>> + size_t window_align;
>> off_t len;
>> + struct repo_settings *settings;
>> +
>> + /* lazy load the settings incase it hasn't been setup */
>
> "incase" -> "in case"?
>
Will change.
>> + prepare_repo_settings(p->repo);
>> + settings = &p->repo->settings;
>
> This change is curious. How can p->repo be uninitialized? p is a
> packed-git list created in some repository, surely it should already
> be initialized, no?
>
Here `p->repo` itself is expected to be initialized. We're however
trying to initialize `p->repo->settings`. Which might not have been. If
it is, `prepare_repo_settings` will return early.
>
>> +
>> + window_align = settings->packed_git_window_size / 2;
>> if (p->pack_fd == -1 && open_packed_git(p))
>> die("packfile %s cannot be accessed", p->pack_name);
>> @@ -661,11 +667,12 @@ unsigned char *use_pack(struct packed_git *p,
>> CALLOC_ARRAY(win, 1);
>> win->offset = (offset / window_align) * window_align;
>> len = p->pack_size - win->offset;
>> - if (len > packed_git_window_size)
>> - len = packed_git_window_size;
>> + if (len > settings->packed_git_window_size)
>> + len = settings->packed_git_window_size;
>> win->len = (size_t)len;
>> pack_mapped += win->len;
>> - while (packed_git_limit < pack_mapped
>> +
>> + while (settings->packed_git_limit < pack_mapped
>> && unuse_one_window(p))
>> ; /* nothing */
>> win->base = xmmap_gently(NULL, win->len,
>
> Other than that, the changes to the above block that uses the local
> variable "settings" looks good.
>
> Thanks.
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 690 bytes --]
^ permalink raw reply [flat|nested] 184+ messages in thread
* [PATCH v6 9/9] midx: add repository to `multi_pack_index` struct
2024-11-07 14:10 ` [PATCH v6 " Karthik Nayak
` (7 preceding siblings ...)
2024-11-07 14:10 ` [PATCH v6 8/9] config: make `packed_git_(limit|window_size)` non-global variables Karthik Nayak
@ 2024-11-07 14:10 ` Karthik Nayak
2024-11-08 7:49 ` [PATCH v6 0/9] packfile: avoid using the 'the_repository' global variable Junio C Hamano
9 siblings, 0 replies; 184+ messages in thread
From: Karthik Nayak @ 2024-11-07 14:10 UTC (permalink / raw)
To: karthik.188; +Cc: git, me, peff
The `multi_pack_index` struct represents the MIDX for a repository.
Here, we add a pointer to the repository in this struct, allowing direct
use of the repository variable without relying on the global
`the_repository` struct.
With this addition, we can determine the repository associated with a
`bitmap_index` struct. A `bitmap_index` points to either a `packed_git`
or a `multi_pack_index`, both of which have direct repository
references. To support this, we introduce a static helper function,
`bitmap_repo`, in `pack-bitmap.c`, which retrieves a repository given a
`bitmap_index`.
With this, we clear up all usages of `the_repository` within
`pack-bitmap.c` and also remove the `USE_THE_REPOSITORY_VARIABLE`
definition. Bringing us another step closer to remove all global
variable usage.
Although this change also opens up the potential to clean up `midx.c`,
doing so would require additional refactoring to pass the repository
struct to functions where the MIDX struct is created: a task better
suited for future patches.
Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
---
midx.c | 1 +
midx.h | 3 ++
pack-bitmap.c | 90 +++++++++++++++++++++++++++++++--------------------
3 files changed, 59 insertions(+), 35 deletions(-)
diff --git a/midx.c b/midx.c
index 8edb75f51d..079c45a1aa 100644
--- a/midx.c
+++ b/midx.c
@@ -131,6 +131,7 @@ static struct multi_pack_index *load_multi_pack_index_one(const char *object_dir
m->data = midx_map;
m->data_len = midx_size;
m->local = local;
+ m->repo = the_repository;
m->signature = get_be32(m->data);
if (m->signature != MIDX_SIGNATURE)
diff --git a/midx.h b/midx.h
index 42d4f8d149..3b0ac4d878 100644
--- a/midx.h
+++ b/midx.h
@@ -71,6 +71,9 @@ struct multi_pack_index {
const char **pack_names;
struct packed_git **packs;
+
+ struct repository *repo;
+
char object_dir[FLEX_ARRAY];
};
diff --git a/pack-bitmap.c b/pack-bitmap.c
index d34ba9909a..0cb1b56c9d 100644
--- a/pack-bitmap.c
+++ b/pack-bitmap.c
@@ -1,5 +1,3 @@
-#define USE_THE_REPOSITORY_VARIABLE
-
#include "git-compat-util.h"
#include "commit.h"
#include "gettext.h"
@@ -177,12 +175,21 @@ static uint32_t bitmap_num_objects(struct bitmap_index *index)
return index->pack->num_objects;
}
+static struct repository *bitmap_repo(struct bitmap_index *bitmap_git)
+{
+ if (bitmap_is_midx(bitmap_git))
+ return bitmap_git->midx->repo;
+ return bitmap_git->pack->repo;
+}
+
static int load_bitmap_header(struct bitmap_index *index)
{
struct bitmap_disk_header *header = (void *)index->map;
- size_t header_size = sizeof(*header) - GIT_MAX_RAWSZ + the_hash_algo->rawsz;
+ const struct git_hash_algo *hash_algo = bitmap_repo(index)->hash_algo;
+
+ size_t header_size = sizeof(*header) - GIT_MAX_RAWSZ + hash_algo->rawsz;
- if (index->map_size < header_size + the_hash_algo->rawsz)
+ if (index->map_size < header_size + hash_algo->rawsz)
return error(_("corrupted bitmap index (too small)"));
if (memcmp(header->magic, BITMAP_IDX_SIGNATURE, sizeof(BITMAP_IDX_SIGNATURE)) != 0)
@@ -196,7 +203,7 @@ static int load_bitmap_header(struct bitmap_index *index)
{
uint32_t flags = ntohs(header->options);
size_t cache_size = st_mult(bitmap_num_objects(index), sizeof(uint32_t));
- unsigned char *index_end = index->map + index->map_size - the_hash_algo->rawsz;
+ unsigned char *index_end = index->map + index->map_size - hash_algo->rawsz;
if ((flags & BITMAP_OPT_FULL_DAG) == 0)
BUG("unsupported options for bitmap index file "
@@ -409,7 +416,7 @@ static int open_midx_bitmap_1(struct bitmap_index *bitmap_git,
if (bitmap_git->pack || bitmap_git->midx) {
struct strbuf buf = STRBUF_INIT;
get_midx_filename(&buf, midx->object_dir);
- trace2_data_string("bitmap", the_repository,
+ trace2_data_string("bitmap", bitmap_repo(bitmap_git),
"ignoring extra midx bitmap file", buf.buf);
close(fd);
strbuf_release(&buf);
@@ -427,7 +434,7 @@ static int open_midx_bitmap_1(struct bitmap_index *bitmap_git,
goto cleanup;
if (!hasheq(get_midx_checksum(bitmap_git->midx), bitmap_git->checksum,
- the_repository->hash_algo)) {
+ bitmap_repo(bitmap_git)->hash_algo)) {
error(_("checksum doesn't match in MIDX and bitmap"));
goto cleanup;
}
@@ -438,7 +445,9 @@ static int open_midx_bitmap_1(struct bitmap_index *bitmap_git,
}
for (i = 0; i < bitmap_git->midx->num_packs; i++) {
- if (prepare_midx_pack(the_repository, bitmap_git->midx, i)) {
+ if (prepare_midx_pack(bitmap_repo(bitmap_git),
+ bitmap_git->midx,
+ i)) {
warning(_("could not open pack %s"),
bitmap_git->midx->pack_names[i]);
goto cleanup;
@@ -492,8 +501,9 @@ static int open_pack_bitmap_1(struct bitmap_index *bitmap_git, struct packed_git
}
if (bitmap_git->pack || bitmap_git->midx) {
- trace2_data_string("bitmap", the_repository,
- "ignoring extra bitmap file", packfile->pack_name);
+ trace2_data_string("bitmap", bitmap_repo(bitmap_git),
+ "ignoring extra bitmap file",
+ packfile->pack_name);
close(fd);
return -1;
}
@@ -518,8 +528,8 @@ static int open_pack_bitmap_1(struct bitmap_index *bitmap_git, struct packed_git
return -1;
}
- trace2_data_string("bitmap", the_repository, "opened bitmap file",
- packfile->pack_name);
+ trace2_data_string("bitmap", bitmap_repo(bitmap_git),
+ "opened bitmap file", packfile->pack_name);
return 0;
}
@@ -649,7 +659,7 @@ struct bitmap_index *prepare_bitmap_git(struct repository *r)
struct bitmap_index *prepare_midx_bitmap_git(struct multi_pack_index *midx)
{
- struct repository *r = the_repository;
+ struct repository *r = midx->repo;
struct bitmap_index *bitmap_git = xcalloc(1, sizeof(*bitmap_git));
if (!open_midx_bitmap_1(bitmap_git, midx) && !load_bitmap(r, bitmap_git))
@@ -1213,6 +1223,7 @@ static struct bitmap *find_boundary_objects(struct bitmap_index *bitmap_git,
{
struct bitmap_boundary_cb cb;
struct object_list *root;
+ struct repository *repo;
unsigned int i;
unsigned int tmp_blobs, tmp_trees, tmp_tags;
int any_missing = 0;
@@ -1222,6 +1233,8 @@ static struct bitmap *find_boundary_objects(struct bitmap_index *bitmap_git,
cb.base = bitmap_new();
object_array_init(&cb.boundary);
+ repo = bitmap_repo(bitmap_git);
+
revs->ignore_missing_links = 1;
if (bitmap_git->pseudo_merges.nr) {
@@ -1280,19 +1293,19 @@ static struct bitmap *find_boundary_objects(struct bitmap_index *bitmap_git,
* revision walk to (a) OR in any bitmaps that are UNINTERESTING
* between the tips and boundary, and (b) record the boundary.
*/
- trace2_region_enter("pack-bitmap", "boundary-prepare", the_repository);
+ trace2_region_enter("pack-bitmap", "boundary-prepare", repo);
if (prepare_revision_walk(revs))
die("revision walk setup failed");
- trace2_region_leave("pack-bitmap", "boundary-prepare", the_repository);
+ trace2_region_leave("pack-bitmap", "boundary-prepare", repo);
- trace2_region_enter("pack-bitmap", "boundary-traverse", the_repository);
+ trace2_region_enter("pack-bitmap", "boundary-traverse", repo);
revs->boundary = 1;
traverse_commit_list_filtered(revs,
show_boundary_commit,
show_boundary_object,
&cb, NULL);
revs->boundary = 0;
- trace2_region_leave("pack-bitmap", "boundary-traverse", the_repository);
+ trace2_region_leave("pack-bitmap", "boundary-traverse", repo);
revs->blob_objects = tmp_blobs;
revs->tree_objects = tmp_trees;
@@ -1304,7 +1317,7 @@ static struct bitmap *find_boundary_objects(struct bitmap_index *bitmap_git,
/*
* Then add the boundary commit(s) as fill-in traversal tips.
*/
- trace2_region_enter("pack-bitmap", "boundary-fill-in", the_repository);
+ trace2_region_enter("pack-bitmap", "boundary-fill-in", repo);
for (i = 0; i < cb.boundary.nr; i++) {
struct object *obj = cb.boundary.objects[i].item;
if (bitmap_walk_contains(bitmap_git, cb.base, &obj->oid))
@@ -1314,7 +1327,7 @@ static struct bitmap *find_boundary_objects(struct bitmap_index *bitmap_git,
}
if (revs->pending.nr)
cb.base = fill_in_bitmap(bitmap_git, revs, cb.base, NULL);
- trace2_region_leave("pack-bitmap", "boundary-fill-in", the_repository);
+ trace2_region_leave("pack-bitmap", "boundary-fill-in", repo);
cleanup:
object_array_clear(&cb.boundary);
@@ -1718,7 +1731,8 @@ static unsigned long get_size_by_pos(struct bitmap_index *bitmap_git,
ofs = pack_pos_to_offset(pack, pos);
}
- if (packed_object_info(the_repository, pack, ofs, &oi) < 0) {
+ if (packed_object_info(bitmap_repo(bitmap_git), pack, ofs,
+ &oi) < 0) {
struct object_id oid;
nth_bitmap_object_oid(bitmap_git, &oid,
pack_pos_to_index(pack, pos));
@@ -1727,7 +1741,8 @@ static unsigned long get_size_by_pos(struct bitmap_index *bitmap_git,
} else {
struct eindex *eindex = &bitmap_git->ext_index;
struct object *obj = eindex->objects[pos - bitmap_num_objects(bitmap_git)];
- if (oid_object_info_extended(the_repository, &obj->oid, &oi, 0) < 0)
+ if (oid_object_info_extended(bitmap_repo(bitmap_git), &obj->oid,
+ &oi, 0) < 0)
die(_("unable to get size of %s"), oid_to_hex(&obj->oid));
}
@@ -1889,7 +1904,8 @@ static void filter_packed_objects_from_bitmap(struct bitmap_index *bitmap_git,
bitmap_unset(result, i);
for (i = 0; i < eindex->count; ++i) {
- if (has_object_pack(the_repository, &eindex->objects[i]->oid))
+ if (has_object_pack(bitmap_repo(bitmap_git),
+ &eindex->objects[i]->oid))
bitmap_unset(result, objects_nr + i);
}
}
@@ -1907,6 +1923,7 @@ struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs,
struct bitmap *haves_bitmap = NULL;
struct bitmap_index *bitmap_git;
+ struct repository *repo;
/*
* We can't do pathspec limiting with bitmaps, because we don't know
@@ -1980,18 +1997,20 @@ struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs,
if (!use_boundary_traversal)
object_array_clear(&revs->pending);
+ repo = bitmap_repo(bitmap_git);
+
if (haves) {
if (use_boundary_traversal) {
- trace2_region_enter("pack-bitmap", "haves/boundary", the_repository);
+ trace2_region_enter("pack-bitmap", "haves/boundary", repo);
haves_bitmap = find_boundary_objects(bitmap_git, revs, haves);
- trace2_region_leave("pack-bitmap", "haves/boundary", the_repository);
+ trace2_region_leave("pack-bitmap", "haves/boundary", repo);
} else {
- trace2_region_enter("pack-bitmap", "haves/classic", the_repository);
+ trace2_region_enter("pack-bitmap", "haves/classic", repo);
revs->ignore_missing_links = 1;
haves_bitmap = find_objects(bitmap_git, revs, haves, NULL);
reset_revision_walk();
revs->ignore_missing_links = 0;
- trace2_region_leave("pack-bitmap", "haves/classic", the_repository);
+ trace2_region_leave("pack-bitmap", "haves/classic", repo);
}
if (!haves_bitmap)
@@ -2025,17 +2044,17 @@ struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs,
object_list_free(&wants);
object_list_free(&haves);
- trace2_data_intmax("bitmap", the_repository, "pseudo_merges_satisfied",
+ trace2_data_intmax("bitmap", repo, "pseudo_merges_satisfied",
pseudo_merges_satisfied_nr);
- trace2_data_intmax("bitmap", the_repository, "pseudo_merges_cascades",
+ trace2_data_intmax("bitmap", repo, "pseudo_merges_cascades",
pseudo_merges_cascades_nr);
- trace2_data_intmax("bitmap", the_repository, "bitmap/hits",
+ trace2_data_intmax("bitmap", repo, "bitmap/hits",
existing_bitmaps_hits_nr);
- trace2_data_intmax("bitmap", the_repository, "bitmap/misses",
+ trace2_data_intmax("bitmap", repo, "bitmap/misses",
existing_bitmaps_misses_nr);
- trace2_data_intmax("bitmap", the_repository, "bitmap/roots_with_bitmap",
+ trace2_data_intmax("bitmap", repo, "bitmap/roots_with_bitmap",
roots_with_bitmaps_nr);
- trace2_data_intmax("bitmap", the_repository, "bitmap/roots_without_bitmap",
+ trace2_data_intmax("bitmap", repo, "bitmap/roots_without_bitmap",
roots_without_bitmaps_nr);
return bitmap_git;
@@ -2256,7 +2275,7 @@ void reuse_partial_packfile_from_bitmap(struct bitmap_index *bitmap_git,
struct bitmap **reuse_out,
int multi_pack_reuse)
{
- struct repository *r = the_repository;
+ struct repository *r = bitmap_repo(bitmap_git);
struct bitmapped_pack *packs = NULL;
struct bitmap *result = bitmap_git->result;
struct bitmap *reuse;
@@ -2792,7 +2811,7 @@ int rebuild_bitmap(const uint32_t *reposition,
uint32_t *create_bitmap_mapping(struct bitmap_index *bitmap_git,
struct packing_data *mapping)
{
- struct repository *r = the_repository;
+ struct repository *r = bitmap_repo(bitmap_git);
uint32_t i, num_objects;
uint32_t *reposition;
@@ -2948,7 +2967,8 @@ static off_t get_disk_usage_for_extended(struct bitmap_index *bitmap_git)
st_add(bitmap_num_objects(bitmap_git), i)))
continue;
- if (oid_object_info_extended(the_repository, &obj->oid, &oi, 0) < 0)
+ if (oid_object_info_extended(bitmap_repo(bitmap_git), &obj->oid,
+ &oi, 0) < 0)
die(_("unable to get disk usage of '%s'"),
oid_to_hex(&obj->oid));
--
2.47.0
^ permalink raw reply related [flat|nested] 184+ messages in thread
* Re: [PATCH v6 0/9] packfile: avoid using the 'the_repository' global variable
2024-11-07 14:10 ` [PATCH v6 " Karthik Nayak
` (8 preceding siblings ...)
2024-11-07 14:10 ` [PATCH v6 9/9] midx: add repository to `multi_pack_index` struct Karthik Nayak
@ 2024-11-08 7:49 ` Junio C Hamano
2024-11-08 9:28 ` karthik nayak
9 siblings, 1 reply; 184+ messages in thread
From: Junio C Hamano @ 2024-11-08 7:49 UTC (permalink / raw)
To: Karthik Nayak; +Cc: git, me, peff
Karthik Nayak <karthik.188@gmail.com> writes:
> Changes in v6:
> - Lazy load repository settings in packfile.c. This ensures that the settings are
> available for sure and we do not rely on callees setting it.
> - Use `size_t` for `delta_base_cache_limit`.
I'll trust the reviews made while I was gone and will comment only
on the differences between the last iteration.
> diff --git c/builtin/gc.c w/builtin/gc.c
> index 9a10eb58bc..ad80c3aed2 100644
> --- c/builtin/gc.c
> +++ w/builtin/gc.c
> @@ -138,7 +138,7 @@ struct gc_config {
> char *repack_filter_to;
> unsigned long big_pack_threshold;
> unsigned long max_delta_cache_size;
> - unsigned long delta_base_cache_limit;
> + size_t delta_base_cache_limit;
> };
Makes sense.
> @@ -170,6 +170,7 @@ static void gc_config(struct gc_config *cfg)
> {
> const char *value;
> char *owned = NULL;
> + unsigned long longval;
>
> if (!git_config_get_value("gc.packrefs", &value)) {
> if (value && !strcmp(value, "notbare"))
> @@ -207,7 +208,9 @@ static void gc_config(struct gc_config *cfg)
>
> git_config_get_ulong("gc.bigpackthreshold", &cfg->big_pack_threshold);
> git_config_get_ulong("pack.deltacachesize", &cfg->max_delta_cache_size);
> - git_config_get_ulong("core.deltabasecachelimit", &cfg->delta_base_cache_limit);
> +
> + if(!git_config_get_ulong("core.deltabasecachelimit", &longval))
> + cfg->delta_base_cache_limit = longval;
And this is a sensible way to fill size_t member with the value read
into a ulong. Should "longval" be named after "unsigned long" instead
of "long", by the way?
There is a required SP missing inside "if(!".
> diff --git c/packfile.c w/packfile.c
> index e1b04a2a6a..46f5369173 100644
> --- c/packfile.c
> +++ w/packfile.c
> @@ -653,7 +653,11 @@ unsigned char *use_pack(struct packed_git *p,
> if (!win) {
> size_t window_align;
> off_t len;
> - struct repo_settings *settings = &p->repo->settings;
> + struct repo_settings *settings;
> +
> + /* lazy load the settings incase it hasn't been setup */
> + prepare_repo_settings(p->repo);
> + settings = &p->repo->settings;
This is a bit curious. I'll read the individual patch that has this
change before commenting on it.
^ permalink raw reply [flat|nested] 184+ messages in thread
* Re: [PATCH v6 0/9] packfile: avoid using the 'the_repository' global variable
2024-11-08 7:49 ` [PATCH v6 0/9] packfile: avoid using the 'the_repository' global variable Junio C Hamano
@ 2024-11-08 9:28 ` karthik nayak
0 siblings, 0 replies; 184+ messages in thread
From: karthik nayak @ 2024-11-08 9:28 UTC (permalink / raw)
To: Junio C Hamano; +Cc: git, me, peff
[-- Attachment #1: Type: text/plain, Size: 2379 bytes --]
Junio C Hamano <gitster@pobox.com> writes:
> Karthik Nayak <karthik.188@gmail.com> writes:
>
>> Changes in v6:
>> - Lazy load repository settings in packfile.c. This ensures that the settings are
>> available for sure and we do not rely on callees setting it.
>> - Use `size_t` for `delta_base_cache_limit`.
>
> I'll trust the reviews made while I was gone and will comment only
> on the differences between the last iteration.
>
>> diff --git c/builtin/gc.c w/builtin/gc.c
>> index 9a10eb58bc..ad80c3aed2 100644
>> --- c/builtin/gc.c
>> +++ w/builtin/gc.c
>> @@ -138,7 +138,7 @@ struct gc_config {
>> char *repack_filter_to;
>> unsigned long big_pack_threshold;
>> unsigned long max_delta_cache_size;
>> - unsigned long delta_base_cache_limit;
>> + size_t delta_base_cache_limit;
>> };
>
> Makes sense.
>
>> @@ -170,6 +170,7 @@ static void gc_config(struct gc_config *cfg)
>> {
>> const char *value;
>> char *owned = NULL;
>> + unsigned long longval;
>>
>> if (!git_config_get_value("gc.packrefs", &value)) {
>> if (value && !strcmp(value, "notbare"))
>> @@ -207,7 +208,9 @@ static void gc_config(struct gc_config *cfg)
>>
>> git_config_get_ulong("gc.bigpackthreshold", &cfg->big_pack_threshold);
>> git_config_get_ulong("pack.deltacachesize", &cfg->max_delta_cache_size);
>> - git_config_get_ulong("core.deltabasecachelimit", &cfg->delta_base_cache_limit);
>> +
>> + if(!git_config_get_ulong("core.deltabasecachelimit", &longval))
>> + cfg->delta_base_cache_limit = longval;
>
> And this is a sensible way to fill size_t member with the value read
> into a ulong. Should "longval" be named after "unsigned long" instead
> of "long", by the way?
>
> There is a required SP missing inside "if(!".
>
Agreed, will fix both and send in a new version.
>> diff --git c/packfile.c w/packfile.c
>> index e1b04a2a6a..46f5369173 100644
>> --- c/packfile.c
>> +++ w/packfile.c
>> @@ -653,7 +653,11 @@ unsigned char *use_pack(struct packed_git *p,
>> if (!win) {
>> size_t window_align;
>> off_t len;
>> - struct repo_settings *settings = &p->repo->settings;
>> + struct repo_settings *settings;
>> +
>> + /* lazy load the settings incase it hasn't been setup */
>> + prepare_repo_settings(p->repo);
>> + settings = &p->repo->settings;
>
> This is a bit curious. I'll read the individual patch that has this
> change before commenting on it.
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 690 bytes --]
^ permalink raw reply [flat|nested] 184+ messages in thread
* [PATCH v7 0/9] packfile: avoid using the 'the_repository' global variable
2024-10-21 9:57 [PATCH 00/20] packfile: avoid using the 'the_repository' global variable Karthik Nayak
` (25 preceding siblings ...)
2024-11-07 14:10 ` [PATCH v6 " Karthik Nayak
@ 2024-11-11 11:14 ` Karthik Nayak
2024-11-11 11:14 ` [PATCH v7 1/9] packfile: add repository to struct `packed_git` Karthik Nayak
` (10 more replies)
2024-11-22 10:08 ` [PATCH v8 00/10] " Karthik Nayak
` (2 subsequent siblings)
29 siblings, 11 replies; 184+ messages in thread
From: Karthik Nayak @ 2024-11-11 11:14 UTC (permalink / raw)
To: karthik.188; +Cc: git, me, peff, gitster
The `packfile.c` file uses the global variable 'the_repository' extensively
throughout the code. Let's remove all usecases of this, by modifying the
required functions to accept a 'struct repository' instead. This is to clean up
usage of global state.
The first 3 patches are mostly internal to `packfile.c`, we add the repository
field to the `packed_git` struct and this is used to clear up some useages of
the global variables.
The next 3 patches are more disruptive, they modify the function definition of
`odb_pack_name`, `has_object[_kept]_pack` and `for_each_packed_object` to receive
a repository, helping remove other usages of 'the_repository' variable.
Finally, the last two patches deal with global config values. These values are
localized.
For v5 onwards, I've rebased the series off the new master: 8f8d6eee53 (The
seventh batch, 2024-11-01), as a dependency for this series 'jk/dumb-http-finalize'
was merged to master. I've found no conflicts while merging with seen & next. But
since this series does touch multiple files, there could be future conflicts.
Changes in v7:
- Cleanup stale commit message.
- Add missing space in `if` statement.
- Fix typo s/incase/in case/.
Changes in v6:
- Lazy load repository settings in packfile.c. This ensures that the settings are
available for sure and we do not rely on callees setting it.
- Use `size_t` for `delta_base_cache_limit`.
Changes in v5:
- Move packed_git* settings to repo_settings to ensure we don't keep reparsing the
settings in `use_pack`.
Changes in v4:
- Renamed the repository field within `packed_git` and `multi_pack_index` from
`r` to `repo`, while keeping function parameters to be `r`.
- Fixed bad braces.
Changes in v3:
- Improved commit messages. In the first commit to talk about how packed_git
struct could also be part of the alternates of a repository. In the 7th commit
to talk about the motive behind removing the global variable.
- Changed 'packed_git->repo' to 'packed_git->r' to keep it consistent with the
rest of the code base.
- Replaced 'the_repository' with locally available access to the repository
struct in multiple regions.
- Removed unecessary inclusion of the 'repository.h' header file by forward
declaring the 'repository' struct.
- Replace memcpy with hashcpy.
- Change the logic in the 7th patch to use if else statements.
- Added an extra commit to cleanup `pack-bitmap.c`.
Karthik Nayak (9):
packfile: add repository to struct `packed_git`
packfile: use `repository` from `packed_git` directly
packfile: pass `repository` to static function in the file
packfile: pass down repository to `odb_pack_name`
packfile: pass down repository to `has_object[_kept]_pack`
packfile: pass down repository to `for_each_packed_object`
config: make `delta_base_cache_limit` a non-global variable
config: make `packed_git_(limit|window_size)` non-global variables
midx: add repository to `multi_pack_index` struct
builtin/cat-file.c | 7 +-
builtin/count-objects.c | 2 +-
builtin/fast-import.c | 15 ++--
builtin/fsck.c | 20 +++---
builtin/gc.c | 8 ++-
builtin/index-pack.c | 20 ++++--
builtin/pack-objects.c | 11 +--
builtin/pack-redundant.c | 2 +-
builtin/repack.c | 2 +-
builtin/rev-list.c | 2 +-
commit-graph.c | 4 +-
config.c | 22 ------
connected.c | 3 +-
diff.c | 3 +-
environment.c | 3 -
environment.h | 1 -
fsck.c | 2 +-
http.c | 4 +-
list-objects.c | 7 +-
midx-write.c | 2 +-
midx.c | 3 +-
midx.h | 3 +
object-store-ll.h | 9 ++-
pack-bitmap.c | 90 +++++++++++++++---------
pack-objects.h | 3 +-
pack-write.c | 1 +
pack.h | 1 +
packfile.c | 148 +++++++++++++++++++++++----------------
packfile.h | 18 +++--
promisor-remote.c | 2 +-
prune-packed.c | 2 +-
reachable.c | 4 +-
repo-settings.c | 14 ++++
repo-settings.h | 5 ++
revision.c | 13 ++--
tag.c | 2 +-
36 files changed, 268 insertions(+), 190 deletions(-)
Range-diff against v6:
-: ---------- > 1: 6c00e25c86 packfile: add repository to struct `packed_git`
-: ---------- > 2: 70fc8a79af packfile: use `repository` from `packed_git` directly
-: ---------- > 3: 167a1f3a11 packfile: pass `repository` to static function in the file
-: ---------- > 4: b7cfe78217 packfile: pass down repository to `odb_pack_name`
-: ---------- > 5: 5566f5554c packfile: pass down repository to `has_object[_kept]_pack`
-: ---------- > 6: 1b26e45a9b packfile: pass down repository to `for_each_packed_object`
1: 89313cfed4 ! 7: 1bdc34f4d8 config: make `delta_base_cache_limit` a non-global variable
@@ builtin/gc.c: static void gc_config(struct gc_config *cfg)
{
const char *value;
char *owned = NULL;
-+ unsigned long longval;
++ unsigned long ulongval;
if (!git_config_get_value("gc.packrefs", &value)) {
if (value && !strcmp(value, "notbare"))
@@ builtin/gc.c: static void gc_config(struct gc_config *cfg)
git_config_get_ulong("gc.bigpackthreshold", &cfg->big_pack_threshold);
git_config_get_ulong("pack.deltacachesize", &cfg->max_delta_cache_size);
-+ if(!git_config_get_ulong("core.deltabasecachelimit", &longval))
-+ cfg->delta_base_cache_limit = longval;
++ if (!git_config_get_ulong("core.deltabasecachelimit", &ulongval))
++ cfg->delta_base_cache_limit = ulongval;
+
if (!git_config_get_string("gc.repackfilter", &owned)) {
free(cfg->repack_filter);
2: 3a8e3b88df ! 8: 7b6baa89ac config: make `packed_git_(limit|window_size)` non-global variables
@@ Commit message
this file, let's change it from being a global config variable to a
local variable for the subsystem.
- We do this by introducing a new local `packfile_config` struct in
- `packfile.c` and also adding the required function to parse the said
- config. We then use this within `packfile.c` to obtain the variables.
-
With this, we rid `packfile.c` from all global variable usage and this
means we can also remove the `USE_THE_REPOSITORY_VARIABLE` guard from
the file.
@@ packfile.c: unsigned char *use_pack(struct packed_git *p,
off_t len;
+ struct repo_settings *settings;
+
-+ /* lazy load the settings incase it hasn't been setup */
++ /* lazy load the settings in case it hasn't been setup */
+ prepare_repo_settings(p->repo);
+ settings = &p->repo->settings;
+
3: 2f9a146978 = 9: a3667d87ec midx: add repository to `multi_pack_index` struct
--
2.47.0
^ permalink raw reply [flat|nested] 184+ messages in thread
* [PATCH v7 1/9] packfile: add repository to struct `packed_git`
2024-11-11 11:14 ` [PATCH v7 " Karthik Nayak
@ 2024-11-11 11:14 ` Karthik Nayak
2024-11-13 12:41 ` Toon Claes
2024-11-11 11:14 ` [PATCH v7 2/9] packfile: use `repository` from `packed_git` directly Karthik Nayak
` (9 subsequent siblings)
10 siblings, 1 reply; 184+ messages in thread
From: Karthik Nayak @ 2024-11-11 11:14 UTC (permalink / raw)
To: karthik.188; +Cc: git, me, peff, gitster
The struct `packed_git` holds information regarding a packed object
file. Let's add the repository variable to this object, to represent the
repository that this packfile belongs to. This helps remove dependency
on the global `the_repository` object in `packfile.c` by simply using
repository information now readily available in the struct.
We do need to consider that a pack file could be part of the alternates
of a repository, but considering that we only have one repository struct
and also that we currently anyways use 'the_repository'. We should be
OK with this change.
We also modify `alloc_packed_git` to ensure that the repository is added
to newly created `packed_git` structs. This requires modifying the
function and all its callee to pass the repository object down the
levels.
Helped-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
---
builtin/fast-import.c | 3 ++-
builtin/index-pack.c | 6 ++++--
commit-graph.c | 2 +-
connected.c | 3 ++-
http.c | 2 +-
midx-write.c | 2 +-
midx.c | 2 +-
object-store-ll.h | 5 +++++
packfile.c | 15 +++++++++------
packfile.h | 6 ++++--
10 files changed, 30 insertions(+), 16 deletions(-)
diff --git a/builtin/fast-import.c b/builtin/fast-import.c
index 76d5c20f14..da7e2d613b 100644
--- a/builtin/fast-import.c
+++ b/builtin/fast-import.c
@@ -765,6 +765,7 @@ static void start_packfile(void)
p->pack_fd = pack_fd;
p->do_not_close = 1;
+ p->repo = the_repository;
pack_file = hashfd(pack_fd, p->pack_name);
pack_data = p;
@@ -888,7 +889,7 @@ static void end_packfile(void)
idx_name = keep_pack(create_index());
/* Register the packfile with core git's machinery. */
- new_p = add_packed_git(idx_name, strlen(idx_name), 1);
+ new_p = add_packed_git(pack_data->repo, idx_name, strlen(idx_name), 1);
if (!new_p)
die("core git rejected index %s", idx_name);
all_packs[pack_id] = new_p;
diff --git a/builtin/index-pack.c b/builtin/index-pack.c
index 9d23b41b3a..be2f99625e 100644
--- a/builtin/index-pack.c
+++ b/builtin/index-pack.c
@@ -1552,7 +1552,8 @@ static void final(const char *final_pack_name, const char *curr_pack_name,
if (do_fsck_object) {
struct packed_git *p;
- p = add_packed_git(final_index_name, strlen(final_index_name), 0);
+ p = add_packed_git(the_repository, final_index_name,
+ strlen(final_index_name), 0);
if (p)
install_packed_git(the_repository, p);
}
@@ -1650,7 +1651,8 @@ static void read_v2_anomalous_offsets(struct packed_git *p,
static void read_idx_option(struct pack_idx_option *opts, const char *pack_name)
{
- struct packed_git *p = add_packed_git(pack_name, strlen(pack_name), 1);
+ struct packed_git *p = add_packed_git(the_repository, pack_name,
+ strlen(pack_name), 1);
if (!p)
die(_("Cannot open existing pack file '%s'"), pack_name);
diff --git a/commit-graph.c b/commit-graph.c
index 5bd89c0acd..83dd69bfeb 100644
--- a/commit-graph.c
+++ b/commit-graph.c
@@ -1914,7 +1914,7 @@ static int fill_oids_from_packs(struct write_commit_graph_context *ctx,
struct packed_git *p;
strbuf_setlen(&packname, dirlen);
strbuf_addstr(&packname, pack_indexes->items[i].string);
- p = add_packed_git(packname.buf, packname.len, 1);
+ p = add_packed_git(ctx->r, packname.buf, packname.len, 1);
if (!p) {
ret = error(_("error adding pack %s"), packname.buf);
goto cleanup;
diff --git a/connected.c b/connected.c
index a9e2e13995..3099da84f3 100644
--- a/connected.c
+++ b/connected.c
@@ -54,7 +54,8 @@ int check_connected(oid_iterate_fn fn, void *cb_data,
strbuf_add(&idx_file, transport->pack_lockfiles.items[0].string,
base_len);
strbuf_addstr(&idx_file, ".idx");
- new_pack = add_packed_git(idx_file.buf, idx_file.len, 1);
+ new_pack = add_packed_git(the_repository, idx_file.buf,
+ idx_file.len, 1);
strbuf_release(&idx_file);
}
diff --git a/http.c b/http.c
index 58242b9d2d..6744e18409 100644
--- a/http.c
+++ b/http.c
@@ -2439,7 +2439,7 @@ static int fetch_and_setup_pack_index(struct packed_git **packs_head,
if (!tmp_idx)
return -1;
- new_pack = parse_pack_index(sha1, tmp_idx);
+ new_pack = parse_pack_index(the_repository, sha1, tmp_idx);
if (!new_pack) {
unlink(tmp_idx);
free(tmp_idx);
diff --git a/midx-write.c b/midx-write.c
index b3a5f6c516..c57726ef94 100644
--- a/midx-write.c
+++ b/midx-write.c
@@ -154,7 +154,7 @@ static void add_pack_to_midx(const char *full_path, size_t full_path_len,
return;
ALLOC_GROW(ctx->info, ctx->nr + 1, ctx->alloc);
- p = add_packed_git(full_path, full_path_len, 0);
+ p = add_packed_git(the_repository, full_path, full_path_len, 0);
if (!p) {
warning(_("failed to add packfile '%s'"),
full_path);
diff --git a/midx.c b/midx.c
index e82d4f2e65..8edb75f51d 100644
--- a/midx.c
+++ b/midx.c
@@ -464,7 +464,7 @@ int prepare_midx_pack(struct repository *r, struct multi_pack_index *m,
strhash(key.buf), key.buf,
struct packed_git, packmap_ent);
if (!p) {
- p = add_packed_git(pack_name.buf, pack_name.len, m->local);
+ p = add_packed_git(r, pack_name.buf, pack_name.len, m->local);
if (p) {
install_packed_git(r, p);
list_add_tail(&p->mru, &r->objects->packed_git_mru);
diff --git a/object-store-ll.h b/object-store-ll.h
index 53b8e693b1..538f2c60cb 100644
--- a/object-store-ll.h
+++ b/object-store-ll.h
@@ -10,6 +10,7 @@
struct oidmap;
struct oidtree;
struct strbuf;
+struct repository;
struct object_directory {
struct object_directory *next;
@@ -135,6 +136,10 @@ struct packed_git {
*/
const uint32_t *mtimes_map;
size_t mtimes_size;
+
+ /* repo dentoes the repository this packed file belongs to */
+ struct repository *repo;
+
/* something like ".git/objects/pack/xxxxx.pack" */
char pack_name[FLEX_ARRAY]; /* more */
};
diff --git a/packfile.c b/packfile.c
index 9560f0a33c..6058eddf35 100644
--- a/packfile.c
+++ b/packfile.c
@@ -217,11 +217,12 @@ uint32_t get_pack_fanout(struct packed_git *p, uint32_t value)
return ntohl(level1_ofs[value]);
}
-static struct packed_git *alloc_packed_git(int extra)
+static struct packed_git *alloc_packed_git(struct repository *r, int extra)
{
struct packed_git *p = xmalloc(st_add(sizeof(*p), extra));
memset(p, 0, sizeof(*p));
p->pack_fd = -1;
+ p->repo = r;
return p;
}
@@ -233,11 +234,12 @@ static char *pack_path_from_idx(const char *idx_path)
return xstrfmt("%.*s.pack", (int)len, idx_path);
}
-struct packed_git *parse_pack_index(unsigned char *sha1, const char *idx_path)
+struct packed_git *parse_pack_index(struct repository *r, unsigned char *sha1,
+ const char *idx_path)
{
char *path = pack_path_from_idx(idx_path);
size_t alloc = st_add(strlen(path), 1);
- struct packed_git *p = alloc_packed_git(alloc);
+ struct packed_git *p = alloc_packed_git(r, alloc);
memcpy(p->pack_name, path, alloc); /* includes NUL */
free(path);
@@ -703,7 +705,8 @@ void unuse_pack(struct pack_window **w_cursor)
}
}
-struct packed_git *add_packed_git(const char *path, size_t path_len, int local)
+struct packed_git *add_packed_git(struct repository *r, const char *path,
+ size_t path_len, int local)
{
struct stat st;
size_t alloc;
@@ -721,7 +724,7 @@ struct packed_git *add_packed_git(const char *path, size_t path_len, int local)
* the use xsnprintf double-checks that)
*/
alloc = st_add3(path_len, strlen(".promisor"), 1);
- p = alloc_packed_git(alloc);
+ p = alloc_packed_git(r, alloc);
memcpy(p->pack_name, path, path_len);
xsnprintf(p->pack_name + path_len, alloc - path_len, ".keep");
@@ -877,7 +880,7 @@ static void prepare_pack(const char *full_name, size_t full_name_len,
/* Don't reopen a pack we already have. */
if (!hashmap_get(&data->r->objects->pack_map, &hent, pack_name)) {
- p = add_packed_git(full_name, full_name_len, data->local);
+ p = add_packed_git(data->r, full_name, full_name_len, data->local);
if (p)
install_packed_git(data->r, p);
}
diff --git a/packfile.h b/packfile.h
index 08f88a7ff5..aee69d1a0b 100644
--- a/packfile.h
+++ b/packfile.h
@@ -46,7 +46,8 @@ const char *pack_basename(struct packed_git *p);
* and does not add the resulting packed_git struct to the internal list of
* packs. You probably want add_packed_git() instead.
*/
-struct packed_git *parse_pack_index(unsigned char *sha1, const char *idx_path);
+struct packed_git *parse_pack_index(struct repository *r, unsigned char *sha1,
+ const char *idx_path);
typedef void each_file_in_pack_dir_fn(const char *full_path, size_t full_path_len,
const char *file_name, void *data);
@@ -113,7 +114,8 @@ void close_pack(struct packed_git *);
void close_object_store(struct raw_object_store *o);
void unuse_pack(struct pack_window **);
void clear_delta_base_cache(void);
-struct packed_git *add_packed_git(const char *path, size_t path_len, int local);
+struct packed_git *add_packed_git(struct repository *r, const char *path,
+ size_t path_len, int local);
/*
* Unlink the .pack and associated extension files.
--
2.47.0
^ permalink raw reply related [flat|nested] 184+ messages in thread
* Re: [PATCH v7 1/9] packfile: add repository to struct `packed_git`
2024-11-11 11:14 ` [PATCH v7 1/9] packfile: add repository to struct `packed_git` Karthik Nayak
@ 2024-11-13 12:41 ` Toon Claes
2024-11-13 13:04 ` karthik nayak
2024-11-20 22:30 ` Taylor Blau
0 siblings, 2 replies; 184+ messages in thread
From: Toon Claes @ 2024-11-13 12:41 UTC (permalink / raw)
To: Karthik Nayak, karthik.188; +Cc: git, me, peff, gitster
Karthik Nayak <karthik.188@gmail.com> writes:
> [snip]
>
> diff --git a/object-store-ll.h b/object-store-ll.h
> index 53b8e693b1..538f2c60cb 100644
> --- a/object-store-ll.h
> +++ b/object-store-ll.h
> @@ -10,6 +10,7 @@
> struct oidmap;
> struct oidtree;
> struct strbuf;
> +struct repository;
>
> struct object_directory {
> struct object_directory *next;
> @@ -135,6 +136,10 @@ struct packed_git {
> */
> const uint32_t *mtimes_map;
> size_t mtimes_size;
> +
> + /* repo dentoes the repository this packed file belongs to */
Small typo here, I think you mean "denotes".
That's all I've got about all other changes in this patch series.
--
Toon
^ permalink raw reply [flat|nested] 184+ messages in thread
* Re: [PATCH v7 1/9] packfile: add repository to struct `packed_git`
2024-11-13 12:41 ` Toon Claes
@ 2024-11-13 13:04 ` karthik nayak
2024-11-13 23:56 ` Junio C Hamano
2024-11-20 22:30 ` Taylor Blau
1 sibling, 1 reply; 184+ messages in thread
From: karthik nayak @ 2024-11-13 13:04 UTC (permalink / raw)
To: Toon Claes; +Cc: git, me, peff, gitster
[-- Attachment #1: Type: text/plain, Size: 833 bytes --]
Toon Claes <toon@iotcl.com> writes:
> Karthik Nayak <karthik.188@gmail.com> writes:
>
>> [snip]
>>
>> diff --git a/object-store-ll.h b/object-store-ll.h
>> index 53b8e693b1..538f2c60cb 100644
>> --- a/object-store-ll.h
>> +++ b/object-store-ll.h
>> @@ -10,6 +10,7 @@
>> struct oidmap;
>> struct oidtree;
>> struct strbuf;
>> +struct repository;
>>
>> struct object_directory {
>> struct object_directory *next;
>> @@ -135,6 +136,10 @@ struct packed_git {
>> */
>> const uint32_t *mtimes_map;
>> size_t mtimes_size;
>> +
>> + /* repo dentoes the repository this packed file belongs to */
>
> Small typo here, I think you mean "denotes".
>
Indeed. I'll add it in locally but will avoid a re-roll just for the
typo.
> That's all I've got about all other changes in this patch series.
>
Thanks for the review!
- Karthik
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 690 bytes --]
^ permalink raw reply [flat|nested] 184+ messages in thread
* Re: [PATCH v7 1/9] packfile: add repository to struct `packed_git`
2024-11-13 13:04 ` karthik nayak
@ 2024-11-13 23:56 ` Junio C Hamano
2024-11-14 10:04 ` karthik nayak
0 siblings, 1 reply; 184+ messages in thread
From: Junio C Hamano @ 2024-11-13 23:56 UTC (permalink / raw)
To: karthik nayak; +Cc: Toon Claes, git, me, peff
karthik nayak <karthik.188@gmail.com> writes:
> Indeed. I'll add it in locally but will avoid a re-roll just for the
> typo.
FWIW, I'd _strongly_ prefer people *not* phrase it that way.
It is very much sensible to refrain sending an another reroll
immediately only to correct a small typo. IOW, "avoid a" -> "avoid
an immediate" would be very much appreciated.
But in the end, the final version should not waste all the work that
went into reviewing the series.
Thanks.
^ permalink raw reply [flat|nested] 184+ messages in thread
* Re: [PATCH v7 1/9] packfile: add repository to struct `packed_git`
2024-11-13 23:56 ` Junio C Hamano
@ 2024-11-14 10:04 ` karthik nayak
0 siblings, 0 replies; 184+ messages in thread
From: karthik nayak @ 2024-11-14 10:04 UTC (permalink / raw)
To: Junio C Hamano; +Cc: Toon Claes, git, me, peff
[-- Attachment #1: Type: text/plain, Size: 648 bytes --]
Junio C Hamano <gitster@pobox.com> writes:
> karthik nayak <karthik.188@gmail.com> writes:
>
>> Indeed. I'll add it in locally but will avoid a re-roll just for the
>> typo.
>
> FWIW, I'd _strongly_ prefer people *not* phrase it that way.
>
> It is very much sensible to refrain sending an another reroll
> immediately only to correct a small typo. IOW, "avoid a" -> "avoid
> an immediate" would be very much appreciated.
>
> But in the end, the final version should not waste all the work that
> went into reviewing the series.
>
Thanks for pointing it out, I agree and didn't really consider how that
seems. I will be more mindful!
> Thanks.
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 690 bytes --]
^ permalink raw reply [flat|nested] 184+ messages in thread
* Re: [PATCH v7 1/9] packfile: add repository to struct `packed_git`
2024-11-13 12:41 ` Toon Claes
2024-11-13 13:04 ` karthik nayak
@ 2024-11-20 22:30 ` Taylor Blau
2024-11-21 10:20 ` karthik nayak
1 sibling, 1 reply; 184+ messages in thread
From: Taylor Blau @ 2024-11-20 22:30 UTC (permalink / raw)
To: Toon Claes; +Cc: Karthik Nayak, git, peff, gitster
On Wed, Nov 13, 2024 at 01:41:18PM +0100, Toon Claes wrote:
> Karthik Nayak <karthik.188@gmail.com> writes:
>
> > [snip]
> >
> > diff --git a/object-store-ll.h b/object-store-ll.h
> > index 53b8e693b1..538f2c60cb 100644
> > --- a/object-store-ll.h
> > +++ b/object-store-ll.h
> > @@ -10,6 +10,7 @@
> > struct oidmap;
> > struct oidtree;
> > struct strbuf;
> > +struct repository;
> >
> > struct object_directory {
> > struct object_directory *next;
> > @@ -135,6 +136,10 @@ struct packed_git {
> > */
> > const uint32_t *mtimes_map;
> > size_t mtimes_size;
> > +
> > + /* repo dentoes the repository this packed file belongs to */
>
> Small typo here, I think you mean "denotes".
Likewise for "packed file", which should be "packfile".
Thanks,
Taylor
^ permalink raw reply [flat|nested] 184+ messages in thread
* Re: [PATCH v7 1/9] packfile: add repository to struct `packed_git`
2024-11-20 22:30 ` Taylor Blau
@ 2024-11-21 10:20 ` karthik nayak
0 siblings, 0 replies; 184+ messages in thread
From: karthik nayak @ 2024-11-21 10:20 UTC (permalink / raw)
To: Taylor Blau, Toon Claes; +Cc: git, peff, gitster
[-- Attachment #1: Type: text/plain, Size: 862 bytes --]
Taylor Blau <me@ttaylorr.com> writes:
> On Wed, Nov 13, 2024 at 01:41:18PM +0100, Toon Claes wrote:
>> Karthik Nayak <karthik.188@gmail.com> writes:
>>
>> > [snip]
>> >
>> > diff --git a/object-store-ll.h b/object-store-ll.h
>> > index 53b8e693b1..538f2c60cb 100644
>> > --- a/object-store-ll.h
>> > +++ b/object-store-ll.h
>> > @@ -10,6 +10,7 @@
>> > struct oidmap;
>> > struct oidtree;
>> > struct strbuf;
>> > +struct repository;
>> >
>> > struct object_directory {
>> > struct object_directory *next;
>> > @@ -135,6 +136,10 @@ struct packed_git {
>> > */
>> > const uint32_t *mtimes_map;
>> > size_t mtimes_size;
>> > +
>> > + /* repo dentoes the repository this packed file belongs to */
>>
>> Small typo here, I think you mean "denotes".
>
> Likewise for "packed file", which should be "packfile".
Indeed, will fix both in the next version.
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 690 bytes --]
^ permalink raw reply [flat|nested] 184+ messages in thread
* [PATCH v7 2/9] packfile: use `repository` from `packed_git` directly
2024-11-11 11:14 ` [PATCH v7 " Karthik Nayak
2024-11-11 11:14 ` [PATCH v7 1/9] packfile: add repository to struct `packed_git` Karthik Nayak
@ 2024-11-11 11:14 ` Karthik Nayak
2024-11-11 11:14 ` [PATCH v7 3/9] packfile: pass `repository` to static function in the file Karthik Nayak
` (8 subsequent siblings)
10 siblings, 0 replies; 184+ messages in thread
From: Karthik Nayak @ 2024-11-11 11:14 UTC (permalink / raw)
To: karthik.188; +Cc: git, me, peff, gitster
In the previous commit, we introduced the `repository` structure inside
`packed_git`. This provides an alternative route instead of using the
global `the_repository` variable. Let's modify `packfile.c` now to use
this field wherever possible instead of relying on the global state.
There are still a few instances of `the_repository` usage in the file,
where there is no struct `packed_git` locally available, which will be
fixed in the following commits.
Helped-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
---
packfile.c | 50 +++++++++++++++++++++++++++-----------------------
1 file changed, 27 insertions(+), 23 deletions(-)
diff --git a/packfile.c b/packfile.c
index 6058eddf35..5bfa1e17c2 100644
--- a/packfile.c
+++ b/packfile.c
@@ -79,7 +79,7 @@ static int check_packed_git_idx(const char *path, struct packed_git *p)
size_t idx_size;
int fd = git_open(path), ret;
struct stat st;
- const unsigned int hashsz = the_hash_algo->rawsz;
+ const unsigned int hashsz = p->repo->hash_algo->rawsz;
if (fd < 0)
return -1;
@@ -243,7 +243,7 @@ struct packed_git *parse_pack_index(struct repository *r, unsigned char *sha1,
memcpy(p->pack_name, path, alloc); /* includes NUL */
free(path);
- hashcpy(p->hash, sha1, the_repository->hash_algo);
+ hashcpy(p->hash, sha1, p->repo->hash_algo);
if (check_packed_git_idx(idx_path, p)) {
free(p);
return NULL;
@@ -278,7 +278,7 @@ static int unuse_one_window(struct packed_git *current)
if (current)
scan_windows(current, &lru_p, &lru_w, &lru_l);
- for (p = the_repository->objects->packed_git; p; p = p->next)
+ for (p = current->repo->objects->packed_git; p; p = p->next)
scan_windows(p, &lru_p, &lru_w, &lru_l);
if (lru_p) {
munmap(lru_w->base, lru_w->len);
@@ -540,7 +540,7 @@ static int open_packed_git_1(struct packed_git *p)
unsigned char hash[GIT_MAX_RAWSZ];
unsigned char *idx_hash;
ssize_t read_result;
- const unsigned hashsz = the_hash_algo->rawsz;
+ const unsigned hashsz = p->repo->hash_algo->rawsz;
if (open_pack_index(p))
return error("packfile %s index unavailable", p->pack_name);
@@ -597,7 +597,7 @@ static int open_packed_git_1(struct packed_git *p)
if (read_result != hashsz)
return error("packfile %s signature is unavailable", p->pack_name);
idx_hash = ((unsigned char *)p->index_data) + p->index_size - hashsz * 2;
- if (!hasheq(hash, idx_hash, the_repository->hash_algo))
+ if (!hasheq(hash, idx_hash, p->repo->hash_algo))
return error("packfile %s does not match index", p->pack_name);
return 0;
}
@@ -637,7 +637,7 @@ unsigned char *use_pack(struct packed_git *p,
*/
if (!p->pack_size && p->pack_fd == -1 && open_packed_git(p))
die("packfile %s cannot be accessed", p->pack_name);
- if (offset > (p->pack_size - the_hash_algo->rawsz))
+ if (offset > (p->pack_size - p->repo->hash_algo->rawsz))
die("offset beyond end of packfile (truncated pack?)");
if (offset < 0)
die(_("offset before end of packfile (broken .idx?)"));
@@ -711,6 +711,7 @@ struct packed_git *add_packed_git(struct repository *r, const char *path,
struct stat st;
size_t alloc;
struct packed_git *p;
+ struct object_id oid;
/*
* Make sure a corresponding .pack file exists and that
@@ -751,9 +752,13 @@ struct packed_git *add_packed_git(struct repository *r, const char *path,
p->pack_size = st.st_size;
p->pack_local = local;
p->mtime = st.st_mtime;
- if (path_len < the_hash_algo->hexsz ||
- get_hash_hex(path + path_len - the_hash_algo->hexsz, p->hash))
- hashclr(p->hash, the_repository->hash_algo);
+ if (path_len < r->hash_algo->hexsz ||
+ get_oid_hex_algop(path + path_len - r->hash_algo->hexsz, &oid,
+ r->hash_algo))
+ hashclr(p->hash, r->hash_algo);
+ else
+ hashcpy(p->hash, oid.hash, r->hash_algo);
+
return p;
}
@@ -1243,9 +1248,9 @@ off_t get_delta_base(struct packed_git *p,
} else if (type == OBJ_REF_DELTA) {
/* The base entry _must_ be in the same pack */
struct object_id oid;
- oidread(&oid, base_info, the_repository->hash_algo);
+ oidread(&oid, base_info, p->repo->hash_algo);
base_offset = find_pack_entry_one(&oid, p);
- *curpos += the_hash_algo->rawsz;
+ *curpos += p->repo->hash_algo->rawsz;
} else
die("I am totally screwed");
return base_offset;
@@ -1266,7 +1271,7 @@ static int get_delta_base_oid(struct packed_git *p,
{
if (type == OBJ_REF_DELTA) {
unsigned char *base = use_pack(p, w_curs, curpos, NULL);
- oidread(oid, base, the_repository->hash_algo);
+ oidread(oid, base, p->repo->hash_algo);
return 0;
} else if (type == OBJ_OFS_DELTA) {
uint32_t base_pos;
@@ -1608,7 +1613,7 @@ int packed_object_info(struct repository *r, struct packed_git *p,
goto out;
}
} else
- oidclr(oi->delta_base_oid, the_repository->hash_algo);
+ oidclr(oi->delta_base_oid, p->repo->hash_algo);
}
oi->whence = in_delta_base_cache(p, obj_offset) ? OI_DBCACHED :
@@ -1897,7 +1902,7 @@ int bsearch_pack(const struct object_id *oid, const struct packed_git *p, uint32
{
const unsigned char *index_fanout = p->index_data;
const unsigned char *index_lookup;
- const unsigned int hashsz = the_hash_algo->rawsz;
+ const unsigned int hashsz = p->repo->hash_algo->rawsz;
int index_lookup_width;
if (!index_fanout)
@@ -1922,7 +1927,7 @@ int nth_packed_object_id(struct object_id *oid,
uint32_t n)
{
const unsigned char *index = p->index_data;
- const unsigned int hashsz = the_hash_algo->rawsz;
+ const unsigned int hashsz = p->repo->hash_algo->rawsz;
if (!index) {
if (open_pack_index(p))
return -1;
@@ -1933,11 +1938,10 @@ int nth_packed_object_id(struct object_id *oid,
index += 4 * 256;
if (p->index_version == 1) {
oidread(oid, index + st_add(st_mult(hashsz + 4, n), 4),
- the_repository->hash_algo);
+ p->repo->hash_algo);
} else {
index += 8;
- oidread(oid, index + st_mult(hashsz, n),
- the_repository->hash_algo);
+ oidread(oid, index + st_mult(hashsz, n), p->repo->hash_algo);
}
return 0;
}
@@ -1959,7 +1963,7 @@ void check_pack_index_ptr(const struct packed_git *p, const void *vptr)
off_t nth_packed_object_offset(const struct packed_git *p, uint32_t n)
{
const unsigned char *index = p->index_data;
- const unsigned int hashsz = the_hash_algo->rawsz;
+ const unsigned int hashsz = p->repo->hash_algo->rawsz;
index += 4 * 256;
if (p->index_version == 1) {
return ntohl(*((uint32_t *)(index + st_mult(hashsz + 4, n))));
@@ -2159,7 +2163,7 @@ int for_each_object_in_pack(struct packed_git *p,
int r = 0;
if (flags & FOR_EACH_OBJECT_PACK_ORDER) {
- if (load_pack_revindex(the_repository, p))
+ if (load_pack_revindex(p->repo, p))
return -1;
}
@@ -2227,7 +2231,7 @@ int for_each_packed_object(each_packed_object_fn cb, void *data,
}
static int add_promisor_object(const struct object_id *oid,
- struct packed_git *pack UNUSED,
+ struct packed_git *pack,
uint32_t pos UNUSED,
void *set_)
{
@@ -2235,12 +2239,12 @@ static int add_promisor_object(const struct object_id *oid,
struct object *obj;
int we_parsed_object;
- obj = lookup_object(the_repository, oid);
+ obj = lookup_object(pack->repo, oid);
if (obj && obj->parsed) {
we_parsed_object = 0;
} else {
we_parsed_object = 1;
- obj = parse_object(the_repository, oid);
+ obj = parse_object(pack->repo, oid);
}
if (!obj)
--
2.47.0
^ permalink raw reply related [flat|nested] 184+ messages in thread
* [PATCH v7 3/9] packfile: pass `repository` to static function in the file
2024-11-11 11:14 ` [PATCH v7 " Karthik Nayak
2024-11-11 11:14 ` [PATCH v7 1/9] packfile: add repository to struct `packed_git` Karthik Nayak
2024-11-11 11:14 ` [PATCH v7 2/9] packfile: use `repository` from `packed_git` directly Karthik Nayak
@ 2024-11-11 11:14 ` Karthik Nayak
2024-11-11 11:14 ` [PATCH v7 4/9] packfile: pass down repository to `odb_pack_name` Karthik Nayak
` (7 subsequent siblings)
10 siblings, 0 replies; 184+ messages in thread
From: Karthik Nayak @ 2024-11-11 11:14 UTC (permalink / raw)
To: karthik.188; +Cc: git, me, peff, gitster
Some of the static functions in the `packfile.c` access global
variables, which can simply be avoiding by passing the `repository`
struct down to them. Let's do that.
Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
---
packfile.c | 15 ++++++++-------
1 file changed, 8 insertions(+), 7 deletions(-)
diff --git a/packfile.c b/packfile.c
index 5bfa1e17c2..c96ebc4c69 100644
--- a/packfile.c
+++ b/packfile.c
@@ -460,13 +460,13 @@ static void find_lru_pack(struct packed_git *p, struct packed_git **lru_p, struc
*accept_windows_inuse = has_windows_inuse;
}
-static int close_one_pack(void)
+static int close_one_pack(struct repository *r)
{
struct packed_git *p, *lru_p = NULL;
struct pack_window *mru_w = NULL;
int accept_windows_inuse = 1;
- for (p = the_repository->objects->packed_git; p; p = p->next) {
+ for (p = r->objects->packed_git; p; p = p->next) {
if (p->pack_fd == -1)
continue;
find_lru_pack(p, &lru_p, &mru_w, &accept_windows_inuse);
@@ -555,7 +555,7 @@ static int open_packed_git_1(struct packed_git *p)
pack_max_fds = 1;
}
- while (pack_max_fds <= pack_open_fds && close_one_pack())
+ while (pack_max_fds <= pack_open_fds && close_one_pack(p->repo))
; /* nothing */
p->pack_fd = git_open(p->pack_name);
@@ -610,7 +610,8 @@ static int open_packed_git(struct packed_git *p)
return -1;
}
-static int in_window(struct pack_window *win, off_t offset)
+static int in_window(struct repository *r, struct pack_window *win,
+ off_t offset)
{
/* We must promise at least one full hash after the
* offset is available from this window, otherwise the offset
@@ -620,7 +621,7 @@ static int in_window(struct pack_window *win, off_t offset)
*/
off_t win_off = win->offset;
return win_off <= offset
- && (offset + the_hash_algo->rawsz) <= (win_off + win->len);
+ && (offset + r->hash_algo->rawsz) <= (win_off + win->len);
}
unsigned char *use_pack(struct packed_git *p,
@@ -642,11 +643,11 @@ unsigned char *use_pack(struct packed_git *p,
if (offset < 0)
die(_("offset before end of packfile (broken .idx?)"));
- if (!win || !in_window(win, offset)) {
+ if (!win || !in_window(p->repo, win, offset)) {
if (win)
win->inuse_cnt--;
for (win = p->windows; win; win = win->next) {
- if (in_window(win, offset))
+ if (in_window(p->repo, win, offset))
break;
}
if (!win) {
--
2.47.0
^ permalink raw reply related [flat|nested] 184+ messages in thread
* [PATCH v7 4/9] packfile: pass down repository to `odb_pack_name`
2024-11-11 11:14 ` [PATCH v7 " Karthik Nayak
` (2 preceding siblings ...)
2024-11-11 11:14 ` [PATCH v7 3/9] packfile: pass `repository` to static function in the file Karthik Nayak
@ 2024-11-11 11:14 ` Karthik Nayak
2024-11-11 11:14 ` [PATCH v7 5/9] packfile: pass down repository to `has_object[_kept]_pack` Karthik Nayak
` (6 subsequent siblings)
10 siblings, 0 replies; 184+ messages in thread
From: Karthik Nayak @ 2024-11-11 11:14 UTC (permalink / raw)
To: karthik.188; +Cc: git, me, peff, gitster
The function `odb_pack_name` currently relies on the global variable
`the_repository`. To eliminate global variable usage in `packfile.c`, we
should progressively shift the dependency on the_repository to higher
layers.
Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
---
builtin/fast-import.c | 8 ++++----
builtin/index-pack.c | 4 ++--
builtin/pack-redundant.c | 2 +-
http.c | 2 +-
packfile.c | 9 ++++-----
packfile.h | 3 ++-
6 files changed, 14 insertions(+), 14 deletions(-)
diff --git a/builtin/fast-import.c b/builtin/fast-import.c
index da7e2d613b..3ccc4c5722 100644
--- a/builtin/fast-import.c
+++ b/builtin/fast-import.c
@@ -806,7 +806,7 @@ static char *keep_pack(const char *curr_index_name)
struct strbuf name = STRBUF_INIT;
int keep_fd;
- odb_pack_name(&name, pack_data->hash, "keep");
+ odb_pack_name(pack_data->repo, &name, pack_data->hash, "keep");
keep_fd = odb_pack_keep(name.buf);
if (keep_fd < 0)
die_errno("cannot create keep file");
@@ -814,11 +814,11 @@ static char *keep_pack(const char *curr_index_name)
if (close(keep_fd))
die_errno("failed to write keep file");
- odb_pack_name(&name, pack_data->hash, "pack");
+ odb_pack_name(pack_data->repo, &name, pack_data->hash, "pack");
if (finalize_object_file(pack_data->pack_name, name.buf))
die("cannot store pack file");
- odb_pack_name(&name, pack_data->hash, "idx");
+ odb_pack_name(pack_data->repo, &name, pack_data->hash, "idx");
if (finalize_object_file(curr_index_name, name.buf))
die("cannot store index file");
free((void *)curr_index_name);
@@ -832,7 +832,7 @@ static void unkeep_all_packs(void)
for (k = 0; k < pack_id; k++) {
struct packed_git *p = all_packs[k];
- odb_pack_name(&name, p->hash, "keep");
+ odb_pack_name(p->repo, &name, p->hash, "keep");
unlink_or_warn(name.buf);
}
strbuf_release(&name);
diff --git a/builtin/index-pack.c b/builtin/index-pack.c
index be2f99625e..eaefb41761 100644
--- a/builtin/index-pack.c
+++ b/builtin/index-pack.c
@@ -1479,7 +1479,7 @@ static void write_special_file(const char *suffix, const char *msg,
if (pack_name)
filename = derive_filename(pack_name, "pack", suffix, &name_buf);
else
- filename = odb_pack_name(&name_buf, hash, suffix);
+ filename = odb_pack_name(the_repository, &name_buf, hash, suffix);
fd = odb_pack_keep(filename);
if (fd < 0) {
@@ -1507,7 +1507,7 @@ static void rename_tmp_packfile(const char **final_name,
{
if (!*final_name || strcmp(*final_name, curr_name)) {
if (!*final_name)
- *final_name = odb_pack_name(name, hash, ext);
+ *final_name = odb_pack_name(the_repository, name, hash, ext);
if (finalize_object_file(curr_name, *final_name))
die(_("unable to rename temporary '*.%s' file to '%s'"),
ext, *final_name);
diff --git a/builtin/pack-redundant.c b/builtin/pack-redundant.c
index d2c1c4e5ec..bc61990a93 100644
--- a/builtin/pack-redundant.c
+++ b/builtin/pack-redundant.c
@@ -690,7 +690,7 @@ int cmd_pack_redundant(int argc, const char **argv, const char *prefix UNUSED, s
pl = red = pack_list_difference(local_packs, min);
while (pl) {
printf("%s\n%s\n",
- odb_pack_name(&idx_name, pl->pack->hash, "idx"),
+ odb_pack_name(pl->pack->repo, &idx_name, pl->pack->hash, "idx"),
pl->pack->pack_name);
pl = pl->next;
}
diff --git a/http.c b/http.c
index 6744e18409..420f1566f0 100644
--- a/http.c
+++ b/http.c
@@ -2581,7 +2581,7 @@ struct http_pack_request *new_direct_http_pack_request(
preq->url = url;
- odb_pack_name(&preq->tmpfile, packed_git_hash, "pack");
+ odb_pack_name(the_repository, &preq->tmpfile, packed_git_hash, "pack");
strbuf_addstr(&preq->tmpfile, ".temp");
preq->packfile = fopen(preq->tmpfile.buf, "a");
if (!preq->packfile) {
diff --git a/packfile.c b/packfile.c
index c96ebc4c69..1015dac6db 100644
--- a/packfile.c
+++ b/packfile.c
@@ -25,13 +25,12 @@
#include "pack-revindex.h"
#include "promisor-remote.h"
-char *odb_pack_name(struct strbuf *buf,
- const unsigned char *hash,
- const char *ext)
+char *odb_pack_name(struct repository *r, struct strbuf *buf,
+ const unsigned char *hash, const char *ext)
{
strbuf_reset(buf);
- strbuf_addf(buf, "%s/pack/pack-%s.%s", repo_get_object_directory(the_repository),
- hash_to_hex(hash), ext);
+ strbuf_addf(buf, "%s/pack/pack-%s.%s", repo_get_object_directory(r),
+ hash_to_hex_algop(hash, r->hash_algo), ext);
return buf->buf;
}
diff --git a/packfile.h b/packfile.h
index aee69d1a0b..51187f2393 100644
--- a/packfile.h
+++ b/packfile.h
@@ -29,7 +29,8 @@ struct pack_entry {
*
* Example: odb_pack_name(out, sha1, "idx") => ".git/objects/pack/pack-1234..idx"
*/
-char *odb_pack_name(struct strbuf *buf, const unsigned char *sha1, const char *ext);
+char *odb_pack_name(struct repository *r, struct strbuf *buf,
+ const unsigned char *hash, const char *ext);
/*
* Return the basename of the packfile, omitting any containing directory
--
2.47.0
^ permalink raw reply related [flat|nested] 184+ messages in thread
* [PATCH v7 5/9] packfile: pass down repository to `has_object[_kept]_pack`
2024-11-11 11:14 ` [PATCH v7 " Karthik Nayak
` (3 preceding siblings ...)
2024-11-11 11:14 ` [PATCH v7 4/9] packfile: pass down repository to `odb_pack_name` Karthik Nayak
@ 2024-11-11 11:14 ` Karthik Nayak
2024-11-11 11:14 ` [PATCH v7 6/9] packfile: pass down repository to `for_each_packed_object` Karthik Nayak
` (5 subsequent siblings)
10 siblings, 0 replies; 184+ messages in thread
From: Karthik Nayak @ 2024-11-11 11:14 UTC (permalink / raw)
To: karthik.188; +Cc: git, me, peff, gitster
The functions `has_object[_kept]_pack` currently rely on the global
variable `the_repository`. To eliminate global variable usage in
`packfile.c`, we should progressively shift the dependency on
the_repository to higher layers. Let's remove its usage from these
functions and any related ones.
Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
---
builtin/count-objects.c | 2 +-
builtin/fsck.c | 2 +-
builtin/pack-objects.c | 4 ++--
diff.c | 3 ++-
list-objects.c | 3 ++-
pack-bitmap.c | 2 +-
packfile.c | 9 +++++----
packfile.h | 5 +++--
prune-packed.c | 2 +-
reachable.c | 2 +-
revision.c | 4 ++--
11 files changed, 21 insertions(+), 17 deletions(-)
diff --git a/builtin/count-objects.c b/builtin/count-objects.c
index 04d80887e0..1e89148ed7 100644
--- a/builtin/count-objects.c
+++ b/builtin/count-objects.c
@@ -67,7 +67,7 @@ static int count_loose(const struct object_id *oid, const char *path,
else {
loose_size += on_disk_bytes(st);
loose++;
- if (verbose && has_object_pack(oid))
+ if (verbose && has_object_pack(the_repository, oid))
packed_loose++;
}
return 0;
diff --git a/builtin/fsck.c b/builtin/fsck.c
index 7f4e2f0414..bb56eb98ac 100644
--- a/builtin/fsck.c
+++ b/builtin/fsck.c
@@ -272,7 +272,7 @@ static void check_reachable_object(struct object *obj)
if (!(obj->flags & HAS_OBJ)) {
if (is_promisor_object(&obj->oid))
return;
- if (has_object_pack(&obj->oid))
+ if (has_object_pack(the_repository, &obj->oid))
return; /* it is in pack - forget about it */
printf_ln(_("missing %s %s"),
printable_type(&obj->oid, obj->type),
diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index 0800714267..0f32e92a3a 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -1529,7 +1529,7 @@ static int want_found_object(const struct object_id *oid, int exclude,
return 0;
if (ignore_packed_keep_in_core && p->pack_keep_in_core)
return 0;
- if (has_object_kept_pack(oid, flags))
+ if (has_object_kept_pack(p->repo, oid, flags))
return 0;
}
@@ -3627,7 +3627,7 @@ static void show_cruft_commit(struct commit *commit, void *data)
static int cruft_include_check_obj(struct object *obj, void *data UNUSED)
{
- return !has_object_kept_pack(&obj->oid, IN_CORE_KEEP_PACKS);
+ return !has_object_kept_pack(to_pack.repo, &obj->oid, IN_CORE_KEEP_PACKS);
}
static int cruft_include_check(struct commit *commit, void *data)
diff --git a/diff.c b/diff.c
index dceac20d18..266ddf18e7 100644
--- a/diff.c
+++ b/diff.c
@@ -4041,7 +4041,8 @@ static int reuse_worktree_file(struct index_state *istate,
* objects however would tend to be slower as they need
* to be individually opened and inflated.
*/
- if (!FAST_WORKING_DIRECTORY && !want_file && has_object_pack(oid))
+ if (!FAST_WORKING_DIRECTORY && !want_file &&
+ has_object_pack(istate->repo, oid))
return 0;
/*
diff --git a/list-objects.c b/list-objects.c
index 985d008799..31236a8dc9 100644
--- a/list-objects.c
+++ b/list-objects.c
@@ -41,7 +41,8 @@ static void show_object(struct traversal_context *ctx,
{
if (!ctx->show_object)
return;
- if (ctx->revs->unpacked && has_object_pack(&object->oid))
+ if (ctx->revs->unpacked && has_object_pack(ctx->revs->repo,
+ &object->oid))
return;
ctx->show_object(object, name, ctx->show_data);
diff --git a/pack-bitmap.c b/pack-bitmap.c
index 4fa9dfc771..d34ba9909a 100644
--- a/pack-bitmap.c
+++ b/pack-bitmap.c
@@ -1889,7 +1889,7 @@ static void filter_packed_objects_from_bitmap(struct bitmap_index *bitmap_git,
bitmap_unset(result, i);
for (i = 0; i < eindex->count; ++i) {
- if (has_object_pack(&eindex->objects[i]->oid))
+ if (has_object_pack(the_repository, &eindex->objects[i]->oid))
bitmap_unset(result, objects_nr + i);
}
}
diff --git a/packfile.c b/packfile.c
index 1015dac6db..e7dd270217 100644
--- a/packfile.c
+++ b/packfile.c
@@ -2143,16 +2143,17 @@ int find_kept_pack_entry(struct repository *r,
return 0;
}
-int has_object_pack(const struct object_id *oid)
+int has_object_pack(struct repository *r, const struct object_id *oid)
{
struct pack_entry e;
- return find_pack_entry(the_repository, oid, &e);
+ return find_pack_entry(r, oid, &e);
}
-int has_object_kept_pack(const struct object_id *oid, unsigned flags)
+int has_object_kept_pack(struct repository *r, const struct object_id *oid,
+ unsigned flags)
{
struct pack_entry e;
- return find_kept_pack_entry(the_repository, oid, flags, &e);
+ return find_kept_pack_entry(r, oid, flags, &e);
}
int for_each_object_in_pack(struct packed_git *p,
diff --git a/packfile.h b/packfile.h
index 51187f2393..b09fb2c530 100644
--- a/packfile.h
+++ b/packfile.h
@@ -193,8 +193,9 @@ const struct packed_git *has_packed_and_bad(struct repository *, const struct ob
int find_pack_entry(struct repository *r, const struct object_id *oid, struct pack_entry *e);
int find_kept_pack_entry(struct repository *r, const struct object_id *oid, unsigned flags, struct pack_entry *e);
-int has_object_pack(const struct object_id *oid);
-int has_object_kept_pack(const struct object_id *oid, unsigned flags);
+int has_object_pack(struct repository *r, const struct object_id *oid);
+int has_object_kept_pack(struct repository *r, const struct object_id *oid,
+ unsigned flags);
/*
* Return 1 if an object in a promisor packfile is or refers to the given
diff --git a/prune-packed.c b/prune-packed.c
index 2bb99c29df..d1c65ab10e 100644
--- a/prune-packed.c
+++ b/prune-packed.c
@@ -24,7 +24,7 @@ static int prune_object(const struct object_id *oid, const char *path,
{
int *opts = data;
- if (!has_object_pack(oid))
+ if (!has_object_pack(the_repository, oid))
return 0;
if (*opts & PRUNE_PACKED_DRY_RUN)
diff --git a/reachable.c b/reachable.c
index 3e9b3dd0a4..09d2c50079 100644
--- a/reachable.c
+++ b/reachable.c
@@ -239,7 +239,7 @@ static int want_recent_object(struct recent_data *data,
const struct object_id *oid)
{
if (data->ignore_in_core_kept_packs &&
- has_object_kept_pack(oid, IN_CORE_KEEP_PACKS))
+ has_object_kept_pack(data->revs->repo, oid, IN_CORE_KEEP_PACKS))
return 0;
return 1;
}
diff --git a/revision.c b/revision.c
index f5f5b84f2b..d1d152a67b 100644
--- a/revision.c
+++ b/revision.c
@@ -4103,10 +4103,10 @@ enum commit_action get_commit_action(struct rev_info *revs, struct commit *commi
{
if (commit->object.flags & SHOWN)
return commit_ignore;
- if (revs->unpacked && has_object_pack(&commit->object.oid))
+ if (revs->unpacked && has_object_pack(revs->repo, &commit->object.oid))
return commit_ignore;
if (revs->no_kept_objects) {
- if (has_object_kept_pack(&commit->object.oid,
+ if (has_object_kept_pack(revs->repo, &commit->object.oid,
revs->keep_pack_cache_flags))
return commit_ignore;
}
--
2.47.0
^ permalink raw reply related [flat|nested] 184+ messages in thread
* [PATCH v7 6/9] packfile: pass down repository to `for_each_packed_object`
2024-11-11 11:14 ` [PATCH v7 " Karthik Nayak
` (4 preceding siblings ...)
2024-11-11 11:14 ` [PATCH v7 5/9] packfile: pass down repository to `has_object[_kept]_pack` Karthik Nayak
@ 2024-11-11 11:14 ` Karthik Nayak
2024-11-20 22:38 ` Taylor Blau
2024-11-11 11:14 ` [PATCH v7 7/9] config: make `delta_base_cache_limit` a non-global variable Karthik Nayak
` (4 subsequent siblings)
10 siblings, 1 reply; 184+ messages in thread
From: Karthik Nayak @ 2024-11-11 11:14 UTC (permalink / raw)
To: karthik.188; +Cc: git, me, peff, gitster
The function `for_each_packed_object` currently relies on the global
variable `the_repository`. To eliminate global variable usage in
`packfile.c`, we should progressively shift the dependency on
the_repository to higher layers. Let's remove its usage from this
function and closely related function `is_promisor_object`.
Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
---
builtin/cat-file.c | 7 ++++---
builtin/fsck.c | 18 +++++++++++-------
builtin/pack-objects.c | 7 +++++--
builtin/repack.c | 2 +-
builtin/rev-list.c | 2 +-
commit-graph.c | 2 +-
fsck.c | 2 +-
list-objects.c | 4 ++--
object-store-ll.h | 4 ++--
packfile.c | 14 +++++++-------
packfile.h | 2 +-
promisor-remote.c | 2 +-
reachable.c | 2 +-
revision.c | 9 +++++----
tag.c | 2 +-
15 files changed, 44 insertions(+), 35 deletions(-)
diff --git a/builtin/cat-file.c b/builtin/cat-file.c
index bfdfb51c7c..d67b101c20 100644
--- a/builtin/cat-file.c
+++ b/builtin/cat-file.c
@@ -827,15 +827,16 @@ static int batch_objects(struct batch_options *opt)
cb.seen = &seen;
for_each_loose_object(batch_unordered_loose, &cb, 0);
- for_each_packed_object(batch_unordered_packed, &cb,
- FOR_EACH_OBJECT_PACK_ORDER);
+ for_each_packed_object(the_repository, batch_unordered_packed,
+ &cb, FOR_EACH_OBJECT_PACK_ORDER);
oidset_clear(&seen);
} else {
struct oid_array sa = OID_ARRAY_INIT;
for_each_loose_object(collect_loose_object, &sa, 0);
- for_each_packed_object(collect_packed_object, &sa, 0);
+ for_each_packed_object(the_repository, collect_packed_object,
+ &sa, 0);
oid_array_for_each_unique(&sa, batch_object_cb, &cb);
diff --git a/builtin/fsck.c b/builtin/fsck.c
index bb56eb98ac..0196c54eb6 100644
--- a/builtin/fsck.c
+++ b/builtin/fsck.c
@@ -150,7 +150,7 @@ static int mark_object(struct object *obj, enum object_type type,
return 0;
obj->flags |= REACHABLE;
- if (is_promisor_object(&obj->oid))
+ if (is_promisor_object(the_repository, &obj->oid))
/*
* Further recursion does not need to be performed on this
* object since it is a promisor object (so it does not need to
@@ -270,7 +270,7 @@ static void check_reachable_object(struct object *obj)
* do a full fsck
*/
if (!(obj->flags & HAS_OBJ)) {
- if (is_promisor_object(&obj->oid))
+ if (is_promisor_object(the_repository, &obj->oid))
return;
if (has_object_pack(the_repository, &obj->oid))
return; /* it is in pack - forget about it */
@@ -391,7 +391,10 @@ static void check_connectivity(void)
* traversal.
*/
for_each_loose_object(mark_loose_unreachable_referents, NULL, 0);
- for_each_packed_object(mark_packed_unreachable_referents, NULL, 0);
+ for_each_packed_object(the_repository,
+ mark_packed_unreachable_referents,
+ NULL,
+ 0);
}
/* Look up all the requirements, warn about missing objects.. */
@@ -488,7 +491,7 @@ static void fsck_handle_reflog_oid(const char *refname, struct object_id *oid,
refname, timestamp);
obj->flags |= USED;
mark_object_reachable(obj);
- } else if (!is_promisor_object(oid)) {
+ } else if (!is_promisor_object(the_repository, oid)) {
error(_("%s: invalid reflog entry %s"),
refname, oid_to_hex(oid));
errors_found |= ERROR_REACHABLE;
@@ -531,7 +534,7 @@ static int fsck_handle_ref(const char *refname, const char *referent UNUSED, con
obj = parse_object(the_repository, oid);
if (!obj) {
- if (is_promisor_object(oid)) {
+ if (is_promisor_object(the_repository, oid)) {
/*
* Increment default_refs anyway, because this is a
* valid ref.
@@ -966,7 +969,8 @@ int cmd_fsck(int argc,
if (connectivity_only) {
for_each_loose_object(mark_loose_for_connectivity, NULL, 0);
- for_each_packed_object(mark_packed_for_connectivity, NULL, 0);
+ for_each_packed_object(the_repository,
+ mark_packed_for_connectivity, NULL, 0);
} else {
prepare_alt_odb(the_repository);
for (odb = the_repository->objects->odb; odb; odb = odb->next)
@@ -1011,7 +1015,7 @@ int cmd_fsck(int argc,
&oid);
if (!obj || !(obj->flags & HAS_OBJ)) {
- if (is_promisor_object(&oid))
+ if (is_promisor_object(the_repository, &oid))
continue;
error(_("%s: object missing"), oid_to_hex(&oid));
errors_found |= ERROR_OBJECT;
diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index 0f32e92a3a..db20f0cf51 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -3858,7 +3858,8 @@ static void show_object__ma_allow_promisor(struct object *obj, const char *name,
* Quietly ignore EXPECTED missing objects. This avoids problems with
* staging them now and getting an odd error later.
*/
- if (!has_object(the_repository, &obj->oid, 0) && is_promisor_object(&obj->oid))
+ if (!has_object(the_repository, &obj->oid, 0) &&
+ is_promisor_object(to_pack.repo, &obj->oid))
return;
show_object(obj, name, data);
@@ -3927,7 +3928,9 @@ static int add_object_in_unpacked_pack(const struct object_id *oid,
static void add_objects_in_unpacked_packs(void)
{
- if (for_each_packed_object(add_object_in_unpacked_pack, NULL,
+ if (for_each_packed_object(to_pack.repo,
+ add_object_in_unpacked_pack,
+ NULL,
FOR_EACH_OBJECT_PACK_ORDER |
FOR_EACH_OBJECT_LOCAL_ONLY |
FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS |
diff --git a/builtin/repack.c b/builtin/repack.c
index d6bb37e84a..96a4fa234b 100644
--- a/builtin/repack.c
+++ b/builtin/repack.c
@@ -404,7 +404,7 @@ static void repack_promisor_objects(const struct pack_objects_args *args,
* {type -> existing pack order} ordering when computing deltas instead
* of a {type -> size} ordering, which may produce better deltas.
*/
- for_each_packed_object(write_oid, &cmd,
+ for_each_packed_object(the_repository, write_oid, &cmd,
FOR_EACH_OBJECT_PROMISOR_ONLY);
if (cmd.in == -1) {
diff --git a/builtin/rev-list.c b/builtin/rev-list.c
index f62bcbf2b1..43c42621e3 100644
--- a/builtin/rev-list.c
+++ b/builtin/rev-list.c
@@ -121,7 +121,7 @@ static inline void finish_object__ma(struct object *obj)
return;
case MA_ALLOW_PROMISOR:
- if (is_promisor_object(&obj->oid))
+ if (is_promisor_object(the_repository, &obj->oid))
return;
die("unexpected missing %s object '%s'",
type_name(obj->type), oid_to_hex(&obj->oid));
diff --git a/commit-graph.c b/commit-graph.c
index 83dd69bfeb..e2e2083951 100644
--- a/commit-graph.c
+++ b/commit-graph.c
@@ -1960,7 +1960,7 @@ static void fill_oids_from_all_packs(struct write_commit_graph_context *ctx)
ctx->progress = start_delayed_progress(
_("Finding commits for commit graph among packed objects"),
ctx->approx_nr_objects);
- for_each_packed_object(add_packed_commits, ctx,
+ for_each_packed_object(ctx->r, add_packed_commits, ctx,
FOR_EACH_OBJECT_PACK_ORDER);
if (ctx->progress_done < ctx->approx_nr_objects)
display_progress(ctx->progress, ctx->approx_nr_objects);
diff --git a/fsck.c b/fsck.c
index 3756f52459..87ce999a49 100644
--- a/fsck.c
+++ b/fsck.c
@@ -1295,7 +1295,7 @@ static int fsck_blobs(struct oidset *blobs_found, struct oidset *blobs_done,
buf = repo_read_object_file(the_repository, oid, &type, &size);
if (!buf) {
- if (is_promisor_object(oid))
+ if (is_promisor_object(the_repository, oid))
continue;
ret |= report(options,
oid, OBJ_BLOB, msg_missing,
diff --git a/list-objects.c b/list-objects.c
index 31236a8dc9..d11a389b3a 100644
--- a/list-objects.c
+++ b/list-objects.c
@@ -75,7 +75,7 @@ static void process_blob(struct traversal_context *ctx,
*/
if (ctx->revs->exclude_promisor_objects &&
!repo_has_object_file(the_repository, &obj->oid) &&
- is_promisor_object(&obj->oid))
+ is_promisor_object(ctx->revs->repo, &obj->oid))
return;
pathlen = path->len;
@@ -180,7 +180,7 @@ static void process_tree(struct traversal_context *ctx,
* an incomplete list of missing objects.
*/
if (revs->exclude_promisor_objects &&
- is_promisor_object(&obj->oid))
+ is_promisor_object(revs->repo, &obj->oid))
return;
if (!revs->do_not_die_on_missing_objects)
diff --git a/object-store-ll.h b/object-store-ll.h
index 538f2c60cb..bcfae2e1bf 100644
--- a/object-store-ll.h
+++ b/object-store-ll.h
@@ -550,7 +550,7 @@ typedef int each_packed_object_fn(const struct object_id *oid,
int for_each_object_in_pack(struct packed_git *p,
each_packed_object_fn, void *data,
enum for_each_object_flags flags);
-int for_each_packed_object(each_packed_object_fn, void *,
- enum for_each_object_flags flags);
+int for_each_packed_object(struct repository *repo, each_packed_object_fn cb,
+ void *data, enum for_each_object_flags flags);
#endif /* OBJECT_STORE_LL_H */
diff --git a/packfile.c b/packfile.c
index e7dd270217..5e8019b1fe 100644
--- a/packfile.c
+++ b/packfile.c
@@ -2200,15 +2200,15 @@ int for_each_object_in_pack(struct packed_git *p,
return r;
}
-int for_each_packed_object(each_packed_object_fn cb, void *data,
- enum for_each_object_flags flags)
+int for_each_packed_object(struct repository *repo, each_packed_object_fn cb,
+ void *data, enum for_each_object_flags flags)
{
struct packed_git *p;
int r = 0;
int pack_errors = 0;
- prepare_packed_git(the_repository);
- for (p = get_all_packs(the_repository); p; p = p->next) {
+ prepare_packed_git(repo);
+ for (p = get_all_packs(repo); p; p = p->next) {
if ((flags & FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local)
continue;
if ((flags & FOR_EACH_OBJECT_PROMISOR_ONLY) &&
@@ -2286,14 +2286,14 @@ static int add_promisor_object(const struct object_id *oid,
return 0;
}
-int is_promisor_object(const struct object_id *oid)
+int is_promisor_object(struct repository *r, const struct object_id *oid)
{
static struct oidset promisor_objects;
static int promisor_objects_prepared;
if (!promisor_objects_prepared) {
- if (repo_has_promisor_remote(the_repository)) {
- for_each_packed_object(add_promisor_object,
+ if (repo_has_promisor_remote(r)) {
+ for_each_packed_object(r, add_promisor_object,
&promisor_objects,
FOR_EACH_OBJECT_PROMISOR_ONLY |
FOR_EACH_OBJECT_PACK_ORDER);
diff --git a/packfile.h b/packfile.h
index b09fb2c530..addb95b0c4 100644
--- a/packfile.h
+++ b/packfile.h
@@ -201,7 +201,7 @@ int has_object_kept_pack(struct repository *r, const struct object_id *oid,
* Return 1 if an object in a promisor packfile is or refers to the given
* object, 0 otherwise.
*/
-int is_promisor_object(const struct object_id *oid);
+int is_promisor_object(struct repository *r, const struct object_id *oid);
/*
* Expose a function for fuzz testing.
diff --git a/promisor-remote.c b/promisor-remote.c
index 9345ae3db2..c714f4f007 100644
--- a/promisor-remote.c
+++ b/promisor-remote.c
@@ -283,7 +283,7 @@ void promisor_remote_get_direct(struct repository *repo,
}
for (i = 0; i < remaining_nr; i++) {
- if (is_promisor_object(&remaining_oids[i]))
+ if (is_promisor_object(repo, &remaining_oids[i]))
die(_("could not fetch %s from promisor remote"),
oid_to_hex(&remaining_oids[i]));
}
diff --git a/reachable.c b/reachable.c
index 09d2c50079..ecf7ccf504 100644
--- a/reachable.c
+++ b/reachable.c
@@ -324,7 +324,7 @@ int add_unseen_recent_objects_to_traversal(struct rev_info *revs,
if (ignore_in_core_kept_packs)
flags |= FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS;
- r = for_each_packed_object(add_recent_packed, &data, flags);
+ r = for_each_packed_object(revs->repo, add_recent_packed, &data, flags);
done:
oidset_clear(&data.extra_recent_oids);
diff --git a/revision.c b/revision.c
index d1d152a67b..45dc6d2819 100644
--- a/revision.c
+++ b/revision.c
@@ -390,7 +390,8 @@ static struct object *get_reference(struct rev_info *revs, const char *name,
if (!object) {
if (revs->ignore_missing)
return NULL;
- if (revs->exclude_promisor_objects && is_promisor_object(oid))
+ if (revs->exclude_promisor_objects &&
+ is_promisor_object(revs->repo, oid))
return NULL;
if (revs->do_not_die_on_missing_objects) {
oidset_insert(&revs->missing_commits, oid);
@@ -432,7 +433,7 @@ static struct commit *handle_commit(struct rev_info *revs,
if (revs->ignore_missing_links || (flags & UNINTERESTING))
return NULL;
if (revs->exclude_promisor_objects &&
- is_promisor_object(&tag->tagged->oid))
+ is_promisor_object(revs->repo, &tag->tagged->oid))
return NULL;
if (revs->do_not_die_on_missing_objects && oid) {
oidset_insert(&revs->missing_commits, oid);
@@ -1211,7 +1212,7 @@ static int process_parents(struct rev_info *revs, struct commit *commit,
revs->do_not_die_on_missing_objects;
if (repo_parse_commit_gently(revs->repo, p, gently) < 0) {
if (revs->exclude_promisor_objects &&
- is_promisor_object(&p->object.oid)) {
+ is_promisor_object(revs->repo, &p->object.oid)) {
if (revs->first_parent_only)
break;
continue;
@@ -3915,7 +3916,7 @@ int prepare_revision_walk(struct rev_info *revs)
revs->treesame.name = "treesame";
if (revs->exclude_promisor_objects) {
- for_each_packed_object(mark_uninteresting, revs,
+ for_each_packed_object(revs->repo, mark_uninteresting, revs,
FOR_EACH_OBJECT_PROMISOR_ONLY);
}
diff --git a/tag.c b/tag.c
index d24170e340..beef9571b5 100644
--- a/tag.c
+++ b/tag.c
@@ -84,7 +84,7 @@ struct object *deref_tag(struct repository *r, struct object *o, const char *war
o = NULL;
}
if (!o && warn) {
- if (last_oid && is_promisor_object(last_oid))
+ if (last_oid && is_promisor_object(r, last_oid))
return NULL;
if (!warnlen)
warnlen = strlen(warn);
--
2.47.0
^ permalink raw reply related [flat|nested] 184+ messages in thread
* Re: [PATCH v7 6/9] packfile: pass down repository to `for_each_packed_object`
2024-11-11 11:14 ` [PATCH v7 6/9] packfile: pass down repository to `for_each_packed_object` Karthik Nayak
@ 2024-11-20 22:38 ` Taylor Blau
2024-11-20 22:48 ` [PATCH] packfile.c: remove unnecessary prepare_packed_git() call Taylor Blau
0 siblings, 1 reply; 184+ messages in thread
From: Taylor Blau @ 2024-11-20 22:38 UTC (permalink / raw)
To: Karthik Nayak; +Cc: git, peff, gitster
On Mon, Nov 11, 2024 at 12:14:06PM +0100, Karthik Nayak wrote:
> diff --git a/packfile.c b/packfile.c
> index e7dd270217..5e8019b1fe 100644
> --- a/packfile.c
> +++ b/packfile.c
> @@ -2200,15 +2200,15 @@ int for_each_object_in_pack(struct packed_git *p,
> return r;
> }
>
> -int for_each_packed_object(each_packed_object_fn cb, void *data,
> - enum for_each_object_flags flags)
> +int for_each_packed_object(struct repository *repo, each_packed_object_fn cb,
> + void *data, enum for_each_object_flags flags)
> {
> struct packed_git *p;
> int r = 0;
> int pack_errors = 0;
>
> - prepare_packed_git(the_repository);
> - for (p = get_all_packs(the_repository); p; p = p->next) {
> + prepare_packed_git(repo);
> + for (p = get_all_packs(repo); p; p = p->next) {
> if ((flags & FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local)
> continue;
> if ((flags & FOR_EACH_OBJECT_PROMISOR_ONLY) &&
Not the fault of your series, but this prepare_packed_git() call is
unnecessary, since it is the first thing that get_all_packs() does when
it executes.
I suspect that this call comes from way back in 660c889e46 (sha1_file:
add for_each iterators for loose and packed objects, 2014-10-15). It
could have been removed in 454ea2e4d7 (treewide: use get_all_packs,
2018-08-20), but I think that patch was a straightforward conversion
that did not inspect each individual change.
Anyway, nothing to do immediately here, but just something I saw when
reviewing and figured was worth writing down somewhere.
Thanks,
Taylor
^ permalink raw reply [flat|nested] 184+ messages in thread
* [PATCH] packfile.c: remove unnecessary prepare_packed_git() call
2024-11-20 22:38 ` Taylor Blau
@ 2024-11-20 22:48 ` Taylor Blau
2024-11-21 9:13 ` Jeff King
0 siblings, 1 reply; 184+ messages in thread
From: Taylor Blau @ 2024-11-20 22:48 UTC (permalink / raw)
To: git; +Cc: Jeff King, Junio C Hamano, Derrick Stolee, Karthik Nayak
In 454ea2e4d7 (treewide: use get_all_packs, 2018-08-20) we converted
existing calls to both:
- get_packed_git(), as well as
- the_repository->objects->packed_git
, to instead use the new get_all_packs() function.
In the instance that this commit addresses, there was a preceding call
to prepare_packed_git(), which dates all the way back to 660c889e46
(sha1_file: add for_each iterators for loose and packed objects,
2014-10-15) when its caller (for_each_packed_object()) was first
introduced.
This call could have been removed in 454ea2e4d7, since get_all_packs()
itself calls prepare_packed_git(). But the translation in 454ea2e4d7 was
(to the best of my knowledge) a find-and-replace rather than inspecting
each individual caller.
Having an extra prepare_packed_git() call here is harmless, since it
will notice that we have already set the 'packed_git_initialized' field
and the call will be a noop. So we're only talking about a few dozen CPU
cycles to set up and tear down the stack frame.
But having a lone prepare_packed_git() call immediately before a call to
get_all_packs() confused me, so let's remove it as redundant to avoid
more confusion in the future.
Signed-off-by: Taylor Blau <me@ttaylorr.com>
---
packfile.c | 1 -
1 file changed, 1 deletion(-)
diff --git a/packfile.c b/packfile.c
index 724ce8e977..5585075023 100644
--- a/packfile.c
+++ b/packfile.c
@@ -2223,7 +2223,6 @@ int for_each_packed_object(struct repository *repo, each_packed_object_fn cb,
int r = 0;
int pack_errors = 0;
- prepare_packed_git(repo);
for (p = get_all_packs(repo); p; p = p->next) {
if ((flags & FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local)
continue;
base-commit: 78e4bc3efc49ee4c9ec1ce14117c2e7d99999234
--
This applies on top of Karthik's series, and was something small I
noticed while reading it.
2.47.0.237.gc601277f4c4
^ permalink raw reply related [flat|nested] 184+ messages in thread
* Re: [PATCH] packfile.c: remove unnecessary prepare_packed_git() call
2024-11-20 22:48 ` [PATCH] packfile.c: remove unnecessary prepare_packed_git() call Taylor Blau
@ 2024-11-21 9:13 ` Jeff King
0 siblings, 0 replies; 184+ messages in thread
From: Jeff King @ 2024-11-21 9:13 UTC (permalink / raw)
To: Taylor Blau; +Cc: git, Junio C Hamano, Derrick Stolee, Karthik Nayak
On Wed, Nov 20, 2024 at 05:48:51PM -0500, Taylor Blau wrote:
> In the instance that this commit addresses, there was a preceding call
> to prepare_packed_git(), which dates all the way back to 660c889e46
> (sha1_file: add for_each iterators for loose and packed objects,
> 2014-10-15) when its caller (for_each_packed_object()) was first
> introduced.
>
> This call could have been removed in 454ea2e4d7, since get_all_packs()
> itself calls prepare_packed_git(). But the translation in 454ea2e4d7 was
> (to the best of my knowledge) a find-and-replace rather than inspecting
> each individual caller.
Yeah, I think that describes what happened.
> Having an extra prepare_packed_git() call here is harmless, since it
> will notice that we have already set the 'packed_git_initialized' field
> and the call will be a noop. So we're only talking about a few dozen CPU
> cycles to set up and tear down the stack frame.
>
> But having a lone prepare_packed_git() call immediately before a call to
> get_all_packs() confused me, so let's remove it as redundant to avoid
> more confusion in the future.
Agreed. I think this is worth doing.
I briefly grepped for other cases. This one confused me:
builtin/gc.c=1272=static off_t get_auto_pack_size(void)
--
builtin/gc.c-1292- reprepare_packed_git(r);
builtin/gc.c:1293: for (p = get_all_packs(r); p; p = p->next) {
It's not a noop because it's calling the reprepare() function, which
will re-check the directory. But why? Are we expecting that something
changed? This is called only when making the midx, so maybe it's trying
to refresh the set of packs after repacking. But that seems like
something that should happen explicitly, not as a side effect of an
otherwise read-only function.
Removing it still passes the tests. So I dunno. It looks superfluous to
me, but it's perhaps more risky than the one you identified.
-Peff
^ permalink raw reply [flat|nested] 184+ messages in thread
* [PATCH v7 7/9] config: make `delta_base_cache_limit` a non-global variable
2024-11-11 11:14 ` [PATCH v7 " Karthik Nayak
` (5 preceding siblings ...)
2024-11-11 11:14 ` [PATCH v7 6/9] packfile: pass down repository to `for_each_packed_object` Karthik Nayak
@ 2024-11-11 11:14 ` Karthik Nayak
2024-11-20 22:52 ` Taylor Blau
2024-11-11 11:14 ` [PATCH v7 8/9] config: make `packed_git_(limit|window_size)` non-global variables Karthik Nayak
` (3 subsequent siblings)
10 siblings, 1 reply; 184+ messages in thread
From: Karthik Nayak @ 2024-11-11 11:14 UTC (permalink / raw)
To: karthik.188; +Cc: git, me, peff, gitster
The `delta_base_cache_limit` variable is a global config variable used
by multiple subsystems. Let's make this non-global, by adding this
variable to the stack of each of the subsystems where it is used.
In `gc.c` we add it to the `gc_config` struct and also the constructor
function. In `index-pack.c` we add it to the `pack_idx_option` struct
and its constructor. Finally, in `packfile.c` we dynamically retrieve
this value from the repository config, since the value is only used once
in the entire subsystem.
These changes are made to remove the usage of `delta_base_cache_limit`
as a global variable in `packfile.c`. This brings us one step closer to
removing the `USE_THE_REPOSITORY_VARIABLE` definition in `packfile.c`
which we complete in the next patch.
Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
---
builtin/gc.c | 8 +++++++-
builtin/index-pack.c | 10 +++++++---
config.c | 5 -----
environment.c | 1 -
environment.h | 1 -
pack-objects.h | 3 ++-
pack-write.c | 1 +
pack.h | 1 +
packfile.c | 13 +++++++++++--
9 files changed, 29 insertions(+), 14 deletions(-)
diff --git a/builtin/gc.c b/builtin/gc.c
index d52735354c..09802eb989 100644
--- a/builtin/gc.c
+++ b/builtin/gc.c
@@ -138,6 +138,7 @@ struct gc_config {
char *repack_filter_to;
unsigned long big_pack_threshold;
unsigned long max_delta_cache_size;
+ size_t delta_base_cache_limit;
};
#define GC_CONFIG_INIT { \
@@ -153,6 +154,7 @@ struct gc_config {
.prune_expire = xstrdup("2.weeks.ago"), \
.prune_worktrees_expire = xstrdup("3.months.ago"), \
.max_delta_cache_size = DEFAULT_DELTA_CACHE_SIZE, \
+ .delta_base_cache_limit = DEFAULT_DELTA_BASE_CACHE_LIMIT, \
}
static void gc_config_release(struct gc_config *cfg)
@@ -168,6 +170,7 @@ static void gc_config(struct gc_config *cfg)
{
const char *value;
char *owned = NULL;
+ unsigned long ulongval;
if (!git_config_get_value("gc.packrefs", &value)) {
if (value && !strcmp(value, "notbare"))
@@ -206,6 +209,9 @@ static void gc_config(struct gc_config *cfg)
git_config_get_ulong("gc.bigpackthreshold", &cfg->big_pack_threshold);
git_config_get_ulong("pack.deltacachesize", &cfg->max_delta_cache_size);
+ if (!git_config_get_ulong("core.deltabasecachelimit", &ulongval))
+ cfg->delta_base_cache_limit = ulongval;
+
if (!git_config_get_string("gc.repackfilter", &owned)) {
free(cfg->repack_filter);
cfg->repack_filter = owned;
@@ -416,7 +422,7 @@ static uint64_t estimate_repack_memory(struct gc_config *cfg,
* read_sha1_file() (either at delta calculation phase, or
* writing phase) also fills up the delta base cache
*/
- heap += delta_base_cache_limit;
+ heap += cfg->delta_base_cache_limit;
/* and of course pack-objects has its own delta cache */
heap += cfg->max_delta_cache_size;
diff --git a/builtin/index-pack.c b/builtin/index-pack.c
index eaefb41761..23bfa45403 100644
--- a/builtin/index-pack.c
+++ b/builtin/index-pack.c
@@ -1238,7 +1238,7 @@ static void parse_pack_objects(unsigned char *hash)
* recursively checking if the resulting object is used as a base
* for some more deltas.
*/
-static void resolve_deltas(void)
+static void resolve_deltas(struct pack_idx_option *opts)
{
int i;
@@ -1254,7 +1254,7 @@ static void resolve_deltas(void)
nr_ref_deltas + nr_ofs_deltas);
nr_dispatched = 0;
- base_cache_limit = delta_base_cache_limit * nr_threads;
+ base_cache_limit = opts->delta_base_cache_limit * nr_threads;
if (nr_threads > 1 || getenv("GIT_FORCE_THREADS")) {
init_thread();
work_lock();
@@ -1604,6 +1604,10 @@ static int git_index_pack_config(const char *k, const char *v,
else
opts->flags &= ~WRITE_REV;
}
+ if (!strcmp(k, "core.deltabasecachelimit")) {
+ opts->delta_base_cache_limit = git_config_ulong(k, v, ctx->kvi);
+ return 0;
+ }
return git_default_config(k, v, ctx, cb);
}
@@ -1930,7 +1934,7 @@ int cmd_index_pack(int argc,
parse_pack_objects(pack_hash);
if (report_end_of_input)
write_in_full(2, "\0", 1);
- resolve_deltas();
+ resolve_deltas(&opts);
conclude_pack(fix_thin_pack, curr_pack, pack_hash);
free(ofs_deltas);
free(ref_deltas);
diff --git a/config.c b/config.c
index a11bb85da3..728ef98e42 100644
--- a/config.c
+++ b/config.c
@@ -1515,11 +1515,6 @@ static int git_default_core_config(const char *var, const char *value,
return 0;
}
- if (!strcmp(var, "core.deltabasecachelimit")) {
- delta_base_cache_limit = git_config_ulong(var, value, ctx->kvi);
- return 0;
- }
-
if (!strcmp(var, "core.autocrlf")) {
if (value && !strcasecmp(value, "input")) {
auto_crlf = AUTO_CRLF_INPUT;
diff --git a/environment.c b/environment.c
index a2ce998081..8e5022c282 100644
--- a/environment.c
+++ b/environment.c
@@ -51,7 +51,6 @@ enum fsync_method fsync_method = FSYNC_METHOD_DEFAULT;
enum fsync_component fsync_components = FSYNC_COMPONENTS_DEFAULT;
size_t packed_git_window_size = DEFAULT_PACKED_GIT_WINDOW_SIZE;
size_t packed_git_limit = DEFAULT_PACKED_GIT_LIMIT;
-size_t delta_base_cache_limit = 96 * 1024 * 1024;
unsigned long big_file_threshold = 512 * 1024 * 1024;
char *editor_program;
char *askpass_program;
diff --git a/environment.h b/environment.h
index 923e12661e..2f43340f0b 100644
--- a/environment.h
+++ b/environment.h
@@ -165,7 +165,6 @@ extern int zlib_compression_level;
extern int pack_compression_level;
extern size_t packed_git_window_size;
extern size_t packed_git_limit;
-extern size_t delta_base_cache_limit;
extern unsigned long big_file_threshold;
extern unsigned long pack_size_limit_cfg;
extern int max_allowed_tree_depth;
diff --git a/pack-objects.h b/pack-objects.h
index b9898a4e64..3f6f504203 100644
--- a/pack-objects.h
+++ b/pack-objects.h
@@ -7,7 +7,8 @@
struct repository;
-#define DEFAULT_DELTA_CACHE_SIZE (256 * 1024 * 1024)
+#define DEFAULT_DELTA_CACHE_SIZE (256 * 1024 * 1024)
+#define DEFAULT_DELTA_BASE_CACHE_LIMIT (96 * 1024 * 1024)
#define OE_DFS_STATE_BITS 2
#define OE_DEPTH_BITS 12
diff --git a/pack-write.c b/pack-write.c
index 8c7dfddc5a..98a8c0e785 100644
--- a/pack-write.c
+++ b/pack-write.c
@@ -21,6 +21,7 @@ void reset_pack_idx_option(struct pack_idx_option *opts)
memset(opts, 0, sizeof(*opts));
opts->version = 2;
opts->off32_limit = 0x7fffffff;
+ opts->delta_base_cache_limit = DEFAULT_DELTA_BASE_CACHE_LIMIT;
}
static int sha1_compare(const void *_a, const void *_b)
diff --git a/pack.h b/pack.h
index 02bbdfb19c..1a33751565 100644
--- a/pack.h
+++ b/pack.h
@@ -58,6 +58,7 @@ struct pack_idx_option {
*/
int anomaly_alloc, anomaly_nr;
uint32_t *anomaly;
+ unsigned long delta_base_cache_limit;
};
void reset_pack_idx_option(struct pack_idx_option *);
diff --git a/packfile.c b/packfile.c
index 5e8019b1fe..2ae35dd03f 100644
--- a/packfile.c
+++ b/packfile.c
@@ -24,6 +24,8 @@
#include "commit-graph.h"
#include "pack-revindex.h"
#include "promisor-remote.h"
+#include "config.h"
+#include "pack-objects.h"
char *odb_pack_name(struct repository *r, struct strbuf *buf,
const unsigned char *hash, const char *ext)
@@ -1496,7 +1498,9 @@ void clear_delta_base_cache(void)
}
static void add_delta_base_cache(struct packed_git *p, off_t base_offset,
- void *base, unsigned long base_size, enum object_type type)
+ void *base, unsigned long base_size,
+ unsigned long delta_base_cache_limit,
+ enum object_type type)
{
struct delta_base_cache_entry *ent;
struct list_head *lru, *tmp;
@@ -1697,6 +1701,9 @@ void *unpack_entry(struct repository *r, struct packed_git *p, off_t obj_offset,
struct unpack_entry_stack_ent *delta_stack = small_delta_stack;
int delta_stack_nr = 0, delta_stack_alloc = UNPACK_ENTRY_STACK_PREALLOC;
int base_from_cache = 0;
+ unsigned long delta_base_cache_limit = DEFAULT_DELTA_BASE_CACHE_LIMIT;
+
+ repo_config_get_ulong(r, "core.deltabasecachelimit", &delta_base_cache_limit);
write_pack_access_log(p, obj_offset);
@@ -1878,7 +1885,9 @@ void *unpack_entry(struct repository *r, struct packed_git *p, off_t obj_offset,
* before we are done using it.
*/
if (!external_base)
- add_delta_base_cache(p, base_obj_offset, base, base_size, type);
+ add_delta_base_cache(p, base_obj_offset, base,
+ base_size, delta_base_cache_limit,
+ type);
free(delta_data);
free(external_base);
--
2.47.0
^ permalink raw reply related [flat|nested] 184+ messages in thread
* Re: [PATCH v7 7/9] config: make `delta_base_cache_limit` a non-global variable
2024-11-11 11:14 ` [PATCH v7 7/9] config: make `delta_base_cache_limit` a non-global variable Karthik Nayak
@ 2024-11-20 22:52 ` Taylor Blau
2024-11-21 9:06 ` Jeff King
2024-11-21 13:10 ` karthik nayak
0 siblings, 2 replies; 184+ messages in thread
From: Taylor Blau @ 2024-11-20 22:52 UTC (permalink / raw)
To: Karthik Nayak; +Cc: git, peff, gitster
On Mon, Nov 11, 2024 at 12:14:07PM +0100, Karthik Nayak wrote:
> @@ -1697,6 +1701,9 @@ void *unpack_entry(struct repository *r, struct packed_git *p, off_t obj_offset,
> struct unpack_entry_stack_ent *delta_stack = small_delta_stack;
> int delta_stack_nr = 0, delta_stack_alloc = UNPACK_ENTRY_STACK_PREALLOC;
> int base_from_cache = 0;
> + unsigned long delta_base_cache_limit = DEFAULT_DELTA_BASE_CACHE_LIMIT;
> +
> + repo_config_get_ulong(r, "core.deltabasecachelimit", &delta_base_cache_limit);
>
> write_pack_access_log(p, obj_offset);
>
Hmm. This repo_config_get_ulong() call will look for the configset entry
in a hashmap which is faster than parsing the configuration file from
scratch every time, but still expensive for my taste in a function as
hot as unpack_entry().
Should this also go in the_repository->settings instead? That way we
have a single field access instead of a hashmap lookup (with multiple
layers of function calls between us and the actual lookup).
Thanks,
Taylor
^ permalink raw reply [flat|nested] 184+ messages in thread
* Re: [PATCH v7 7/9] config: make `delta_base_cache_limit` a non-global variable
2024-11-20 22:52 ` Taylor Blau
@ 2024-11-21 9:06 ` Jeff King
2024-11-21 13:10 ` karthik nayak
1 sibling, 0 replies; 184+ messages in thread
From: Jeff King @ 2024-11-21 9:06 UTC (permalink / raw)
To: Taylor Blau; +Cc: Karthik Nayak, git, gitster
On Wed, Nov 20, 2024 at 05:52:58PM -0500, Taylor Blau wrote:
> On Mon, Nov 11, 2024 at 12:14:07PM +0100, Karthik Nayak wrote:
> > @@ -1697,6 +1701,9 @@ void *unpack_entry(struct repository *r, struct packed_git *p, off_t obj_offset,
> > struct unpack_entry_stack_ent *delta_stack = small_delta_stack;
> > int delta_stack_nr = 0, delta_stack_alloc = UNPACK_ENTRY_STACK_PREALLOC;
> > int base_from_cache = 0;
> > + unsigned long delta_base_cache_limit = DEFAULT_DELTA_BASE_CACHE_LIMIT;
> > +
> > + repo_config_get_ulong(r, "core.deltabasecachelimit", &delta_base_cache_limit);
> >
> > write_pack_access_log(p, obj_offset);
> >
>
> Hmm. This repo_config_get_ulong() call will look for the configset entry
> in a hashmap which is faster than parsing the configuration file from
> scratch every time, but still expensive for my taste in a function as
> hot as unpack_entry().
>
> Should this also go in the_repository->settings instead? That way we
> have a single field access instead of a hashmap lookup (with multiple
> layers of function calls between us and the actual lookup).
Good catch. I missed this one when making a similar argument for the
packed_git variables.
-Peff
^ permalink raw reply [flat|nested] 184+ messages in thread
* Re: [PATCH v7 7/9] config: make `delta_base_cache_limit` a non-global variable
2024-11-20 22:52 ` Taylor Blau
2024-11-21 9:06 ` Jeff King
@ 2024-11-21 13:10 ` karthik nayak
1 sibling, 0 replies; 184+ messages in thread
From: karthik nayak @ 2024-11-21 13:10 UTC (permalink / raw)
To: Taylor Blau; +Cc: git, peff, gitster
[-- Attachment #1: Type: text/plain, Size: 1406 bytes --]
Taylor Blau <me@ttaylorr.com> writes:
> On Mon, Nov 11, 2024 at 12:14:07PM +0100, Karthik Nayak wrote:
>> @@ -1697,6 +1701,9 @@ void *unpack_entry(struct repository *r, struct packed_git *p, off_t obj_offset,
>> struct unpack_entry_stack_ent *delta_stack = small_delta_stack;
>> int delta_stack_nr = 0, delta_stack_alloc = UNPACK_ENTRY_STACK_PREALLOC;
>> int base_from_cache = 0;
>> + unsigned long delta_base_cache_limit = DEFAULT_DELTA_BASE_CACHE_LIMIT;
>> +
>> + repo_config_get_ulong(r, "core.deltabasecachelimit", &delta_base_cache_limit);
>>
>> write_pack_access_log(p, obj_offset);
>>
>
> Hmm. This repo_config_get_ulong() call will look for the configset entry
> in a hashmap which is faster than parsing the configuration file from
> scratch every time, but still expensive for my taste in a function as
> hot as unpack_entry().
>
Thanks for pointing out, I do have not much idea about the object
database code, so any input here is appreciated.
> Should this also go in the_repository->settings instead? That way we
> have a single field access instead of a hashmap lookup (with multiple
> layers of function calls between us and the actual lookup).
>
I think that is the best way, though we'll have to still add the config
to the gc config struct. Since there is no repository available there,
but I think it still gets us one step closer towards cleaner config.
> Thanks,
> Taylor
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 690 bytes --]
^ permalink raw reply [flat|nested] 184+ messages in thread
* [PATCH v7 8/9] config: make `packed_git_(limit|window_size)` non-global variables
2024-11-11 11:14 ` [PATCH v7 " Karthik Nayak
` (6 preceding siblings ...)
2024-11-11 11:14 ` [PATCH v7 7/9] config: make `delta_base_cache_limit` a non-global variable Karthik Nayak
@ 2024-11-11 11:14 ` Karthik Nayak
2024-11-11 11:14 ` [PATCH v7 9/9] midx: add repository to `multi_pack_index` struct Karthik Nayak
` (2 subsequent siblings)
10 siblings, 0 replies; 184+ messages in thread
From: Karthik Nayak @ 2024-11-11 11:14 UTC (permalink / raw)
To: karthik.188; +Cc: git, me, peff, gitster
The variables `packed_git_window_size` and `packed_git_limit` are global
config variables used in the `packfile.c` file. Since it is only used in
this file, let's change it from being a global config variable to a
local variable for the subsystem.
With this, we rid `packfile.c` from all global variable usage and this
means we can also remove the `USE_THE_REPOSITORY_VARIABLE` guard from
the file.
Helped-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
---
builtin/fast-import.c | 4 ++--
config.c | 17 -----------------
environment.c | 2 --
packfile.c | 23 +++++++++++++++--------
packfile.h | 2 +-
repo-settings.c | 14 ++++++++++++++
repo-settings.h | 5 +++++
7 files changed, 37 insertions(+), 30 deletions(-)
diff --git a/builtin/fast-import.c b/builtin/fast-import.c
index 3ccc4c5722..0ece070260 100644
--- a/builtin/fast-import.c
+++ b/builtin/fast-import.c
@@ -3539,7 +3539,7 @@ static void parse_argv(void)
int cmd_fast_import(int argc,
const char **argv,
const char *prefix,
- struct repository *repo UNUSED)
+ struct repository *repo)
{
unsigned int i;
@@ -3660,7 +3660,7 @@ int cmd_fast_import(int argc,
fprintf(stderr, " pools: %10lu KiB\n", (unsigned long)((tree_entry_allocd + fi_mem_pool.pool_alloc) /1024));
fprintf(stderr, " objects: %10" PRIuMAX " KiB\n", (alloc_count*sizeof(struct object_entry))/1024);
fprintf(stderr, "---------------------------------------------------------------------\n");
- pack_report();
+ pack_report(repo);
fprintf(stderr, "---------------------------------------------------------------------\n");
fprintf(stderr, "\n");
}
diff --git a/config.c b/config.c
index 728ef98e42..2c295f7430 100644
--- a/config.c
+++ b/config.c
@@ -1493,28 +1493,11 @@ static int git_default_core_config(const char *var, const char *value,
return 0;
}
- if (!strcmp(var, "core.packedgitwindowsize")) {
- int pgsz_x2 = getpagesize() * 2;
- packed_git_window_size = git_config_ulong(var, value, ctx->kvi);
-
- /* This value must be multiple of (pagesize * 2) */
- packed_git_window_size /= pgsz_x2;
- if (packed_git_window_size < 1)
- packed_git_window_size = 1;
- packed_git_window_size *= pgsz_x2;
- return 0;
- }
-
if (!strcmp(var, "core.bigfilethreshold")) {
big_file_threshold = git_config_ulong(var, value, ctx->kvi);
return 0;
}
- if (!strcmp(var, "core.packedgitlimit")) {
- packed_git_limit = git_config_ulong(var, value, ctx->kvi);
- return 0;
- }
-
if (!strcmp(var, "core.autocrlf")) {
if (value && !strcasecmp(value, "input")) {
auto_crlf = AUTO_CRLF_INPUT;
diff --git a/environment.c b/environment.c
index 8e5022c282..8389a27270 100644
--- a/environment.c
+++ b/environment.c
@@ -49,8 +49,6 @@ int fsync_object_files = -1;
int use_fsync = -1;
enum fsync_method fsync_method = FSYNC_METHOD_DEFAULT;
enum fsync_component fsync_components = FSYNC_COMPONENTS_DEFAULT;
-size_t packed_git_window_size = DEFAULT_PACKED_GIT_WINDOW_SIZE;
-size_t packed_git_limit = DEFAULT_PACKED_GIT_LIMIT;
unsigned long big_file_threshold = 512 * 1024 * 1024;
char *editor_program;
char *askpass_program;
diff --git a/packfile.c b/packfile.c
index 2ae35dd03f..724ce8e977 100644
--- a/packfile.c
+++ b/packfile.c
@@ -1,4 +1,3 @@
-#define USE_THE_REPOSITORY_VARIABLE
#include "git-compat-util.h"
#include "environment.h"
@@ -48,15 +47,15 @@ static size_t pack_mapped;
#define SZ_FMT PRIuMAX
static inline uintmax_t sz_fmt(size_t s) { return s; }
-void pack_report(void)
+void pack_report(struct repository *repo)
{
fprintf(stderr,
"pack_report: getpagesize() = %10" SZ_FMT "\n"
"pack_report: core.packedGitWindowSize = %10" SZ_FMT "\n"
"pack_report: core.packedGitLimit = %10" SZ_FMT "\n",
sz_fmt(getpagesize()),
- sz_fmt(packed_git_window_size),
- sz_fmt(packed_git_limit));
+ sz_fmt(repo->settings.packed_git_window_size),
+ sz_fmt(repo->settings.packed_git_limit));
fprintf(stderr,
"pack_report: pack_used_ctr = %10u\n"
"pack_report: pack_mmap_calls = %10u\n"
@@ -652,8 +651,15 @@ unsigned char *use_pack(struct packed_git *p,
break;
}
if (!win) {
- size_t window_align = packed_git_window_size / 2;
+ size_t window_align;
off_t len;
+ struct repo_settings *settings;
+
+ /* lazy load the settings in case it hasn't been setup */
+ prepare_repo_settings(p->repo);
+ settings = &p->repo->settings;
+
+ window_align = settings->packed_git_window_size / 2;
if (p->pack_fd == -1 && open_packed_git(p))
die("packfile %s cannot be accessed", p->pack_name);
@@ -661,11 +667,12 @@ unsigned char *use_pack(struct packed_git *p,
CALLOC_ARRAY(win, 1);
win->offset = (offset / window_align) * window_align;
len = p->pack_size - win->offset;
- if (len > packed_git_window_size)
- len = packed_git_window_size;
+ if (len > settings->packed_git_window_size)
+ len = settings->packed_git_window_size;
win->len = (size_t)len;
pack_mapped += win->len;
- while (packed_git_limit < pack_mapped
+
+ while (settings->packed_git_limit < pack_mapped
&& unuse_one_window(p))
; /* nothing */
win->base = xmmap_gently(NULL, win->len,
diff --git a/packfile.h b/packfile.h
index addb95b0c4..58104fa009 100644
--- a/packfile.h
+++ b/packfile.h
@@ -89,7 +89,7 @@ unsigned long repo_approximate_object_count(struct repository *r);
struct packed_git *find_oid_pack(const struct object_id *oid,
struct packed_git *packs);
-void pack_report(void);
+void pack_report(struct repository *repo);
/*
* mmap the index file for the specified packfile (if it is not
diff --git a/repo-settings.c b/repo-settings.c
index 4699b4b365..0d875fdd86 100644
--- a/repo-settings.c
+++ b/repo-settings.c
@@ -26,6 +26,7 @@ void prepare_repo_settings(struct repository *r)
const char *strval;
int manyfiles;
int read_changed_paths;
+ unsigned long longval;
if (!r->gitdir)
BUG("Cannot add settings for uninitialized repository");
@@ -123,6 +124,19 @@ void prepare_repo_settings(struct repository *r)
* removed.
*/
r->settings.command_requires_full_index = 1;
+
+ if (!repo_config_get_ulong(r, "core.packedgitwindowsize", &longval)) {
+ int pgsz_x2 = getpagesize() * 2;
+
+ /* This value must be multiple of (pagesize * 2) */
+ longval /= pgsz_x2;
+ if (longval < 1)
+ longval = 1;
+ r->settings.packed_git_window_size = longval * pgsz_x2;
+ }
+
+ if (!repo_config_get_ulong(r, "core.packedgitlimit", &longval))
+ r->settings.packed_git_limit = longval;
}
enum log_refs_config repo_settings_get_log_all_ref_updates(struct repository *repo)
diff --git a/repo-settings.h b/repo-settings.h
index 51d6156a11..b22d6438e2 100644
--- a/repo-settings.h
+++ b/repo-settings.h
@@ -57,12 +57,17 @@ struct repo_settings {
int core_multi_pack_index;
int warn_ambiguous_refs; /* lazily loaded via accessor */
+
+ size_t packed_git_window_size;
+ size_t packed_git_limit;
};
#define REPO_SETTINGS_INIT { \
.index_version = -1, \
.core_untracked_cache = UNTRACKED_CACHE_KEEP, \
.fetch_negotiation_algorithm = FETCH_NEGOTIATION_CONSECUTIVE, \
.warn_ambiguous_refs = -1, \
+ .packed_git_window_size = DEFAULT_PACKED_GIT_WINDOW_SIZE, \
+ .packed_git_limit = DEFAULT_PACKED_GIT_LIMIT, \
}
void prepare_repo_settings(struct repository *r);
--
2.47.0
^ permalink raw reply related [flat|nested] 184+ messages in thread
* [PATCH v7 9/9] midx: add repository to `multi_pack_index` struct
2024-11-11 11:14 ` [PATCH v7 " Karthik Nayak
` (7 preceding siblings ...)
2024-11-11 11:14 ` [PATCH v7 8/9] config: make `packed_git_(limit|window_size)` non-global variables Karthik Nayak
@ 2024-11-11 11:14 ` Karthik Nayak
2024-11-12 8:30 ` [PATCH v7 0/9] packfile: avoid using the 'the_repository' global variable Jeff King
2024-11-20 22:55 ` Taylor Blau
10 siblings, 0 replies; 184+ messages in thread
From: Karthik Nayak @ 2024-11-11 11:14 UTC (permalink / raw)
To: karthik.188; +Cc: git, me, peff, gitster
The `multi_pack_index` struct represents the MIDX for a repository.
Here, we add a pointer to the repository in this struct, allowing direct
use of the repository variable without relying on the global
`the_repository` struct.
With this addition, we can determine the repository associated with a
`bitmap_index` struct. A `bitmap_index` points to either a `packed_git`
or a `multi_pack_index`, both of which have direct repository
references. To support this, we introduce a static helper function,
`bitmap_repo`, in `pack-bitmap.c`, which retrieves a repository given a
`bitmap_index`.
With this, we clear up all usages of `the_repository` within
`pack-bitmap.c` and also remove the `USE_THE_REPOSITORY_VARIABLE`
definition. Bringing us another step closer to remove all global
variable usage.
Although this change also opens up the potential to clean up `midx.c`,
doing so would require additional refactoring to pass the repository
struct to functions where the MIDX struct is created: a task better
suited for future patches.
Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
---
midx.c | 1 +
midx.h | 3 ++
pack-bitmap.c | 90 +++++++++++++++++++++++++++++++--------------------
3 files changed, 59 insertions(+), 35 deletions(-)
diff --git a/midx.c b/midx.c
index 8edb75f51d..079c45a1aa 100644
--- a/midx.c
+++ b/midx.c
@@ -131,6 +131,7 @@ static struct multi_pack_index *load_multi_pack_index_one(const char *object_dir
m->data = midx_map;
m->data_len = midx_size;
m->local = local;
+ m->repo = the_repository;
m->signature = get_be32(m->data);
if (m->signature != MIDX_SIGNATURE)
diff --git a/midx.h b/midx.h
index 42d4f8d149..3b0ac4d878 100644
--- a/midx.h
+++ b/midx.h
@@ -71,6 +71,9 @@ struct multi_pack_index {
const char **pack_names;
struct packed_git **packs;
+
+ struct repository *repo;
+
char object_dir[FLEX_ARRAY];
};
diff --git a/pack-bitmap.c b/pack-bitmap.c
index d34ba9909a..0cb1b56c9d 100644
--- a/pack-bitmap.c
+++ b/pack-bitmap.c
@@ -1,5 +1,3 @@
-#define USE_THE_REPOSITORY_VARIABLE
-
#include "git-compat-util.h"
#include "commit.h"
#include "gettext.h"
@@ -177,12 +175,21 @@ static uint32_t bitmap_num_objects(struct bitmap_index *index)
return index->pack->num_objects;
}
+static struct repository *bitmap_repo(struct bitmap_index *bitmap_git)
+{
+ if (bitmap_is_midx(bitmap_git))
+ return bitmap_git->midx->repo;
+ return bitmap_git->pack->repo;
+}
+
static int load_bitmap_header(struct bitmap_index *index)
{
struct bitmap_disk_header *header = (void *)index->map;
- size_t header_size = sizeof(*header) - GIT_MAX_RAWSZ + the_hash_algo->rawsz;
+ const struct git_hash_algo *hash_algo = bitmap_repo(index)->hash_algo;
+
+ size_t header_size = sizeof(*header) - GIT_MAX_RAWSZ + hash_algo->rawsz;
- if (index->map_size < header_size + the_hash_algo->rawsz)
+ if (index->map_size < header_size + hash_algo->rawsz)
return error(_("corrupted bitmap index (too small)"));
if (memcmp(header->magic, BITMAP_IDX_SIGNATURE, sizeof(BITMAP_IDX_SIGNATURE)) != 0)
@@ -196,7 +203,7 @@ static int load_bitmap_header(struct bitmap_index *index)
{
uint32_t flags = ntohs(header->options);
size_t cache_size = st_mult(bitmap_num_objects(index), sizeof(uint32_t));
- unsigned char *index_end = index->map + index->map_size - the_hash_algo->rawsz;
+ unsigned char *index_end = index->map + index->map_size - hash_algo->rawsz;
if ((flags & BITMAP_OPT_FULL_DAG) == 0)
BUG("unsupported options for bitmap index file "
@@ -409,7 +416,7 @@ static int open_midx_bitmap_1(struct bitmap_index *bitmap_git,
if (bitmap_git->pack || bitmap_git->midx) {
struct strbuf buf = STRBUF_INIT;
get_midx_filename(&buf, midx->object_dir);
- trace2_data_string("bitmap", the_repository,
+ trace2_data_string("bitmap", bitmap_repo(bitmap_git),
"ignoring extra midx bitmap file", buf.buf);
close(fd);
strbuf_release(&buf);
@@ -427,7 +434,7 @@ static int open_midx_bitmap_1(struct bitmap_index *bitmap_git,
goto cleanup;
if (!hasheq(get_midx_checksum(bitmap_git->midx), bitmap_git->checksum,
- the_repository->hash_algo)) {
+ bitmap_repo(bitmap_git)->hash_algo)) {
error(_("checksum doesn't match in MIDX and bitmap"));
goto cleanup;
}
@@ -438,7 +445,9 @@ static int open_midx_bitmap_1(struct bitmap_index *bitmap_git,
}
for (i = 0; i < bitmap_git->midx->num_packs; i++) {
- if (prepare_midx_pack(the_repository, bitmap_git->midx, i)) {
+ if (prepare_midx_pack(bitmap_repo(bitmap_git),
+ bitmap_git->midx,
+ i)) {
warning(_("could not open pack %s"),
bitmap_git->midx->pack_names[i]);
goto cleanup;
@@ -492,8 +501,9 @@ static int open_pack_bitmap_1(struct bitmap_index *bitmap_git, struct packed_git
}
if (bitmap_git->pack || bitmap_git->midx) {
- trace2_data_string("bitmap", the_repository,
- "ignoring extra bitmap file", packfile->pack_name);
+ trace2_data_string("bitmap", bitmap_repo(bitmap_git),
+ "ignoring extra bitmap file",
+ packfile->pack_name);
close(fd);
return -1;
}
@@ -518,8 +528,8 @@ static int open_pack_bitmap_1(struct bitmap_index *bitmap_git, struct packed_git
return -1;
}
- trace2_data_string("bitmap", the_repository, "opened bitmap file",
- packfile->pack_name);
+ trace2_data_string("bitmap", bitmap_repo(bitmap_git),
+ "opened bitmap file", packfile->pack_name);
return 0;
}
@@ -649,7 +659,7 @@ struct bitmap_index *prepare_bitmap_git(struct repository *r)
struct bitmap_index *prepare_midx_bitmap_git(struct multi_pack_index *midx)
{
- struct repository *r = the_repository;
+ struct repository *r = midx->repo;
struct bitmap_index *bitmap_git = xcalloc(1, sizeof(*bitmap_git));
if (!open_midx_bitmap_1(bitmap_git, midx) && !load_bitmap(r, bitmap_git))
@@ -1213,6 +1223,7 @@ static struct bitmap *find_boundary_objects(struct bitmap_index *bitmap_git,
{
struct bitmap_boundary_cb cb;
struct object_list *root;
+ struct repository *repo;
unsigned int i;
unsigned int tmp_blobs, tmp_trees, tmp_tags;
int any_missing = 0;
@@ -1222,6 +1233,8 @@ static struct bitmap *find_boundary_objects(struct bitmap_index *bitmap_git,
cb.base = bitmap_new();
object_array_init(&cb.boundary);
+ repo = bitmap_repo(bitmap_git);
+
revs->ignore_missing_links = 1;
if (bitmap_git->pseudo_merges.nr) {
@@ -1280,19 +1293,19 @@ static struct bitmap *find_boundary_objects(struct bitmap_index *bitmap_git,
* revision walk to (a) OR in any bitmaps that are UNINTERESTING
* between the tips and boundary, and (b) record the boundary.
*/
- trace2_region_enter("pack-bitmap", "boundary-prepare", the_repository);
+ trace2_region_enter("pack-bitmap", "boundary-prepare", repo);
if (prepare_revision_walk(revs))
die("revision walk setup failed");
- trace2_region_leave("pack-bitmap", "boundary-prepare", the_repository);
+ trace2_region_leave("pack-bitmap", "boundary-prepare", repo);
- trace2_region_enter("pack-bitmap", "boundary-traverse", the_repository);
+ trace2_region_enter("pack-bitmap", "boundary-traverse", repo);
revs->boundary = 1;
traverse_commit_list_filtered(revs,
show_boundary_commit,
show_boundary_object,
&cb, NULL);
revs->boundary = 0;
- trace2_region_leave("pack-bitmap", "boundary-traverse", the_repository);
+ trace2_region_leave("pack-bitmap", "boundary-traverse", repo);
revs->blob_objects = tmp_blobs;
revs->tree_objects = tmp_trees;
@@ -1304,7 +1317,7 @@ static struct bitmap *find_boundary_objects(struct bitmap_index *bitmap_git,
/*
* Then add the boundary commit(s) as fill-in traversal tips.
*/
- trace2_region_enter("pack-bitmap", "boundary-fill-in", the_repository);
+ trace2_region_enter("pack-bitmap", "boundary-fill-in", repo);
for (i = 0; i < cb.boundary.nr; i++) {
struct object *obj = cb.boundary.objects[i].item;
if (bitmap_walk_contains(bitmap_git, cb.base, &obj->oid))
@@ -1314,7 +1327,7 @@ static struct bitmap *find_boundary_objects(struct bitmap_index *bitmap_git,
}
if (revs->pending.nr)
cb.base = fill_in_bitmap(bitmap_git, revs, cb.base, NULL);
- trace2_region_leave("pack-bitmap", "boundary-fill-in", the_repository);
+ trace2_region_leave("pack-bitmap", "boundary-fill-in", repo);
cleanup:
object_array_clear(&cb.boundary);
@@ -1718,7 +1731,8 @@ static unsigned long get_size_by_pos(struct bitmap_index *bitmap_git,
ofs = pack_pos_to_offset(pack, pos);
}
- if (packed_object_info(the_repository, pack, ofs, &oi) < 0) {
+ if (packed_object_info(bitmap_repo(bitmap_git), pack, ofs,
+ &oi) < 0) {
struct object_id oid;
nth_bitmap_object_oid(bitmap_git, &oid,
pack_pos_to_index(pack, pos));
@@ -1727,7 +1741,8 @@ static unsigned long get_size_by_pos(struct bitmap_index *bitmap_git,
} else {
struct eindex *eindex = &bitmap_git->ext_index;
struct object *obj = eindex->objects[pos - bitmap_num_objects(bitmap_git)];
- if (oid_object_info_extended(the_repository, &obj->oid, &oi, 0) < 0)
+ if (oid_object_info_extended(bitmap_repo(bitmap_git), &obj->oid,
+ &oi, 0) < 0)
die(_("unable to get size of %s"), oid_to_hex(&obj->oid));
}
@@ -1889,7 +1904,8 @@ static void filter_packed_objects_from_bitmap(struct bitmap_index *bitmap_git,
bitmap_unset(result, i);
for (i = 0; i < eindex->count; ++i) {
- if (has_object_pack(the_repository, &eindex->objects[i]->oid))
+ if (has_object_pack(bitmap_repo(bitmap_git),
+ &eindex->objects[i]->oid))
bitmap_unset(result, objects_nr + i);
}
}
@@ -1907,6 +1923,7 @@ struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs,
struct bitmap *haves_bitmap = NULL;
struct bitmap_index *bitmap_git;
+ struct repository *repo;
/*
* We can't do pathspec limiting with bitmaps, because we don't know
@@ -1980,18 +1997,20 @@ struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs,
if (!use_boundary_traversal)
object_array_clear(&revs->pending);
+ repo = bitmap_repo(bitmap_git);
+
if (haves) {
if (use_boundary_traversal) {
- trace2_region_enter("pack-bitmap", "haves/boundary", the_repository);
+ trace2_region_enter("pack-bitmap", "haves/boundary", repo);
haves_bitmap = find_boundary_objects(bitmap_git, revs, haves);
- trace2_region_leave("pack-bitmap", "haves/boundary", the_repository);
+ trace2_region_leave("pack-bitmap", "haves/boundary", repo);
} else {
- trace2_region_enter("pack-bitmap", "haves/classic", the_repository);
+ trace2_region_enter("pack-bitmap", "haves/classic", repo);
revs->ignore_missing_links = 1;
haves_bitmap = find_objects(bitmap_git, revs, haves, NULL);
reset_revision_walk();
revs->ignore_missing_links = 0;
- trace2_region_leave("pack-bitmap", "haves/classic", the_repository);
+ trace2_region_leave("pack-bitmap", "haves/classic", repo);
}
if (!haves_bitmap)
@@ -2025,17 +2044,17 @@ struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs,
object_list_free(&wants);
object_list_free(&haves);
- trace2_data_intmax("bitmap", the_repository, "pseudo_merges_satisfied",
+ trace2_data_intmax("bitmap", repo, "pseudo_merges_satisfied",
pseudo_merges_satisfied_nr);
- trace2_data_intmax("bitmap", the_repository, "pseudo_merges_cascades",
+ trace2_data_intmax("bitmap", repo, "pseudo_merges_cascades",
pseudo_merges_cascades_nr);
- trace2_data_intmax("bitmap", the_repository, "bitmap/hits",
+ trace2_data_intmax("bitmap", repo, "bitmap/hits",
existing_bitmaps_hits_nr);
- trace2_data_intmax("bitmap", the_repository, "bitmap/misses",
+ trace2_data_intmax("bitmap", repo, "bitmap/misses",
existing_bitmaps_misses_nr);
- trace2_data_intmax("bitmap", the_repository, "bitmap/roots_with_bitmap",
+ trace2_data_intmax("bitmap", repo, "bitmap/roots_with_bitmap",
roots_with_bitmaps_nr);
- trace2_data_intmax("bitmap", the_repository, "bitmap/roots_without_bitmap",
+ trace2_data_intmax("bitmap", repo, "bitmap/roots_without_bitmap",
roots_without_bitmaps_nr);
return bitmap_git;
@@ -2256,7 +2275,7 @@ void reuse_partial_packfile_from_bitmap(struct bitmap_index *bitmap_git,
struct bitmap **reuse_out,
int multi_pack_reuse)
{
- struct repository *r = the_repository;
+ struct repository *r = bitmap_repo(bitmap_git);
struct bitmapped_pack *packs = NULL;
struct bitmap *result = bitmap_git->result;
struct bitmap *reuse;
@@ -2792,7 +2811,7 @@ int rebuild_bitmap(const uint32_t *reposition,
uint32_t *create_bitmap_mapping(struct bitmap_index *bitmap_git,
struct packing_data *mapping)
{
- struct repository *r = the_repository;
+ struct repository *r = bitmap_repo(bitmap_git);
uint32_t i, num_objects;
uint32_t *reposition;
@@ -2948,7 +2967,8 @@ static off_t get_disk_usage_for_extended(struct bitmap_index *bitmap_git)
st_add(bitmap_num_objects(bitmap_git), i)))
continue;
- if (oid_object_info_extended(the_repository, &obj->oid, &oi, 0) < 0)
+ if (oid_object_info_extended(bitmap_repo(bitmap_git), &obj->oid,
+ &oi, 0) < 0)
die(_("unable to get disk usage of '%s'"),
oid_to_hex(&obj->oid));
--
2.47.0
^ permalink raw reply related [flat|nested] 184+ messages in thread
* Re: [PATCH v7 0/9] packfile: avoid using the 'the_repository' global variable
2024-11-11 11:14 ` [PATCH v7 " Karthik Nayak
` (8 preceding siblings ...)
2024-11-11 11:14 ` [PATCH v7 9/9] midx: add repository to `multi_pack_index` struct Karthik Nayak
@ 2024-11-12 8:30 ` Jeff King
2024-11-13 13:03 ` karthik nayak
2024-11-20 22:55 ` Taylor Blau
10 siblings, 1 reply; 184+ messages in thread
From: Jeff King @ 2024-11-12 8:30 UTC (permalink / raw)
To: Karthik Nayak; +Cc: git, me, gitster
On Mon, Nov 11, 2024 at 12:14:00PM +0100, Karthik Nayak wrote:
> Changes in v7:
> - Cleanup stale commit message.
> - Add missing space in `if` statement.
> - Fix typo s/incase/in case/.
Thanks, I think this addresses all of the comments I had on previous
versions.
-Peff
^ permalink raw reply [flat|nested] 184+ messages in thread
* Re: [PATCH v7 0/9] packfile: avoid using the 'the_repository' global variable
2024-11-11 11:14 ` [PATCH v7 " Karthik Nayak
` (9 preceding siblings ...)
2024-11-12 8:30 ` [PATCH v7 0/9] packfile: avoid using the 'the_repository' global variable Jeff King
@ 2024-11-20 22:55 ` Taylor Blau
2024-11-21 13:12 ` karthik nayak
10 siblings, 1 reply; 184+ messages in thread
From: Taylor Blau @ 2024-11-20 22:55 UTC (permalink / raw)
To: Karthik Nayak; +Cc: git, peff, gitster
On Mon, Nov 11, 2024 at 12:14:00PM +0100, Karthik Nayak wrote:
> Karthik Nayak (9):
> packfile: add repository to struct `packed_git`
> packfile: use `repository` from `packed_git` directly
> packfile: pass `repository` to static function in the file
> packfile: pass down repository to `odb_pack_name`
> packfile: pass down repository to `has_object[_kept]_pack`
> packfile: pass down repository to `for_each_packed_object`
> config: make `delta_base_cache_limit` a non-global variable
> config: make `packed_git_(limit|window_size)` non-global variables
> midx: add repository to `multi_pack_index` struct
I reviewed this round, and think that it is looking very close. There
are a couple of typofixes that I and others have noticed, which are
minor (but I think in aggregate should merit a reroll).
I did have a concern about the conversion of delta_base_cache_limit to
be a non-global variable, since I think we're determining that value
from within unpack_entry() in a more expensive manner than is possible.
So I think that merits some investigation, and will likely result in
some changes that we should consider before merging.
Karthik: if you do end up rerolling this, please feel free to include
the patch I sent in [1] on top, which should make the maintainer's life
a bit easier than adding another topic dependent upon this one ;-).
Thanks,
Taylor
[1]: https://lore.kernel.org/git/884ca9770d1fb1e84962b1f700b1ce4adce6321c.1732142889.git.me@ttaylorr.com/
^ permalink raw reply [flat|nested] 184+ messages in thread
* Re: [PATCH v7 0/9] packfile: avoid using the 'the_repository' global variable
2024-11-20 22:55 ` Taylor Blau
@ 2024-11-21 13:12 ` karthik nayak
0 siblings, 0 replies; 184+ messages in thread
From: karthik nayak @ 2024-11-21 13:12 UTC (permalink / raw)
To: Taylor Blau; +Cc: git, peff, gitster
[-- Attachment #1: Type: text/plain, Size: 1793 bytes --]
Taylor Blau <me@ttaylorr.com> writes:
> On Mon, Nov 11, 2024 at 12:14:00PM +0100, Karthik Nayak wrote:
>> Karthik Nayak (9):
>> packfile: add repository to struct `packed_git`
>> packfile: use `repository` from `packed_git` directly
>> packfile: pass `repository` to static function in the file
>> packfile: pass down repository to `odb_pack_name`
>> packfile: pass down repository to `has_object[_kept]_pack`
>> packfile: pass down repository to `for_each_packed_object`
>> config: make `delta_base_cache_limit` a non-global variable
>> config: make `packed_git_(limit|window_size)` non-global variables
>> midx: add repository to `multi_pack_index` struct
>
> I reviewed this round, and think that it is looking very close. There
> are a couple of typofixes that I and others have noticed, which are
> minor (but I think in aggregate should merit a reroll).
>
Thanks for the review, indeed, I will be re-rolling with the fixes
discussed.
> I did have a concern about the conversion of delta_base_cache_limit to
> be a non-global variable, since I think we're determining that value
> from within unpack_entry() in a more expensive manner than is possible.
>
> So I think that merits some investigation, and will likely result in
> some changes that we should consider before merging.
>
Yup, I'll move it to the repository settings struct and this should help
alleviate the perf issue.
> Karthik: if you do end up rerolling this, please feel free to include
> the patch I sent in [1] on top, which should make the maintainer's life
> a bit easier than adding another topic dependent upon this one ;-).
>
That makes sense, I'll add it in.
> Thanks,
> Taylor
>
> [1]: https://lore.kernel.org/git/884ca9770d1fb1e84962b1f700b1ce4adce6321c.1732142889.git.me@ttaylorr.com/
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 690 bytes --]
^ permalink raw reply [flat|nested] 184+ messages in thread
* [PATCH v8 00/10] packfile: avoid using the 'the_repository' global variable
2024-10-21 9:57 [PATCH 00/20] packfile: avoid using the 'the_repository' global variable Karthik Nayak
` (26 preceding siblings ...)
2024-11-11 11:14 ` [PATCH v7 " Karthik Nayak
@ 2024-11-22 10:08 ` Karthik Nayak
2024-11-22 10:08 ` [PATCH v8 01/10] packfile: add repository to struct `packed_git` Karthik Nayak
` (9 more replies)
2024-11-26 10:57 ` [PATCH v9 00/10] packfile: avoid using the 'the_repository' global variable Karthik Nayak
2024-12-03 14:43 ` [PATCH v10 00/10] packfile: avoid using the 'the_repository' global variable Karthik Nayak
29 siblings, 10 replies; 184+ messages in thread
From: Karthik Nayak @ 2024-11-22 10:08 UTC (permalink / raw)
To: karthik.188; +Cc: git, me, peff, gitster
The `packfile.c` file uses the global variable 'the_repository' extensively
throughout the code. Let's remove all usecases of this, by modifying the
required functions to accept a 'struct repository' instead. This is to clean up
usage of global state.
The first 3 patches are mostly internal to `packfile.c`, we add the repository
field to the `packed_git` struct and this is used to clear up some useages of
the global variables.
The next 3 patches are more disruptive, they modify the function definition of
`odb_pack_name`, `has_object[_kept]_pack` and `for_each_packed_object` to receive
a repository, helping remove other usages of 'the_repository' variable.
Finally, the next two patches deal with global config values. These values are
localized. The last patch is removal of an unecessary call to `prepare_packed_git()`.
For v5 onwards, I've rebased the series off the master: 8f8d6eee53 (The
seventh batch, 2024-11-01), as a dependency for this series 'jk/dumb-http-finalize'
was merged to master. I've found no conflicts while merging with seen & next. But
since this series does touch multiple files, there could be future conflicts.
Changes in v8:
- Fix typos in comments
- For packfile.c use delta_base_cache_limit from the repository
settings, this avoids loading the config in hot paths.
- Rename `longval` to `ulongval` to better signify the type.
Changes in v7:
- Cleanup stale commit message.
- Add missing space in `if` statement.
- Fix typo s/incase/in case/.
Changes in v6:
- Lazy load repository settings in packfile.c. This ensures that the settings are
available for sure and we do not rely on callees setting it.
- Use `size_t` for `delta_base_cache_limit`.
Changes in v5:
- Move packed_git* settings to repo_settings to ensure we don't keep reparsing the
settings in `use_pack`.
Changes in v4:
- Renamed the repository field within `packed_git` and `multi_pack_index` from
`r` to `repo`, while keeping function parameters to be `r`.
- Fixed bad braces.
Changes in v3:
- Improved commit messages. In the first commit to talk about how packed_git
struct could also be part of the alternates of a repository. In the 7th commit
to talk about the motive behind removing the global variable.
- Changed 'packed_git->repo' to 'packed_git->r' to keep it consistent with the
rest of the code base.
- Replaced 'the_repository' with locally available access to the repository
struct in multiple regions.
- Removed unecessary inclusion of the 'repository.h' header file by forward
declaring the 'repository' struct.
- Replace memcpy with hashcpy.
- Change the logic in the 7th patch to use if else statements.
- Added an extra commit to cleanup `pack-bitmap.c`.
Karthik Nayak (9):
packfile: add repository to struct `packed_git`
packfile: use `repository` from `packed_git` directly
packfile: pass `repository` to static function in the file
packfile: pass down repository to `odb_pack_name`
packfile: pass down repository to `has_object[_kept]_pack`
packfile: pass down repository to `for_each_packed_object`
config: make `delta_base_cache_limit` a non-global variable
config: make `packed_git_(limit|window_size)` non-global variables
midx: add repository to `multi_pack_index` struct
Taylor Blau (1):
packfile.c: remove unnecessary prepare_packed_git() call
builtin/cat-file.c | 7 +-
builtin/count-objects.c | 2 +-
builtin/fast-import.c | 15 ++--
builtin/fsck.c | 20 +++---
builtin/gc.c | 8 ++-
builtin/index-pack.c | 20 ++++--
builtin/pack-objects.c | 11 +--
builtin/pack-redundant.c | 2 +-
builtin/repack.c | 2 +-
builtin/rev-list.c | 2 +-
commit-graph.c | 4 +-
config.c | 22 ------
connected.c | 3 +-
diff.c | 3 +-
environment.c | 3 -
environment.h | 1 -
fsck.c | 2 +-
http.c | 4 +-
list-objects.c | 7 +-
midx-write.c | 2 +-
midx.c | 3 +-
midx.h | 3 +
object-store-ll.h | 9 ++-
pack-bitmap.c | 90 ++++++++++++++----------
pack-objects.h | 3 +-
pack-write.c | 1 +
pack.h | 2 +
packfile.c | 144 ++++++++++++++++++++++-----------------
packfile.h | 18 +++--
promisor-remote.c | 2 +-
prune-packed.c | 2 +-
reachable.c | 4 +-
repo-settings.c | 18 +++++
repo-settings.h | 7 ++
revision.c | 13 ++--
tag.c | 2 +-
36 files changed, 271 insertions(+), 190 deletions(-)
Range-diff against v7:
1: 6c00e25c86 ! 1: d1fdd6996a packfile: add repository to struct `packed_git`
@@ object-store-ll.h: struct packed_git {
const uint32_t *mtimes_map;
size_t mtimes_size;
+
-+ /* repo dentoes the repository this packed file belongs to */
++ /* repo denotes the repository this packfile belongs to */
+ struct repository *repo;
+
/* something like ".git/objects/pack/xxxxx.pack" */
2: 70fc8a79af = 2: 65c09858ce packfile: use `repository` from `packed_git` directly
3: 167a1f3a11 = 3: 80632934d1 packfile: pass `repository` to static function in the file
4: b7cfe78217 = 4: 67d71eab83 packfile: pass down repository to `odb_pack_name`
5: 5566f5554c = 5: ee210fa153 packfile: pass down repository to `has_object[_kept]_pack`
6: 1b26e45a9b = 6: 8db7094f4e packfile: pass down repository to `for_each_packed_object`
7: 1bdc34f4d8 ! 7: d1b6e8801b config: make `delta_base_cache_limit` a non-global variable
@@ Commit message
The `delta_base_cache_limit` variable is a global config variable used
by multiple subsystems. Let's make this non-global, by adding this
- variable to the stack of each of the subsystems where it is used.
+ variable independently to the subsystems where it is used.
- In `gc.c` we add it to the `gc_config` struct and also the constructor
- function. In `index-pack.c` we add it to the `pack_idx_option` struct
- and its constructor. Finally, in `packfile.c` we dynamically retrieve
- this value from the repository config, since the value is only used once
- in the entire subsystem.
+ First, add the setting to the `repo_settings` struct, this provides
+ access to the config in places where the repository is available. Use
+ this in `packfile.c`.
+
+ In `index-pack.c` we add it to the `pack_idx_option` struct and its
+ constructor. While the repository struct is available here, it may not
+ be set because `git index-pack` can be used without a repository.
+
+ In `gc.c` add it to the `gc_config` struct and also the constructor
+ function. The gc functions currently do not have direct access to a
+ repository struct.
These changes are made to remove the usage of `delta_base_cache_limit`
as a global variable in `packfile.c`. This brings us one step closer to
@@ pack.h: struct pack_idx_option {
*/
int anomaly_alloc, anomaly_nr;
uint32_t *anomaly;
-+ unsigned long delta_base_cache_limit;
++
++ size_t delta_base_cache_limit;
};
void reset_pack_idx_option(struct pack_idx_option *);
## packfile.c ##
-@@
- #include "commit-graph.h"
- #include "pack-revindex.h"
- #include "promisor-remote.h"
-+#include "config.h"
-+#include "pack-objects.h"
-
- char *odb_pack_name(struct repository *r, struct strbuf *buf,
- const unsigned char *hash, const char *ext)
@@ packfile.c: void clear_delta_base_cache(void)
}
@@ packfile.c: void clear_delta_base_cache(void)
struct delta_base_cache_entry *ent;
struct list_head *lru, *tmp;
@@ packfile.c: void *unpack_entry(struct repository *r, struct packed_git *p, off_t obj_offset,
- struct unpack_entry_stack_ent *delta_stack = small_delta_stack;
int delta_stack_nr = 0, delta_stack_alloc = UNPACK_ENTRY_STACK_PREALLOC;
int base_from_cache = 0;
-+ unsigned long delta_base_cache_limit = DEFAULT_DELTA_BASE_CACHE_LIMIT;
-+
-+ repo_config_get_ulong(r, "core.deltabasecachelimit", &delta_base_cache_limit);
++ prepare_repo_settings(p->repo);
++
write_pack_access_log(p, obj_offset);
+ /* PHASE 1: drill down to the innermost base object */
@@ packfile.c: void *unpack_entry(struct repository *r, struct packed_git *p, off_t obj_offset,
* before we are done using it.
*/
if (!external_base)
- add_delta_base_cache(p, base_obj_offset, base, base_size, type);
-+ add_delta_base_cache(p, base_obj_offset, base,
-+ base_size, delta_base_cache_limit,
++ add_delta_base_cache(p, base_obj_offset, base, base_size,
++ p->repo->settings.delta_base_cache_limit,
+ type);
free(delta_data);
free(external_base);
+
+ ## repo-settings.c ##
+@@
+ #include "repo-settings.h"
+ #include "repository.h"
+ #include "midx.h"
++#include "pack-objects.h"
+
+ static void repo_cfg_bool(struct repository *r, const char *key, int *dest,
+ int def)
+@@ repo-settings.c: void prepare_repo_settings(struct repository *r)
+ const char *strval;
+ int manyfiles;
+ int read_changed_paths;
++ unsigned long ulongval;
+
+ if (!r->gitdir)
+ BUG("Cannot add settings for uninitialized repository");
+@@ repo-settings.c: void prepare_repo_settings(struct repository *r)
+ * removed.
+ */
+ r->settings.command_requires_full_index = 1;
++
++ if (!repo_config_get_ulong(r, "core.deltabasecachelimit", &ulongval))
++ r->settings.delta_base_cache_limit = ulongval;
+ }
+
+ enum log_refs_config repo_settings_get_log_all_ref_updates(struct repository *repo)
+
+ ## repo-settings.h ##
+@@ repo-settings.h: struct repo_settings {
+
+ int core_multi_pack_index;
+ int warn_ambiguous_refs; /* lazily loaded via accessor */
++
++ size_t delta_base_cache_limit;
+ };
+ #define REPO_SETTINGS_INIT { \
+ .index_version = -1, \
+ .core_untracked_cache = UNTRACKED_CACHE_KEEP, \
+ .fetch_negotiation_algorithm = FETCH_NEGOTIATION_CONSECUTIVE, \
+ .warn_ambiguous_refs = -1, \
++ .delta_base_cache_limit = DEFAULT_DELTA_BASE_CACHE_LIMIT, \
+ }
+
+ void prepare_repo_settings(struct repository *r);
8: 7b6baa89ac ! 8: 30a52f192f config: make `packed_git_(limit|window_size)` non-global variables
@@ packfile.h: unsigned long repo_approximate_object_count(struct repository *r);
## repo-settings.c ##
@@ repo-settings.c: void prepare_repo_settings(struct repository *r)
- const char *strval;
- int manyfiles;
- int read_changed_paths;
-+ unsigned long longval;
- if (!r->gitdir)
- BUG("Cannot add settings for uninitialized repository");
-@@ repo-settings.c: void prepare_repo_settings(struct repository *r)
- * removed.
- */
- r->settings.command_requires_full_index = 1;
+ if (!repo_config_get_ulong(r, "core.deltabasecachelimit", &ulongval))
+ r->settings.delta_base_cache_limit = ulongval;
+
-+ if (!repo_config_get_ulong(r, "core.packedgitwindowsize", &longval)) {
++ if (!repo_config_get_ulong(r, "core.packedgitwindowsize", &ulongval)) {
+ int pgsz_x2 = getpagesize() * 2;
+
+ /* This value must be multiple of (pagesize * 2) */
-+ longval /= pgsz_x2;
-+ if (longval < 1)
-+ longval = 1;
-+ r->settings.packed_git_window_size = longval * pgsz_x2;
++ ulongval /= pgsz_x2;
++ if (ulongval < 1)
++ ulongval = 1;
++ r->settings.packed_git_window_size = ulongval * pgsz_x2;
+ }
+
-+ if (!repo_config_get_ulong(r, "core.packedgitlimit", &longval))
-+ r->settings.packed_git_limit = longval;
++ if (!repo_config_get_ulong(r, "core.packedgitlimit", &ulongval))
++ r->settings.packed_git_limit = ulongval;
}
enum log_refs_config repo_settings_get_log_all_ref_updates(struct repository *repo)
## repo-settings.h ##
@@ repo-settings.h: struct repo_settings {
-
- int core_multi_pack_index;
int warn_ambiguous_refs; /* lazily loaded via accessor */
-+
+
+ size_t delta_base_cache_limit;
+ size_t packed_git_window_size;
+ size_t packed_git_limit;
};
#define REPO_SETTINGS_INIT { \
.index_version = -1, \
- .core_untracked_cache = UNTRACKED_CACHE_KEEP, \
+@@ repo-settings.h: struct repo_settings {
.fetch_negotiation_algorithm = FETCH_NEGOTIATION_CONSECUTIVE, \
.warn_ambiguous_refs = -1, \
+ .delta_base_cache_limit = DEFAULT_DELTA_BASE_CACHE_LIMIT, \
+ .packed_git_window_size = DEFAULT_PACKED_GIT_WINDOW_SIZE, \
+ .packed_git_limit = DEFAULT_PACKED_GIT_LIMIT, \
}
9: a3667d87ec = 9: 2fe5d2506f midx: add repository to `multi_pack_index` struct
-: ---------- > 10: 05989c2e27 packfile.c: remove unnecessary prepare_packed_git() call
--
2.47.0
^ permalink raw reply [flat|nested] 184+ messages in thread
* [PATCH v8 01/10] packfile: add repository to struct `packed_git`
2024-11-22 10:08 ` [PATCH v8 00/10] " Karthik Nayak
@ 2024-11-22 10:08 ` Karthik Nayak
2024-11-22 10:08 ` [PATCH v8 02/10] packfile: use `repository` from `packed_git` directly Karthik Nayak
` (8 subsequent siblings)
9 siblings, 0 replies; 184+ messages in thread
From: Karthik Nayak @ 2024-11-22 10:08 UTC (permalink / raw)
To: karthik.188; +Cc: git, me, peff, gitster
The struct `packed_git` holds information regarding a packed object
file. Let's add the repository variable to this object, to represent the
repository that this packfile belongs to. This helps remove dependency
on the global `the_repository` object in `packfile.c` by simply using
repository information now readily available in the struct.
We do need to consider that a pack file could be part of the alternates
of a repository, but considering that we only have one repository struct
and also that we currently anyways use 'the_repository'. We should be
OK with this change.
We also modify `alloc_packed_git` to ensure that the repository is added
to newly created `packed_git` structs. This requires modifying the
function and all its callee to pass the repository object down the
levels.
Helped-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
---
builtin/fast-import.c | 3 ++-
builtin/index-pack.c | 6 ++++--
commit-graph.c | 2 +-
connected.c | 3 ++-
http.c | 2 +-
midx-write.c | 2 +-
midx.c | 2 +-
object-store-ll.h | 5 +++++
packfile.c | 15 +++++++++------
packfile.h | 6 ++++--
10 files changed, 30 insertions(+), 16 deletions(-)
diff --git a/builtin/fast-import.c b/builtin/fast-import.c
index 76d5c20f14..da7e2d613b 100644
--- a/builtin/fast-import.c
+++ b/builtin/fast-import.c
@@ -765,6 +765,7 @@ static void start_packfile(void)
p->pack_fd = pack_fd;
p->do_not_close = 1;
+ p->repo = the_repository;
pack_file = hashfd(pack_fd, p->pack_name);
pack_data = p;
@@ -888,7 +889,7 @@ static void end_packfile(void)
idx_name = keep_pack(create_index());
/* Register the packfile with core git's machinery. */
- new_p = add_packed_git(idx_name, strlen(idx_name), 1);
+ new_p = add_packed_git(pack_data->repo, idx_name, strlen(idx_name), 1);
if (!new_p)
die("core git rejected index %s", idx_name);
all_packs[pack_id] = new_p;
diff --git a/builtin/index-pack.c b/builtin/index-pack.c
index 9d23b41b3a..be2f99625e 100644
--- a/builtin/index-pack.c
+++ b/builtin/index-pack.c
@@ -1552,7 +1552,8 @@ static void final(const char *final_pack_name, const char *curr_pack_name,
if (do_fsck_object) {
struct packed_git *p;
- p = add_packed_git(final_index_name, strlen(final_index_name), 0);
+ p = add_packed_git(the_repository, final_index_name,
+ strlen(final_index_name), 0);
if (p)
install_packed_git(the_repository, p);
}
@@ -1650,7 +1651,8 @@ static void read_v2_anomalous_offsets(struct packed_git *p,
static void read_idx_option(struct pack_idx_option *opts, const char *pack_name)
{
- struct packed_git *p = add_packed_git(pack_name, strlen(pack_name), 1);
+ struct packed_git *p = add_packed_git(the_repository, pack_name,
+ strlen(pack_name), 1);
if (!p)
die(_("Cannot open existing pack file '%s'"), pack_name);
diff --git a/commit-graph.c b/commit-graph.c
index 5bd89c0acd..83dd69bfeb 100644
--- a/commit-graph.c
+++ b/commit-graph.c
@@ -1914,7 +1914,7 @@ static int fill_oids_from_packs(struct write_commit_graph_context *ctx,
struct packed_git *p;
strbuf_setlen(&packname, dirlen);
strbuf_addstr(&packname, pack_indexes->items[i].string);
- p = add_packed_git(packname.buf, packname.len, 1);
+ p = add_packed_git(ctx->r, packname.buf, packname.len, 1);
if (!p) {
ret = error(_("error adding pack %s"), packname.buf);
goto cleanup;
diff --git a/connected.c b/connected.c
index a9e2e13995..3099da84f3 100644
--- a/connected.c
+++ b/connected.c
@@ -54,7 +54,8 @@ int check_connected(oid_iterate_fn fn, void *cb_data,
strbuf_add(&idx_file, transport->pack_lockfiles.items[0].string,
base_len);
strbuf_addstr(&idx_file, ".idx");
- new_pack = add_packed_git(idx_file.buf, idx_file.len, 1);
+ new_pack = add_packed_git(the_repository, idx_file.buf,
+ idx_file.len, 1);
strbuf_release(&idx_file);
}
diff --git a/http.c b/http.c
index 58242b9d2d..6744e18409 100644
--- a/http.c
+++ b/http.c
@@ -2439,7 +2439,7 @@ static int fetch_and_setup_pack_index(struct packed_git **packs_head,
if (!tmp_idx)
return -1;
- new_pack = parse_pack_index(sha1, tmp_idx);
+ new_pack = parse_pack_index(the_repository, sha1, tmp_idx);
if (!new_pack) {
unlink(tmp_idx);
free(tmp_idx);
diff --git a/midx-write.c b/midx-write.c
index b3a5f6c516..c57726ef94 100644
--- a/midx-write.c
+++ b/midx-write.c
@@ -154,7 +154,7 @@ static void add_pack_to_midx(const char *full_path, size_t full_path_len,
return;
ALLOC_GROW(ctx->info, ctx->nr + 1, ctx->alloc);
- p = add_packed_git(full_path, full_path_len, 0);
+ p = add_packed_git(the_repository, full_path, full_path_len, 0);
if (!p) {
warning(_("failed to add packfile '%s'"),
full_path);
diff --git a/midx.c b/midx.c
index e82d4f2e65..8edb75f51d 100644
--- a/midx.c
+++ b/midx.c
@@ -464,7 +464,7 @@ int prepare_midx_pack(struct repository *r, struct multi_pack_index *m,
strhash(key.buf), key.buf,
struct packed_git, packmap_ent);
if (!p) {
- p = add_packed_git(pack_name.buf, pack_name.len, m->local);
+ p = add_packed_git(r, pack_name.buf, pack_name.len, m->local);
if (p) {
install_packed_git(r, p);
list_add_tail(&p->mru, &r->objects->packed_git_mru);
diff --git a/object-store-ll.h b/object-store-ll.h
index 53b8e693b1..d46cd0e654 100644
--- a/object-store-ll.h
+++ b/object-store-ll.h
@@ -10,6 +10,7 @@
struct oidmap;
struct oidtree;
struct strbuf;
+struct repository;
struct object_directory {
struct object_directory *next;
@@ -135,6 +136,10 @@ struct packed_git {
*/
const uint32_t *mtimes_map;
size_t mtimes_size;
+
+ /* repo denotes the repository this packfile belongs to */
+ struct repository *repo;
+
/* something like ".git/objects/pack/xxxxx.pack" */
char pack_name[FLEX_ARRAY]; /* more */
};
diff --git a/packfile.c b/packfile.c
index 9560f0a33c..6058eddf35 100644
--- a/packfile.c
+++ b/packfile.c
@@ -217,11 +217,12 @@ uint32_t get_pack_fanout(struct packed_git *p, uint32_t value)
return ntohl(level1_ofs[value]);
}
-static struct packed_git *alloc_packed_git(int extra)
+static struct packed_git *alloc_packed_git(struct repository *r, int extra)
{
struct packed_git *p = xmalloc(st_add(sizeof(*p), extra));
memset(p, 0, sizeof(*p));
p->pack_fd = -1;
+ p->repo = r;
return p;
}
@@ -233,11 +234,12 @@ static char *pack_path_from_idx(const char *idx_path)
return xstrfmt("%.*s.pack", (int)len, idx_path);
}
-struct packed_git *parse_pack_index(unsigned char *sha1, const char *idx_path)
+struct packed_git *parse_pack_index(struct repository *r, unsigned char *sha1,
+ const char *idx_path)
{
char *path = pack_path_from_idx(idx_path);
size_t alloc = st_add(strlen(path), 1);
- struct packed_git *p = alloc_packed_git(alloc);
+ struct packed_git *p = alloc_packed_git(r, alloc);
memcpy(p->pack_name, path, alloc); /* includes NUL */
free(path);
@@ -703,7 +705,8 @@ void unuse_pack(struct pack_window **w_cursor)
}
}
-struct packed_git *add_packed_git(const char *path, size_t path_len, int local)
+struct packed_git *add_packed_git(struct repository *r, const char *path,
+ size_t path_len, int local)
{
struct stat st;
size_t alloc;
@@ -721,7 +724,7 @@ struct packed_git *add_packed_git(const char *path, size_t path_len, int local)
* the use xsnprintf double-checks that)
*/
alloc = st_add3(path_len, strlen(".promisor"), 1);
- p = alloc_packed_git(alloc);
+ p = alloc_packed_git(r, alloc);
memcpy(p->pack_name, path, path_len);
xsnprintf(p->pack_name + path_len, alloc - path_len, ".keep");
@@ -877,7 +880,7 @@ static void prepare_pack(const char *full_name, size_t full_name_len,
/* Don't reopen a pack we already have. */
if (!hashmap_get(&data->r->objects->pack_map, &hent, pack_name)) {
- p = add_packed_git(full_name, full_name_len, data->local);
+ p = add_packed_git(data->r, full_name, full_name_len, data->local);
if (p)
install_packed_git(data->r, p);
}
diff --git a/packfile.h b/packfile.h
index 08f88a7ff5..aee69d1a0b 100644
--- a/packfile.h
+++ b/packfile.h
@@ -46,7 +46,8 @@ const char *pack_basename(struct packed_git *p);
* and does not add the resulting packed_git struct to the internal list of
* packs. You probably want add_packed_git() instead.
*/
-struct packed_git *parse_pack_index(unsigned char *sha1, const char *idx_path);
+struct packed_git *parse_pack_index(struct repository *r, unsigned char *sha1,
+ const char *idx_path);
typedef void each_file_in_pack_dir_fn(const char *full_path, size_t full_path_len,
const char *file_name, void *data);
@@ -113,7 +114,8 @@ void close_pack(struct packed_git *);
void close_object_store(struct raw_object_store *o);
void unuse_pack(struct pack_window **);
void clear_delta_base_cache(void);
-struct packed_git *add_packed_git(const char *path, size_t path_len, int local);
+struct packed_git *add_packed_git(struct repository *r, const char *path,
+ size_t path_len, int local);
/*
* Unlink the .pack and associated extension files.
--
2.47.0
^ permalink raw reply related [flat|nested] 184+ messages in thread
* [PATCH v8 02/10] packfile: use `repository` from `packed_git` directly
2024-11-22 10:08 ` [PATCH v8 00/10] " Karthik Nayak
2024-11-22 10:08 ` [PATCH v8 01/10] packfile: add repository to struct `packed_git` Karthik Nayak
@ 2024-11-22 10:08 ` Karthik Nayak
2024-11-22 10:08 ` [PATCH v8 03/10] packfile: pass `repository` to static function in the file Karthik Nayak
` (7 subsequent siblings)
9 siblings, 0 replies; 184+ messages in thread
From: Karthik Nayak @ 2024-11-22 10:08 UTC (permalink / raw)
To: karthik.188; +Cc: git, me, peff, gitster
In the previous commit, we introduced the `repository` structure inside
`packed_git`. This provides an alternative route instead of using the
global `the_repository` variable. Let's modify `packfile.c` now to use
this field wherever possible instead of relying on the global state.
There are still a few instances of `the_repository` usage in the file,
where there is no struct `packed_git` locally available, which will be
fixed in the following commits.
Helped-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
---
packfile.c | 50 +++++++++++++++++++++++++++-----------------------
1 file changed, 27 insertions(+), 23 deletions(-)
diff --git a/packfile.c b/packfile.c
index 6058eddf35..5bfa1e17c2 100644
--- a/packfile.c
+++ b/packfile.c
@@ -79,7 +79,7 @@ static int check_packed_git_idx(const char *path, struct packed_git *p)
size_t idx_size;
int fd = git_open(path), ret;
struct stat st;
- const unsigned int hashsz = the_hash_algo->rawsz;
+ const unsigned int hashsz = p->repo->hash_algo->rawsz;
if (fd < 0)
return -1;
@@ -243,7 +243,7 @@ struct packed_git *parse_pack_index(struct repository *r, unsigned char *sha1,
memcpy(p->pack_name, path, alloc); /* includes NUL */
free(path);
- hashcpy(p->hash, sha1, the_repository->hash_algo);
+ hashcpy(p->hash, sha1, p->repo->hash_algo);
if (check_packed_git_idx(idx_path, p)) {
free(p);
return NULL;
@@ -278,7 +278,7 @@ static int unuse_one_window(struct packed_git *current)
if (current)
scan_windows(current, &lru_p, &lru_w, &lru_l);
- for (p = the_repository->objects->packed_git; p; p = p->next)
+ for (p = current->repo->objects->packed_git; p; p = p->next)
scan_windows(p, &lru_p, &lru_w, &lru_l);
if (lru_p) {
munmap(lru_w->base, lru_w->len);
@@ -540,7 +540,7 @@ static int open_packed_git_1(struct packed_git *p)
unsigned char hash[GIT_MAX_RAWSZ];
unsigned char *idx_hash;
ssize_t read_result;
- const unsigned hashsz = the_hash_algo->rawsz;
+ const unsigned hashsz = p->repo->hash_algo->rawsz;
if (open_pack_index(p))
return error("packfile %s index unavailable", p->pack_name);
@@ -597,7 +597,7 @@ static int open_packed_git_1(struct packed_git *p)
if (read_result != hashsz)
return error("packfile %s signature is unavailable", p->pack_name);
idx_hash = ((unsigned char *)p->index_data) + p->index_size - hashsz * 2;
- if (!hasheq(hash, idx_hash, the_repository->hash_algo))
+ if (!hasheq(hash, idx_hash, p->repo->hash_algo))
return error("packfile %s does not match index", p->pack_name);
return 0;
}
@@ -637,7 +637,7 @@ unsigned char *use_pack(struct packed_git *p,
*/
if (!p->pack_size && p->pack_fd == -1 && open_packed_git(p))
die("packfile %s cannot be accessed", p->pack_name);
- if (offset > (p->pack_size - the_hash_algo->rawsz))
+ if (offset > (p->pack_size - p->repo->hash_algo->rawsz))
die("offset beyond end of packfile (truncated pack?)");
if (offset < 0)
die(_("offset before end of packfile (broken .idx?)"));
@@ -711,6 +711,7 @@ struct packed_git *add_packed_git(struct repository *r, const char *path,
struct stat st;
size_t alloc;
struct packed_git *p;
+ struct object_id oid;
/*
* Make sure a corresponding .pack file exists and that
@@ -751,9 +752,13 @@ struct packed_git *add_packed_git(struct repository *r, const char *path,
p->pack_size = st.st_size;
p->pack_local = local;
p->mtime = st.st_mtime;
- if (path_len < the_hash_algo->hexsz ||
- get_hash_hex(path + path_len - the_hash_algo->hexsz, p->hash))
- hashclr(p->hash, the_repository->hash_algo);
+ if (path_len < r->hash_algo->hexsz ||
+ get_oid_hex_algop(path + path_len - r->hash_algo->hexsz, &oid,
+ r->hash_algo))
+ hashclr(p->hash, r->hash_algo);
+ else
+ hashcpy(p->hash, oid.hash, r->hash_algo);
+
return p;
}
@@ -1243,9 +1248,9 @@ off_t get_delta_base(struct packed_git *p,
} else if (type == OBJ_REF_DELTA) {
/* The base entry _must_ be in the same pack */
struct object_id oid;
- oidread(&oid, base_info, the_repository->hash_algo);
+ oidread(&oid, base_info, p->repo->hash_algo);
base_offset = find_pack_entry_one(&oid, p);
- *curpos += the_hash_algo->rawsz;
+ *curpos += p->repo->hash_algo->rawsz;
} else
die("I am totally screwed");
return base_offset;
@@ -1266,7 +1271,7 @@ static int get_delta_base_oid(struct packed_git *p,
{
if (type == OBJ_REF_DELTA) {
unsigned char *base = use_pack(p, w_curs, curpos, NULL);
- oidread(oid, base, the_repository->hash_algo);
+ oidread(oid, base, p->repo->hash_algo);
return 0;
} else if (type == OBJ_OFS_DELTA) {
uint32_t base_pos;
@@ -1608,7 +1613,7 @@ int packed_object_info(struct repository *r, struct packed_git *p,
goto out;
}
} else
- oidclr(oi->delta_base_oid, the_repository->hash_algo);
+ oidclr(oi->delta_base_oid, p->repo->hash_algo);
}
oi->whence = in_delta_base_cache(p, obj_offset) ? OI_DBCACHED :
@@ -1897,7 +1902,7 @@ int bsearch_pack(const struct object_id *oid, const struct packed_git *p, uint32
{
const unsigned char *index_fanout = p->index_data;
const unsigned char *index_lookup;
- const unsigned int hashsz = the_hash_algo->rawsz;
+ const unsigned int hashsz = p->repo->hash_algo->rawsz;
int index_lookup_width;
if (!index_fanout)
@@ -1922,7 +1927,7 @@ int nth_packed_object_id(struct object_id *oid,
uint32_t n)
{
const unsigned char *index = p->index_data;
- const unsigned int hashsz = the_hash_algo->rawsz;
+ const unsigned int hashsz = p->repo->hash_algo->rawsz;
if (!index) {
if (open_pack_index(p))
return -1;
@@ -1933,11 +1938,10 @@ int nth_packed_object_id(struct object_id *oid,
index += 4 * 256;
if (p->index_version == 1) {
oidread(oid, index + st_add(st_mult(hashsz + 4, n), 4),
- the_repository->hash_algo);
+ p->repo->hash_algo);
} else {
index += 8;
- oidread(oid, index + st_mult(hashsz, n),
- the_repository->hash_algo);
+ oidread(oid, index + st_mult(hashsz, n), p->repo->hash_algo);
}
return 0;
}
@@ -1959,7 +1963,7 @@ void check_pack_index_ptr(const struct packed_git *p, const void *vptr)
off_t nth_packed_object_offset(const struct packed_git *p, uint32_t n)
{
const unsigned char *index = p->index_data;
- const unsigned int hashsz = the_hash_algo->rawsz;
+ const unsigned int hashsz = p->repo->hash_algo->rawsz;
index += 4 * 256;
if (p->index_version == 1) {
return ntohl(*((uint32_t *)(index + st_mult(hashsz + 4, n))));
@@ -2159,7 +2163,7 @@ int for_each_object_in_pack(struct packed_git *p,
int r = 0;
if (flags & FOR_EACH_OBJECT_PACK_ORDER) {
- if (load_pack_revindex(the_repository, p))
+ if (load_pack_revindex(p->repo, p))
return -1;
}
@@ -2227,7 +2231,7 @@ int for_each_packed_object(each_packed_object_fn cb, void *data,
}
static int add_promisor_object(const struct object_id *oid,
- struct packed_git *pack UNUSED,
+ struct packed_git *pack,
uint32_t pos UNUSED,
void *set_)
{
@@ -2235,12 +2239,12 @@ static int add_promisor_object(const struct object_id *oid,
struct object *obj;
int we_parsed_object;
- obj = lookup_object(the_repository, oid);
+ obj = lookup_object(pack->repo, oid);
if (obj && obj->parsed) {
we_parsed_object = 0;
} else {
we_parsed_object = 1;
- obj = parse_object(the_repository, oid);
+ obj = parse_object(pack->repo, oid);
}
if (!obj)
--
2.47.0
^ permalink raw reply related [flat|nested] 184+ messages in thread
* [PATCH v8 03/10] packfile: pass `repository` to static function in the file
2024-11-22 10:08 ` [PATCH v8 00/10] " Karthik Nayak
2024-11-22 10:08 ` [PATCH v8 01/10] packfile: add repository to struct `packed_git` Karthik Nayak
2024-11-22 10:08 ` [PATCH v8 02/10] packfile: use `repository` from `packed_git` directly Karthik Nayak
@ 2024-11-22 10:08 ` Karthik Nayak
2024-11-22 10:08 ` [PATCH v8 04/10] packfile: pass down repository to `odb_pack_name` Karthik Nayak
` (6 subsequent siblings)
9 siblings, 0 replies; 184+ messages in thread
From: Karthik Nayak @ 2024-11-22 10:08 UTC (permalink / raw)
To: karthik.188; +Cc: git, me, peff, gitster
Some of the static functions in the `packfile.c` access global
variables, which can simply be avoiding by passing the `repository`
struct down to them. Let's do that.
Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
---
packfile.c | 15 ++++++++-------
1 file changed, 8 insertions(+), 7 deletions(-)
diff --git a/packfile.c b/packfile.c
index 5bfa1e17c2..c96ebc4c69 100644
--- a/packfile.c
+++ b/packfile.c
@@ -460,13 +460,13 @@ static void find_lru_pack(struct packed_git *p, struct packed_git **lru_p, struc
*accept_windows_inuse = has_windows_inuse;
}
-static int close_one_pack(void)
+static int close_one_pack(struct repository *r)
{
struct packed_git *p, *lru_p = NULL;
struct pack_window *mru_w = NULL;
int accept_windows_inuse = 1;
- for (p = the_repository->objects->packed_git; p; p = p->next) {
+ for (p = r->objects->packed_git; p; p = p->next) {
if (p->pack_fd == -1)
continue;
find_lru_pack(p, &lru_p, &mru_w, &accept_windows_inuse);
@@ -555,7 +555,7 @@ static int open_packed_git_1(struct packed_git *p)
pack_max_fds = 1;
}
- while (pack_max_fds <= pack_open_fds && close_one_pack())
+ while (pack_max_fds <= pack_open_fds && close_one_pack(p->repo))
; /* nothing */
p->pack_fd = git_open(p->pack_name);
@@ -610,7 +610,8 @@ static int open_packed_git(struct packed_git *p)
return -1;
}
-static int in_window(struct pack_window *win, off_t offset)
+static int in_window(struct repository *r, struct pack_window *win,
+ off_t offset)
{
/* We must promise at least one full hash after the
* offset is available from this window, otherwise the offset
@@ -620,7 +621,7 @@ static int in_window(struct pack_window *win, off_t offset)
*/
off_t win_off = win->offset;
return win_off <= offset
- && (offset + the_hash_algo->rawsz) <= (win_off + win->len);
+ && (offset + r->hash_algo->rawsz) <= (win_off + win->len);
}
unsigned char *use_pack(struct packed_git *p,
@@ -642,11 +643,11 @@ unsigned char *use_pack(struct packed_git *p,
if (offset < 0)
die(_("offset before end of packfile (broken .idx?)"));
- if (!win || !in_window(win, offset)) {
+ if (!win || !in_window(p->repo, win, offset)) {
if (win)
win->inuse_cnt--;
for (win = p->windows; win; win = win->next) {
- if (in_window(win, offset))
+ if (in_window(p->repo, win, offset))
break;
}
if (!win) {
--
2.47.0
^ permalink raw reply related [flat|nested] 184+ messages in thread
* [PATCH v8 04/10] packfile: pass down repository to `odb_pack_name`
2024-11-22 10:08 ` [PATCH v8 00/10] " Karthik Nayak
` (2 preceding siblings ...)
2024-11-22 10:08 ` [PATCH v8 03/10] packfile: pass `repository` to static function in the file Karthik Nayak
@ 2024-11-22 10:08 ` Karthik Nayak
2024-11-22 10:08 ` [PATCH v8 05/10] packfile: pass down repository to `has_object[_kept]_pack` Karthik Nayak
` (5 subsequent siblings)
9 siblings, 0 replies; 184+ messages in thread
From: Karthik Nayak @ 2024-11-22 10:08 UTC (permalink / raw)
To: karthik.188; +Cc: git, me, peff, gitster
The function `odb_pack_name` currently relies on the global variable
`the_repository`. To eliminate global variable usage in `packfile.c`, we
should progressively shift the dependency on the_repository to higher
layers.
Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
---
builtin/fast-import.c | 8 ++++----
builtin/index-pack.c | 4 ++--
builtin/pack-redundant.c | 2 +-
http.c | 2 +-
packfile.c | 9 ++++-----
packfile.h | 3 ++-
6 files changed, 14 insertions(+), 14 deletions(-)
diff --git a/builtin/fast-import.c b/builtin/fast-import.c
index da7e2d613b..3ccc4c5722 100644
--- a/builtin/fast-import.c
+++ b/builtin/fast-import.c
@@ -806,7 +806,7 @@ static char *keep_pack(const char *curr_index_name)
struct strbuf name = STRBUF_INIT;
int keep_fd;
- odb_pack_name(&name, pack_data->hash, "keep");
+ odb_pack_name(pack_data->repo, &name, pack_data->hash, "keep");
keep_fd = odb_pack_keep(name.buf);
if (keep_fd < 0)
die_errno("cannot create keep file");
@@ -814,11 +814,11 @@ static char *keep_pack(const char *curr_index_name)
if (close(keep_fd))
die_errno("failed to write keep file");
- odb_pack_name(&name, pack_data->hash, "pack");
+ odb_pack_name(pack_data->repo, &name, pack_data->hash, "pack");
if (finalize_object_file(pack_data->pack_name, name.buf))
die("cannot store pack file");
- odb_pack_name(&name, pack_data->hash, "idx");
+ odb_pack_name(pack_data->repo, &name, pack_data->hash, "idx");
if (finalize_object_file(curr_index_name, name.buf))
die("cannot store index file");
free((void *)curr_index_name);
@@ -832,7 +832,7 @@ static void unkeep_all_packs(void)
for (k = 0; k < pack_id; k++) {
struct packed_git *p = all_packs[k];
- odb_pack_name(&name, p->hash, "keep");
+ odb_pack_name(p->repo, &name, p->hash, "keep");
unlink_or_warn(name.buf);
}
strbuf_release(&name);
diff --git a/builtin/index-pack.c b/builtin/index-pack.c
index be2f99625e..eaefb41761 100644
--- a/builtin/index-pack.c
+++ b/builtin/index-pack.c
@@ -1479,7 +1479,7 @@ static void write_special_file(const char *suffix, const char *msg,
if (pack_name)
filename = derive_filename(pack_name, "pack", suffix, &name_buf);
else
- filename = odb_pack_name(&name_buf, hash, suffix);
+ filename = odb_pack_name(the_repository, &name_buf, hash, suffix);
fd = odb_pack_keep(filename);
if (fd < 0) {
@@ -1507,7 +1507,7 @@ static void rename_tmp_packfile(const char **final_name,
{
if (!*final_name || strcmp(*final_name, curr_name)) {
if (!*final_name)
- *final_name = odb_pack_name(name, hash, ext);
+ *final_name = odb_pack_name(the_repository, name, hash, ext);
if (finalize_object_file(curr_name, *final_name))
die(_("unable to rename temporary '*.%s' file to '%s'"),
ext, *final_name);
diff --git a/builtin/pack-redundant.c b/builtin/pack-redundant.c
index d2c1c4e5ec..bc61990a93 100644
--- a/builtin/pack-redundant.c
+++ b/builtin/pack-redundant.c
@@ -690,7 +690,7 @@ int cmd_pack_redundant(int argc, const char **argv, const char *prefix UNUSED, s
pl = red = pack_list_difference(local_packs, min);
while (pl) {
printf("%s\n%s\n",
- odb_pack_name(&idx_name, pl->pack->hash, "idx"),
+ odb_pack_name(pl->pack->repo, &idx_name, pl->pack->hash, "idx"),
pl->pack->pack_name);
pl = pl->next;
}
diff --git a/http.c b/http.c
index 6744e18409..420f1566f0 100644
--- a/http.c
+++ b/http.c
@@ -2581,7 +2581,7 @@ struct http_pack_request *new_direct_http_pack_request(
preq->url = url;
- odb_pack_name(&preq->tmpfile, packed_git_hash, "pack");
+ odb_pack_name(the_repository, &preq->tmpfile, packed_git_hash, "pack");
strbuf_addstr(&preq->tmpfile, ".temp");
preq->packfile = fopen(preq->tmpfile.buf, "a");
if (!preq->packfile) {
diff --git a/packfile.c b/packfile.c
index c96ebc4c69..1015dac6db 100644
--- a/packfile.c
+++ b/packfile.c
@@ -25,13 +25,12 @@
#include "pack-revindex.h"
#include "promisor-remote.h"
-char *odb_pack_name(struct strbuf *buf,
- const unsigned char *hash,
- const char *ext)
+char *odb_pack_name(struct repository *r, struct strbuf *buf,
+ const unsigned char *hash, const char *ext)
{
strbuf_reset(buf);
- strbuf_addf(buf, "%s/pack/pack-%s.%s", repo_get_object_directory(the_repository),
- hash_to_hex(hash), ext);
+ strbuf_addf(buf, "%s/pack/pack-%s.%s", repo_get_object_directory(r),
+ hash_to_hex_algop(hash, r->hash_algo), ext);
return buf->buf;
}
diff --git a/packfile.h b/packfile.h
index aee69d1a0b..51187f2393 100644
--- a/packfile.h
+++ b/packfile.h
@@ -29,7 +29,8 @@ struct pack_entry {
*
* Example: odb_pack_name(out, sha1, "idx") => ".git/objects/pack/pack-1234..idx"
*/
-char *odb_pack_name(struct strbuf *buf, const unsigned char *sha1, const char *ext);
+char *odb_pack_name(struct repository *r, struct strbuf *buf,
+ const unsigned char *hash, const char *ext);
/*
* Return the basename of the packfile, omitting any containing directory
--
2.47.0
^ permalink raw reply related [flat|nested] 184+ messages in thread
* [PATCH v8 05/10] packfile: pass down repository to `has_object[_kept]_pack`
2024-11-22 10:08 ` [PATCH v8 00/10] " Karthik Nayak
` (3 preceding siblings ...)
2024-11-22 10:08 ` [PATCH v8 04/10] packfile: pass down repository to `odb_pack_name` Karthik Nayak
@ 2024-11-22 10:08 ` Karthik Nayak
2024-11-22 10:08 ` [PATCH v8 06/10] packfile: pass down repository to `for_each_packed_object` Karthik Nayak
` (4 subsequent siblings)
9 siblings, 0 replies; 184+ messages in thread
From: Karthik Nayak @ 2024-11-22 10:08 UTC (permalink / raw)
To: karthik.188; +Cc: git, me, peff, gitster
The functions `has_object[_kept]_pack` currently rely on the global
variable `the_repository`. To eliminate global variable usage in
`packfile.c`, we should progressively shift the dependency on
the_repository to higher layers. Let's remove its usage from these
functions and any related ones.
Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
---
builtin/count-objects.c | 2 +-
builtin/fsck.c | 2 +-
builtin/pack-objects.c | 4 ++--
diff.c | 3 ++-
list-objects.c | 3 ++-
pack-bitmap.c | 2 +-
packfile.c | 9 +++++----
packfile.h | 5 +++--
prune-packed.c | 2 +-
reachable.c | 2 +-
revision.c | 4 ++--
11 files changed, 21 insertions(+), 17 deletions(-)
diff --git a/builtin/count-objects.c b/builtin/count-objects.c
index 04d80887e0..1e89148ed7 100644
--- a/builtin/count-objects.c
+++ b/builtin/count-objects.c
@@ -67,7 +67,7 @@ static int count_loose(const struct object_id *oid, const char *path,
else {
loose_size += on_disk_bytes(st);
loose++;
- if (verbose && has_object_pack(oid))
+ if (verbose && has_object_pack(the_repository, oid))
packed_loose++;
}
return 0;
diff --git a/builtin/fsck.c b/builtin/fsck.c
index 7f4e2f0414..bb56eb98ac 100644
--- a/builtin/fsck.c
+++ b/builtin/fsck.c
@@ -272,7 +272,7 @@ static void check_reachable_object(struct object *obj)
if (!(obj->flags & HAS_OBJ)) {
if (is_promisor_object(&obj->oid))
return;
- if (has_object_pack(&obj->oid))
+ if (has_object_pack(the_repository, &obj->oid))
return; /* it is in pack - forget about it */
printf_ln(_("missing %s %s"),
printable_type(&obj->oid, obj->type),
diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index 0800714267..0f32e92a3a 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -1529,7 +1529,7 @@ static int want_found_object(const struct object_id *oid, int exclude,
return 0;
if (ignore_packed_keep_in_core && p->pack_keep_in_core)
return 0;
- if (has_object_kept_pack(oid, flags))
+ if (has_object_kept_pack(p->repo, oid, flags))
return 0;
}
@@ -3627,7 +3627,7 @@ static void show_cruft_commit(struct commit *commit, void *data)
static int cruft_include_check_obj(struct object *obj, void *data UNUSED)
{
- return !has_object_kept_pack(&obj->oid, IN_CORE_KEEP_PACKS);
+ return !has_object_kept_pack(to_pack.repo, &obj->oid, IN_CORE_KEEP_PACKS);
}
static int cruft_include_check(struct commit *commit, void *data)
diff --git a/diff.c b/diff.c
index dceac20d18..266ddf18e7 100644
--- a/diff.c
+++ b/diff.c
@@ -4041,7 +4041,8 @@ static int reuse_worktree_file(struct index_state *istate,
* objects however would tend to be slower as they need
* to be individually opened and inflated.
*/
- if (!FAST_WORKING_DIRECTORY && !want_file && has_object_pack(oid))
+ if (!FAST_WORKING_DIRECTORY && !want_file &&
+ has_object_pack(istate->repo, oid))
return 0;
/*
diff --git a/list-objects.c b/list-objects.c
index 985d008799..31236a8dc9 100644
--- a/list-objects.c
+++ b/list-objects.c
@@ -41,7 +41,8 @@ static void show_object(struct traversal_context *ctx,
{
if (!ctx->show_object)
return;
- if (ctx->revs->unpacked && has_object_pack(&object->oid))
+ if (ctx->revs->unpacked && has_object_pack(ctx->revs->repo,
+ &object->oid))
return;
ctx->show_object(object, name, ctx->show_data);
diff --git a/pack-bitmap.c b/pack-bitmap.c
index 4fa9dfc771..d34ba9909a 100644
--- a/pack-bitmap.c
+++ b/pack-bitmap.c
@@ -1889,7 +1889,7 @@ static void filter_packed_objects_from_bitmap(struct bitmap_index *bitmap_git,
bitmap_unset(result, i);
for (i = 0; i < eindex->count; ++i) {
- if (has_object_pack(&eindex->objects[i]->oid))
+ if (has_object_pack(the_repository, &eindex->objects[i]->oid))
bitmap_unset(result, objects_nr + i);
}
}
diff --git a/packfile.c b/packfile.c
index 1015dac6db..e7dd270217 100644
--- a/packfile.c
+++ b/packfile.c
@@ -2143,16 +2143,17 @@ int find_kept_pack_entry(struct repository *r,
return 0;
}
-int has_object_pack(const struct object_id *oid)
+int has_object_pack(struct repository *r, const struct object_id *oid)
{
struct pack_entry e;
- return find_pack_entry(the_repository, oid, &e);
+ return find_pack_entry(r, oid, &e);
}
-int has_object_kept_pack(const struct object_id *oid, unsigned flags)
+int has_object_kept_pack(struct repository *r, const struct object_id *oid,
+ unsigned flags)
{
struct pack_entry e;
- return find_kept_pack_entry(the_repository, oid, flags, &e);
+ return find_kept_pack_entry(r, oid, flags, &e);
}
int for_each_object_in_pack(struct packed_git *p,
diff --git a/packfile.h b/packfile.h
index 51187f2393..b09fb2c530 100644
--- a/packfile.h
+++ b/packfile.h
@@ -193,8 +193,9 @@ const struct packed_git *has_packed_and_bad(struct repository *, const struct ob
int find_pack_entry(struct repository *r, const struct object_id *oid, struct pack_entry *e);
int find_kept_pack_entry(struct repository *r, const struct object_id *oid, unsigned flags, struct pack_entry *e);
-int has_object_pack(const struct object_id *oid);
-int has_object_kept_pack(const struct object_id *oid, unsigned flags);
+int has_object_pack(struct repository *r, const struct object_id *oid);
+int has_object_kept_pack(struct repository *r, const struct object_id *oid,
+ unsigned flags);
/*
* Return 1 if an object in a promisor packfile is or refers to the given
diff --git a/prune-packed.c b/prune-packed.c
index 2bb99c29df..d1c65ab10e 100644
--- a/prune-packed.c
+++ b/prune-packed.c
@@ -24,7 +24,7 @@ static int prune_object(const struct object_id *oid, const char *path,
{
int *opts = data;
- if (!has_object_pack(oid))
+ if (!has_object_pack(the_repository, oid))
return 0;
if (*opts & PRUNE_PACKED_DRY_RUN)
diff --git a/reachable.c b/reachable.c
index 3e9b3dd0a4..09d2c50079 100644
--- a/reachable.c
+++ b/reachable.c
@@ -239,7 +239,7 @@ static int want_recent_object(struct recent_data *data,
const struct object_id *oid)
{
if (data->ignore_in_core_kept_packs &&
- has_object_kept_pack(oid, IN_CORE_KEEP_PACKS))
+ has_object_kept_pack(data->revs->repo, oid, IN_CORE_KEEP_PACKS))
return 0;
return 1;
}
diff --git a/revision.c b/revision.c
index f5f5b84f2b..d1d152a67b 100644
--- a/revision.c
+++ b/revision.c
@@ -4103,10 +4103,10 @@ enum commit_action get_commit_action(struct rev_info *revs, struct commit *commi
{
if (commit->object.flags & SHOWN)
return commit_ignore;
- if (revs->unpacked && has_object_pack(&commit->object.oid))
+ if (revs->unpacked && has_object_pack(revs->repo, &commit->object.oid))
return commit_ignore;
if (revs->no_kept_objects) {
- if (has_object_kept_pack(&commit->object.oid,
+ if (has_object_kept_pack(revs->repo, &commit->object.oid,
revs->keep_pack_cache_flags))
return commit_ignore;
}
--
2.47.0
^ permalink raw reply related [flat|nested] 184+ messages in thread
* [PATCH v8 06/10] packfile: pass down repository to `for_each_packed_object`
2024-11-22 10:08 ` [PATCH v8 00/10] " Karthik Nayak
` (4 preceding siblings ...)
2024-11-22 10:08 ` [PATCH v8 05/10] packfile: pass down repository to `has_object[_kept]_pack` Karthik Nayak
@ 2024-11-22 10:08 ` Karthik Nayak
2024-11-22 10:08 ` [PATCH v8 07/10] config: make `delta_base_cache_limit` a non-global variable Karthik Nayak
` (3 subsequent siblings)
9 siblings, 0 replies; 184+ messages in thread
From: Karthik Nayak @ 2024-11-22 10:08 UTC (permalink / raw)
To: karthik.188; +Cc: git, me, peff, gitster
The function `for_each_packed_object` currently relies on the global
variable `the_repository`. To eliminate global variable usage in
`packfile.c`, we should progressively shift the dependency on
the_repository to higher layers. Let's remove its usage from this
function and closely related function `is_promisor_object`.
Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
---
builtin/cat-file.c | 7 ++++---
builtin/fsck.c | 18 +++++++++++-------
builtin/pack-objects.c | 7 +++++--
builtin/repack.c | 2 +-
builtin/rev-list.c | 2 +-
commit-graph.c | 2 +-
fsck.c | 2 +-
list-objects.c | 4 ++--
object-store-ll.h | 4 ++--
packfile.c | 14 +++++++-------
packfile.h | 2 +-
promisor-remote.c | 2 +-
reachable.c | 2 +-
revision.c | 9 +++++----
tag.c | 2 +-
15 files changed, 44 insertions(+), 35 deletions(-)
diff --git a/builtin/cat-file.c b/builtin/cat-file.c
index bfdfb51c7c..d67b101c20 100644
--- a/builtin/cat-file.c
+++ b/builtin/cat-file.c
@@ -827,15 +827,16 @@ static int batch_objects(struct batch_options *opt)
cb.seen = &seen;
for_each_loose_object(batch_unordered_loose, &cb, 0);
- for_each_packed_object(batch_unordered_packed, &cb,
- FOR_EACH_OBJECT_PACK_ORDER);
+ for_each_packed_object(the_repository, batch_unordered_packed,
+ &cb, FOR_EACH_OBJECT_PACK_ORDER);
oidset_clear(&seen);
} else {
struct oid_array sa = OID_ARRAY_INIT;
for_each_loose_object(collect_loose_object, &sa, 0);
- for_each_packed_object(collect_packed_object, &sa, 0);
+ for_each_packed_object(the_repository, collect_packed_object,
+ &sa, 0);
oid_array_for_each_unique(&sa, batch_object_cb, &cb);
diff --git a/builtin/fsck.c b/builtin/fsck.c
index bb56eb98ac..0196c54eb6 100644
--- a/builtin/fsck.c
+++ b/builtin/fsck.c
@@ -150,7 +150,7 @@ static int mark_object(struct object *obj, enum object_type type,
return 0;
obj->flags |= REACHABLE;
- if (is_promisor_object(&obj->oid))
+ if (is_promisor_object(the_repository, &obj->oid))
/*
* Further recursion does not need to be performed on this
* object since it is a promisor object (so it does not need to
@@ -270,7 +270,7 @@ static void check_reachable_object(struct object *obj)
* do a full fsck
*/
if (!(obj->flags & HAS_OBJ)) {
- if (is_promisor_object(&obj->oid))
+ if (is_promisor_object(the_repository, &obj->oid))
return;
if (has_object_pack(the_repository, &obj->oid))
return; /* it is in pack - forget about it */
@@ -391,7 +391,10 @@ static void check_connectivity(void)
* traversal.
*/
for_each_loose_object(mark_loose_unreachable_referents, NULL, 0);
- for_each_packed_object(mark_packed_unreachable_referents, NULL, 0);
+ for_each_packed_object(the_repository,
+ mark_packed_unreachable_referents,
+ NULL,
+ 0);
}
/* Look up all the requirements, warn about missing objects.. */
@@ -488,7 +491,7 @@ static void fsck_handle_reflog_oid(const char *refname, struct object_id *oid,
refname, timestamp);
obj->flags |= USED;
mark_object_reachable(obj);
- } else if (!is_promisor_object(oid)) {
+ } else if (!is_promisor_object(the_repository, oid)) {
error(_("%s: invalid reflog entry %s"),
refname, oid_to_hex(oid));
errors_found |= ERROR_REACHABLE;
@@ -531,7 +534,7 @@ static int fsck_handle_ref(const char *refname, const char *referent UNUSED, con
obj = parse_object(the_repository, oid);
if (!obj) {
- if (is_promisor_object(oid)) {
+ if (is_promisor_object(the_repository, oid)) {
/*
* Increment default_refs anyway, because this is a
* valid ref.
@@ -966,7 +969,8 @@ int cmd_fsck(int argc,
if (connectivity_only) {
for_each_loose_object(mark_loose_for_connectivity, NULL, 0);
- for_each_packed_object(mark_packed_for_connectivity, NULL, 0);
+ for_each_packed_object(the_repository,
+ mark_packed_for_connectivity, NULL, 0);
} else {
prepare_alt_odb(the_repository);
for (odb = the_repository->objects->odb; odb; odb = odb->next)
@@ -1011,7 +1015,7 @@ int cmd_fsck(int argc,
&oid);
if (!obj || !(obj->flags & HAS_OBJ)) {
- if (is_promisor_object(&oid))
+ if (is_promisor_object(the_repository, &oid))
continue;
error(_("%s: object missing"), oid_to_hex(&oid));
errors_found |= ERROR_OBJECT;
diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index 0f32e92a3a..db20f0cf51 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -3858,7 +3858,8 @@ static void show_object__ma_allow_promisor(struct object *obj, const char *name,
* Quietly ignore EXPECTED missing objects. This avoids problems with
* staging them now and getting an odd error later.
*/
- if (!has_object(the_repository, &obj->oid, 0) && is_promisor_object(&obj->oid))
+ if (!has_object(the_repository, &obj->oid, 0) &&
+ is_promisor_object(to_pack.repo, &obj->oid))
return;
show_object(obj, name, data);
@@ -3927,7 +3928,9 @@ static int add_object_in_unpacked_pack(const struct object_id *oid,
static void add_objects_in_unpacked_packs(void)
{
- if (for_each_packed_object(add_object_in_unpacked_pack, NULL,
+ if (for_each_packed_object(to_pack.repo,
+ add_object_in_unpacked_pack,
+ NULL,
FOR_EACH_OBJECT_PACK_ORDER |
FOR_EACH_OBJECT_LOCAL_ONLY |
FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS |
diff --git a/builtin/repack.c b/builtin/repack.c
index d6bb37e84a..96a4fa234b 100644
--- a/builtin/repack.c
+++ b/builtin/repack.c
@@ -404,7 +404,7 @@ static void repack_promisor_objects(const struct pack_objects_args *args,
* {type -> existing pack order} ordering when computing deltas instead
* of a {type -> size} ordering, which may produce better deltas.
*/
- for_each_packed_object(write_oid, &cmd,
+ for_each_packed_object(the_repository, write_oid, &cmd,
FOR_EACH_OBJECT_PROMISOR_ONLY);
if (cmd.in == -1) {
diff --git a/builtin/rev-list.c b/builtin/rev-list.c
index f62bcbf2b1..43c42621e3 100644
--- a/builtin/rev-list.c
+++ b/builtin/rev-list.c
@@ -121,7 +121,7 @@ static inline void finish_object__ma(struct object *obj)
return;
case MA_ALLOW_PROMISOR:
- if (is_promisor_object(&obj->oid))
+ if (is_promisor_object(the_repository, &obj->oid))
return;
die("unexpected missing %s object '%s'",
type_name(obj->type), oid_to_hex(&obj->oid));
diff --git a/commit-graph.c b/commit-graph.c
index 83dd69bfeb..e2e2083951 100644
--- a/commit-graph.c
+++ b/commit-graph.c
@@ -1960,7 +1960,7 @@ static void fill_oids_from_all_packs(struct write_commit_graph_context *ctx)
ctx->progress = start_delayed_progress(
_("Finding commits for commit graph among packed objects"),
ctx->approx_nr_objects);
- for_each_packed_object(add_packed_commits, ctx,
+ for_each_packed_object(ctx->r, add_packed_commits, ctx,
FOR_EACH_OBJECT_PACK_ORDER);
if (ctx->progress_done < ctx->approx_nr_objects)
display_progress(ctx->progress, ctx->approx_nr_objects);
diff --git a/fsck.c b/fsck.c
index 3756f52459..87ce999a49 100644
--- a/fsck.c
+++ b/fsck.c
@@ -1295,7 +1295,7 @@ static int fsck_blobs(struct oidset *blobs_found, struct oidset *blobs_done,
buf = repo_read_object_file(the_repository, oid, &type, &size);
if (!buf) {
- if (is_promisor_object(oid))
+ if (is_promisor_object(the_repository, oid))
continue;
ret |= report(options,
oid, OBJ_BLOB, msg_missing,
diff --git a/list-objects.c b/list-objects.c
index 31236a8dc9..d11a389b3a 100644
--- a/list-objects.c
+++ b/list-objects.c
@@ -75,7 +75,7 @@ static void process_blob(struct traversal_context *ctx,
*/
if (ctx->revs->exclude_promisor_objects &&
!repo_has_object_file(the_repository, &obj->oid) &&
- is_promisor_object(&obj->oid))
+ is_promisor_object(ctx->revs->repo, &obj->oid))
return;
pathlen = path->len;
@@ -180,7 +180,7 @@ static void process_tree(struct traversal_context *ctx,
* an incomplete list of missing objects.
*/
if (revs->exclude_promisor_objects &&
- is_promisor_object(&obj->oid))
+ is_promisor_object(revs->repo, &obj->oid))
return;
if (!revs->do_not_die_on_missing_objects)
diff --git a/object-store-ll.h b/object-store-ll.h
index d46cd0e654..cd3bd5bd99 100644
--- a/object-store-ll.h
+++ b/object-store-ll.h
@@ -550,7 +550,7 @@ typedef int each_packed_object_fn(const struct object_id *oid,
int for_each_object_in_pack(struct packed_git *p,
each_packed_object_fn, void *data,
enum for_each_object_flags flags);
-int for_each_packed_object(each_packed_object_fn, void *,
- enum for_each_object_flags flags);
+int for_each_packed_object(struct repository *repo, each_packed_object_fn cb,
+ void *data, enum for_each_object_flags flags);
#endif /* OBJECT_STORE_LL_H */
diff --git a/packfile.c b/packfile.c
index e7dd270217..5e8019b1fe 100644
--- a/packfile.c
+++ b/packfile.c
@@ -2200,15 +2200,15 @@ int for_each_object_in_pack(struct packed_git *p,
return r;
}
-int for_each_packed_object(each_packed_object_fn cb, void *data,
- enum for_each_object_flags flags)
+int for_each_packed_object(struct repository *repo, each_packed_object_fn cb,
+ void *data, enum for_each_object_flags flags)
{
struct packed_git *p;
int r = 0;
int pack_errors = 0;
- prepare_packed_git(the_repository);
- for (p = get_all_packs(the_repository); p; p = p->next) {
+ prepare_packed_git(repo);
+ for (p = get_all_packs(repo); p; p = p->next) {
if ((flags & FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local)
continue;
if ((flags & FOR_EACH_OBJECT_PROMISOR_ONLY) &&
@@ -2286,14 +2286,14 @@ static int add_promisor_object(const struct object_id *oid,
return 0;
}
-int is_promisor_object(const struct object_id *oid)
+int is_promisor_object(struct repository *r, const struct object_id *oid)
{
static struct oidset promisor_objects;
static int promisor_objects_prepared;
if (!promisor_objects_prepared) {
- if (repo_has_promisor_remote(the_repository)) {
- for_each_packed_object(add_promisor_object,
+ if (repo_has_promisor_remote(r)) {
+ for_each_packed_object(r, add_promisor_object,
&promisor_objects,
FOR_EACH_OBJECT_PROMISOR_ONLY |
FOR_EACH_OBJECT_PACK_ORDER);
diff --git a/packfile.h b/packfile.h
index b09fb2c530..addb95b0c4 100644
--- a/packfile.h
+++ b/packfile.h
@@ -201,7 +201,7 @@ int has_object_kept_pack(struct repository *r, const struct object_id *oid,
* Return 1 if an object in a promisor packfile is or refers to the given
* object, 0 otherwise.
*/
-int is_promisor_object(const struct object_id *oid);
+int is_promisor_object(struct repository *r, const struct object_id *oid);
/*
* Expose a function for fuzz testing.
diff --git a/promisor-remote.c b/promisor-remote.c
index 9345ae3db2..c714f4f007 100644
--- a/promisor-remote.c
+++ b/promisor-remote.c
@@ -283,7 +283,7 @@ void promisor_remote_get_direct(struct repository *repo,
}
for (i = 0; i < remaining_nr; i++) {
- if (is_promisor_object(&remaining_oids[i]))
+ if (is_promisor_object(repo, &remaining_oids[i]))
die(_("could not fetch %s from promisor remote"),
oid_to_hex(&remaining_oids[i]));
}
diff --git a/reachable.c b/reachable.c
index 09d2c50079..ecf7ccf504 100644
--- a/reachable.c
+++ b/reachable.c
@@ -324,7 +324,7 @@ int add_unseen_recent_objects_to_traversal(struct rev_info *revs,
if (ignore_in_core_kept_packs)
flags |= FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS;
- r = for_each_packed_object(add_recent_packed, &data, flags);
+ r = for_each_packed_object(revs->repo, add_recent_packed, &data, flags);
done:
oidset_clear(&data.extra_recent_oids);
diff --git a/revision.c b/revision.c
index d1d152a67b..45dc6d2819 100644
--- a/revision.c
+++ b/revision.c
@@ -390,7 +390,8 @@ static struct object *get_reference(struct rev_info *revs, const char *name,
if (!object) {
if (revs->ignore_missing)
return NULL;
- if (revs->exclude_promisor_objects && is_promisor_object(oid))
+ if (revs->exclude_promisor_objects &&
+ is_promisor_object(revs->repo, oid))
return NULL;
if (revs->do_not_die_on_missing_objects) {
oidset_insert(&revs->missing_commits, oid);
@@ -432,7 +433,7 @@ static struct commit *handle_commit(struct rev_info *revs,
if (revs->ignore_missing_links || (flags & UNINTERESTING))
return NULL;
if (revs->exclude_promisor_objects &&
- is_promisor_object(&tag->tagged->oid))
+ is_promisor_object(revs->repo, &tag->tagged->oid))
return NULL;
if (revs->do_not_die_on_missing_objects && oid) {
oidset_insert(&revs->missing_commits, oid);
@@ -1211,7 +1212,7 @@ static int process_parents(struct rev_info *revs, struct commit *commit,
revs->do_not_die_on_missing_objects;
if (repo_parse_commit_gently(revs->repo, p, gently) < 0) {
if (revs->exclude_promisor_objects &&
- is_promisor_object(&p->object.oid)) {
+ is_promisor_object(revs->repo, &p->object.oid)) {
if (revs->first_parent_only)
break;
continue;
@@ -3915,7 +3916,7 @@ int prepare_revision_walk(struct rev_info *revs)
revs->treesame.name = "treesame";
if (revs->exclude_promisor_objects) {
- for_each_packed_object(mark_uninteresting, revs,
+ for_each_packed_object(revs->repo, mark_uninteresting, revs,
FOR_EACH_OBJECT_PROMISOR_ONLY);
}
diff --git a/tag.c b/tag.c
index d24170e340..beef9571b5 100644
--- a/tag.c
+++ b/tag.c
@@ -84,7 +84,7 @@ struct object *deref_tag(struct repository *r, struct object *o, const char *war
o = NULL;
}
if (!o && warn) {
- if (last_oid && is_promisor_object(last_oid))
+ if (last_oid && is_promisor_object(r, last_oid))
return NULL;
if (!warnlen)
warnlen = strlen(warn);
--
2.47.0
^ permalink raw reply related [flat|nested] 184+ messages in thread
* [PATCH v8 07/10] config: make `delta_base_cache_limit` a non-global variable
2024-11-22 10:08 ` [PATCH v8 00/10] " Karthik Nayak
` (5 preceding siblings ...)
2024-11-22 10:08 ` [PATCH v8 06/10] packfile: pass down repository to `for_each_packed_object` Karthik Nayak
@ 2024-11-22 10:08 ` Karthik Nayak
2024-11-26 7:25 ` Junio C Hamano
2024-11-22 10:08 ` [PATCH v8 08/10] config: make `packed_git_(limit|window_size)` non-global variables Karthik Nayak
` (2 subsequent siblings)
9 siblings, 1 reply; 184+ messages in thread
From: Karthik Nayak @ 2024-11-22 10:08 UTC (permalink / raw)
To: karthik.188; +Cc: git, me, peff, gitster
The `delta_base_cache_limit` variable is a global config variable used
by multiple subsystems. Let's make this non-global, by adding this
variable independently to the subsystems where it is used.
First, add the setting to the `repo_settings` struct, this provides
access to the config in places where the repository is available. Use
this in `packfile.c`.
In `index-pack.c` we add it to the `pack_idx_option` struct and its
constructor. While the repository struct is available here, it may not
be set because `git index-pack` can be used without a repository.
In `gc.c` add it to the `gc_config` struct and also the constructor
function. The gc functions currently do not have direct access to a
repository struct.
These changes are made to remove the usage of `delta_base_cache_limit`
as a global variable in `packfile.c`. This brings us one step closer to
removing the `USE_THE_REPOSITORY_VARIABLE` definition in `packfile.c`
which we complete in the next patch.
Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
---
builtin/gc.c | 8 +++++++-
builtin/index-pack.c | 10 +++++++---
config.c | 5 -----
environment.c | 1 -
environment.h | 1 -
pack-objects.h | 3 ++-
pack-write.c | 1 +
pack.h | 2 ++
packfile.c | 10 ++++++++--
repo-settings.c | 5 +++++
repo-settings.h | 3 +++
11 files changed, 35 insertions(+), 14 deletions(-)
diff --git a/builtin/gc.c b/builtin/gc.c
index d52735354c..09802eb989 100644
--- a/builtin/gc.c
+++ b/builtin/gc.c
@@ -138,6 +138,7 @@ struct gc_config {
char *repack_filter_to;
unsigned long big_pack_threshold;
unsigned long max_delta_cache_size;
+ size_t delta_base_cache_limit;
};
#define GC_CONFIG_INIT { \
@@ -153,6 +154,7 @@ struct gc_config {
.prune_expire = xstrdup("2.weeks.ago"), \
.prune_worktrees_expire = xstrdup("3.months.ago"), \
.max_delta_cache_size = DEFAULT_DELTA_CACHE_SIZE, \
+ .delta_base_cache_limit = DEFAULT_DELTA_BASE_CACHE_LIMIT, \
}
static void gc_config_release(struct gc_config *cfg)
@@ -168,6 +170,7 @@ static void gc_config(struct gc_config *cfg)
{
const char *value;
char *owned = NULL;
+ unsigned long ulongval;
if (!git_config_get_value("gc.packrefs", &value)) {
if (value && !strcmp(value, "notbare"))
@@ -206,6 +209,9 @@ static void gc_config(struct gc_config *cfg)
git_config_get_ulong("gc.bigpackthreshold", &cfg->big_pack_threshold);
git_config_get_ulong("pack.deltacachesize", &cfg->max_delta_cache_size);
+ if (!git_config_get_ulong("core.deltabasecachelimit", &ulongval))
+ cfg->delta_base_cache_limit = ulongval;
+
if (!git_config_get_string("gc.repackfilter", &owned)) {
free(cfg->repack_filter);
cfg->repack_filter = owned;
@@ -416,7 +422,7 @@ static uint64_t estimate_repack_memory(struct gc_config *cfg,
* read_sha1_file() (either at delta calculation phase, or
* writing phase) also fills up the delta base cache
*/
- heap += delta_base_cache_limit;
+ heap += cfg->delta_base_cache_limit;
/* and of course pack-objects has its own delta cache */
heap += cfg->max_delta_cache_size;
diff --git a/builtin/index-pack.c b/builtin/index-pack.c
index eaefb41761..23bfa45403 100644
--- a/builtin/index-pack.c
+++ b/builtin/index-pack.c
@@ -1238,7 +1238,7 @@ static void parse_pack_objects(unsigned char *hash)
* recursively checking if the resulting object is used as a base
* for some more deltas.
*/
-static void resolve_deltas(void)
+static void resolve_deltas(struct pack_idx_option *opts)
{
int i;
@@ -1254,7 +1254,7 @@ static void resolve_deltas(void)
nr_ref_deltas + nr_ofs_deltas);
nr_dispatched = 0;
- base_cache_limit = delta_base_cache_limit * nr_threads;
+ base_cache_limit = opts->delta_base_cache_limit * nr_threads;
if (nr_threads > 1 || getenv("GIT_FORCE_THREADS")) {
init_thread();
work_lock();
@@ -1604,6 +1604,10 @@ static int git_index_pack_config(const char *k, const char *v,
else
opts->flags &= ~WRITE_REV;
}
+ if (!strcmp(k, "core.deltabasecachelimit")) {
+ opts->delta_base_cache_limit = git_config_ulong(k, v, ctx->kvi);
+ return 0;
+ }
return git_default_config(k, v, ctx, cb);
}
@@ -1930,7 +1934,7 @@ int cmd_index_pack(int argc,
parse_pack_objects(pack_hash);
if (report_end_of_input)
write_in_full(2, "\0", 1);
- resolve_deltas();
+ resolve_deltas(&opts);
conclude_pack(fix_thin_pack, curr_pack, pack_hash);
free(ofs_deltas);
free(ref_deltas);
diff --git a/config.c b/config.c
index a11bb85da3..728ef98e42 100644
--- a/config.c
+++ b/config.c
@@ -1515,11 +1515,6 @@ static int git_default_core_config(const char *var, const char *value,
return 0;
}
- if (!strcmp(var, "core.deltabasecachelimit")) {
- delta_base_cache_limit = git_config_ulong(var, value, ctx->kvi);
- return 0;
- }
-
if (!strcmp(var, "core.autocrlf")) {
if (value && !strcasecmp(value, "input")) {
auto_crlf = AUTO_CRLF_INPUT;
diff --git a/environment.c b/environment.c
index a2ce998081..8e5022c282 100644
--- a/environment.c
+++ b/environment.c
@@ -51,7 +51,6 @@ enum fsync_method fsync_method = FSYNC_METHOD_DEFAULT;
enum fsync_component fsync_components = FSYNC_COMPONENTS_DEFAULT;
size_t packed_git_window_size = DEFAULT_PACKED_GIT_WINDOW_SIZE;
size_t packed_git_limit = DEFAULT_PACKED_GIT_LIMIT;
-size_t delta_base_cache_limit = 96 * 1024 * 1024;
unsigned long big_file_threshold = 512 * 1024 * 1024;
char *editor_program;
char *askpass_program;
diff --git a/environment.h b/environment.h
index 923e12661e..2f43340f0b 100644
--- a/environment.h
+++ b/environment.h
@@ -165,7 +165,6 @@ extern int zlib_compression_level;
extern int pack_compression_level;
extern size_t packed_git_window_size;
extern size_t packed_git_limit;
-extern size_t delta_base_cache_limit;
extern unsigned long big_file_threshold;
extern unsigned long pack_size_limit_cfg;
extern int max_allowed_tree_depth;
diff --git a/pack-objects.h b/pack-objects.h
index b9898a4e64..3f6f504203 100644
--- a/pack-objects.h
+++ b/pack-objects.h
@@ -7,7 +7,8 @@
struct repository;
-#define DEFAULT_DELTA_CACHE_SIZE (256 * 1024 * 1024)
+#define DEFAULT_DELTA_CACHE_SIZE (256 * 1024 * 1024)
+#define DEFAULT_DELTA_BASE_CACHE_LIMIT (96 * 1024 * 1024)
#define OE_DFS_STATE_BITS 2
#define OE_DEPTH_BITS 12
diff --git a/pack-write.c b/pack-write.c
index 8c7dfddc5a..98a8c0e785 100644
--- a/pack-write.c
+++ b/pack-write.c
@@ -21,6 +21,7 @@ void reset_pack_idx_option(struct pack_idx_option *opts)
memset(opts, 0, sizeof(*opts));
opts->version = 2;
opts->off32_limit = 0x7fffffff;
+ opts->delta_base_cache_limit = DEFAULT_DELTA_BASE_CACHE_LIMIT;
}
static int sha1_compare(const void *_a, const void *_b)
diff --git a/pack.h b/pack.h
index 02bbdfb19c..a8da040629 100644
--- a/pack.h
+++ b/pack.h
@@ -58,6 +58,8 @@ struct pack_idx_option {
*/
int anomaly_alloc, anomaly_nr;
uint32_t *anomaly;
+
+ size_t delta_base_cache_limit;
};
void reset_pack_idx_option(struct pack_idx_option *);
diff --git a/packfile.c b/packfile.c
index 5e8019b1fe..64248ca664 100644
--- a/packfile.c
+++ b/packfile.c
@@ -1496,7 +1496,9 @@ void clear_delta_base_cache(void)
}
static void add_delta_base_cache(struct packed_git *p, off_t base_offset,
- void *base, unsigned long base_size, enum object_type type)
+ void *base, unsigned long base_size,
+ unsigned long delta_base_cache_limit,
+ enum object_type type)
{
struct delta_base_cache_entry *ent;
struct list_head *lru, *tmp;
@@ -1698,6 +1700,8 @@ void *unpack_entry(struct repository *r, struct packed_git *p, off_t obj_offset,
int delta_stack_nr = 0, delta_stack_alloc = UNPACK_ENTRY_STACK_PREALLOC;
int base_from_cache = 0;
+ prepare_repo_settings(p->repo);
+
write_pack_access_log(p, obj_offset);
/* PHASE 1: drill down to the innermost base object */
@@ -1878,7 +1882,9 @@ void *unpack_entry(struct repository *r, struct packed_git *p, off_t obj_offset,
* before we are done using it.
*/
if (!external_base)
- add_delta_base_cache(p, base_obj_offset, base, base_size, type);
+ add_delta_base_cache(p, base_obj_offset, base, base_size,
+ p->repo->settings.delta_base_cache_limit,
+ type);
free(delta_data);
free(external_base);
diff --git a/repo-settings.c b/repo-settings.c
index 4699b4b365..acc27eb8fe 100644
--- a/repo-settings.c
+++ b/repo-settings.c
@@ -3,6 +3,7 @@
#include "repo-settings.h"
#include "repository.h"
#include "midx.h"
+#include "pack-objects.h"
static void repo_cfg_bool(struct repository *r, const char *key, int *dest,
int def)
@@ -26,6 +27,7 @@ void prepare_repo_settings(struct repository *r)
const char *strval;
int manyfiles;
int read_changed_paths;
+ unsigned long ulongval;
if (!r->gitdir)
BUG("Cannot add settings for uninitialized repository");
@@ -123,6 +125,9 @@ void prepare_repo_settings(struct repository *r)
* removed.
*/
r->settings.command_requires_full_index = 1;
+
+ if (!repo_config_get_ulong(r, "core.deltabasecachelimit", &ulongval))
+ r->settings.delta_base_cache_limit = ulongval;
}
enum log_refs_config repo_settings_get_log_all_ref_updates(struct repository *repo)
diff --git a/repo-settings.h b/repo-settings.h
index 51d6156a11..10a6f7ed64 100644
--- a/repo-settings.h
+++ b/repo-settings.h
@@ -57,12 +57,15 @@ struct repo_settings {
int core_multi_pack_index;
int warn_ambiguous_refs; /* lazily loaded via accessor */
+
+ size_t delta_base_cache_limit;
};
#define REPO_SETTINGS_INIT { \
.index_version = -1, \
.core_untracked_cache = UNTRACKED_CACHE_KEEP, \
.fetch_negotiation_algorithm = FETCH_NEGOTIATION_CONSECUTIVE, \
.warn_ambiguous_refs = -1, \
+ .delta_base_cache_limit = DEFAULT_DELTA_BASE_CACHE_LIMIT, \
}
void prepare_repo_settings(struct repository *r);
--
2.47.0
^ permalink raw reply related [flat|nested] 184+ messages in thread
* Re: [PATCH v8 07/10] config: make `delta_base_cache_limit` a non-global variable
2024-11-22 10:08 ` [PATCH v8 07/10] config: make `delta_base_cache_limit` a non-global variable Karthik Nayak
@ 2024-11-26 7:25 ` Junio C Hamano
2024-11-26 10:54 ` karthik nayak
0 siblings, 1 reply; 184+ messages in thread
From: Junio C Hamano @ 2024-11-26 7:25 UTC (permalink / raw)
To: Karthik Nayak; +Cc: git, me, peff
Karthik Nayak <karthik.188@gmail.com> writes:
> In `gc.c` add it to the `gc_config` struct and also the constructor
> function. The gc functions currently do not have direct access to a
> repository struct.
We should remember to remove this member from gc_config when we pass
the repository through the callchain. As a built-in, cmd_gc()
should already be receiving the repository as its parameter, so it
may not have to wait for a long time before we are ready to do so.
Perhaps have a comment next to the member's definition to remind us?
> These changes are made to remove the usage of `delta_base_cache_limit`
> as a global variable in `packfile.c`. This brings us one step closer to
> removing the `USE_THE_REPOSITORY_VARIABLE` definition in `packfile.c`
> which we complete in the next patch.
OK.
IIUC, Taylor's comment to avoid repeated calls to configuration
layer to obtain delta_base_cache_limit, aside from typofixes, was
the only thing remaining for this topic? If so, everything is
looking really good.
^ permalink raw reply [flat|nested] 184+ messages in thread
* Re: [PATCH v8 07/10] config: make `delta_base_cache_limit` a non-global variable
2024-11-26 7:25 ` Junio C Hamano
@ 2024-11-26 10:54 ` karthik nayak
0 siblings, 0 replies; 184+ messages in thread
From: karthik nayak @ 2024-11-26 10:54 UTC (permalink / raw)
To: Junio C Hamano; +Cc: git, me, peff
[-- Attachment #1: Type: text/plain, Size: 1199 bytes --]
Junio C Hamano <gitster@pobox.com> writes:
> Karthik Nayak <karthik.188@gmail.com> writes:
>
>> In `gc.c` add it to the `gc_config` struct and also the constructor
>> function. The gc functions currently do not have direct access to a
>> repository struct.
>
> We should remember to remove this member from gc_config when we pass
> the repository through the callchain. As a built-in, cmd_gc()
> should already be receiving the repository as its parameter, so it
> may not have to wait for a long time before we are ready to do so.
>
> Perhaps have a comment next to the member's definition to remind us?
>
Yeah, that seems like a good idea.
>> These changes are made to remove the usage of `delta_base_cache_limit`
>> as a global variable in `packfile.c`. This brings us one step closer to
>> removing the `USE_THE_REPOSITORY_VARIABLE` definition in `packfile.c`
>> which we complete in the next patch.
>
> OK.
>
> IIUC, Taylor's comment to avoid repeated calls to configuration
> layer to obtain delta_base_cache_limit, aside from typofixes, was
> the only thing remaining for this topic? If so, everything is
> looking really good.
Thanks, will send a new version with the comment inlined.
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 690 bytes --]
^ permalink raw reply [flat|nested] 184+ messages in thread
* [PATCH v8 08/10] config: make `packed_git_(limit|window_size)` non-global variables
2024-11-22 10:08 ` [PATCH v8 00/10] " Karthik Nayak
` (6 preceding siblings ...)
2024-11-22 10:08 ` [PATCH v8 07/10] config: make `delta_base_cache_limit` a non-global variable Karthik Nayak
@ 2024-11-22 10:08 ` Karthik Nayak
2024-11-22 10:08 ` [PATCH v8 09/10] midx: add repository to `multi_pack_index` struct Karthik Nayak
2024-11-22 10:08 ` [PATCH v8 10/10] packfile.c: remove unnecessary prepare_packed_git() call Karthik Nayak
9 siblings, 0 replies; 184+ messages in thread
From: Karthik Nayak @ 2024-11-22 10:08 UTC (permalink / raw)
To: karthik.188; +Cc: git, me, peff, gitster
The variables `packed_git_window_size` and `packed_git_limit` are global
config variables used in the `packfile.c` file. Since it is only used in
this file, let's change it from being a global config variable to a
local variable for the subsystem.
With this, we rid `packfile.c` from all global variable usage and this
means we can also remove the `USE_THE_REPOSITORY_VARIABLE` guard from
the file.
Helped-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
---
builtin/fast-import.c | 4 ++--
config.c | 17 -----------------
environment.c | 2 --
packfile.c | 23 +++++++++++++++--------
packfile.h | 2 +-
repo-settings.c | 13 +++++++++++++
repo-settings.h | 4 ++++
7 files changed, 35 insertions(+), 30 deletions(-)
diff --git a/builtin/fast-import.c b/builtin/fast-import.c
index 3ccc4c5722..0ece070260 100644
--- a/builtin/fast-import.c
+++ b/builtin/fast-import.c
@@ -3539,7 +3539,7 @@ static void parse_argv(void)
int cmd_fast_import(int argc,
const char **argv,
const char *prefix,
- struct repository *repo UNUSED)
+ struct repository *repo)
{
unsigned int i;
@@ -3660,7 +3660,7 @@ int cmd_fast_import(int argc,
fprintf(stderr, " pools: %10lu KiB\n", (unsigned long)((tree_entry_allocd + fi_mem_pool.pool_alloc) /1024));
fprintf(stderr, " objects: %10" PRIuMAX " KiB\n", (alloc_count*sizeof(struct object_entry))/1024);
fprintf(stderr, "---------------------------------------------------------------------\n");
- pack_report();
+ pack_report(repo);
fprintf(stderr, "---------------------------------------------------------------------\n");
fprintf(stderr, "\n");
}
diff --git a/config.c b/config.c
index 728ef98e42..2c295f7430 100644
--- a/config.c
+++ b/config.c
@@ -1493,28 +1493,11 @@ static int git_default_core_config(const char *var, const char *value,
return 0;
}
- if (!strcmp(var, "core.packedgitwindowsize")) {
- int pgsz_x2 = getpagesize() * 2;
- packed_git_window_size = git_config_ulong(var, value, ctx->kvi);
-
- /* This value must be multiple of (pagesize * 2) */
- packed_git_window_size /= pgsz_x2;
- if (packed_git_window_size < 1)
- packed_git_window_size = 1;
- packed_git_window_size *= pgsz_x2;
- return 0;
- }
-
if (!strcmp(var, "core.bigfilethreshold")) {
big_file_threshold = git_config_ulong(var, value, ctx->kvi);
return 0;
}
- if (!strcmp(var, "core.packedgitlimit")) {
- packed_git_limit = git_config_ulong(var, value, ctx->kvi);
- return 0;
- }
-
if (!strcmp(var, "core.autocrlf")) {
if (value && !strcasecmp(value, "input")) {
auto_crlf = AUTO_CRLF_INPUT;
diff --git a/environment.c b/environment.c
index 8e5022c282..8389a27270 100644
--- a/environment.c
+++ b/environment.c
@@ -49,8 +49,6 @@ int fsync_object_files = -1;
int use_fsync = -1;
enum fsync_method fsync_method = FSYNC_METHOD_DEFAULT;
enum fsync_component fsync_components = FSYNC_COMPONENTS_DEFAULT;
-size_t packed_git_window_size = DEFAULT_PACKED_GIT_WINDOW_SIZE;
-size_t packed_git_limit = DEFAULT_PACKED_GIT_LIMIT;
unsigned long big_file_threshold = 512 * 1024 * 1024;
char *editor_program;
char *askpass_program;
diff --git a/packfile.c b/packfile.c
index 64248ca664..2e0e28c7de 100644
--- a/packfile.c
+++ b/packfile.c
@@ -1,4 +1,3 @@
-#define USE_THE_REPOSITORY_VARIABLE
#include "git-compat-util.h"
#include "environment.h"
@@ -46,15 +45,15 @@ static size_t pack_mapped;
#define SZ_FMT PRIuMAX
static inline uintmax_t sz_fmt(size_t s) { return s; }
-void pack_report(void)
+void pack_report(struct repository *repo)
{
fprintf(stderr,
"pack_report: getpagesize() = %10" SZ_FMT "\n"
"pack_report: core.packedGitWindowSize = %10" SZ_FMT "\n"
"pack_report: core.packedGitLimit = %10" SZ_FMT "\n",
sz_fmt(getpagesize()),
- sz_fmt(packed_git_window_size),
- sz_fmt(packed_git_limit));
+ sz_fmt(repo->settings.packed_git_window_size),
+ sz_fmt(repo->settings.packed_git_limit));
fprintf(stderr,
"pack_report: pack_used_ctr = %10u\n"
"pack_report: pack_mmap_calls = %10u\n"
@@ -650,8 +649,15 @@ unsigned char *use_pack(struct packed_git *p,
break;
}
if (!win) {
- size_t window_align = packed_git_window_size / 2;
+ size_t window_align;
off_t len;
+ struct repo_settings *settings;
+
+ /* lazy load the settings in case it hasn't been setup */
+ prepare_repo_settings(p->repo);
+ settings = &p->repo->settings;
+
+ window_align = settings->packed_git_window_size / 2;
if (p->pack_fd == -1 && open_packed_git(p))
die("packfile %s cannot be accessed", p->pack_name);
@@ -659,11 +665,12 @@ unsigned char *use_pack(struct packed_git *p,
CALLOC_ARRAY(win, 1);
win->offset = (offset / window_align) * window_align;
len = p->pack_size - win->offset;
- if (len > packed_git_window_size)
- len = packed_git_window_size;
+ if (len > settings->packed_git_window_size)
+ len = settings->packed_git_window_size;
win->len = (size_t)len;
pack_mapped += win->len;
- while (packed_git_limit < pack_mapped
+
+ while (settings->packed_git_limit < pack_mapped
&& unuse_one_window(p))
; /* nothing */
win->base = xmmap_gently(NULL, win->len,
diff --git a/packfile.h b/packfile.h
index addb95b0c4..58104fa009 100644
--- a/packfile.h
+++ b/packfile.h
@@ -89,7 +89,7 @@ unsigned long repo_approximate_object_count(struct repository *r);
struct packed_git *find_oid_pack(const struct object_id *oid,
struct packed_git *packs);
-void pack_report(void);
+void pack_report(struct repository *repo);
/*
* mmap the index file for the specified packfile (if it is not
diff --git a/repo-settings.c b/repo-settings.c
index acc27eb8fe..9d16d5399e 100644
--- a/repo-settings.c
+++ b/repo-settings.c
@@ -128,6 +128,19 @@ void prepare_repo_settings(struct repository *r)
if (!repo_config_get_ulong(r, "core.deltabasecachelimit", &ulongval))
r->settings.delta_base_cache_limit = ulongval;
+
+ if (!repo_config_get_ulong(r, "core.packedgitwindowsize", &ulongval)) {
+ int pgsz_x2 = getpagesize() * 2;
+
+ /* This value must be multiple of (pagesize * 2) */
+ ulongval /= pgsz_x2;
+ if (ulongval < 1)
+ ulongval = 1;
+ r->settings.packed_git_window_size = ulongval * pgsz_x2;
+ }
+
+ if (!repo_config_get_ulong(r, "core.packedgitlimit", &ulongval))
+ r->settings.packed_git_limit = ulongval;
}
enum log_refs_config repo_settings_get_log_all_ref_updates(struct repository *repo)
diff --git a/repo-settings.h b/repo-settings.h
index 10a6f7ed64..93ea0c3274 100644
--- a/repo-settings.h
+++ b/repo-settings.h
@@ -59,6 +59,8 @@ struct repo_settings {
int warn_ambiguous_refs; /* lazily loaded via accessor */
size_t delta_base_cache_limit;
+ size_t packed_git_window_size;
+ size_t packed_git_limit;
};
#define REPO_SETTINGS_INIT { \
.index_version = -1, \
@@ -66,6 +68,8 @@ struct repo_settings {
.fetch_negotiation_algorithm = FETCH_NEGOTIATION_CONSECUTIVE, \
.warn_ambiguous_refs = -1, \
.delta_base_cache_limit = DEFAULT_DELTA_BASE_CACHE_LIMIT, \
+ .packed_git_window_size = DEFAULT_PACKED_GIT_WINDOW_SIZE, \
+ .packed_git_limit = DEFAULT_PACKED_GIT_LIMIT, \
}
void prepare_repo_settings(struct repository *r);
--
2.47.0
^ permalink raw reply related [flat|nested] 184+ messages in thread
* [PATCH v8 09/10] midx: add repository to `multi_pack_index` struct
2024-11-22 10:08 ` [PATCH v8 00/10] " Karthik Nayak
` (7 preceding siblings ...)
2024-11-22 10:08 ` [PATCH v8 08/10] config: make `packed_git_(limit|window_size)` non-global variables Karthik Nayak
@ 2024-11-22 10:08 ` Karthik Nayak
2024-11-22 10:08 ` [PATCH v8 10/10] packfile.c: remove unnecessary prepare_packed_git() call Karthik Nayak
9 siblings, 0 replies; 184+ messages in thread
From: Karthik Nayak @ 2024-11-22 10:08 UTC (permalink / raw)
To: karthik.188; +Cc: git, me, peff, gitster
The `multi_pack_index` struct represents the MIDX for a repository.
Here, we add a pointer to the repository in this struct, allowing direct
use of the repository variable without relying on the global
`the_repository` struct.
With this addition, we can determine the repository associated with a
`bitmap_index` struct. A `bitmap_index` points to either a `packed_git`
or a `multi_pack_index`, both of which have direct repository
references. To support this, we introduce a static helper function,
`bitmap_repo`, in `pack-bitmap.c`, which retrieves a repository given a
`bitmap_index`.
With this, we clear up all usages of `the_repository` within
`pack-bitmap.c` and also remove the `USE_THE_REPOSITORY_VARIABLE`
definition. Bringing us another step closer to remove all global
variable usage.
Although this change also opens up the potential to clean up `midx.c`,
doing so would require additional refactoring to pass the repository
struct to functions where the MIDX struct is created: a task better
suited for future patches.
Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
---
midx.c | 1 +
midx.h | 3 ++
pack-bitmap.c | 90 +++++++++++++++++++++++++++++++--------------------
3 files changed, 59 insertions(+), 35 deletions(-)
diff --git a/midx.c b/midx.c
index 8edb75f51d..079c45a1aa 100644
--- a/midx.c
+++ b/midx.c
@@ -131,6 +131,7 @@ static struct multi_pack_index *load_multi_pack_index_one(const char *object_dir
m->data = midx_map;
m->data_len = midx_size;
m->local = local;
+ m->repo = the_repository;
m->signature = get_be32(m->data);
if (m->signature != MIDX_SIGNATURE)
diff --git a/midx.h b/midx.h
index 42d4f8d149..3b0ac4d878 100644
--- a/midx.h
+++ b/midx.h
@@ -71,6 +71,9 @@ struct multi_pack_index {
const char **pack_names;
struct packed_git **packs;
+
+ struct repository *repo;
+
char object_dir[FLEX_ARRAY];
};
diff --git a/pack-bitmap.c b/pack-bitmap.c
index d34ba9909a..0cb1b56c9d 100644
--- a/pack-bitmap.c
+++ b/pack-bitmap.c
@@ -1,5 +1,3 @@
-#define USE_THE_REPOSITORY_VARIABLE
-
#include "git-compat-util.h"
#include "commit.h"
#include "gettext.h"
@@ -177,12 +175,21 @@ static uint32_t bitmap_num_objects(struct bitmap_index *index)
return index->pack->num_objects;
}
+static struct repository *bitmap_repo(struct bitmap_index *bitmap_git)
+{
+ if (bitmap_is_midx(bitmap_git))
+ return bitmap_git->midx->repo;
+ return bitmap_git->pack->repo;
+}
+
static int load_bitmap_header(struct bitmap_index *index)
{
struct bitmap_disk_header *header = (void *)index->map;
- size_t header_size = sizeof(*header) - GIT_MAX_RAWSZ + the_hash_algo->rawsz;
+ const struct git_hash_algo *hash_algo = bitmap_repo(index)->hash_algo;
+
+ size_t header_size = sizeof(*header) - GIT_MAX_RAWSZ + hash_algo->rawsz;
- if (index->map_size < header_size + the_hash_algo->rawsz)
+ if (index->map_size < header_size + hash_algo->rawsz)
return error(_("corrupted bitmap index (too small)"));
if (memcmp(header->magic, BITMAP_IDX_SIGNATURE, sizeof(BITMAP_IDX_SIGNATURE)) != 0)
@@ -196,7 +203,7 @@ static int load_bitmap_header(struct bitmap_index *index)
{
uint32_t flags = ntohs(header->options);
size_t cache_size = st_mult(bitmap_num_objects(index), sizeof(uint32_t));
- unsigned char *index_end = index->map + index->map_size - the_hash_algo->rawsz;
+ unsigned char *index_end = index->map + index->map_size - hash_algo->rawsz;
if ((flags & BITMAP_OPT_FULL_DAG) == 0)
BUG("unsupported options for bitmap index file "
@@ -409,7 +416,7 @@ static int open_midx_bitmap_1(struct bitmap_index *bitmap_git,
if (bitmap_git->pack || bitmap_git->midx) {
struct strbuf buf = STRBUF_INIT;
get_midx_filename(&buf, midx->object_dir);
- trace2_data_string("bitmap", the_repository,
+ trace2_data_string("bitmap", bitmap_repo(bitmap_git),
"ignoring extra midx bitmap file", buf.buf);
close(fd);
strbuf_release(&buf);
@@ -427,7 +434,7 @@ static int open_midx_bitmap_1(struct bitmap_index *bitmap_git,
goto cleanup;
if (!hasheq(get_midx_checksum(bitmap_git->midx), bitmap_git->checksum,
- the_repository->hash_algo)) {
+ bitmap_repo(bitmap_git)->hash_algo)) {
error(_("checksum doesn't match in MIDX and bitmap"));
goto cleanup;
}
@@ -438,7 +445,9 @@ static int open_midx_bitmap_1(struct bitmap_index *bitmap_git,
}
for (i = 0; i < bitmap_git->midx->num_packs; i++) {
- if (prepare_midx_pack(the_repository, bitmap_git->midx, i)) {
+ if (prepare_midx_pack(bitmap_repo(bitmap_git),
+ bitmap_git->midx,
+ i)) {
warning(_("could not open pack %s"),
bitmap_git->midx->pack_names[i]);
goto cleanup;
@@ -492,8 +501,9 @@ static int open_pack_bitmap_1(struct bitmap_index *bitmap_git, struct packed_git
}
if (bitmap_git->pack || bitmap_git->midx) {
- trace2_data_string("bitmap", the_repository,
- "ignoring extra bitmap file", packfile->pack_name);
+ trace2_data_string("bitmap", bitmap_repo(bitmap_git),
+ "ignoring extra bitmap file",
+ packfile->pack_name);
close(fd);
return -1;
}
@@ -518,8 +528,8 @@ static int open_pack_bitmap_1(struct bitmap_index *bitmap_git, struct packed_git
return -1;
}
- trace2_data_string("bitmap", the_repository, "opened bitmap file",
- packfile->pack_name);
+ trace2_data_string("bitmap", bitmap_repo(bitmap_git),
+ "opened bitmap file", packfile->pack_name);
return 0;
}
@@ -649,7 +659,7 @@ struct bitmap_index *prepare_bitmap_git(struct repository *r)
struct bitmap_index *prepare_midx_bitmap_git(struct multi_pack_index *midx)
{
- struct repository *r = the_repository;
+ struct repository *r = midx->repo;
struct bitmap_index *bitmap_git = xcalloc(1, sizeof(*bitmap_git));
if (!open_midx_bitmap_1(bitmap_git, midx) && !load_bitmap(r, bitmap_git))
@@ -1213,6 +1223,7 @@ static struct bitmap *find_boundary_objects(struct bitmap_index *bitmap_git,
{
struct bitmap_boundary_cb cb;
struct object_list *root;
+ struct repository *repo;
unsigned int i;
unsigned int tmp_blobs, tmp_trees, tmp_tags;
int any_missing = 0;
@@ -1222,6 +1233,8 @@ static struct bitmap *find_boundary_objects(struct bitmap_index *bitmap_git,
cb.base = bitmap_new();
object_array_init(&cb.boundary);
+ repo = bitmap_repo(bitmap_git);
+
revs->ignore_missing_links = 1;
if (bitmap_git->pseudo_merges.nr) {
@@ -1280,19 +1293,19 @@ static struct bitmap *find_boundary_objects(struct bitmap_index *bitmap_git,
* revision walk to (a) OR in any bitmaps that are UNINTERESTING
* between the tips and boundary, and (b) record the boundary.
*/
- trace2_region_enter("pack-bitmap", "boundary-prepare", the_repository);
+ trace2_region_enter("pack-bitmap", "boundary-prepare", repo);
if (prepare_revision_walk(revs))
die("revision walk setup failed");
- trace2_region_leave("pack-bitmap", "boundary-prepare", the_repository);
+ trace2_region_leave("pack-bitmap", "boundary-prepare", repo);
- trace2_region_enter("pack-bitmap", "boundary-traverse", the_repository);
+ trace2_region_enter("pack-bitmap", "boundary-traverse", repo);
revs->boundary = 1;
traverse_commit_list_filtered(revs,
show_boundary_commit,
show_boundary_object,
&cb, NULL);
revs->boundary = 0;
- trace2_region_leave("pack-bitmap", "boundary-traverse", the_repository);
+ trace2_region_leave("pack-bitmap", "boundary-traverse", repo);
revs->blob_objects = tmp_blobs;
revs->tree_objects = tmp_trees;
@@ -1304,7 +1317,7 @@ static struct bitmap *find_boundary_objects(struct bitmap_index *bitmap_git,
/*
* Then add the boundary commit(s) as fill-in traversal tips.
*/
- trace2_region_enter("pack-bitmap", "boundary-fill-in", the_repository);
+ trace2_region_enter("pack-bitmap", "boundary-fill-in", repo);
for (i = 0; i < cb.boundary.nr; i++) {
struct object *obj = cb.boundary.objects[i].item;
if (bitmap_walk_contains(bitmap_git, cb.base, &obj->oid))
@@ -1314,7 +1327,7 @@ static struct bitmap *find_boundary_objects(struct bitmap_index *bitmap_git,
}
if (revs->pending.nr)
cb.base = fill_in_bitmap(bitmap_git, revs, cb.base, NULL);
- trace2_region_leave("pack-bitmap", "boundary-fill-in", the_repository);
+ trace2_region_leave("pack-bitmap", "boundary-fill-in", repo);
cleanup:
object_array_clear(&cb.boundary);
@@ -1718,7 +1731,8 @@ static unsigned long get_size_by_pos(struct bitmap_index *bitmap_git,
ofs = pack_pos_to_offset(pack, pos);
}
- if (packed_object_info(the_repository, pack, ofs, &oi) < 0) {
+ if (packed_object_info(bitmap_repo(bitmap_git), pack, ofs,
+ &oi) < 0) {
struct object_id oid;
nth_bitmap_object_oid(bitmap_git, &oid,
pack_pos_to_index(pack, pos));
@@ -1727,7 +1741,8 @@ static unsigned long get_size_by_pos(struct bitmap_index *bitmap_git,
} else {
struct eindex *eindex = &bitmap_git->ext_index;
struct object *obj = eindex->objects[pos - bitmap_num_objects(bitmap_git)];
- if (oid_object_info_extended(the_repository, &obj->oid, &oi, 0) < 0)
+ if (oid_object_info_extended(bitmap_repo(bitmap_git), &obj->oid,
+ &oi, 0) < 0)
die(_("unable to get size of %s"), oid_to_hex(&obj->oid));
}
@@ -1889,7 +1904,8 @@ static void filter_packed_objects_from_bitmap(struct bitmap_index *bitmap_git,
bitmap_unset(result, i);
for (i = 0; i < eindex->count; ++i) {
- if (has_object_pack(the_repository, &eindex->objects[i]->oid))
+ if (has_object_pack(bitmap_repo(bitmap_git),
+ &eindex->objects[i]->oid))
bitmap_unset(result, objects_nr + i);
}
}
@@ -1907,6 +1923,7 @@ struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs,
struct bitmap *haves_bitmap = NULL;
struct bitmap_index *bitmap_git;
+ struct repository *repo;
/*
* We can't do pathspec limiting with bitmaps, because we don't know
@@ -1980,18 +1997,20 @@ struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs,
if (!use_boundary_traversal)
object_array_clear(&revs->pending);
+ repo = bitmap_repo(bitmap_git);
+
if (haves) {
if (use_boundary_traversal) {
- trace2_region_enter("pack-bitmap", "haves/boundary", the_repository);
+ trace2_region_enter("pack-bitmap", "haves/boundary", repo);
haves_bitmap = find_boundary_objects(bitmap_git, revs, haves);
- trace2_region_leave("pack-bitmap", "haves/boundary", the_repository);
+ trace2_region_leave("pack-bitmap", "haves/boundary", repo);
} else {
- trace2_region_enter("pack-bitmap", "haves/classic", the_repository);
+ trace2_region_enter("pack-bitmap", "haves/classic", repo);
revs->ignore_missing_links = 1;
haves_bitmap = find_objects(bitmap_git, revs, haves, NULL);
reset_revision_walk();
revs->ignore_missing_links = 0;
- trace2_region_leave("pack-bitmap", "haves/classic", the_repository);
+ trace2_region_leave("pack-bitmap", "haves/classic", repo);
}
if (!haves_bitmap)
@@ -2025,17 +2044,17 @@ struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs,
object_list_free(&wants);
object_list_free(&haves);
- trace2_data_intmax("bitmap", the_repository, "pseudo_merges_satisfied",
+ trace2_data_intmax("bitmap", repo, "pseudo_merges_satisfied",
pseudo_merges_satisfied_nr);
- trace2_data_intmax("bitmap", the_repository, "pseudo_merges_cascades",
+ trace2_data_intmax("bitmap", repo, "pseudo_merges_cascades",
pseudo_merges_cascades_nr);
- trace2_data_intmax("bitmap", the_repository, "bitmap/hits",
+ trace2_data_intmax("bitmap", repo, "bitmap/hits",
existing_bitmaps_hits_nr);
- trace2_data_intmax("bitmap", the_repository, "bitmap/misses",
+ trace2_data_intmax("bitmap", repo, "bitmap/misses",
existing_bitmaps_misses_nr);
- trace2_data_intmax("bitmap", the_repository, "bitmap/roots_with_bitmap",
+ trace2_data_intmax("bitmap", repo, "bitmap/roots_with_bitmap",
roots_with_bitmaps_nr);
- trace2_data_intmax("bitmap", the_repository, "bitmap/roots_without_bitmap",
+ trace2_data_intmax("bitmap", repo, "bitmap/roots_without_bitmap",
roots_without_bitmaps_nr);
return bitmap_git;
@@ -2256,7 +2275,7 @@ void reuse_partial_packfile_from_bitmap(struct bitmap_index *bitmap_git,
struct bitmap **reuse_out,
int multi_pack_reuse)
{
- struct repository *r = the_repository;
+ struct repository *r = bitmap_repo(bitmap_git);
struct bitmapped_pack *packs = NULL;
struct bitmap *result = bitmap_git->result;
struct bitmap *reuse;
@@ -2792,7 +2811,7 @@ int rebuild_bitmap(const uint32_t *reposition,
uint32_t *create_bitmap_mapping(struct bitmap_index *bitmap_git,
struct packing_data *mapping)
{
- struct repository *r = the_repository;
+ struct repository *r = bitmap_repo(bitmap_git);
uint32_t i, num_objects;
uint32_t *reposition;
@@ -2948,7 +2967,8 @@ static off_t get_disk_usage_for_extended(struct bitmap_index *bitmap_git)
st_add(bitmap_num_objects(bitmap_git), i)))
continue;
- if (oid_object_info_extended(the_repository, &obj->oid, &oi, 0) < 0)
+ if (oid_object_info_extended(bitmap_repo(bitmap_git), &obj->oid,
+ &oi, 0) < 0)
die(_("unable to get disk usage of '%s'"),
oid_to_hex(&obj->oid));
--
2.47.0
^ permalink raw reply related [flat|nested] 184+ messages in thread
* [PATCH v8 10/10] packfile.c: remove unnecessary prepare_packed_git() call
2024-11-22 10:08 ` [PATCH v8 00/10] " Karthik Nayak
` (8 preceding siblings ...)
2024-11-22 10:08 ` [PATCH v8 09/10] midx: add repository to `multi_pack_index` struct Karthik Nayak
@ 2024-11-22 10:08 ` Karthik Nayak
9 siblings, 0 replies; 184+ messages in thread
From: Karthik Nayak @ 2024-11-22 10:08 UTC (permalink / raw)
To: karthik.188; +Cc: git, me, peff, gitster
From: Taylor Blau <me@ttaylorr.com>
In 454ea2e4d7 (treewide: use get_all_packs, 2018-08-20) we converted
existing calls to both:
- get_packed_git(), as well as
- the_repository->objects->packed_git
, to instead use the new get_all_packs() function.
In the instance that this commit addresses, there was a preceding call
to prepare_packed_git(), which dates all the way back to 660c889e46
(sha1_file: add for_each iterators for loose and packed objects,
2014-10-15) when its caller (for_each_packed_object()) was first
introduced.
This call could have been removed in 454ea2e4d7, since get_all_packs()
itself calls prepare_packed_git(). But the translation in 454ea2e4d7 was
(to the best of my knowledge) a find-and-replace rather than inspecting
each individual caller.
Having an extra prepare_packed_git() call here is harmless, since it
will notice that we have already set the 'packed_git_initialized' field
and the call will be a noop. So we're only talking about a few dozen CPU
cycles to set up and tear down the stack frame.
But having a lone prepare_packed_git() call immediately before a call to
get_all_packs() confused me, so let's remove it as redundant to avoid
more confusion in the future.
Signed-off-by: Taylor Blau <me@ttaylorr.com>
---
packfile.c | 1 -
1 file changed, 1 deletion(-)
diff --git a/packfile.c b/packfile.c
index 2e0e28c7de..9c4bd81a8c 100644
--- a/packfile.c
+++ b/packfile.c
@@ -2220,7 +2220,6 @@ int for_each_packed_object(struct repository *repo, each_packed_object_fn cb,
int r = 0;
int pack_errors = 0;
- prepare_packed_git(repo);
for (p = get_all_packs(repo); p; p = p->next) {
if ((flags & FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local)
continue;
--
2.47.0
^ permalink raw reply related [flat|nested] 184+ messages in thread
* [PATCH v9 00/10] packfile: avoid using the 'the_repository' global variable
2024-10-21 9:57 [PATCH 00/20] packfile: avoid using the 'the_repository' global variable Karthik Nayak
` (27 preceding siblings ...)
2024-11-22 10:08 ` [PATCH v8 00/10] " Karthik Nayak
@ 2024-11-26 10:57 ` Karthik Nayak
2024-11-26 10:57 ` [PATCH v9 01/10] packfile: add repository to struct `packed_git` Karthik Nayak
` (9 more replies)
2024-12-03 14:43 ` [PATCH v10 00/10] packfile: avoid using the 'the_repository' global variable Karthik Nayak
29 siblings, 10 replies; 184+ messages in thread
From: Karthik Nayak @ 2024-11-26 10:57 UTC (permalink / raw)
To: karthik.188; +Cc: git, me, peff, gitster
The `packfile.c` file uses the global variable 'the_repository' extensively
throughout the code. Let's remove all usecases of this, by modifying the
required functions to accept a 'struct repository' instead. This is to clean up
usage of global state.
The first 3 patches are mostly internal to `packfile.c`, we add the repository
field to the `packed_git` struct and this is used to clear up some useages of
the global variables.
The next 3 patches are more disruptive, they modify the function definition of
`odb_pack_name`, `has_object[_kept]_pack` and `for_each_packed_object` to receive
a repository, helping remove other usages of 'the_repository' variable.
Finally, the next two patches deal with global config values. These values are
localized. The last patch is removal of an unecessary call to `prepare_packed_git()`.
For v5 onwards, I've rebased the series off the master: 8f8d6eee53 (The
seventh batch, 2024-11-01), as a dependency for this series 'jk/dumb-http-finalize'
was merged to master. I've found no conflicts while merging with seen & next. But
since this series does touch multiple files, there could be future conflicts.
Changes in v9:
- Added a comment in gc_config to indicate that eventually the
`delta_base_cache_limit` variable should be used through repo_settings.
Changes in v8:
- Fix typos in comments
- For packfile.c use delta_base_cache_limit from the repository
settings, this avoids loading the config in hot paths.
- Rename `longval` to `ulongval` to better signify the type.
Changes in v7:
- Cleanup stale commit message.
- Add missing space in `if` statement.
- Fix typo s/incase/in case/.
Changes in v6:
- Lazy load repository settings in packfile.c. This ensures that the settings are
available for sure and we do not rely on callees setting it.
- Use `size_t` for `delta_base_cache_limit`.
Changes in v5:
- Move packed_git* settings to repo_settings to ensure we don't keep reparsing the
settings in `use_pack`.
Changes in v4:
- Renamed the repository field within `packed_git` and `multi_pack_index` from
`r` to `repo`, while keeping function parameters to be `r`.
- Fixed bad braces.
Changes in v3:
- Improved commit messages. In the first commit to talk about how packed_git
struct could also be part of the alternates of a repository. In the 7th commit
to talk about the motive behind removing the global variable.
- Changed 'packed_git->repo' to 'packed_git->r' to keep it consistent with the
rest of the code base.
- Replaced 'the_repository' with locally available access to the repository
struct in multiple regions.
- Removed unecessary inclusion of the 'repository.h' header file by forward
declaring the 'repository' struct.
- Replace memcpy with hashcpy.
- Change the logic in the 7th patch to use if else statements.
- Added an extra commit to cleanup `pack-bitmap.c`.
Karthik Nayak (9):
packfile: add repository to struct `packed_git`
packfile: use `repository` from `packed_git` directly
packfile: pass `repository` to static function in the file
packfile: pass down repository to `odb_pack_name`
packfile: pass down repository to `has_object[_kept]_pack`
packfile: pass down repository to `for_each_packed_object`
config: make `delta_base_cache_limit` a non-global variable
config: make `packed_git_(limit|window_size)` non-global variables
midx: add repository to `multi_pack_index` struct
Taylor Blau (1):
packfile.c: remove unnecessary prepare_packed_git() call
builtin/cat-file.c | 7 +-
builtin/count-objects.c | 2 +-
builtin/fast-import.c | 15 ++--
builtin/fsck.c | 20 +++---
builtin/gc.c | 12 +++-
builtin/index-pack.c | 20 ++++--
builtin/pack-objects.c | 11 +--
builtin/pack-redundant.c | 2 +-
builtin/repack.c | 2 +-
builtin/rev-list.c | 2 +-
commit-graph.c | 4 +-
config.c | 22 ------
connected.c | 3 +-
diff.c | 3 +-
environment.c | 3 -
environment.h | 1 -
fsck.c | 2 +-
http.c | 4 +-
list-objects.c | 7 +-
midx-write.c | 2 +-
midx.c | 3 +-
midx.h | 3 +
object-store-ll.h | 9 ++-
pack-bitmap.c | 90 ++++++++++++++----------
pack-objects.h | 3 +-
pack-write.c | 1 +
pack.h | 2 +
packfile.c | 144 ++++++++++++++++++++++-----------------
packfile.h | 18 +++--
promisor-remote.c | 2 +-
prune-packed.c | 2 +-
reachable.c | 4 +-
repo-settings.c | 18 +++++
repo-settings.h | 7 ++
revision.c | 13 ++--
tag.c | 2 +-
36 files changed, 275 insertions(+), 190 deletions(-)
Range-diff against v8:
-: ---------- > 1: d1fdd6996a packfile: add repository to struct `packed_git`
-: ---------- > 2: 65c09858ce packfile: use `repository` from `packed_git` directly
-: ---------- > 3: 80632934d1 packfile: pass `repository` to static function in the file
-: ---------- > 4: 67d71eab83 packfile: pass down repository to `odb_pack_name`
-: ---------- > 5: ee210fa153 packfile: pass down repository to `has_object[_kept]_pack`
-: ---------- > 6: 8db7094f4e packfile: pass down repository to `for_each_packed_object`
1: d1b6e8801b ! 7: a66494384d config: make `delta_base_cache_limit` a non-global variable
@@ builtin/gc.c: struct gc_config {
char *repack_filter_to;
unsigned long big_pack_threshold;
unsigned long max_delta_cache_size;
++ /*
++ * Remove this member from gc_config once repo_settings is passed
++ * through the callchain.
++ */
+ size_t delta_base_cache_limit;
};
2: 30a52f192f = 8: bce9196f6b config: make `packed_git_(limit|window_size)` non-global variables
3: 2fe5d2506f = 9: c7fba8cf6a midx: add repository to `multi_pack_index` struct
4: 05989c2e27 = 10: d7f475fbd0 packfile.c: remove unnecessary prepare_packed_git() call
--
2.47.0
^ permalink raw reply [flat|nested] 184+ messages in thread
* [PATCH v9 01/10] packfile: add repository to struct `packed_git`
2024-11-26 10:57 ` [PATCH v9 00/10] packfile: avoid using the 'the_repository' global variable Karthik Nayak
@ 2024-11-26 10:57 ` Karthik Nayak
2024-11-27 9:24 ` Kristoffer Haugsbakk
2024-11-26 10:57 ` [PATCH v9 02/10] packfile: use `repository` from `packed_git` directly Karthik Nayak
` (8 subsequent siblings)
9 siblings, 1 reply; 184+ messages in thread
From: Karthik Nayak @ 2024-11-26 10:57 UTC (permalink / raw)
To: karthik.188; +Cc: git, me, peff, gitster
The struct `packed_git` holds information regarding a packed object
file. Let's add the repository variable to this object, to represent the
repository that this packfile belongs to. This helps remove dependency
on the global `the_repository` object in `packfile.c` by simply using
repository information now readily available in the struct.
We do need to consider that a pack file could be part of the alternates
of a repository, but considering that we only have one repository struct
and also that we currently anyways use 'the_repository'. We should be
OK with this change.
We also modify `alloc_packed_git` to ensure that the repository is added
to newly created `packed_git` structs. This requires modifying the
function and all its callee to pass the repository object down the
levels.
Helped-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
---
builtin/fast-import.c | 3 ++-
builtin/index-pack.c | 6 ++++--
commit-graph.c | 2 +-
connected.c | 3 ++-
http.c | 2 +-
midx-write.c | 2 +-
midx.c | 2 +-
object-store-ll.h | 5 +++++
packfile.c | 15 +++++++++------
packfile.h | 6 ++++--
10 files changed, 30 insertions(+), 16 deletions(-)
diff --git a/builtin/fast-import.c b/builtin/fast-import.c
index 76d5c20f14..da7e2d613b 100644
--- a/builtin/fast-import.c
+++ b/builtin/fast-import.c
@@ -765,6 +765,7 @@ static void start_packfile(void)
p->pack_fd = pack_fd;
p->do_not_close = 1;
+ p->repo = the_repository;
pack_file = hashfd(pack_fd, p->pack_name);
pack_data = p;
@@ -888,7 +889,7 @@ static void end_packfile(void)
idx_name = keep_pack(create_index());
/* Register the packfile with core git's machinery. */
- new_p = add_packed_git(idx_name, strlen(idx_name), 1);
+ new_p = add_packed_git(pack_data->repo, idx_name, strlen(idx_name), 1);
if (!new_p)
die("core git rejected index %s", idx_name);
all_packs[pack_id] = new_p;
diff --git a/builtin/index-pack.c b/builtin/index-pack.c
index 9d23b41b3a..be2f99625e 100644
--- a/builtin/index-pack.c
+++ b/builtin/index-pack.c
@@ -1552,7 +1552,8 @@ static void final(const char *final_pack_name, const char *curr_pack_name,
if (do_fsck_object) {
struct packed_git *p;
- p = add_packed_git(final_index_name, strlen(final_index_name), 0);
+ p = add_packed_git(the_repository, final_index_name,
+ strlen(final_index_name), 0);
if (p)
install_packed_git(the_repository, p);
}
@@ -1650,7 +1651,8 @@ static void read_v2_anomalous_offsets(struct packed_git *p,
static void read_idx_option(struct pack_idx_option *opts, const char *pack_name)
{
- struct packed_git *p = add_packed_git(pack_name, strlen(pack_name), 1);
+ struct packed_git *p = add_packed_git(the_repository, pack_name,
+ strlen(pack_name), 1);
if (!p)
die(_("Cannot open existing pack file '%s'"), pack_name);
diff --git a/commit-graph.c b/commit-graph.c
index 5bd89c0acd..83dd69bfeb 100644
--- a/commit-graph.c
+++ b/commit-graph.c
@@ -1914,7 +1914,7 @@ static int fill_oids_from_packs(struct write_commit_graph_context *ctx,
struct packed_git *p;
strbuf_setlen(&packname, dirlen);
strbuf_addstr(&packname, pack_indexes->items[i].string);
- p = add_packed_git(packname.buf, packname.len, 1);
+ p = add_packed_git(ctx->r, packname.buf, packname.len, 1);
if (!p) {
ret = error(_("error adding pack %s"), packname.buf);
goto cleanup;
diff --git a/connected.c b/connected.c
index a9e2e13995..3099da84f3 100644
--- a/connected.c
+++ b/connected.c
@@ -54,7 +54,8 @@ int check_connected(oid_iterate_fn fn, void *cb_data,
strbuf_add(&idx_file, transport->pack_lockfiles.items[0].string,
base_len);
strbuf_addstr(&idx_file, ".idx");
- new_pack = add_packed_git(idx_file.buf, idx_file.len, 1);
+ new_pack = add_packed_git(the_repository, idx_file.buf,
+ idx_file.len, 1);
strbuf_release(&idx_file);
}
diff --git a/http.c b/http.c
index 58242b9d2d..6744e18409 100644
--- a/http.c
+++ b/http.c
@@ -2439,7 +2439,7 @@ static int fetch_and_setup_pack_index(struct packed_git **packs_head,
if (!tmp_idx)
return -1;
- new_pack = parse_pack_index(sha1, tmp_idx);
+ new_pack = parse_pack_index(the_repository, sha1, tmp_idx);
if (!new_pack) {
unlink(tmp_idx);
free(tmp_idx);
diff --git a/midx-write.c b/midx-write.c
index b3a5f6c516..c57726ef94 100644
--- a/midx-write.c
+++ b/midx-write.c
@@ -154,7 +154,7 @@ static void add_pack_to_midx(const char *full_path, size_t full_path_len,
return;
ALLOC_GROW(ctx->info, ctx->nr + 1, ctx->alloc);
- p = add_packed_git(full_path, full_path_len, 0);
+ p = add_packed_git(the_repository, full_path, full_path_len, 0);
if (!p) {
warning(_("failed to add packfile '%s'"),
full_path);
diff --git a/midx.c b/midx.c
index e82d4f2e65..8edb75f51d 100644
--- a/midx.c
+++ b/midx.c
@@ -464,7 +464,7 @@ int prepare_midx_pack(struct repository *r, struct multi_pack_index *m,
strhash(key.buf), key.buf,
struct packed_git, packmap_ent);
if (!p) {
- p = add_packed_git(pack_name.buf, pack_name.len, m->local);
+ p = add_packed_git(r, pack_name.buf, pack_name.len, m->local);
if (p) {
install_packed_git(r, p);
list_add_tail(&p->mru, &r->objects->packed_git_mru);
diff --git a/object-store-ll.h b/object-store-ll.h
index 53b8e693b1..d46cd0e654 100644
--- a/object-store-ll.h
+++ b/object-store-ll.h
@@ -10,6 +10,7 @@
struct oidmap;
struct oidtree;
struct strbuf;
+struct repository;
struct object_directory {
struct object_directory *next;
@@ -135,6 +136,10 @@ struct packed_git {
*/
const uint32_t *mtimes_map;
size_t mtimes_size;
+
+ /* repo denotes the repository this packfile belongs to */
+ struct repository *repo;
+
/* something like ".git/objects/pack/xxxxx.pack" */
char pack_name[FLEX_ARRAY]; /* more */
};
diff --git a/packfile.c b/packfile.c
index 9560f0a33c..6058eddf35 100644
--- a/packfile.c
+++ b/packfile.c
@@ -217,11 +217,12 @@ uint32_t get_pack_fanout(struct packed_git *p, uint32_t value)
return ntohl(level1_ofs[value]);
}
-static struct packed_git *alloc_packed_git(int extra)
+static struct packed_git *alloc_packed_git(struct repository *r, int extra)
{
struct packed_git *p = xmalloc(st_add(sizeof(*p), extra));
memset(p, 0, sizeof(*p));
p->pack_fd = -1;
+ p->repo = r;
return p;
}
@@ -233,11 +234,12 @@ static char *pack_path_from_idx(const char *idx_path)
return xstrfmt("%.*s.pack", (int)len, idx_path);
}
-struct packed_git *parse_pack_index(unsigned char *sha1, const char *idx_path)
+struct packed_git *parse_pack_index(struct repository *r, unsigned char *sha1,
+ const char *idx_path)
{
char *path = pack_path_from_idx(idx_path);
size_t alloc = st_add(strlen(path), 1);
- struct packed_git *p = alloc_packed_git(alloc);
+ struct packed_git *p = alloc_packed_git(r, alloc);
memcpy(p->pack_name, path, alloc); /* includes NUL */
free(path);
@@ -703,7 +705,8 @@ void unuse_pack(struct pack_window **w_cursor)
}
}
-struct packed_git *add_packed_git(const char *path, size_t path_len, int local)
+struct packed_git *add_packed_git(struct repository *r, const char *path,
+ size_t path_len, int local)
{
struct stat st;
size_t alloc;
@@ -721,7 +724,7 @@ struct packed_git *add_packed_git(const char *path, size_t path_len, int local)
* the use xsnprintf double-checks that)
*/
alloc = st_add3(path_len, strlen(".promisor"), 1);
- p = alloc_packed_git(alloc);
+ p = alloc_packed_git(r, alloc);
memcpy(p->pack_name, path, path_len);
xsnprintf(p->pack_name + path_len, alloc - path_len, ".keep");
@@ -877,7 +880,7 @@ static void prepare_pack(const char *full_name, size_t full_name_len,
/* Don't reopen a pack we already have. */
if (!hashmap_get(&data->r->objects->pack_map, &hent, pack_name)) {
- p = add_packed_git(full_name, full_name_len, data->local);
+ p = add_packed_git(data->r, full_name, full_name_len, data->local);
if (p)
install_packed_git(data->r, p);
}
diff --git a/packfile.h b/packfile.h
index 08f88a7ff5..aee69d1a0b 100644
--- a/packfile.h
+++ b/packfile.h
@@ -46,7 +46,8 @@ const char *pack_basename(struct packed_git *p);
* and does not add the resulting packed_git struct to the internal list of
* packs. You probably want add_packed_git() instead.
*/
-struct packed_git *parse_pack_index(unsigned char *sha1, const char *idx_path);
+struct packed_git *parse_pack_index(struct repository *r, unsigned char *sha1,
+ const char *idx_path);
typedef void each_file_in_pack_dir_fn(const char *full_path, size_t full_path_len,
const char *file_name, void *data);
@@ -113,7 +114,8 @@ void close_pack(struct packed_git *);
void close_object_store(struct raw_object_store *o);
void unuse_pack(struct pack_window **);
void clear_delta_base_cache(void);
-struct packed_git *add_packed_git(const char *path, size_t path_len, int local);
+struct packed_git *add_packed_git(struct repository *r, const char *path,
+ size_t path_len, int local);
/*
* Unlink the .pack and associated extension files.
--
2.47.0
^ permalink raw reply related [flat|nested] 184+ messages in thread
* Re: [PATCH v9 01/10] packfile: add repository to struct `packed_git`
2024-11-26 10:57 ` [PATCH v9 01/10] packfile: add repository to struct `packed_git` Karthik Nayak
@ 2024-11-27 9:24 ` Kristoffer Haugsbakk
2024-11-27 12:15 ` karthik nayak
0 siblings, 1 reply; 184+ messages in thread
From: Kristoffer Haugsbakk @ 2024-11-27 9:24 UTC (permalink / raw)
To: Karthik Nayak; +Cc: git, Taylor Blau, Jeff King, Junio C Hamano
On Tue, Nov 26, 2024, at 11:57, Karthik Nayak wrote:
> We do need to consider that a pack file could be part of the alternates
> of a repository, but considering that we only have one repository struct
> and also that we currently anyways use 'the_repository'. We should be
> OK with this change.
The “but considering” needs for a conclusion to come in the same
sentence. But it seems that the conclusion has been put off to the next
sentence: “We should be OK with this change”. That doesn’t read right
to me.
^ permalink raw reply [flat|nested] 184+ messages in thread
* Re: [PATCH v9 01/10] packfile: add repository to struct `packed_git`
2024-11-27 9:24 ` Kristoffer Haugsbakk
@ 2024-11-27 12:15 ` karthik nayak
0 siblings, 0 replies; 184+ messages in thread
From: karthik nayak @ 2024-11-27 12:15 UTC (permalink / raw)
To: Kristoffer Haugsbakk; +Cc: git, Taylor Blau, Jeff King, Junio C Hamano
[-- Attachment #1: Type: text/plain, Size: 724 bytes --]
"Kristoffer Haugsbakk" <kristofferhaugsbakk@fastmail.com> writes:
> On Tue, Nov 26, 2024, at 11:57, Karthik Nayak wrote:
>> We do need to consider that a pack file could be part of the alternates
>> of a repository, but considering that we only have one repository struct
>> and also that we currently anyways use 'the_repository'. We should be
>> OK with this change.
>
> The “but considering” needs for a conclusion to come in the same
> sentence. But it seems that the conclusion has been put off to the next
> sentence: “We should be OK with this change”. That doesn’t read right
> to me.
Hmm, yeah, perhaps a comma right after the 'the_repository' instead of a
period would make it better.
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 690 bytes --]
^ permalink raw reply [flat|nested] 184+ messages in thread
* [PATCH v9 02/10] packfile: use `repository` from `packed_git` directly
2024-11-26 10:57 ` [PATCH v9 00/10] packfile: avoid using the 'the_repository' global variable Karthik Nayak
2024-11-26 10:57 ` [PATCH v9 01/10] packfile: add repository to struct `packed_git` Karthik Nayak
@ 2024-11-26 10:57 ` Karthik Nayak
2024-11-26 10:57 ` [PATCH v9 03/10] packfile: pass `repository` to static function in the file Karthik Nayak
` (7 subsequent siblings)
9 siblings, 0 replies; 184+ messages in thread
From: Karthik Nayak @ 2024-11-26 10:57 UTC (permalink / raw)
To: karthik.188; +Cc: git, me, peff, gitster
In the previous commit, we introduced the `repository` structure inside
`packed_git`. This provides an alternative route instead of using the
global `the_repository` variable. Let's modify `packfile.c` now to use
this field wherever possible instead of relying on the global state.
There are still a few instances of `the_repository` usage in the file,
where there is no struct `packed_git` locally available, which will be
fixed in the following commits.
Helped-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
---
packfile.c | 50 +++++++++++++++++++++++++++-----------------------
1 file changed, 27 insertions(+), 23 deletions(-)
diff --git a/packfile.c b/packfile.c
index 6058eddf35..5bfa1e17c2 100644
--- a/packfile.c
+++ b/packfile.c
@@ -79,7 +79,7 @@ static int check_packed_git_idx(const char *path, struct packed_git *p)
size_t idx_size;
int fd = git_open(path), ret;
struct stat st;
- const unsigned int hashsz = the_hash_algo->rawsz;
+ const unsigned int hashsz = p->repo->hash_algo->rawsz;
if (fd < 0)
return -1;
@@ -243,7 +243,7 @@ struct packed_git *parse_pack_index(struct repository *r, unsigned char *sha1,
memcpy(p->pack_name, path, alloc); /* includes NUL */
free(path);
- hashcpy(p->hash, sha1, the_repository->hash_algo);
+ hashcpy(p->hash, sha1, p->repo->hash_algo);
if (check_packed_git_idx(idx_path, p)) {
free(p);
return NULL;
@@ -278,7 +278,7 @@ static int unuse_one_window(struct packed_git *current)
if (current)
scan_windows(current, &lru_p, &lru_w, &lru_l);
- for (p = the_repository->objects->packed_git; p; p = p->next)
+ for (p = current->repo->objects->packed_git; p; p = p->next)
scan_windows(p, &lru_p, &lru_w, &lru_l);
if (lru_p) {
munmap(lru_w->base, lru_w->len);
@@ -540,7 +540,7 @@ static int open_packed_git_1(struct packed_git *p)
unsigned char hash[GIT_MAX_RAWSZ];
unsigned char *idx_hash;
ssize_t read_result;
- const unsigned hashsz = the_hash_algo->rawsz;
+ const unsigned hashsz = p->repo->hash_algo->rawsz;
if (open_pack_index(p))
return error("packfile %s index unavailable", p->pack_name);
@@ -597,7 +597,7 @@ static int open_packed_git_1(struct packed_git *p)
if (read_result != hashsz)
return error("packfile %s signature is unavailable", p->pack_name);
idx_hash = ((unsigned char *)p->index_data) + p->index_size - hashsz * 2;
- if (!hasheq(hash, idx_hash, the_repository->hash_algo))
+ if (!hasheq(hash, idx_hash, p->repo->hash_algo))
return error("packfile %s does not match index", p->pack_name);
return 0;
}
@@ -637,7 +637,7 @@ unsigned char *use_pack(struct packed_git *p,
*/
if (!p->pack_size && p->pack_fd == -1 && open_packed_git(p))
die("packfile %s cannot be accessed", p->pack_name);
- if (offset > (p->pack_size - the_hash_algo->rawsz))
+ if (offset > (p->pack_size - p->repo->hash_algo->rawsz))
die("offset beyond end of packfile (truncated pack?)");
if (offset < 0)
die(_("offset before end of packfile (broken .idx?)"));
@@ -711,6 +711,7 @@ struct packed_git *add_packed_git(struct repository *r, const char *path,
struct stat st;
size_t alloc;
struct packed_git *p;
+ struct object_id oid;
/*
* Make sure a corresponding .pack file exists and that
@@ -751,9 +752,13 @@ struct packed_git *add_packed_git(struct repository *r, const char *path,
p->pack_size = st.st_size;
p->pack_local = local;
p->mtime = st.st_mtime;
- if (path_len < the_hash_algo->hexsz ||
- get_hash_hex(path + path_len - the_hash_algo->hexsz, p->hash))
- hashclr(p->hash, the_repository->hash_algo);
+ if (path_len < r->hash_algo->hexsz ||
+ get_oid_hex_algop(path + path_len - r->hash_algo->hexsz, &oid,
+ r->hash_algo))
+ hashclr(p->hash, r->hash_algo);
+ else
+ hashcpy(p->hash, oid.hash, r->hash_algo);
+
return p;
}
@@ -1243,9 +1248,9 @@ off_t get_delta_base(struct packed_git *p,
} else if (type == OBJ_REF_DELTA) {
/* The base entry _must_ be in the same pack */
struct object_id oid;
- oidread(&oid, base_info, the_repository->hash_algo);
+ oidread(&oid, base_info, p->repo->hash_algo);
base_offset = find_pack_entry_one(&oid, p);
- *curpos += the_hash_algo->rawsz;
+ *curpos += p->repo->hash_algo->rawsz;
} else
die("I am totally screwed");
return base_offset;
@@ -1266,7 +1271,7 @@ static int get_delta_base_oid(struct packed_git *p,
{
if (type == OBJ_REF_DELTA) {
unsigned char *base = use_pack(p, w_curs, curpos, NULL);
- oidread(oid, base, the_repository->hash_algo);
+ oidread(oid, base, p->repo->hash_algo);
return 0;
} else if (type == OBJ_OFS_DELTA) {
uint32_t base_pos;
@@ -1608,7 +1613,7 @@ int packed_object_info(struct repository *r, struct packed_git *p,
goto out;
}
} else
- oidclr(oi->delta_base_oid, the_repository->hash_algo);
+ oidclr(oi->delta_base_oid, p->repo->hash_algo);
}
oi->whence = in_delta_base_cache(p, obj_offset) ? OI_DBCACHED :
@@ -1897,7 +1902,7 @@ int bsearch_pack(const struct object_id *oid, const struct packed_git *p, uint32
{
const unsigned char *index_fanout = p->index_data;
const unsigned char *index_lookup;
- const unsigned int hashsz = the_hash_algo->rawsz;
+ const unsigned int hashsz = p->repo->hash_algo->rawsz;
int index_lookup_width;
if (!index_fanout)
@@ -1922,7 +1927,7 @@ int nth_packed_object_id(struct object_id *oid,
uint32_t n)
{
const unsigned char *index = p->index_data;
- const unsigned int hashsz = the_hash_algo->rawsz;
+ const unsigned int hashsz = p->repo->hash_algo->rawsz;
if (!index) {
if (open_pack_index(p))
return -1;
@@ -1933,11 +1938,10 @@ int nth_packed_object_id(struct object_id *oid,
index += 4 * 256;
if (p->index_version == 1) {
oidread(oid, index + st_add(st_mult(hashsz + 4, n), 4),
- the_repository->hash_algo);
+ p->repo->hash_algo);
} else {
index += 8;
- oidread(oid, index + st_mult(hashsz, n),
- the_repository->hash_algo);
+ oidread(oid, index + st_mult(hashsz, n), p->repo->hash_algo);
}
return 0;
}
@@ -1959,7 +1963,7 @@ void check_pack_index_ptr(const struct packed_git *p, const void *vptr)
off_t nth_packed_object_offset(const struct packed_git *p, uint32_t n)
{
const unsigned char *index = p->index_data;
- const unsigned int hashsz = the_hash_algo->rawsz;
+ const unsigned int hashsz = p->repo->hash_algo->rawsz;
index += 4 * 256;
if (p->index_version == 1) {
return ntohl(*((uint32_t *)(index + st_mult(hashsz + 4, n))));
@@ -2159,7 +2163,7 @@ int for_each_object_in_pack(struct packed_git *p,
int r = 0;
if (flags & FOR_EACH_OBJECT_PACK_ORDER) {
- if (load_pack_revindex(the_repository, p))
+ if (load_pack_revindex(p->repo, p))
return -1;
}
@@ -2227,7 +2231,7 @@ int for_each_packed_object(each_packed_object_fn cb, void *data,
}
static int add_promisor_object(const struct object_id *oid,
- struct packed_git *pack UNUSED,
+ struct packed_git *pack,
uint32_t pos UNUSED,
void *set_)
{
@@ -2235,12 +2239,12 @@ static int add_promisor_object(const struct object_id *oid,
struct object *obj;
int we_parsed_object;
- obj = lookup_object(the_repository, oid);
+ obj = lookup_object(pack->repo, oid);
if (obj && obj->parsed) {
we_parsed_object = 0;
} else {
we_parsed_object = 1;
- obj = parse_object(the_repository, oid);
+ obj = parse_object(pack->repo, oid);
}
if (!obj)
--
2.47.0
^ permalink raw reply related [flat|nested] 184+ messages in thread
* [PATCH v9 03/10] packfile: pass `repository` to static function in the file
2024-11-26 10:57 ` [PATCH v9 00/10] packfile: avoid using the 'the_repository' global variable Karthik Nayak
2024-11-26 10:57 ` [PATCH v9 01/10] packfile: add repository to struct `packed_git` Karthik Nayak
2024-11-26 10:57 ` [PATCH v9 02/10] packfile: use `repository` from `packed_git` directly Karthik Nayak
@ 2024-11-26 10:57 ` Karthik Nayak
2024-11-27 7:44 ` Kristoffer Haugsbakk
2024-11-26 10:57 ` [PATCH v9 04/10] packfile: pass down repository to `odb_pack_name` Karthik Nayak
` (6 subsequent siblings)
9 siblings, 1 reply; 184+ messages in thread
From: Karthik Nayak @ 2024-11-26 10:57 UTC (permalink / raw)
To: karthik.188; +Cc: git, me, peff, gitster
Some of the static functions in the `packfile.c` access global
variables, which can simply be avoiding by passing the `repository`
struct down to them. Let's do that.
Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
---
packfile.c | 15 ++++++++-------
1 file changed, 8 insertions(+), 7 deletions(-)
diff --git a/packfile.c b/packfile.c
index 5bfa1e17c2..c96ebc4c69 100644
--- a/packfile.c
+++ b/packfile.c
@@ -460,13 +460,13 @@ static void find_lru_pack(struct packed_git *p, struct packed_git **lru_p, struc
*accept_windows_inuse = has_windows_inuse;
}
-static int close_one_pack(void)
+static int close_one_pack(struct repository *r)
{
struct packed_git *p, *lru_p = NULL;
struct pack_window *mru_w = NULL;
int accept_windows_inuse = 1;
- for (p = the_repository->objects->packed_git; p; p = p->next) {
+ for (p = r->objects->packed_git; p; p = p->next) {
if (p->pack_fd == -1)
continue;
find_lru_pack(p, &lru_p, &mru_w, &accept_windows_inuse);
@@ -555,7 +555,7 @@ static int open_packed_git_1(struct packed_git *p)
pack_max_fds = 1;
}
- while (pack_max_fds <= pack_open_fds && close_one_pack())
+ while (pack_max_fds <= pack_open_fds && close_one_pack(p->repo))
; /* nothing */
p->pack_fd = git_open(p->pack_name);
@@ -610,7 +610,8 @@ static int open_packed_git(struct packed_git *p)
return -1;
}
-static int in_window(struct pack_window *win, off_t offset)
+static int in_window(struct repository *r, struct pack_window *win,
+ off_t offset)
{
/* We must promise at least one full hash after the
* offset is available from this window, otherwise the offset
@@ -620,7 +621,7 @@ static int in_window(struct pack_window *win, off_t offset)
*/
off_t win_off = win->offset;
return win_off <= offset
- && (offset + the_hash_algo->rawsz) <= (win_off + win->len);
+ && (offset + r->hash_algo->rawsz) <= (win_off + win->len);
}
unsigned char *use_pack(struct packed_git *p,
@@ -642,11 +643,11 @@ unsigned char *use_pack(struct packed_git *p,
if (offset < 0)
die(_("offset before end of packfile (broken .idx?)"));
- if (!win || !in_window(win, offset)) {
+ if (!win || !in_window(p->repo, win, offset)) {
if (win)
win->inuse_cnt--;
for (win = p->windows; win; win = win->next) {
- if (in_window(win, offset))
+ if (in_window(p->repo, win, offset))
break;
}
if (!win) {
--
2.47.0
^ permalink raw reply related [flat|nested] 184+ messages in thread
* Re: [PATCH v9 03/10] packfile: pass `repository` to static function in the file
2024-11-26 10:57 ` [PATCH v9 03/10] packfile: pass `repository` to static function in the file Karthik Nayak
@ 2024-11-27 7:44 ` Kristoffer Haugsbakk
2024-11-27 9:09 ` karthik nayak
0 siblings, 1 reply; 184+ messages in thread
From: Kristoffer Haugsbakk @ 2024-11-27 7:44 UTC (permalink / raw)
To: Karthik Nayak; +Cc: git, Taylor Blau, Jeff King, Junio C Hamano
On Tue, Nov 26, 2024, at 11:57, Karthik Nayak wrote:
> Some of the static functions in the `packfile.c` access global
> variables, which can simply be avoiding by passing the `repository`
s/can simply be avoiding/can simply be avoided/
^ permalink raw reply [flat|nested] 184+ messages in thread
* Re: [PATCH v9 03/10] packfile: pass `repository` to static function in the file
2024-11-27 7:44 ` Kristoffer Haugsbakk
@ 2024-11-27 9:09 ` karthik nayak
0 siblings, 0 replies; 184+ messages in thread
From: karthik nayak @ 2024-11-27 9:09 UTC (permalink / raw)
To: Kristoffer Haugsbakk; +Cc: git, Taylor Blau, Jeff King, Junio C Hamano
[-- Attachment #1: Type: text/plain, Size: 401 bytes --]
"Kristoffer Haugsbakk" <kristofferhaugsbakk@fastmail.com> writes:
> On Tue, Nov 26, 2024, at 11:57, Karthik Nayak wrote:
>> Some of the static functions in the `packfile.c` access global
>> variables, which can simply be avoiding by passing the `repository`
>
> s/can simply be avoiding/can simply be avoided/
Indeed, thanks for noticing. Will avoid an immediate re-roll, but have
added it locally.
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 690 bytes --]
^ permalink raw reply [flat|nested] 184+ messages in thread
* [PATCH v9 04/10] packfile: pass down repository to `odb_pack_name`
2024-11-26 10:57 ` [PATCH v9 00/10] packfile: avoid using the 'the_repository' global variable Karthik Nayak
` (2 preceding siblings ...)
2024-11-26 10:57 ` [PATCH v9 03/10] packfile: pass `repository` to static function in the file Karthik Nayak
@ 2024-11-26 10:57 ` Karthik Nayak
2024-11-26 10:57 ` [PATCH v9 05/10] packfile: pass down repository to `has_object[_kept]_pack` Karthik Nayak
` (5 subsequent siblings)
9 siblings, 0 replies; 184+ messages in thread
From: Karthik Nayak @ 2024-11-26 10:57 UTC (permalink / raw)
To: karthik.188; +Cc: git, me, peff, gitster
The function `odb_pack_name` currently relies on the global variable
`the_repository`. To eliminate global variable usage in `packfile.c`, we
should progressively shift the dependency on the_repository to higher
layers.
Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
---
builtin/fast-import.c | 8 ++++----
builtin/index-pack.c | 4 ++--
builtin/pack-redundant.c | 2 +-
http.c | 2 +-
packfile.c | 9 ++++-----
packfile.h | 3 ++-
6 files changed, 14 insertions(+), 14 deletions(-)
diff --git a/builtin/fast-import.c b/builtin/fast-import.c
index da7e2d613b..3ccc4c5722 100644
--- a/builtin/fast-import.c
+++ b/builtin/fast-import.c
@@ -806,7 +806,7 @@ static char *keep_pack(const char *curr_index_name)
struct strbuf name = STRBUF_INIT;
int keep_fd;
- odb_pack_name(&name, pack_data->hash, "keep");
+ odb_pack_name(pack_data->repo, &name, pack_data->hash, "keep");
keep_fd = odb_pack_keep(name.buf);
if (keep_fd < 0)
die_errno("cannot create keep file");
@@ -814,11 +814,11 @@ static char *keep_pack(const char *curr_index_name)
if (close(keep_fd))
die_errno("failed to write keep file");
- odb_pack_name(&name, pack_data->hash, "pack");
+ odb_pack_name(pack_data->repo, &name, pack_data->hash, "pack");
if (finalize_object_file(pack_data->pack_name, name.buf))
die("cannot store pack file");
- odb_pack_name(&name, pack_data->hash, "idx");
+ odb_pack_name(pack_data->repo, &name, pack_data->hash, "idx");
if (finalize_object_file(curr_index_name, name.buf))
die("cannot store index file");
free((void *)curr_index_name);
@@ -832,7 +832,7 @@ static void unkeep_all_packs(void)
for (k = 0; k < pack_id; k++) {
struct packed_git *p = all_packs[k];
- odb_pack_name(&name, p->hash, "keep");
+ odb_pack_name(p->repo, &name, p->hash, "keep");
unlink_or_warn(name.buf);
}
strbuf_release(&name);
diff --git a/builtin/index-pack.c b/builtin/index-pack.c
index be2f99625e..eaefb41761 100644
--- a/builtin/index-pack.c
+++ b/builtin/index-pack.c
@@ -1479,7 +1479,7 @@ static void write_special_file(const char *suffix, const char *msg,
if (pack_name)
filename = derive_filename(pack_name, "pack", suffix, &name_buf);
else
- filename = odb_pack_name(&name_buf, hash, suffix);
+ filename = odb_pack_name(the_repository, &name_buf, hash, suffix);
fd = odb_pack_keep(filename);
if (fd < 0) {
@@ -1507,7 +1507,7 @@ static void rename_tmp_packfile(const char **final_name,
{
if (!*final_name || strcmp(*final_name, curr_name)) {
if (!*final_name)
- *final_name = odb_pack_name(name, hash, ext);
+ *final_name = odb_pack_name(the_repository, name, hash, ext);
if (finalize_object_file(curr_name, *final_name))
die(_("unable to rename temporary '*.%s' file to '%s'"),
ext, *final_name);
diff --git a/builtin/pack-redundant.c b/builtin/pack-redundant.c
index d2c1c4e5ec..bc61990a93 100644
--- a/builtin/pack-redundant.c
+++ b/builtin/pack-redundant.c
@@ -690,7 +690,7 @@ int cmd_pack_redundant(int argc, const char **argv, const char *prefix UNUSED, s
pl = red = pack_list_difference(local_packs, min);
while (pl) {
printf("%s\n%s\n",
- odb_pack_name(&idx_name, pl->pack->hash, "idx"),
+ odb_pack_name(pl->pack->repo, &idx_name, pl->pack->hash, "idx"),
pl->pack->pack_name);
pl = pl->next;
}
diff --git a/http.c b/http.c
index 6744e18409..420f1566f0 100644
--- a/http.c
+++ b/http.c
@@ -2581,7 +2581,7 @@ struct http_pack_request *new_direct_http_pack_request(
preq->url = url;
- odb_pack_name(&preq->tmpfile, packed_git_hash, "pack");
+ odb_pack_name(the_repository, &preq->tmpfile, packed_git_hash, "pack");
strbuf_addstr(&preq->tmpfile, ".temp");
preq->packfile = fopen(preq->tmpfile.buf, "a");
if (!preq->packfile) {
diff --git a/packfile.c b/packfile.c
index c96ebc4c69..1015dac6db 100644
--- a/packfile.c
+++ b/packfile.c
@@ -25,13 +25,12 @@
#include "pack-revindex.h"
#include "promisor-remote.h"
-char *odb_pack_name(struct strbuf *buf,
- const unsigned char *hash,
- const char *ext)
+char *odb_pack_name(struct repository *r, struct strbuf *buf,
+ const unsigned char *hash, const char *ext)
{
strbuf_reset(buf);
- strbuf_addf(buf, "%s/pack/pack-%s.%s", repo_get_object_directory(the_repository),
- hash_to_hex(hash), ext);
+ strbuf_addf(buf, "%s/pack/pack-%s.%s", repo_get_object_directory(r),
+ hash_to_hex_algop(hash, r->hash_algo), ext);
return buf->buf;
}
diff --git a/packfile.h b/packfile.h
index aee69d1a0b..51187f2393 100644
--- a/packfile.h
+++ b/packfile.h
@@ -29,7 +29,8 @@ struct pack_entry {
*
* Example: odb_pack_name(out, sha1, "idx") => ".git/objects/pack/pack-1234..idx"
*/
-char *odb_pack_name(struct strbuf *buf, const unsigned char *sha1, const char *ext);
+char *odb_pack_name(struct repository *r, struct strbuf *buf,
+ const unsigned char *hash, const char *ext);
/*
* Return the basename of the packfile, omitting any containing directory
--
2.47.0
^ permalink raw reply related [flat|nested] 184+ messages in thread
* [PATCH v9 05/10] packfile: pass down repository to `has_object[_kept]_pack`
2024-11-26 10:57 ` [PATCH v9 00/10] packfile: avoid using the 'the_repository' global variable Karthik Nayak
` (3 preceding siblings ...)
2024-11-26 10:57 ` [PATCH v9 04/10] packfile: pass down repository to `odb_pack_name` Karthik Nayak
@ 2024-11-26 10:57 ` Karthik Nayak
2024-11-26 10:57 ` [PATCH v9 06/10] packfile: pass down repository to `for_each_packed_object` Karthik Nayak
` (4 subsequent siblings)
9 siblings, 0 replies; 184+ messages in thread
From: Karthik Nayak @ 2024-11-26 10:57 UTC (permalink / raw)
To: karthik.188; +Cc: git, me, peff, gitster
The functions `has_object[_kept]_pack` currently rely on the global
variable `the_repository`. To eliminate global variable usage in
`packfile.c`, we should progressively shift the dependency on
the_repository to higher layers. Let's remove its usage from these
functions and any related ones.
Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
---
builtin/count-objects.c | 2 +-
builtin/fsck.c | 2 +-
builtin/pack-objects.c | 4 ++--
diff.c | 3 ++-
list-objects.c | 3 ++-
pack-bitmap.c | 2 +-
packfile.c | 9 +++++----
packfile.h | 5 +++--
prune-packed.c | 2 +-
reachable.c | 2 +-
revision.c | 4 ++--
11 files changed, 21 insertions(+), 17 deletions(-)
diff --git a/builtin/count-objects.c b/builtin/count-objects.c
index 04d80887e0..1e89148ed7 100644
--- a/builtin/count-objects.c
+++ b/builtin/count-objects.c
@@ -67,7 +67,7 @@ static int count_loose(const struct object_id *oid, const char *path,
else {
loose_size += on_disk_bytes(st);
loose++;
- if (verbose && has_object_pack(oid))
+ if (verbose && has_object_pack(the_repository, oid))
packed_loose++;
}
return 0;
diff --git a/builtin/fsck.c b/builtin/fsck.c
index 7f4e2f0414..bb56eb98ac 100644
--- a/builtin/fsck.c
+++ b/builtin/fsck.c
@@ -272,7 +272,7 @@ static void check_reachable_object(struct object *obj)
if (!(obj->flags & HAS_OBJ)) {
if (is_promisor_object(&obj->oid))
return;
- if (has_object_pack(&obj->oid))
+ if (has_object_pack(the_repository, &obj->oid))
return; /* it is in pack - forget about it */
printf_ln(_("missing %s %s"),
printable_type(&obj->oid, obj->type),
diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index 0800714267..0f32e92a3a 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -1529,7 +1529,7 @@ static int want_found_object(const struct object_id *oid, int exclude,
return 0;
if (ignore_packed_keep_in_core && p->pack_keep_in_core)
return 0;
- if (has_object_kept_pack(oid, flags))
+ if (has_object_kept_pack(p->repo, oid, flags))
return 0;
}
@@ -3627,7 +3627,7 @@ static void show_cruft_commit(struct commit *commit, void *data)
static int cruft_include_check_obj(struct object *obj, void *data UNUSED)
{
- return !has_object_kept_pack(&obj->oid, IN_CORE_KEEP_PACKS);
+ return !has_object_kept_pack(to_pack.repo, &obj->oid, IN_CORE_KEEP_PACKS);
}
static int cruft_include_check(struct commit *commit, void *data)
diff --git a/diff.c b/diff.c
index dceac20d18..266ddf18e7 100644
--- a/diff.c
+++ b/diff.c
@@ -4041,7 +4041,8 @@ static int reuse_worktree_file(struct index_state *istate,
* objects however would tend to be slower as they need
* to be individually opened and inflated.
*/
- if (!FAST_WORKING_DIRECTORY && !want_file && has_object_pack(oid))
+ if (!FAST_WORKING_DIRECTORY && !want_file &&
+ has_object_pack(istate->repo, oid))
return 0;
/*
diff --git a/list-objects.c b/list-objects.c
index 985d008799..31236a8dc9 100644
--- a/list-objects.c
+++ b/list-objects.c
@@ -41,7 +41,8 @@ static void show_object(struct traversal_context *ctx,
{
if (!ctx->show_object)
return;
- if (ctx->revs->unpacked && has_object_pack(&object->oid))
+ if (ctx->revs->unpacked && has_object_pack(ctx->revs->repo,
+ &object->oid))
return;
ctx->show_object(object, name, ctx->show_data);
diff --git a/pack-bitmap.c b/pack-bitmap.c
index 4fa9dfc771..d34ba9909a 100644
--- a/pack-bitmap.c
+++ b/pack-bitmap.c
@@ -1889,7 +1889,7 @@ static void filter_packed_objects_from_bitmap(struct bitmap_index *bitmap_git,
bitmap_unset(result, i);
for (i = 0; i < eindex->count; ++i) {
- if (has_object_pack(&eindex->objects[i]->oid))
+ if (has_object_pack(the_repository, &eindex->objects[i]->oid))
bitmap_unset(result, objects_nr + i);
}
}
diff --git a/packfile.c b/packfile.c
index 1015dac6db..e7dd270217 100644
--- a/packfile.c
+++ b/packfile.c
@@ -2143,16 +2143,17 @@ int find_kept_pack_entry(struct repository *r,
return 0;
}
-int has_object_pack(const struct object_id *oid)
+int has_object_pack(struct repository *r, const struct object_id *oid)
{
struct pack_entry e;
- return find_pack_entry(the_repository, oid, &e);
+ return find_pack_entry(r, oid, &e);
}
-int has_object_kept_pack(const struct object_id *oid, unsigned flags)
+int has_object_kept_pack(struct repository *r, const struct object_id *oid,
+ unsigned flags)
{
struct pack_entry e;
- return find_kept_pack_entry(the_repository, oid, flags, &e);
+ return find_kept_pack_entry(r, oid, flags, &e);
}
int for_each_object_in_pack(struct packed_git *p,
diff --git a/packfile.h b/packfile.h
index 51187f2393..b09fb2c530 100644
--- a/packfile.h
+++ b/packfile.h
@@ -193,8 +193,9 @@ const struct packed_git *has_packed_and_bad(struct repository *, const struct ob
int find_pack_entry(struct repository *r, const struct object_id *oid, struct pack_entry *e);
int find_kept_pack_entry(struct repository *r, const struct object_id *oid, unsigned flags, struct pack_entry *e);
-int has_object_pack(const struct object_id *oid);
-int has_object_kept_pack(const struct object_id *oid, unsigned flags);
+int has_object_pack(struct repository *r, const struct object_id *oid);
+int has_object_kept_pack(struct repository *r, const struct object_id *oid,
+ unsigned flags);
/*
* Return 1 if an object in a promisor packfile is or refers to the given
diff --git a/prune-packed.c b/prune-packed.c
index 2bb99c29df..d1c65ab10e 100644
--- a/prune-packed.c
+++ b/prune-packed.c
@@ -24,7 +24,7 @@ static int prune_object(const struct object_id *oid, const char *path,
{
int *opts = data;
- if (!has_object_pack(oid))
+ if (!has_object_pack(the_repository, oid))
return 0;
if (*opts & PRUNE_PACKED_DRY_RUN)
diff --git a/reachable.c b/reachable.c
index 3e9b3dd0a4..09d2c50079 100644
--- a/reachable.c
+++ b/reachable.c
@@ -239,7 +239,7 @@ static int want_recent_object(struct recent_data *data,
const struct object_id *oid)
{
if (data->ignore_in_core_kept_packs &&
- has_object_kept_pack(oid, IN_CORE_KEEP_PACKS))
+ has_object_kept_pack(data->revs->repo, oid, IN_CORE_KEEP_PACKS))
return 0;
return 1;
}
diff --git a/revision.c b/revision.c
index f5f5b84f2b..d1d152a67b 100644
--- a/revision.c
+++ b/revision.c
@@ -4103,10 +4103,10 @@ enum commit_action get_commit_action(struct rev_info *revs, struct commit *commi
{
if (commit->object.flags & SHOWN)
return commit_ignore;
- if (revs->unpacked && has_object_pack(&commit->object.oid))
+ if (revs->unpacked && has_object_pack(revs->repo, &commit->object.oid))
return commit_ignore;
if (revs->no_kept_objects) {
- if (has_object_kept_pack(&commit->object.oid,
+ if (has_object_kept_pack(revs->repo, &commit->object.oid,
revs->keep_pack_cache_flags))
return commit_ignore;
}
--
2.47.0
^ permalink raw reply related [flat|nested] 184+ messages in thread
* [PATCH v9 06/10] packfile: pass down repository to `for_each_packed_object`
2024-11-26 10:57 ` [PATCH v9 00/10] packfile: avoid using the 'the_repository' global variable Karthik Nayak
` (4 preceding siblings ...)
2024-11-26 10:57 ` [PATCH v9 05/10] packfile: pass down repository to `has_object[_kept]_pack` Karthik Nayak
@ 2024-11-26 10:57 ` Karthik Nayak
2024-11-26 10:57 ` [PATCH v9 07/10] config: make `delta_base_cache_limit` a non-global variable Karthik Nayak
` (3 subsequent siblings)
9 siblings, 0 replies; 184+ messages in thread
From: Karthik Nayak @ 2024-11-26 10:57 UTC (permalink / raw)
To: karthik.188; +Cc: git, me, peff, gitster
The function `for_each_packed_object` currently relies on the global
variable `the_repository`. To eliminate global variable usage in
`packfile.c`, we should progressively shift the dependency on
the_repository to higher layers. Let's remove its usage from this
function and closely related function `is_promisor_object`.
Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
---
builtin/cat-file.c | 7 ++++---
builtin/fsck.c | 18 +++++++++++-------
builtin/pack-objects.c | 7 +++++--
builtin/repack.c | 2 +-
builtin/rev-list.c | 2 +-
commit-graph.c | 2 +-
fsck.c | 2 +-
list-objects.c | 4 ++--
object-store-ll.h | 4 ++--
packfile.c | 14 +++++++-------
packfile.h | 2 +-
promisor-remote.c | 2 +-
reachable.c | 2 +-
revision.c | 9 +++++----
tag.c | 2 +-
15 files changed, 44 insertions(+), 35 deletions(-)
diff --git a/builtin/cat-file.c b/builtin/cat-file.c
index bfdfb51c7c..d67b101c20 100644
--- a/builtin/cat-file.c
+++ b/builtin/cat-file.c
@@ -827,15 +827,16 @@ static int batch_objects(struct batch_options *opt)
cb.seen = &seen;
for_each_loose_object(batch_unordered_loose, &cb, 0);
- for_each_packed_object(batch_unordered_packed, &cb,
- FOR_EACH_OBJECT_PACK_ORDER);
+ for_each_packed_object(the_repository, batch_unordered_packed,
+ &cb, FOR_EACH_OBJECT_PACK_ORDER);
oidset_clear(&seen);
} else {
struct oid_array sa = OID_ARRAY_INIT;
for_each_loose_object(collect_loose_object, &sa, 0);
- for_each_packed_object(collect_packed_object, &sa, 0);
+ for_each_packed_object(the_repository, collect_packed_object,
+ &sa, 0);
oid_array_for_each_unique(&sa, batch_object_cb, &cb);
diff --git a/builtin/fsck.c b/builtin/fsck.c
index bb56eb98ac..0196c54eb6 100644
--- a/builtin/fsck.c
+++ b/builtin/fsck.c
@@ -150,7 +150,7 @@ static int mark_object(struct object *obj, enum object_type type,
return 0;
obj->flags |= REACHABLE;
- if (is_promisor_object(&obj->oid))
+ if (is_promisor_object(the_repository, &obj->oid))
/*
* Further recursion does not need to be performed on this
* object since it is a promisor object (so it does not need to
@@ -270,7 +270,7 @@ static void check_reachable_object(struct object *obj)
* do a full fsck
*/
if (!(obj->flags & HAS_OBJ)) {
- if (is_promisor_object(&obj->oid))
+ if (is_promisor_object(the_repository, &obj->oid))
return;
if (has_object_pack(the_repository, &obj->oid))
return; /* it is in pack - forget about it */
@@ -391,7 +391,10 @@ static void check_connectivity(void)
* traversal.
*/
for_each_loose_object(mark_loose_unreachable_referents, NULL, 0);
- for_each_packed_object(mark_packed_unreachable_referents, NULL, 0);
+ for_each_packed_object(the_repository,
+ mark_packed_unreachable_referents,
+ NULL,
+ 0);
}
/* Look up all the requirements, warn about missing objects.. */
@@ -488,7 +491,7 @@ static void fsck_handle_reflog_oid(const char *refname, struct object_id *oid,
refname, timestamp);
obj->flags |= USED;
mark_object_reachable(obj);
- } else if (!is_promisor_object(oid)) {
+ } else if (!is_promisor_object(the_repository, oid)) {
error(_("%s: invalid reflog entry %s"),
refname, oid_to_hex(oid));
errors_found |= ERROR_REACHABLE;
@@ -531,7 +534,7 @@ static int fsck_handle_ref(const char *refname, const char *referent UNUSED, con
obj = parse_object(the_repository, oid);
if (!obj) {
- if (is_promisor_object(oid)) {
+ if (is_promisor_object(the_repository, oid)) {
/*
* Increment default_refs anyway, because this is a
* valid ref.
@@ -966,7 +969,8 @@ int cmd_fsck(int argc,
if (connectivity_only) {
for_each_loose_object(mark_loose_for_connectivity, NULL, 0);
- for_each_packed_object(mark_packed_for_connectivity, NULL, 0);
+ for_each_packed_object(the_repository,
+ mark_packed_for_connectivity, NULL, 0);
} else {
prepare_alt_odb(the_repository);
for (odb = the_repository->objects->odb; odb; odb = odb->next)
@@ -1011,7 +1015,7 @@ int cmd_fsck(int argc,
&oid);
if (!obj || !(obj->flags & HAS_OBJ)) {
- if (is_promisor_object(&oid))
+ if (is_promisor_object(the_repository, &oid))
continue;
error(_("%s: object missing"), oid_to_hex(&oid));
errors_found |= ERROR_OBJECT;
diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index 0f32e92a3a..db20f0cf51 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -3858,7 +3858,8 @@ static void show_object__ma_allow_promisor(struct object *obj, const char *name,
* Quietly ignore EXPECTED missing objects. This avoids problems with
* staging them now and getting an odd error later.
*/
- if (!has_object(the_repository, &obj->oid, 0) && is_promisor_object(&obj->oid))
+ if (!has_object(the_repository, &obj->oid, 0) &&
+ is_promisor_object(to_pack.repo, &obj->oid))
return;
show_object(obj, name, data);
@@ -3927,7 +3928,9 @@ static int add_object_in_unpacked_pack(const struct object_id *oid,
static void add_objects_in_unpacked_packs(void)
{
- if (for_each_packed_object(add_object_in_unpacked_pack, NULL,
+ if (for_each_packed_object(to_pack.repo,
+ add_object_in_unpacked_pack,
+ NULL,
FOR_EACH_OBJECT_PACK_ORDER |
FOR_EACH_OBJECT_LOCAL_ONLY |
FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS |
diff --git a/builtin/repack.c b/builtin/repack.c
index d6bb37e84a..96a4fa234b 100644
--- a/builtin/repack.c
+++ b/builtin/repack.c
@@ -404,7 +404,7 @@ static void repack_promisor_objects(const struct pack_objects_args *args,
* {type -> existing pack order} ordering when computing deltas instead
* of a {type -> size} ordering, which may produce better deltas.
*/
- for_each_packed_object(write_oid, &cmd,
+ for_each_packed_object(the_repository, write_oid, &cmd,
FOR_EACH_OBJECT_PROMISOR_ONLY);
if (cmd.in == -1) {
diff --git a/builtin/rev-list.c b/builtin/rev-list.c
index f62bcbf2b1..43c42621e3 100644
--- a/builtin/rev-list.c
+++ b/builtin/rev-list.c
@@ -121,7 +121,7 @@ static inline void finish_object__ma(struct object *obj)
return;
case MA_ALLOW_PROMISOR:
- if (is_promisor_object(&obj->oid))
+ if (is_promisor_object(the_repository, &obj->oid))
return;
die("unexpected missing %s object '%s'",
type_name(obj->type), oid_to_hex(&obj->oid));
diff --git a/commit-graph.c b/commit-graph.c
index 83dd69bfeb..e2e2083951 100644
--- a/commit-graph.c
+++ b/commit-graph.c
@@ -1960,7 +1960,7 @@ static void fill_oids_from_all_packs(struct write_commit_graph_context *ctx)
ctx->progress = start_delayed_progress(
_("Finding commits for commit graph among packed objects"),
ctx->approx_nr_objects);
- for_each_packed_object(add_packed_commits, ctx,
+ for_each_packed_object(ctx->r, add_packed_commits, ctx,
FOR_EACH_OBJECT_PACK_ORDER);
if (ctx->progress_done < ctx->approx_nr_objects)
display_progress(ctx->progress, ctx->approx_nr_objects);
diff --git a/fsck.c b/fsck.c
index 3756f52459..87ce999a49 100644
--- a/fsck.c
+++ b/fsck.c
@@ -1295,7 +1295,7 @@ static int fsck_blobs(struct oidset *blobs_found, struct oidset *blobs_done,
buf = repo_read_object_file(the_repository, oid, &type, &size);
if (!buf) {
- if (is_promisor_object(oid))
+ if (is_promisor_object(the_repository, oid))
continue;
ret |= report(options,
oid, OBJ_BLOB, msg_missing,
diff --git a/list-objects.c b/list-objects.c
index 31236a8dc9..d11a389b3a 100644
--- a/list-objects.c
+++ b/list-objects.c
@@ -75,7 +75,7 @@ static void process_blob(struct traversal_context *ctx,
*/
if (ctx->revs->exclude_promisor_objects &&
!repo_has_object_file(the_repository, &obj->oid) &&
- is_promisor_object(&obj->oid))
+ is_promisor_object(ctx->revs->repo, &obj->oid))
return;
pathlen = path->len;
@@ -180,7 +180,7 @@ static void process_tree(struct traversal_context *ctx,
* an incomplete list of missing objects.
*/
if (revs->exclude_promisor_objects &&
- is_promisor_object(&obj->oid))
+ is_promisor_object(revs->repo, &obj->oid))
return;
if (!revs->do_not_die_on_missing_objects)
diff --git a/object-store-ll.h b/object-store-ll.h
index d46cd0e654..cd3bd5bd99 100644
--- a/object-store-ll.h
+++ b/object-store-ll.h
@@ -550,7 +550,7 @@ typedef int each_packed_object_fn(const struct object_id *oid,
int for_each_object_in_pack(struct packed_git *p,
each_packed_object_fn, void *data,
enum for_each_object_flags flags);
-int for_each_packed_object(each_packed_object_fn, void *,
- enum for_each_object_flags flags);
+int for_each_packed_object(struct repository *repo, each_packed_object_fn cb,
+ void *data, enum for_each_object_flags flags);
#endif /* OBJECT_STORE_LL_H */
diff --git a/packfile.c b/packfile.c
index e7dd270217..5e8019b1fe 100644
--- a/packfile.c
+++ b/packfile.c
@@ -2200,15 +2200,15 @@ int for_each_object_in_pack(struct packed_git *p,
return r;
}
-int for_each_packed_object(each_packed_object_fn cb, void *data,
- enum for_each_object_flags flags)
+int for_each_packed_object(struct repository *repo, each_packed_object_fn cb,
+ void *data, enum for_each_object_flags flags)
{
struct packed_git *p;
int r = 0;
int pack_errors = 0;
- prepare_packed_git(the_repository);
- for (p = get_all_packs(the_repository); p; p = p->next) {
+ prepare_packed_git(repo);
+ for (p = get_all_packs(repo); p; p = p->next) {
if ((flags & FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local)
continue;
if ((flags & FOR_EACH_OBJECT_PROMISOR_ONLY) &&
@@ -2286,14 +2286,14 @@ static int add_promisor_object(const struct object_id *oid,
return 0;
}
-int is_promisor_object(const struct object_id *oid)
+int is_promisor_object(struct repository *r, const struct object_id *oid)
{
static struct oidset promisor_objects;
static int promisor_objects_prepared;
if (!promisor_objects_prepared) {
- if (repo_has_promisor_remote(the_repository)) {
- for_each_packed_object(add_promisor_object,
+ if (repo_has_promisor_remote(r)) {
+ for_each_packed_object(r, add_promisor_object,
&promisor_objects,
FOR_EACH_OBJECT_PROMISOR_ONLY |
FOR_EACH_OBJECT_PACK_ORDER);
diff --git a/packfile.h b/packfile.h
index b09fb2c530..addb95b0c4 100644
--- a/packfile.h
+++ b/packfile.h
@@ -201,7 +201,7 @@ int has_object_kept_pack(struct repository *r, const struct object_id *oid,
* Return 1 if an object in a promisor packfile is or refers to the given
* object, 0 otherwise.
*/
-int is_promisor_object(const struct object_id *oid);
+int is_promisor_object(struct repository *r, const struct object_id *oid);
/*
* Expose a function for fuzz testing.
diff --git a/promisor-remote.c b/promisor-remote.c
index 9345ae3db2..c714f4f007 100644
--- a/promisor-remote.c
+++ b/promisor-remote.c
@@ -283,7 +283,7 @@ void promisor_remote_get_direct(struct repository *repo,
}
for (i = 0; i < remaining_nr; i++) {
- if (is_promisor_object(&remaining_oids[i]))
+ if (is_promisor_object(repo, &remaining_oids[i]))
die(_("could not fetch %s from promisor remote"),
oid_to_hex(&remaining_oids[i]));
}
diff --git a/reachable.c b/reachable.c
index 09d2c50079..ecf7ccf504 100644
--- a/reachable.c
+++ b/reachable.c
@@ -324,7 +324,7 @@ int add_unseen_recent_objects_to_traversal(struct rev_info *revs,
if (ignore_in_core_kept_packs)
flags |= FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS;
- r = for_each_packed_object(add_recent_packed, &data, flags);
+ r = for_each_packed_object(revs->repo, add_recent_packed, &data, flags);
done:
oidset_clear(&data.extra_recent_oids);
diff --git a/revision.c b/revision.c
index d1d152a67b..45dc6d2819 100644
--- a/revision.c
+++ b/revision.c
@@ -390,7 +390,8 @@ static struct object *get_reference(struct rev_info *revs, const char *name,
if (!object) {
if (revs->ignore_missing)
return NULL;
- if (revs->exclude_promisor_objects && is_promisor_object(oid))
+ if (revs->exclude_promisor_objects &&
+ is_promisor_object(revs->repo, oid))
return NULL;
if (revs->do_not_die_on_missing_objects) {
oidset_insert(&revs->missing_commits, oid);
@@ -432,7 +433,7 @@ static struct commit *handle_commit(struct rev_info *revs,
if (revs->ignore_missing_links || (flags & UNINTERESTING))
return NULL;
if (revs->exclude_promisor_objects &&
- is_promisor_object(&tag->tagged->oid))
+ is_promisor_object(revs->repo, &tag->tagged->oid))
return NULL;
if (revs->do_not_die_on_missing_objects && oid) {
oidset_insert(&revs->missing_commits, oid);
@@ -1211,7 +1212,7 @@ static int process_parents(struct rev_info *revs, struct commit *commit,
revs->do_not_die_on_missing_objects;
if (repo_parse_commit_gently(revs->repo, p, gently) < 0) {
if (revs->exclude_promisor_objects &&
- is_promisor_object(&p->object.oid)) {
+ is_promisor_object(revs->repo, &p->object.oid)) {
if (revs->first_parent_only)
break;
continue;
@@ -3915,7 +3916,7 @@ int prepare_revision_walk(struct rev_info *revs)
revs->treesame.name = "treesame";
if (revs->exclude_promisor_objects) {
- for_each_packed_object(mark_uninteresting, revs,
+ for_each_packed_object(revs->repo, mark_uninteresting, revs,
FOR_EACH_OBJECT_PROMISOR_ONLY);
}
diff --git a/tag.c b/tag.c
index d24170e340..beef9571b5 100644
--- a/tag.c
+++ b/tag.c
@@ -84,7 +84,7 @@ struct object *deref_tag(struct repository *r, struct object *o, const char *war
o = NULL;
}
if (!o && warn) {
- if (last_oid && is_promisor_object(last_oid))
+ if (last_oid && is_promisor_object(r, last_oid))
return NULL;
if (!warnlen)
warnlen = strlen(warn);
--
2.47.0
^ permalink raw reply related [flat|nested] 184+ messages in thread
* [PATCH v9 07/10] config: make `delta_base_cache_limit` a non-global variable
2024-11-26 10:57 ` [PATCH v9 00/10] packfile: avoid using the 'the_repository' global variable Karthik Nayak
` (5 preceding siblings ...)
2024-11-26 10:57 ` [PATCH v9 06/10] packfile: pass down repository to `for_each_packed_object` Karthik Nayak
@ 2024-11-26 10:57 ` Karthik Nayak
2024-11-26 10:57 ` [PATCH v9 08/10] config: make `packed_git_(limit|window_size)` non-global variables Karthik Nayak
` (2 subsequent siblings)
9 siblings, 0 replies; 184+ messages in thread
From: Karthik Nayak @ 2024-11-26 10:57 UTC (permalink / raw)
To: karthik.188; +Cc: git, me, peff, gitster
The `delta_base_cache_limit` variable is a global config variable used
by multiple subsystems. Let's make this non-global, by adding this
variable independently to the subsystems where it is used.
First, add the setting to the `repo_settings` struct, this provides
access to the config in places where the repository is available. Use
this in `packfile.c`.
In `index-pack.c` we add it to the `pack_idx_option` struct and its
constructor. While the repository struct is available here, it may not
be set because `git index-pack` can be used without a repository.
In `gc.c` add it to the `gc_config` struct and also the constructor
function. The gc functions currently do not have direct access to a
repository struct.
These changes are made to remove the usage of `delta_base_cache_limit`
as a global variable in `packfile.c`. This brings us one step closer to
removing the `USE_THE_REPOSITORY_VARIABLE` definition in `packfile.c`
which we complete in the next patch.
Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
---
builtin/gc.c | 12 +++++++++++-
builtin/index-pack.c | 10 +++++++---
config.c | 5 -----
environment.c | 1 -
environment.h | 1 -
pack-objects.h | 3 ++-
pack-write.c | 1 +
pack.h | 2 ++
packfile.c | 10 ++++++++--
repo-settings.c | 5 +++++
repo-settings.h | 3 +++
11 files changed, 39 insertions(+), 14 deletions(-)
diff --git a/builtin/gc.c b/builtin/gc.c
index d52735354c..efb6162fb0 100644
--- a/builtin/gc.c
+++ b/builtin/gc.c
@@ -138,6 +138,11 @@ struct gc_config {
char *repack_filter_to;
unsigned long big_pack_threshold;
unsigned long max_delta_cache_size;
+ /*
+ * Remove this member from gc_config once repo_settings is passed
+ * through the callchain.
+ */
+ size_t delta_base_cache_limit;
};
#define GC_CONFIG_INIT { \
@@ -153,6 +158,7 @@ struct gc_config {
.prune_expire = xstrdup("2.weeks.ago"), \
.prune_worktrees_expire = xstrdup("3.months.ago"), \
.max_delta_cache_size = DEFAULT_DELTA_CACHE_SIZE, \
+ .delta_base_cache_limit = DEFAULT_DELTA_BASE_CACHE_LIMIT, \
}
static void gc_config_release(struct gc_config *cfg)
@@ -168,6 +174,7 @@ static void gc_config(struct gc_config *cfg)
{
const char *value;
char *owned = NULL;
+ unsigned long ulongval;
if (!git_config_get_value("gc.packrefs", &value)) {
if (value && !strcmp(value, "notbare"))
@@ -206,6 +213,9 @@ static void gc_config(struct gc_config *cfg)
git_config_get_ulong("gc.bigpackthreshold", &cfg->big_pack_threshold);
git_config_get_ulong("pack.deltacachesize", &cfg->max_delta_cache_size);
+ if (!git_config_get_ulong("core.deltabasecachelimit", &ulongval))
+ cfg->delta_base_cache_limit = ulongval;
+
if (!git_config_get_string("gc.repackfilter", &owned)) {
free(cfg->repack_filter);
cfg->repack_filter = owned;
@@ -416,7 +426,7 @@ static uint64_t estimate_repack_memory(struct gc_config *cfg,
* read_sha1_file() (either at delta calculation phase, or
* writing phase) also fills up the delta base cache
*/
- heap += delta_base_cache_limit;
+ heap += cfg->delta_base_cache_limit;
/* and of course pack-objects has its own delta cache */
heap += cfg->max_delta_cache_size;
diff --git a/builtin/index-pack.c b/builtin/index-pack.c
index eaefb41761..23bfa45403 100644
--- a/builtin/index-pack.c
+++ b/builtin/index-pack.c
@@ -1238,7 +1238,7 @@ static void parse_pack_objects(unsigned char *hash)
* recursively checking if the resulting object is used as a base
* for some more deltas.
*/
-static void resolve_deltas(void)
+static void resolve_deltas(struct pack_idx_option *opts)
{
int i;
@@ -1254,7 +1254,7 @@ static void resolve_deltas(void)
nr_ref_deltas + nr_ofs_deltas);
nr_dispatched = 0;
- base_cache_limit = delta_base_cache_limit * nr_threads;
+ base_cache_limit = opts->delta_base_cache_limit * nr_threads;
if (nr_threads > 1 || getenv("GIT_FORCE_THREADS")) {
init_thread();
work_lock();
@@ -1604,6 +1604,10 @@ static int git_index_pack_config(const char *k, const char *v,
else
opts->flags &= ~WRITE_REV;
}
+ if (!strcmp(k, "core.deltabasecachelimit")) {
+ opts->delta_base_cache_limit = git_config_ulong(k, v, ctx->kvi);
+ return 0;
+ }
return git_default_config(k, v, ctx, cb);
}
@@ -1930,7 +1934,7 @@ int cmd_index_pack(int argc,
parse_pack_objects(pack_hash);
if (report_end_of_input)
write_in_full(2, "\0", 1);
- resolve_deltas();
+ resolve_deltas(&opts);
conclude_pack(fix_thin_pack, curr_pack, pack_hash);
free(ofs_deltas);
free(ref_deltas);
diff --git a/config.c b/config.c
index a11bb85da3..728ef98e42 100644
--- a/config.c
+++ b/config.c
@@ -1515,11 +1515,6 @@ static int git_default_core_config(const char *var, const char *value,
return 0;
}
- if (!strcmp(var, "core.deltabasecachelimit")) {
- delta_base_cache_limit = git_config_ulong(var, value, ctx->kvi);
- return 0;
- }
-
if (!strcmp(var, "core.autocrlf")) {
if (value && !strcasecmp(value, "input")) {
auto_crlf = AUTO_CRLF_INPUT;
diff --git a/environment.c b/environment.c
index a2ce998081..8e5022c282 100644
--- a/environment.c
+++ b/environment.c
@@ -51,7 +51,6 @@ enum fsync_method fsync_method = FSYNC_METHOD_DEFAULT;
enum fsync_component fsync_components = FSYNC_COMPONENTS_DEFAULT;
size_t packed_git_window_size = DEFAULT_PACKED_GIT_WINDOW_SIZE;
size_t packed_git_limit = DEFAULT_PACKED_GIT_LIMIT;
-size_t delta_base_cache_limit = 96 * 1024 * 1024;
unsigned long big_file_threshold = 512 * 1024 * 1024;
char *editor_program;
char *askpass_program;
diff --git a/environment.h b/environment.h
index 923e12661e..2f43340f0b 100644
--- a/environment.h
+++ b/environment.h
@@ -165,7 +165,6 @@ extern int zlib_compression_level;
extern int pack_compression_level;
extern size_t packed_git_window_size;
extern size_t packed_git_limit;
-extern size_t delta_base_cache_limit;
extern unsigned long big_file_threshold;
extern unsigned long pack_size_limit_cfg;
extern int max_allowed_tree_depth;
diff --git a/pack-objects.h b/pack-objects.h
index b9898a4e64..3f6f504203 100644
--- a/pack-objects.h
+++ b/pack-objects.h
@@ -7,7 +7,8 @@
struct repository;
-#define DEFAULT_DELTA_CACHE_SIZE (256 * 1024 * 1024)
+#define DEFAULT_DELTA_CACHE_SIZE (256 * 1024 * 1024)
+#define DEFAULT_DELTA_BASE_CACHE_LIMIT (96 * 1024 * 1024)
#define OE_DFS_STATE_BITS 2
#define OE_DEPTH_BITS 12
diff --git a/pack-write.c b/pack-write.c
index 8c7dfddc5a..98a8c0e785 100644
--- a/pack-write.c
+++ b/pack-write.c
@@ -21,6 +21,7 @@ void reset_pack_idx_option(struct pack_idx_option *opts)
memset(opts, 0, sizeof(*opts));
opts->version = 2;
opts->off32_limit = 0x7fffffff;
+ opts->delta_base_cache_limit = DEFAULT_DELTA_BASE_CACHE_LIMIT;
}
static int sha1_compare(const void *_a, const void *_b)
diff --git a/pack.h b/pack.h
index 02bbdfb19c..a8da040629 100644
--- a/pack.h
+++ b/pack.h
@@ -58,6 +58,8 @@ struct pack_idx_option {
*/
int anomaly_alloc, anomaly_nr;
uint32_t *anomaly;
+
+ size_t delta_base_cache_limit;
};
void reset_pack_idx_option(struct pack_idx_option *);
diff --git a/packfile.c b/packfile.c
index 5e8019b1fe..64248ca664 100644
--- a/packfile.c
+++ b/packfile.c
@@ -1496,7 +1496,9 @@ void clear_delta_base_cache(void)
}
static void add_delta_base_cache(struct packed_git *p, off_t base_offset,
- void *base, unsigned long base_size, enum object_type type)
+ void *base, unsigned long base_size,
+ unsigned long delta_base_cache_limit,
+ enum object_type type)
{
struct delta_base_cache_entry *ent;
struct list_head *lru, *tmp;
@@ -1698,6 +1700,8 @@ void *unpack_entry(struct repository *r, struct packed_git *p, off_t obj_offset,
int delta_stack_nr = 0, delta_stack_alloc = UNPACK_ENTRY_STACK_PREALLOC;
int base_from_cache = 0;
+ prepare_repo_settings(p->repo);
+
write_pack_access_log(p, obj_offset);
/* PHASE 1: drill down to the innermost base object */
@@ -1878,7 +1882,9 @@ void *unpack_entry(struct repository *r, struct packed_git *p, off_t obj_offset,
* before we are done using it.
*/
if (!external_base)
- add_delta_base_cache(p, base_obj_offset, base, base_size, type);
+ add_delta_base_cache(p, base_obj_offset, base, base_size,
+ p->repo->settings.delta_base_cache_limit,
+ type);
free(delta_data);
free(external_base);
diff --git a/repo-settings.c b/repo-settings.c
index 4699b4b365..acc27eb8fe 100644
--- a/repo-settings.c
+++ b/repo-settings.c
@@ -3,6 +3,7 @@
#include "repo-settings.h"
#include "repository.h"
#include "midx.h"
+#include "pack-objects.h"
static void repo_cfg_bool(struct repository *r, const char *key, int *dest,
int def)
@@ -26,6 +27,7 @@ void prepare_repo_settings(struct repository *r)
const char *strval;
int manyfiles;
int read_changed_paths;
+ unsigned long ulongval;
if (!r->gitdir)
BUG("Cannot add settings for uninitialized repository");
@@ -123,6 +125,9 @@ void prepare_repo_settings(struct repository *r)
* removed.
*/
r->settings.command_requires_full_index = 1;
+
+ if (!repo_config_get_ulong(r, "core.deltabasecachelimit", &ulongval))
+ r->settings.delta_base_cache_limit = ulongval;
}
enum log_refs_config repo_settings_get_log_all_ref_updates(struct repository *repo)
diff --git a/repo-settings.h b/repo-settings.h
index 51d6156a11..10a6f7ed64 100644
--- a/repo-settings.h
+++ b/repo-settings.h
@@ -57,12 +57,15 @@ struct repo_settings {
int core_multi_pack_index;
int warn_ambiguous_refs; /* lazily loaded via accessor */
+
+ size_t delta_base_cache_limit;
};
#define REPO_SETTINGS_INIT { \
.index_version = -1, \
.core_untracked_cache = UNTRACKED_CACHE_KEEP, \
.fetch_negotiation_algorithm = FETCH_NEGOTIATION_CONSECUTIVE, \
.warn_ambiguous_refs = -1, \
+ .delta_base_cache_limit = DEFAULT_DELTA_BASE_CACHE_LIMIT, \
}
void prepare_repo_settings(struct repository *r);
--
2.47.0
^ permalink raw reply related [flat|nested] 184+ messages in thread
* [PATCH v9 08/10] config: make `packed_git_(limit|window_size)` non-global variables
2024-11-26 10:57 ` [PATCH v9 00/10] packfile: avoid using the 'the_repository' global variable Karthik Nayak
` (6 preceding siblings ...)
2024-11-26 10:57 ` [PATCH v9 07/10] config: make `delta_base_cache_limit` a non-global variable Karthik Nayak
@ 2024-11-26 10:57 ` Karthik Nayak
2024-11-26 10:57 ` [PATCH v9 09/10] midx: add repository to `multi_pack_index` struct Karthik Nayak
2024-11-26 10:57 ` [PATCH v9 10/10] packfile.c: remove unnecessary prepare_packed_git() call Karthik Nayak
9 siblings, 0 replies; 184+ messages in thread
From: Karthik Nayak @ 2024-11-26 10:57 UTC (permalink / raw)
To: karthik.188; +Cc: git, me, peff, gitster
The variables `packed_git_window_size` and `packed_git_limit` are global
config variables used in the `packfile.c` file. Since it is only used in
this file, let's change it from being a global config variable to a
local variable for the subsystem.
With this, we rid `packfile.c` from all global variable usage and this
means we can also remove the `USE_THE_REPOSITORY_VARIABLE` guard from
the file.
Helped-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
---
builtin/fast-import.c | 4 ++--
config.c | 17 -----------------
environment.c | 2 --
packfile.c | 23 +++++++++++++++--------
packfile.h | 2 +-
repo-settings.c | 13 +++++++++++++
repo-settings.h | 4 ++++
7 files changed, 35 insertions(+), 30 deletions(-)
diff --git a/builtin/fast-import.c b/builtin/fast-import.c
index 3ccc4c5722..0ece070260 100644
--- a/builtin/fast-import.c
+++ b/builtin/fast-import.c
@@ -3539,7 +3539,7 @@ static void parse_argv(void)
int cmd_fast_import(int argc,
const char **argv,
const char *prefix,
- struct repository *repo UNUSED)
+ struct repository *repo)
{
unsigned int i;
@@ -3660,7 +3660,7 @@ int cmd_fast_import(int argc,
fprintf(stderr, " pools: %10lu KiB\n", (unsigned long)((tree_entry_allocd + fi_mem_pool.pool_alloc) /1024));
fprintf(stderr, " objects: %10" PRIuMAX " KiB\n", (alloc_count*sizeof(struct object_entry))/1024);
fprintf(stderr, "---------------------------------------------------------------------\n");
- pack_report();
+ pack_report(repo);
fprintf(stderr, "---------------------------------------------------------------------\n");
fprintf(stderr, "\n");
}
diff --git a/config.c b/config.c
index 728ef98e42..2c295f7430 100644
--- a/config.c
+++ b/config.c
@@ -1493,28 +1493,11 @@ static int git_default_core_config(const char *var, const char *value,
return 0;
}
- if (!strcmp(var, "core.packedgitwindowsize")) {
- int pgsz_x2 = getpagesize() * 2;
- packed_git_window_size = git_config_ulong(var, value, ctx->kvi);
-
- /* This value must be multiple of (pagesize * 2) */
- packed_git_window_size /= pgsz_x2;
- if (packed_git_window_size < 1)
- packed_git_window_size = 1;
- packed_git_window_size *= pgsz_x2;
- return 0;
- }
-
if (!strcmp(var, "core.bigfilethreshold")) {
big_file_threshold = git_config_ulong(var, value, ctx->kvi);
return 0;
}
- if (!strcmp(var, "core.packedgitlimit")) {
- packed_git_limit = git_config_ulong(var, value, ctx->kvi);
- return 0;
- }
-
if (!strcmp(var, "core.autocrlf")) {
if (value && !strcasecmp(value, "input")) {
auto_crlf = AUTO_CRLF_INPUT;
diff --git a/environment.c b/environment.c
index 8e5022c282..8389a27270 100644
--- a/environment.c
+++ b/environment.c
@@ -49,8 +49,6 @@ int fsync_object_files = -1;
int use_fsync = -1;
enum fsync_method fsync_method = FSYNC_METHOD_DEFAULT;
enum fsync_component fsync_components = FSYNC_COMPONENTS_DEFAULT;
-size_t packed_git_window_size = DEFAULT_PACKED_GIT_WINDOW_SIZE;
-size_t packed_git_limit = DEFAULT_PACKED_GIT_LIMIT;
unsigned long big_file_threshold = 512 * 1024 * 1024;
char *editor_program;
char *askpass_program;
diff --git a/packfile.c b/packfile.c
index 64248ca664..2e0e28c7de 100644
--- a/packfile.c
+++ b/packfile.c
@@ -1,4 +1,3 @@
-#define USE_THE_REPOSITORY_VARIABLE
#include "git-compat-util.h"
#include "environment.h"
@@ -46,15 +45,15 @@ static size_t pack_mapped;
#define SZ_FMT PRIuMAX
static inline uintmax_t sz_fmt(size_t s) { return s; }
-void pack_report(void)
+void pack_report(struct repository *repo)
{
fprintf(stderr,
"pack_report: getpagesize() = %10" SZ_FMT "\n"
"pack_report: core.packedGitWindowSize = %10" SZ_FMT "\n"
"pack_report: core.packedGitLimit = %10" SZ_FMT "\n",
sz_fmt(getpagesize()),
- sz_fmt(packed_git_window_size),
- sz_fmt(packed_git_limit));
+ sz_fmt(repo->settings.packed_git_window_size),
+ sz_fmt(repo->settings.packed_git_limit));
fprintf(stderr,
"pack_report: pack_used_ctr = %10u\n"
"pack_report: pack_mmap_calls = %10u\n"
@@ -650,8 +649,15 @@ unsigned char *use_pack(struct packed_git *p,
break;
}
if (!win) {
- size_t window_align = packed_git_window_size / 2;
+ size_t window_align;
off_t len;
+ struct repo_settings *settings;
+
+ /* lazy load the settings in case it hasn't been setup */
+ prepare_repo_settings(p->repo);
+ settings = &p->repo->settings;
+
+ window_align = settings->packed_git_window_size / 2;
if (p->pack_fd == -1 && open_packed_git(p))
die("packfile %s cannot be accessed", p->pack_name);
@@ -659,11 +665,12 @@ unsigned char *use_pack(struct packed_git *p,
CALLOC_ARRAY(win, 1);
win->offset = (offset / window_align) * window_align;
len = p->pack_size - win->offset;
- if (len > packed_git_window_size)
- len = packed_git_window_size;
+ if (len > settings->packed_git_window_size)
+ len = settings->packed_git_window_size;
win->len = (size_t)len;
pack_mapped += win->len;
- while (packed_git_limit < pack_mapped
+
+ while (settings->packed_git_limit < pack_mapped
&& unuse_one_window(p))
; /* nothing */
win->base = xmmap_gently(NULL, win->len,
diff --git a/packfile.h b/packfile.h
index addb95b0c4..58104fa009 100644
--- a/packfile.h
+++ b/packfile.h
@@ -89,7 +89,7 @@ unsigned long repo_approximate_object_count(struct repository *r);
struct packed_git *find_oid_pack(const struct object_id *oid,
struct packed_git *packs);
-void pack_report(void);
+void pack_report(struct repository *repo);
/*
* mmap the index file for the specified packfile (if it is not
diff --git a/repo-settings.c b/repo-settings.c
index acc27eb8fe..9d16d5399e 100644
--- a/repo-settings.c
+++ b/repo-settings.c
@@ -128,6 +128,19 @@ void prepare_repo_settings(struct repository *r)
if (!repo_config_get_ulong(r, "core.deltabasecachelimit", &ulongval))
r->settings.delta_base_cache_limit = ulongval;
+
+ if (!repo_config_get_ulong(r, "core.packedgitwindowsize", &ulongval)) {
+ int pgsz_x2 = getpagesize() * 2;
+
+ /* This value must be multiple of (pagesize * 2) */
+ ulongval /= pgsz_x2;
+ if (ulongval < 1)
+ ulongval = 1;
+ r->settings.packed_git_window_size = ulongval * pgsz_x2;
+ }
+
+ if (!repo_config_get_ulong(r, "core.packedgitlimit", &ulongval))
+ r->settings.packed_git_limit = ulongval;
}
enum log_refs_config repo_settings_get_log_all_ref_updates(struct repository *repo)
diff --git a/repo-settings.h b/repo-settings.h
index 10a6f7ed64..93ea0c3274 100644
--- a/repo-settings.h
+++ b/repo-settings.h
@@ -59,6 +59,8 @@ struct repo_settings {
int warn_ambiguous_refs; /* lazily loaded via accessor */
size_t delta_base_cache_limit;
+ size_t packed_git_window_size;
+ size_t packed_git_limit;
};
#define REPO_SETTINGS_INIT { \
.index_version = -1, \
@@ -66,6 +68,8 @@ struct repo_settings {
.fetch_negotiation_algorithm = FETCH_NEGOTIATION_CONSECUTIVE, \
.warn_ambiguous_refs = -1, \
.delta_base_cache_limit = DEFAULT_DELTA_BASE_CACHE_LIMIT, \
+ .packed_git_window_size = DEFAULT_PACKED_GIT_WINDOW_SIZE, \
+ .packed_git_limit = DEFAULT_PACKED_GIT_LIMIT, \
}
void prepare_repo_settings(struct repository *r);
--
2.47.0
^ permalink raw reply related [flat|nested] 184+ messages in thread
* [PATCH v9 09/10] midx: add repository to `multi_pack_index` struct
2024-11-26 10:57 ` [PATCH v9 00/10] packfile: avoid using the 'the_repository' global variable Karthik Nayak
` (7 preceding siblings ...)
2024-11-26 10:57 ` [PATCH v9 08/10] config: make `packed_git_(limit|window_size)` non-global variables Karthik Nayak
@ 2024-11-26 10:57 ` Karthik Nayak
2024-11-26 10:57 ` [PATCH v9 10/10] packfile.c: remove unnecessary prepare_packed_git() call Karthik Nayak
9 siblings, 0 replies; 184+ messages in thread
From: Karthik Nayak @ 2024-11-26 10:57 UTC (permalink / raw)
To: karthik.188; +Cc: git, me, peff, gitster
The `multi_pack_index` struct represents the MIDX for a repository.
Here, we add a pointer to the repository in this struct, allowing direct
use of the repository variable without relying on the global
`the_repository` struct.
With this addition, we can determine the repository associated with a
`bitmap_index` struct. A `bitmap_index` points to either a `packed_git`
or a `multi_pack_index`, both of which have direct repository
references. To support this, we introduce a static helper function,
`bitmap_repo`, in `pack-bitmap.c`, which retrieves a repository given a
`bitmap_index`.
With this, we clear up all usages of `the_repository` within
`pack-bitmap.c` and also remove the `USE_THE_REPOSITORY_VARIABLE`
definition. Bringing us another step closer to remove all global
variable usage.
Although this change also opens up the potential to clean up `midx.c`,
doing so would require additional refactoring to pass the repository
struct to functions where the MIDX struct is created: a task better
suited for future patches.
Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
---
midx.c | 1 +
midx.h | 3 ++
pack-bitmap.c | 90 +++++++++++++++++++++++++++++++--------------------
3 files changed, 59 insertions(+), 35 deletions(-)
diff --git a/midx.c b/midx.c
index 8edb75f51d..079c45a1aa 100644
--- a/midx.c
+++ b/midx.c
@@ -131,6 +131,7 @@ static struct multi_pack_index *load_multi_pack_index_one(const char *object_dir
m->data = midx_map;
m->data_len = midx_size;
m->local = local;
+ m->repo = the_repository;
m->signature = get_be32(m->data);
if (m->signature != MIDX_SIGNATURE)
diff --git a/midx.h b/midx.h
index 42d4f8d149..3b0ac4d878 100644
--- a/midx.h
+++ b/midx.h
@@ -71,6 +71,9 @@ struct multi_pack_index {
const char **pack_names;
struct packed_git **packs;
+
+ struct repository *repo;
+
char object_dir[FLEX_ARRAY];
};
diff --git a/pack-bitmap.c b/pack-bitmap.c
index d34ba9909a..0cb1b56c9d 100644
--- a/pack-bitmap.c
+++ b/pack-bitmap.c
@@ -1,5 +1,3 @@
-#define USE_THE_REPOSITORY_VARIABLE
-
#include "git-compat-util.h"
#include "commit.h"
#include "gettext.h"
@@ -177,12 +175,21 @@ static uint32_t bitmap_num_objects(struct bitmap_index *index)
return index->pack->num_objects;
}
+static struct repository *bitmap_repo(struct bitmap_index *bitmap_git)
+{
+ if (bitmap_is_midx(bitmap_git))
+ return bitmap_git->midx->repo;
+ return bitmap_git->pack->repo;
+}
+
static int load_bitmap_header(struct bitmap_index *index)
{
struct bitmap_disk_header *header = (void *)index->map;
- size_t header_size = sizeof(*header) - GIT_MAX_RAWSZ + the_hash_algo->rawsz;
+ const struct git_hash_algo *hash_algo = bitmap_repo(index)->hash_algo;
+
+ size_t header_size = sizeof(*header) - GIT_MAX_RAWSZ + hash_algo->rawsz;
- if (index->map_size < header_size + the_hash_algo->rawsz)
+ if (index->map_size < header_size + hash_algo->rawsz)
return error(_("corrupted bitmap index (too small)"));
if (memcmp(header->magic, BITMAP_IDX_SIGNATURE, sizeof(BITMAP_IDX_SIGNATURE)) != 0)
@@ -196,7 +203,7 @@ static int load_bitmap_header(struct bitmap_index *index)
{
uint32_t flags = ntohs(header->options);
size_t cache_size = st_mult(bitmap_num_objects(index), sizeof(uint32_t));
- unsigned char *index_end = index->map + index->map_size - the_hash_algo->rawsz;
+ unsigned char *index_end = index->map + index->map_size - hash_algo->rawsz;
if ((flags & BITMAP_OPT_FULL_DAG) == 0)
BUG("unsupported options for bitmap index file "
@@ -409,7 +416,7 @@ static int open_midx_bitmap_1(struct bitmap_index *bitmap_git,
if (bitmap_git->pack || bitmap_git->midx) {
struct strbuf buf = STRBUF_INIT;
get_midx_filename(&buf, midx->object_dir);
- trace2_data_string("bitmap", the_repository,
+ trace2_data_string("bitmap", bitmap_repo(bitmap_git),
"ignoring extra midx bitmap file", buf.buf);
close(fd);
strbuf_release(&buf);
@@ -427,7 +434,7 @@ static int open_midx_bitmap_1(struct bitmap_index *bitmap_git,
goto cleanup;
if (!hasheq(get_midx_checksum(bitmap_git->midx), bitmap_git->checksum,
- the_repository->hash_algo)) {
+ bitmap_repo(bitmap_git)->hash_algo)) {
error(_("checksum doesn't match in MIDX and bitmap"));
goto cleanup;
}
@@ -438,7 +445,9 @@ static int open_midx_bitmap_1(struct bitmap_index *bitmap_git,
}
for (i = 0; i < bitmap_git->midx->num_packs; i++) {
- if (prepare_midx_pack(the_repository, bitmap_git->midx, i)) {
+ if (prepare_midx_pack(bitmap_repo(bitmap_git),
+ bitmap_git->midx,
+ i)) {
warning(_("could not open pack %s"),
bitmap_git->midx->pack_names[i]);
goto cleanup;
@@ -492,8 +501,9 @@ static int open_pack_bitmap_1(struct bitmap_index *bitmap_git, struct packed_git
}
if (bitmap_git->pack || bitmap_git->midx) {
- trace2_data_string("bitmap", the_repository,
- "ignoring extra bitmap file", packfile->pack_name);
+ trace2_data_string("bitmap", bitmap_repo(bitmap_git),
+ "ignoring extra bitmap file",
+ packfile->pack_name);
close(fd);
return -1;
}
@@ -518,8 +528,8 @@ static int open_pack_bitmap_1(struct bitmap_index *bitmap_git, struct packed_git
return -1;
}
- trace2_data_string("bitmap", the_repository, "opened bitmap file",
- packfile->pack_name);
+ trace2_data_string("bitmap", bitmap_repo(bitmap_git),
+ "opened bitmap file", packfile->pack_name);
return 0;
}
@@ -649,7 +659,7 @@ struct bitmap_index *prepare_bitmap_git(struct repository *r)
struct bitmap_index *prepare_midx_bitmap_git(struct multi_pack_index *midx)
{
- struct repository *r = the_repository;
+ struct repository *r = midx->repo;
struct bitmap_index *bitmap_git = xcalloc(1, sizeof(*bitmap_git));
if (!open_midx_bitmap_1(bitmap_git, midx) && !load_bitmap(r, bitmap_git))
@@ -1213,6 +1223,7 @@ static struct bitmap *find_boundary_objects(struct bitmap_index *bitmap_git,
{
struct bitmap_boundary_cb cb;
struct object_list *root;
+ struct repository *repo;
unsigned int i;
unsigned int tmp_blobs, tmp_trees, tmp_tags;
int any_missing = 0;
@@ -1222,6 +1233,8 @@ static struct bitmap *find_boundary_objects(struct bitmap_index *bitmap_git,
cb.base = bitmap_new();
object_array_init(&cb.boundary);
+ repo = bitmap_repo(bitmap_git);
+
revs->ignore_missing_links = 1;
if (bitmap_git->pseudo_merges.nr) {
@@ -1280,19 +1293,19 @@ static struct bitmap *find_boundary_objects(struct bitmap_index *bitmap_git,
* revision walk to (a) OR in any bitmaps that are UNINTERESTING
* between the tips and boundary, and (b) record the boundary.
*/
- trace2_region_enter("pack-bitmap", "boundary-prepare", the_repository);
+ trace2_region_enter("pack-bitmap", "boundary-prepare", repo);
if (prepare_revision_walk(revs))
die("revision walk setup failed");
- trace2_region_leave("pack-bitmap", "boundary-prepare", the_repository);
+ trace2_region_leave("pack-bitmap", "boundary-prepare", repo);
- trace2_region_enter("pack-bitmap", "boundary-traverse", the_repository);
+ trace2_region_enter("pack-bitmap", "boundary-traverse", repo);
revs->boundary = 1;
traverse_commit_list_filtered(revs,
show_boundary_commit,
show_boundary_object,
&cb, NULL);
revs->boundary = 0;
- trace2_region_leave("pack-bitmap", "boundary-traverse", the_repository);
+ trace2_region_leave("pack-bitmap", "boundary-traverse", repo);
revs->blob_objects = tmp_blobs;
revs->tree_objects = tmp_trees;
@@ -1304,7 +1317,7 @@ static struct bitmap *find_boundary_objects(struct bitmap_index *bitmap_git,
/*
* Then add the boundary commit(s) as fill-in traversal tips.
*/
- trace2_region_enter("pack-bitmap", "boundary-fill-in", the_repository);
+ trace2_region_enter("pack-bitmap", "boundary-fill-in", repo);
for (i = 0; i < cb.boundary.nr; i++) {
struct object *obj = cb.boundary.objects[i].item;
if (bitmap_walk_contains(bitmap_git, cb.base, &obj->oid))
@@ -1314,7 +1327,7 @@ static struct bitmap *find_boundary_objects(struct bitmap_index *bitmap_git,
}
if (revs->pending.nr)
cb.base = fill_in_bitmap(bitmap_git, revs, cb.base, NULL);
- trace2_region_leave("pack-bitmap", "boundary-fill-in", the_repository);
+ trace2_region_leave("pack-bitmap", "boundary-fill-in", repo);
cleanup:
object_array_clear(&cb.boundary);
@@ -1718,7 +1731,8 @@ static unsigned long get_size_by_pos(struct bitmap_index *bitmap_git,
ofs = pack_pos_to_offset(pack, pos);
}
- if (packed_object_info(the_repository, pack, ofs, &oi) < 0) {
+ if (packed_object_info(bitmap_repo(bitmap_git), pack, ofs,
+ &oi) < 0) {
struct object_id oid;
nth_bitmap_object_oid(bitmap_git, &oid,
pack_pos_to_index(pack, pos));
@@ -1727,7 +1741,8 @@ static unsigned long get_size_by_pos(struct bitmap_index *bitmap_git,
} else {
struct eindex *eindex = &bitmap_git->ext_index;
struct object *obj = eindex->objects[pos - bitmap_num_objects(bitmap_git)];
- if (oid_object_info_extended(the_repository, &obj->oid, &oi, 0) < 0)
+ if (oid_object_info_extended(bitmap_repo(bitmap_git), &obj->oid,
+ &oi, 0) < 0)
die(_("unable to get size of %s"), oid_to_hex(&obj->oid));
}
@@ -1889,7 +1904,8 @@ static void filter_packed_objects_from_bitmap(struct bitmap_index *bitmap_git,
bitmap_unset(result, i);
for (i = 0; i < eindex->count; ++i) {
- if (has_object_pack(the_repository, &eindex->objects[i]->oid))
+ if (has_object_pack(bitmap_repo(bitmap_git),
+ &eindex->objects[i]->oid))
bitmap_unset(result, objects_nr + i);
}
}
@@ -1907,6 +1923,7 @@ struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs,
struct bitmap *haves_bitmap = NULL;
struct bitmap_index *bitmap_git;
+ struct repository *repo;
/*
* We can't do pathspec limiting with bitmaps, because we don't know
@@ -1980,18 +1997,20 @@ struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs,
if (!use_boundary_traversal)
object_array_clear(&revs->pending);
+ repo = bitmap_repo(bitmap_git);
+
if (haves) {
if (use_boundary_traversal) {
- trace2_region_enter("pack-bitmap", "haves/boundary", the_repository);
+ trace2_region_enter("pack-bitmap", "haves/boundary", repo);
haves_bitmap = find_boundary_objects(bitmap_git, revs, haves);
- trace2_region_leave("pack-bitmap", "haves/boundary", the_repository);
+ trace2_region_leave("pack-bitmap", "haves/boundary", repo);
} else {
- trace2_region_enter("pack-bitmap", "haves/classic", the_repository);
+ trace2_region_enter("pack-bitmap", "haves/classic", repo);
revs->ignore_missing_links = 1;
haves_bitmap = find_objects(bitmap_git, revs, haves, NULL);
reset_revision_walk();
revs->ignore_missing_links = 0;
- trace2_region_leave("pack-bitmap", "haves/classic", the_repository);
+ trace2_region_leave("pack-bitmap", "haves/classic", repo);
}
if (!haves_bitmap)
@@ -2025,17 +2044,17 @@ struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs,
object_list_free(&wants);
object_list_free(&haves);
- trace2_data_intmax("bitmap", the_repository, "pseudo_merges_satisfied",
+ trace2_data_intmax("bitmap", repo, "pseudo_merges_satisfied",
pseudo_merges_satisfied_nr);
- trace2_data_intmax("bitmap", the_repository, "pseudo_merges_cascades",
+ trace2_data_intmax("bitmap", repo, "pseudo_merges_cascades",
pseudo_merges_cascades_nr);
- trace2_data_intmax("bitmap", the_repository, "bitmap/hits",
+ trace2_data_intmax("bitmap", repo, "bitmap/hits",
existing_bitmaps_hits_nr);
- trace2_data_intmax("bitmap", the_repository, "bitmap/misses",
+ trace2_data_intmax("bitmap", repo, "bitmap/misses",
existing_bitmaps_misses_nr);
- trace2_data_intmax("bitmap", the_repository, "bitmap/roots_with_bitmap",
+ trace2_data_intmax("bitmap", repo, "bitmap/roots_with_bitmap",
roots_with_bitmaps_nr);
- trace2_data_intmax("bitmap", the_repository, "bitmap/roots_without_bitmap",
+ trace2_data_intmax("bitmap", repo, "bitmap/roots_without_bitmap",
roots_without_bitmaps_nr);
return bitmap_git;
@@ -2256,7 +2275,7 @@ void reuse_partial_packfile_from_bitmap(struct bitmap_index *bitmap_git,
struct bitmap **reuse_out,
int multi_pack_reuse)
{
- struct repository *r = the_repository;
+ struct repository *r = bitmap_repo(bitmap_git);
struct bitmapped_pack *packs = NULL;
struct bitmap *result = bitmap_git->result;
struct bitmap *reuse;
@@ -2792,7 +2811,7 @@ int rebuild_bitmap(const uint32_t *reposition,
uint32_t *create_bitmap_mapping(struct bitmap_index *bitmap_git,
struct packing_data *mapping)
{
- struct repository *r = the_repository;
+ struct repository *r = bitmap_repo(bitmap_git);
uint32_t i, num_objects;
uint32_t *reposition;
@@ -2948,7 +2967,8 @@ static off_t get_disk_usage_for_extended(struct bitmap_index *bitmap_git)
st_add(bitmap_num_objects(bitmap_git), i)))
continue;
- if (oid_object_info_extended(the_repository, &obj->oid, &oi, 0) < 0)
+ if (oid_object_info_extended(bitmap_repo(bitmap_git), &obj->oid,
+ &oi, 0) < 0)
die(_("unable to get disk usage of '%s'"),
oid_to_hex(&obj->oid));
--
2.47.0
^ permalink raw reply related [flat|nested] 184+ messages in thread
* [PATCH v9 10/10] packfile.c: remove unnecessary prepare_packed_git() call
2024-11-26 10:57 ` [PATCH v9 00/10] packfile: avoid using the 'the_repository' global variable Karthik Nayak
` (8 preceding siblings ...)
2024-11-26 10:57 ` [PATCH v9 09/10] midx: add repository to `multi_pack_index` struct Karthik Nayak
@ 2024-11-26 10:57 ` Karthik Nayak
9 siblings, 0 replies; 184+ messages in thread
From: Karthik Nayak @ 2024-11-26 10:57 UTC (permalink / raw)
To: karthik.188; +Cc: git, me, peff, gitster
From: Taylor Blau <me@ttaylorr.com>
In 454ea2e4d7 (treewide: use get_all_packs, 2018-08-20) we converted
existing calls to both:
- get_packed_git(), as well as
- the_repository->objects->packed_git
, to instead use the new get_all_packs() function.
In the instance that this commit addresses, there was a preceding call
to prepare_packed_git(), which dates all the way back to 660c889e46
(sha1_file: add for_each iterators for loose and packed objects,
2014-10-15) when its caller (for_each_packed_object()) was first
introduced.
This call could have been removed in 454ea2e4d7, since get_all_packs()
itself calls prepare_packed_git(). But the translation in 454ea2e4d7 was
(to the best of my knowledge) a find-and-replace rather than inspecting
each individual caller.
Having an extra prepare_packed_git() call here is harmless, since it
will notice that we have already set the 'packed_git_initialized' field
and the call will be a noop. So we're only talking about a few dozen CPU
cycles to set up and tear down the stack frame.
But having a lone prepare_packed_git() call immediately before a call to
get_all_packs() confused me, so let's remove it as redundant to avoid
more confusion in the future.
Signed-off-by: Taylor Blau <me@ttaylorr.com>
---
packfile.c | 1 -
1 file changed, 1 deletion(-)
diff --git a/packfile.c b/packfile.c
index 2e0e28c7de..9c4bd81a8c 100644
--- a/packfile.c
+++ b/packfile.c
@@ -2220,7 +2220,6 @@ int for_each_packed_object(struct repository *repo, each_packed_object_fn cb,
int r = 0;
int pack_errors = 0;
- prepare_packed_git(repo);
for (p = get_all_packs(repo); p; p = p->next) {
if ((flags & FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local)
continue;
--
2.47.0
^ permalink raw reply related [flat|nested] 184+ messages in thread
* [PATCH v10 00/10] packfile: avoid using the 'the_repository' global variable
2024-10-21 9:57 [PATCH 00/20] packfile: avoid using the 'the_repository' global variable Karthik Nayak
` (28 preceding siblings ...)
2024-11-26 10:57 ` [PATCH v9 00/10] packfile: avoid using the 'the_repository' global variable Karthik Nayak
@ 2024-12-03 14:43 ` Karthik Nayak
2024-12-03 14:43 ` [PATCH v10 01/10] packfile: add repository to struct `packed_git` Karthik Nayak
` (10 more replies)
29 siblings, 11 replies; 184+ messages in thread
From: Karthik Nayak @ 2024-12-03 14:43 UTC (permalink / raw)
To: karthik.188; +Cc: git, gitster
The `packfile.c` file uses the global variable 'the_repository' extensively
throughout the code. Let's remove all usecases of this, by modifying the
required functions to accept a 'struct repository' instead. This is to clean up
usage of global state.
The first 3 patches are mostly internal to `packfile.c`, we add the repository
field to the `packed_git` struct and this is used to clear up some useages of
the global variables.
The next 3 patches are more disruptive, they modify the function definition of
`odb_pack_name`, `has_object[_kept]_pack` and `for_each_packed_object` to receive
a repository, helping remove other usages of 'the_repository' variable.
Finally, the next two patches deal with global config values. These values are
localized. The last patch is removal of an unecessary call to `prepare_packed_git()`.
For v5 onwards, I've rebased the series off the master: 8f8d6eee53 (The
seventh batch, 2024-11-01), as a dependency for this series 'jk/dumb-http-finalize'
was merged to master. I've found no conflicts while merging with seen & next. But
since this series does touch multiple files, there could be future conflicts.
Changes in v10:
- Grammar corrections in the commit messages.
Changes in v9:
- Added a comment in gc_config to indicate that eventually the
`delta_base_cache_limit` variable should be used through repo_settings.
Changes in v8:
- Fix typos in comments
- For packfile.c use delta_base_cache_limit from the repository
settings, this avoids loading the config in hot paths.
- Rename `longval` to `ulongval` to better signify the type.
Changes in v7:
- Cleanup stale commit message.
- Add missing space in `if` statement.
- Fix typo s/incase/in case/.
Changes in v6:
- Lazy load repository settings in packfile.c. This ensures that the settings are
available for sure and we do not rely on callees setting it.
- Use `size_t` for `delta_base_cache_limit`.
Changes in v5:
- Move packed_git* settings to repo_settings to ensure we don't keep reparsing the
settings in `use_pack`.
Changes in v4:
- Renamed the repository field within `packed_git` and `multi_pack_index` from
`r` to `repo`, while keeping function parameters to be `r`.
- Fixed bad braces.
Changes in v3:
- Improved commit messages. In the first commit to talk about how packed_git
struct could also be part of the alternates of a repository. In the 7th commit
to talk about the motive behind removing the global variable.
- Changed 'packed_git->repo' to 'packed_git->r' to keep it consistent with the
rest of the code base.
- Replaced 'the_repository' with locally available access to the repository
struct in multiple regions.
- Removed unecessary inclusion of the 'repository.h' header file by forward
declaring the 'repository' struct.
- Replace memcpy with hashcpy.
- Change the logic in the 7th patch to use if else statements.
- Added an extra commit to cleanup `pack-bitmap.c`.
Karthik Nayak (9):
packfile: add repository to struct `packed_git`
packfile: use `repository` from `packed_git` directly
packfile: pass `repository` to static function in the file
packfile: pass down repository to `odb_pack_name`
packfile: pass down repository to `has_object[_kept]_pack`
packfile: pass down repository to `for_each_packed_object`
config: make `delta_base_cache_limit` a non-global variable
config: make `packed_git_(limit|window_size)` non-global variables
midx: add repository to `multi_pack_index` struct
Taylor Blau (1):
packfile.c: remove unnecessary prepare_packed_git() call
builtin/cat-file.c | 7 +-
builtin/count-objects.c | 2 +-
builtin/fast-import.c | 15 ++--
builtin/fsck.c | 20 +++---
builtin/gc.c | 12 +++-
builtin/index-pack.c | 20 ++++--
builtin/pack-objects.c | 11 +--
builtin/pack-redundant.c | 2 +-
builtin/repack.c | 2 +-
builtin/rev-list.c | 2 +-
commit-graph.c | 4 +-
config.c | 22 ------
connected.c | 3 +-
diff.c | 3 +-
environment.c | 3 -
environment.h | 1 -
fsck.c | 2 +-
http.c | 4 +-
list-objects.c | 7 +-
midx-write.c | 2 +-
midx.c | 3 +-
midx.h | 3 +
object-store-ll.h | 9 ++-
pack-bitmap.c | 90 ++++++++++++++----------
pack-objects.h | 3 +-
pack-write.c | 1 +
pack.h | 2 +
packfile.c | 144 ++++++++++++++++++++++-----------------
packfile.h | 18 +++--
promisor-remote.c | 2 +-
prune-packed.c | 2 +-
reachable.c | 4 +-
repo-settings.c | 18 +++++
repo-settings.h | 7 ++
revision.c | 13 ++--
tag.c | 2 +-
36 files changed, 275 insertions(+), 190 deletions(-)
Range-diff against v9:
1: d1fdd6996a ! 1: d6d571c58e packfile: add repository to struct `packed_git`
@@ Commit message
on the global `the_repository` object in `packfile.c` by simply using
repository information now readily available in the struct.
- We do need to consider that a pack file could be part of the alternates
+ We do need to consider that a packfile could be part of the alternates
of a repository, but considering that we only have one repository struct
- and also that we currently anyways use 'the_repository'. We should be
+ and also that we currently anyways use 'the_repository', we should be
OK with this change.
We also modify `alloc_packed_git` to ensure that the repository is added
2: 65c09858ce = 2: fa69763468 packfile: use `repository` from `packed_git` directly
3: 80632934d1 ! 3: c6acbece46 packfile: pass `repository` to static function in the file
@@ Commit message
packfile: pass `repository` to static function in the file
Some of the static functions in the `packfile.c` access global
- variables, which can simply be avoiding by passing the `repository`
+ variables, which can simply be avoided by passing the `repository`
struct down to them. Let's do that.
Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
4: 67d71eab83 = 4: a8588d6086 packfile: pass down repository to `odb_pack_name`
5: ee210fa153 = 5: b3fe20c8f1 packfile: pass down repository to `has_object[_kept]_pack`
6: 8db7094f4e = 6: ad46b339ea packfile: pass down repository to `for_each_packed_object`
7: a66494384d = 7: 342a26572d config: make `delta_base_cache_limit` a non-global variable
8: bce9196f6b = 8: 6e55daf5b3 config: make `packed_git_(limit|window_size)` non-global variables
9: c7fba8cf6a = 9: 6e0ec955e6 midx: add repository to `multi_pack_index` struct
10: d7f475fbd0 = 10: e33fa2ea0d packfile.c: remove unnecessary prepare_packed_git() call
--
2.47.1
^ permalink raw reply [flat|nested] 184+ messages in thread
* [PATCH v10 01/10] packfile: add repository to struct `packed_git`
2024-12-03 14:43 ` [PATCH v10 00/10] packfile: avoid using the 'the_repository' global variable Karthik Nayak
@ 2024-12-03 14:43 ` Karthik Nayak
2024-12-03 14:43 ` [PATCH v10 02/10] packfile: use `repository` from `packed_git` directly Karthik Nayak
` (9 subsequent siblings)
10 siblings, 0 replies; 184+ messages in thread
From: Karthik Nayak @ 2024-12-03 14:43 UTC (permalink / raw)
To: karthik.188; +Cc: git, gitster, Taylor Blau
The struct `packed_git` holds information regarding a packed object
file. Let's add the repository variable to this object, to represent the
repository that this packfile belongs to. This helps remove dependency
on the global `the_repository` object in `packfile.c` by simply using
repository information now readily available in the struct.
We do need to consider that a packfile could be part of the alternates
of a repository, but considering that we only have one repository struct
and also that we currently anyways use 'the_repository', we should be
OK with this change.
We also modify `alloc_packed_git` to ensure that the repository is added
to newly created `packed_git` structs. This requires modifying the
function and all its callee to pass the repository object down the
levels.
Helped-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
---
builtin/fast-import.c | 3 ++-
builtin/index-pack.c | 6 ++++--
commit-graph.c | 2 +-
connected.c | 3 ++-
http.c | 2 +-
midx-write.c | 2 +-
midx.c | 2 +-
object-store-ll.h | 5 +++++
packfile.c | 15 +++++++++------
packfile.h | 6 ++++--
10 files changed, 30 insertions(+), 16 deletions(-)
diff --git a/builtin/fast-import.c b/builtin/fast-import.c
index 76d5c20f14..da7e2d613b 100644
--- a/builtin/fast-import.c
+++ b/builtin/fast-import.c
@@ -765,6 +765,7 @@ static void start_packfile(void)
p->pack_fd = pack_fd;
p->do_not_close = 1;
+ p->repo = the_repository;
pack_file = hashfd(pack_fd, p->pack_name);
pack_data = p;
@@ -888,7 +889,7 @@ static void end_packfile(void)
idx_name = keep_pack(create_index());
/* Register the packfile with core git's machinery. */
- new_p = add_packed_git(idx_name, strlen(idx_name), 1);
+ new_p = add_packed_git(pack_data->repo, idx_name, strlen(idx_name), 1);
if (!new_p)
die("core git rejected index %s", idx_name);
all_packs[pack_id] = new_p;
diff --git a/builtin/index-pack.c b/builtin/index-pack.c
index 9d23b41b3a..be2f99625e 100644
--- a/builtin/index-pack.c
+++ b/builtin/index-pack.c
@@ -1552,7 +1552,8 @@ static void final(const char *final_pack_name, const char *curr_pack_name,
if (do_fsck_object) {
struct packed_git *p;
- p = add_packed_git(final_index_name, strlen(final_index_name), 0);
+ p = add_packed_git(the_repository, final_index_name,
+ strlen(final_index_name), 0);
if (p)
install_packed_git(the_repository, p);
}
@@ -1650,7 +1651,8 @@ static void read_v2_anomalous_offsets(struct packed_git *p,
static void read_idx_option(struct pack_idx_option *opts, const char *pack_name)
{
- struct packed_git *p = add_packed_git(pack_name, strlen(pack_name), 1);
+ struct packed_git *p = add_packed_git(the_repository, pack_name,
+ strlen(pack_name), 1);
if (!p)
die(_("Cannot open existing pack file '%s'"), pack_name);
diff --git a/commit-graph.c b/commit-graph.c
index 5bd89c0acd..83dd69bfeb 100644
--- a/commit-graph.c
+++ b/commit-graph.c
@@ -1914,7 +1914,7 @@ static int fill_oids_from_packs(struct write_commit_graph_context *ctx,
struct packed_git *p;
strbuf_setlen(&packname, dirlen);
strbuf_addstr(&packname, pack_indexes->items[i].string);
- p = add_packed_git(packname.buf, packname.len, 1);
+ p = add_packed_git(ctx->r, packname.buf, packname.len, 1);
if (!p) {
ret = error(_("error adding pack %s"), packname.buf);
goto cleanup;
diff --git a/connected.c b/connected.c
index a9e2e13995..3099da84f3 100644
--- a/connected.c
+++ b/connected.c
@@ -54,7 +54,8 @@ int check_connected(oid_iterate_fn fn, void *cb_data,
strbuf_add(&idx_file, transport->pack_lockfiles.items[0].string,
base_len);
strbuf_addstr(&idx_file, ".idx");
- new_pack = add_packed_git(idx_file.buf, idx_file.len, 1);
+ new_pack = add_packed_git(the_repository, idx_file.buf,
+ idx_file.len, 1);
strbuf_release(&idx_file);
}
diff --git a/http.c b/http.c
index 58242b9d2d..6744e18409 100644
--- a/http.c
+++ b/http.c
@@ -2439,7 +2439,7 @@ static int fetch_and_setup_pack_index(struct packed_git **packs_head,
if (!tmp_idx)
return -1;
- new_pack = parse_pack_index(sha1, tmp_idx);
+ new_pack = parse_pack_index(the_repository, sha1, tmp_idx);
if (!new_pack) {
unlink(tmp_idx);
free(tmp_idx);
diff --git a/midx-write.c b/midx-write.c
index b3a5f6c516..c57726ef94 100644
--- a/midx-write.c
+++ b/midx-write.c
@@ -154,7 +154,7 @@ static void add_pack_to_midx(const char *full_path, size_t full_path_len,
return;
ALLOC_GROW(ctx->info, ctx->nr + 1, ctx->alloc);
- p = add_packed_git(full_path, full_path_len, 0);
+ p = add_packed_git(the_repository, full_path, full_path_len, 0);
if (!p) {
warning(_("failed to add packfile '%s'"),
full_path);
diff --git a/midx.c b/midx.c
index e82d4f2e65..8edb75f51d 100644
--- a/midx.c
+++ b/midx.c
@@ -464,7 +464,7 @@ int prepare_midx_pack(struct repository *r, struct multi_pack_index *m,
strhash(key.buf), key.buf,
struct packed_git, packmap_ent);
if (!p) {
- p = add_packed_git(pack_name.buf, pack_name.len, m->local);
+ p = add_packed_git(r, pack_name.buf, pack_name.len, m->local);
if (p) {
install_packed_git(r, p);
list_add_tail(&p->mru, &r->objects->packed_git_mru);
diff --git a/object-store-ll.h b/object-store-ll.h
index 53b8e693b1..d46cd0e654 100644
--- a/object-store-ll.h
+++ b/object-store-ll.h
@@ -10,6 +10,7 @@
struct oidmap;
struct oidtree;
struct strbuf;
+struct repository;
struct object_directory {
struct object_directory *next;
@@ -135,6 +136,10 @@ struct packed_git {
*/
const uint32_t *mtimes_map;
size_t mtimes_size;
+
+ /* repo denotes the repository this packfile belongs to */
+ struct repository *repo;
+
/* something like ".git/objects/pack/xxxxx.pack" */
char pack_name[FLEX_ARRAY]; /* more */
};
diff --git a/packfile.c b/packfile.c
index 9560f0a33c..6058eddf35 100644
--- a/packfile.c
+++ b/packfile.c
@@ -217,11 +217,12 @@ uint32_t get_pack_fanout(struct packed_git *p, uint32_t value)
return ntohl(level1_ofs[value]);
}
-static struct packed_git *alloc_packed_git(int extra)
+static struct packed_git *alloc_packed_git(struct repository *r, int extra)
{
struct packed_git *p = xmalloc(st_add(sizeof(*p), extra));
memset(p, 0, sizeof(*p));
p->pack_fd = -1;
+ p->repo = r;
return p;
}
@@ -233,11 +234,12 @@ static char *pack_path_from_idx(const char *idx_path)
return xstrfmt("%.*s.pack", (int)len, idx_path);
}
-struct packed_git *parse_pack_index(unsigned char *sha1, const char *idx_path)
+struct packed_git *parse_pack_index(struct repository *r, unsigned char *sha1,
+ const char *idx_path)
{
char *path = pack_path_from_idx(idx_path);
size_t alloc = st_add(strlen(path), 1);
- struct packed_git *p = alloc_packed_git(alloc);
+ struct packed_git *p = alloc_packed_git(r, alloc);
memcpy(p->pack_name, path, alloc); /* includes NUL */
free(path);
@@ -703,7 +705,8 @@ void unuse_pack(struct pack_window **w_cursor)
}
}
-struct packed_git *add_packed_git(const char *path, size_t path_len, int local)
+struct packed_git *add_packed_git(struct repository *r, const char *path,
+ size_t path_len, int local)
{
struct stat st;
size_t alloc;
@@ -721,7 +724,7 @@ struct packed_git *add_packed_git(const char *path, size_t path_len, int local)
* the use xsnprintf double-checks that)
*/
alloc = st_add3(path_len, strlen(".promisor"), 1);
- p = alloc_packed_git(alloc);
+ p = alloc_packed_git(r, alloc);
memcpy(p->pack_name, path, path_len);
xsnprintf(p->pack_name + path_len, alloc - path_len, ".keep");
@@ -877,7 +880,7 @@ static void prepare_pack(const char *full_name, size_t full_name_len,
/* Don't reopen a pack we already have. */
if (!hashmap_get(&data->r->objects->pack_map, &hent, pack_name)) {
- p = add_packed_git(full_name, full_name_len, data->local);
+ p = add_packed_git(data->r, full_name, full_name_len, data->local);
if (p)
install_packed_git(data->r, p);
}
diff --git a/packfile.h b/packfile.h
index 08f88a7ff5..aee69d1a0b 100644
--- a/packfile.h
+++ b/packfile.h
@@ -46,7 +46,8 @@ const char *pack_basename(struct packed_git *p);
* and does not add the resulting packed_git struct to the internal list of
* packs. You probably want add_packed_git() instead.
*/
-struct packed_git *parse_pack_index(unsigned char *sha1, const char *idx_path);
+struct packed_git *parse_pack_index(struct repository *r, unsigned char *sha1,
+ const char *idx_path);
typedef void each_file_in_pack_dir_fn(const char *full_path, size_t full_path_len,
const char *file_name, void *data);
@@ -113,7 +114,8 @@ void close_pack(struct packed_git *);
void close_object_store(struct raw_object_store *o);
void unuse_pack(struct pack_window **);
void clear_delta_base_cache(void);
-struct packed_git *add_packed_git(const char *path, size_t path_len, int local);
+struct packed_git *add_packed_git(struct repository *r, const char *path,
+ size_t path_len, int local);
/*
* Unlink the .pack and associated extension files.
--
2.47.1
^ permalink raw reply related [flat|nested] 184+ messages in thread
* [PATCH v10 02/10] packfile: use `repository` from `packed_git` directly
2024-12-03 14:43 ` [PATCH v10 00/10] packfile: avoid using the 'the_repository' global variable Karthik Nayak
2024-12-03 14:43 ` [PATCH v10 01/10] packfile: add repository to struct `packed_git` Karthik Nayak
@ 2024-12-03 14:43 ` Karthik Nayak
2024-12-03 14:43 ` [PATCH v10 03/10] packfile: pass `repository` to static function in the file Karthik Nayak
` (8 subsequent siblings)
10 siblings, 0 replies; 184+ messages in thread
From: Karthik Nayak @ 2024-12-03 14:43 UTC (permalink / raw)
To: karthik.188; +Cc: git, gitster, Taylor Blau
In the previous commit, we introduced the `repository` structure inside
`packed_git`. This provides an alternative route instead of using the
global `the_repository` variable. Let's modify `packfile.c` now to use
this field wherever possible instead of relying on the global state.
There are still a few instances of `the_repository` usage in the file,
where there is no struct `packed_git` locally available, which will be
fixed in the following commits.
Helped-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
---
packfile.c | 50 +++++++++++++++++++++++++++-----------------------
1 file changed, 27 insertions(+), 23 deletions(-)
diff --git a/packfile.c b/packfile.c
index 6058eddf35..5bfa1e17c2 100644
--- a/packfile.c
+++ b/packfile.c
@@ -79,7 +79,7 @@ static int check_packed_git_idx(const char *path, struct packed_git *p)
size_t idx_size;
int fd = git_open(path), ret;
struct stat st;
- const unsigned int hashsz = the_hash_algo->rawsz;
+ const unsigned int hashsz = p->repo->hash_algo->rawsz;
if (fd < 0)
return -1;
@@ -243,7 +243,7 @@ struct packed_git *parse_pack_index(struct repository *r, unsigned char *sha1,
memcpy(p->pack_name, path, alloc); /* includes NUL */
free(path);
- hashcpy(p->hash, sha1, the_repository->hash_algo);
+ hashcpy(p->hash, sha1, p->repo->hash_algo);
if (check_packed_git_idx(idx_path, p)) {
free(p);
return NULL;
@@ -278,7 +278,7 @@ static int unuse_one_window(struct packed_git *current)
if (current)
scan_windows(current, &lru_p, &lru_w, &lru_l);
- for (p = the_repository->objects->packed_git; p; p = p->next)
+ for (p = current->repo->objects->packed_git; p; p = p->next)
scan_windows(p, &lru_p, &lru_w, &lru_l);
if (lru_p) {
munmap(lru_w->base, lru_w->len);
@@ -540,7 +540,7 @@ static int open_packed_git_1(struct packed_git *p)
unsigned char hash[GIT_MAX_RAWSZ];
unsigned char *idx_hash;
ssize_t read_result;
- const unsigned hashsz = the_hash_algo->rawsz;
+ const unsigned hashsz = p->repo->hash_algo->rawsz;
if (open_pack_index(p))
return error("packfile %s index unavailable", p->pack_name);
@@ -597,7 +597,7 @@ static int open_packed_git_1(struct packed_git *p)
if (read_result != hashsz)
return error("packfile %s signature is unavailable", p->pack_name);
idx_hash = ((unsigned char *)p->index_data) + p->index_size - hashsz * 2;
- if (!hasheq(hash, idx_hash, the_repository->hash_algo))
+ if (!hasheq(hash, idx_hash, p->repo->hash_algo))
return error("packfile %s does not match index", p->pack_name);
return 0;
}
@@ -637,7 +637,7 @@ unsigned char *use_pack(struct packed_git *p,
*/
if (!p->pack_size && p->pack_fd == -1 && open_packed_git(p))
die("packfile %s cannot be accessed", p->pack_name);
- if (offset > (p->pack_size - the_hash_algo->rawsz))
+ if (offset > (p->pack_size - p->repo->hash_algo->rawsz))
die("offset beyond end of packfile (truncated pack?)");
if (offset < 0)
die(_("offset before end of packfile (broken .idx?)"));
@@ -711,6 +711,7 @@ struct packed_git *add_packed_git(struct repository *r, const char *path,
struct stat st;
size_t alloc;
struct packed_git *p;
+ struct object_id oid;
/*
* Make sure a corresponding .pack file exists and that
@@ -751,9 +752,13 @@ struct packed_git *add_packed_git(struct repository *r, const char *path,
p->pack_size = st.st_size;
p->pack_local = local;
p->mtime = st.st_mtime;
- if (path_len < the_hash_algo->hexsz ||
- get_hash_hex(path + path_len - the_hash_algo->hexsz, p->hash))
- hashclr(p->hash, the_repository->hash_algo);
+ if (path_len < r->hash_algo->hexsz ||
+ get_oid_hex_algop(path + path_len - r->hash_algo->hexsz, &oid,
+ r->hash_algo))
+ hashclr(p->hash, r->hash_algo);
+ else
+ hashcpy(p->hash, oid.hash, r->hash_algo);
+
return p;
}
@@ -1243,9 +1248,9 @@ off_t get_delta_base(struct packed_git *p,
} else if (type == OBJ_REF_DELTA) {
/* The base entry _must_ be in the same pack */
struct object_id oid;
- oidread(&oid, base_info, the_repository->hash_algo);
+ oidread(&oid, base_info, p->repo->hash_algo);
base_offset = find_pack_entry_one(&oid, p);
- *curpos += the_hash_algo->rawsz;
+ *curpos += p->repo->hash_algo->rawsz;
} else
die("I am totally screwed");
return base_offset;
@@ -1266,7 +1271,7 @@ static int get_delta_base_oid(struct packed_git *p,
{
if (type == OBJ_REF_DELTA) {
unsigned char *base = use_pack(p, w_curs, curpos, NULL);
- oidread(oid, base, the_repository->hash_algo);
+ oidread(oid, base, p->repo->hash_algo);
return 0;
} else if (type == OBJ_OFS_DELTA) {
uint32_t base_pos;
@@ -1608,7 +1613,7 @@ int packed_object_info(struct repository *r, struct packed_git *p,
goto out;
}
} else
- oidclr(oi->delta_base_oid, the_repository->hash_algo);
+ oidclr(oi->delta_base_oid, p->repo->hash_algo);
}
oi->whence = in_delta_base_cache(p, obj_offset) ? OI_DBCACHED :
@@ -1897,7 +1902,7 @@ int bsearch_pack(const struct object_id *oid, const struct packed_git *p, uint32
{
const unsigned char *index_fanout = p->index_data;
const unsigned char *index_lookup;
- const unsigned int hashsz = the_hash_algo->rawsz;
+ const unsigned int hashsz = p->repo->hash_algo->rawsz;
int index_lookup_width;
if (!index_fanout)
@@ -1922,7 +1927,7 @@ int nth_packed_object_id(struct object_id *oid,
uint32_t n)
{
const unsigned char *index = p->index_data;
- const unsigned int hashsz = the_hash_algo->rawsz;
+ const unsigned int hashsz = p->repo->hash_algo->rawsz;
if (!index) {
if (open_pack_index(p))
return -1;
@@ -1933,11 +1938,10 @@ int nth_packed_object_id(struct object_id *oid,
index += 4 * 256;
if (p->index_version == 1) {
oidread(oid, index + st_add(st_mult(hashsz + 4, n), 4),
- the_repository->hash_algo);
+ p->repo->hash_algo);
} else {
index += 8;
- oidread(oid, index + st_mult(hashsz, n),
- the_repository->hash_algo);
+ oidread(oid, index + st_mult(hashsz, n), p->repo->hash_algo);
}
return 0;
}
@@ -1959,7 +1963,7 @@ void check_pack_index_ptr(const struct packed_git *p, const void *vptr)
off_t nth_packed_object_offset(const struct packed_git *p, uint32_t n)
{
const unsigned char *index = p->index_data;
- const unsigned int hashsz = the_hash_algo->rawsz;
+ const unsigned int hashsz = p->repo->hash_algo->rawsz;
index += 4 * 256;
if (p->index_version == 1) {
return ntohl(*((uint32_t *)(index + st_mult(hashsz + 4, n))));
@@ -2159,7 +2163,7 @@ int for_each_object_in_pack(struct packed_git *p,
int r = 0;
if (flags & FOR_EACH_OBJECT_PACK_ORDER) {
- if (load_pack_revindex(the_repository, p))
+ if (load_pack_revindex(p->repo, p))
return -1;
}
@@ -2227,7 +2231,7 @@ int for_each_packed_object(each_packed_object_fn cb, void *data,
}
static int add_promisor_object(const struct object_id *oid,
- struct packed_git *pack UNUSED,
+ struct packed_git *pack,
uint32_t pos UNUSED,
void *set_)
{
@@ -2235,12 +2239,12 @@ static int add_promisor_object(const struct object_id *oid,
struct object *obj;
int we_parsed_object;
- obj = lookup_object(the_repository, oid);
+ obj = lookup_object(pack->repo, oid);
if (obj && obj->parsed) {
we_parsed_object = 0;
} else {
we_parsed_object = 1;
- obj = parse_object(the_repository, oid);
+ obj = parse_object(pack->repo, oid);
}
if (!obj)
--
2.47.1
^ permalink raw reply related [flat|nested] 184+ messages in thread
* [PATCH v10 03/10] packfile: pass `repository` to static function in the file
2024-12-03 14:43 ` [PATCH v10 00/10] packfile: avoid using the 'the_repository' global variable Karthik Nayak
2024-12-03 14:43 ` [PATCH v10 01/10] packfile: add repository to struct `packed_git` Karthik Nayak
2024-12-03 14:43 ` [PATCH v10 02/10] packfile: use `repository` from `packed_git` directly Karthik Nayak
@ 2024-12-03 14:43 ` Karthik Nayak
2024-12-03 14:43 ` [PATCH v10 04/10] packfile: pass down repository to `odb_pack_name` Karthik Nayak
` (7 subsequent siblings)
10 siblings, 0 replies; 184+ messages in thread
From: Karthik Nayak @ 2024-12-03 14:43 UTC (permalink / raw)
To: karthik.188; +Cc: git, gitster
Some of the static functions in the `packfile.c` access global
variables, which can simply be avoided by passing the `repository`
struct down to them. Let's do that.
Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
---
packfile.c | 15 ++++++++-------
1 file changed, 8 insertions(+), 7 deletions(-)
diff --git a/packfile.c b/packfile.c
index 5bfa1e17c2..c96ebc4c69 100644
--- a/packfile.c
+++ b/packfile.c
@@ -460,13 +460,13 @@ static void find_lru_pack(struct packed_git *p, struct packed_git **lru_p, struc
*accept_windows_inuse = has_windows_inuse;
}
-static int close_one_pack(void)
+static int close_one_pack(struct repository *r)
{
struct packed_git *p, *lru_p = NULL;
struct pack_window *mru_w = NULL;
int accept_windows_inuse = 1;
- for (p = the_repository->objects->packed_git; p; p = p->next) {
+ for (p = r->objects->packed_git; p; p = p->next) {
if (p->pack_fd == -1)
continue;
find_lru_pack(p, &lru_p, &mru_w, &accept_windows_inuse);
@@ -555,7 +555,7 @@ static int open_packed_git_1(struct packed_git *p)
pack_max_fds = 1;
}
- while (pack_max_fds <= pack_open_fds && close_one_pack())
+ while (pack_max_fds <= pack_open_fds && close_one_pack(p->repo))
; /* nothing */
p->pack_fd = git_open(p->pack_name);
@@ -610,7 +610,8 @@ static int open_packed_git(struct packed_git *p)
return -1;
}
-static int in_window(struct pack_window *win, off_t offset)
+static int in_window(struct repository *r, struct pack_window *win,
+ off_t offset)
{
/* We must promise at least one full hash after the
* offset is available from this window, otherwise the offset
@@ -620,7 +621,7 @@ static int in_window(struct pack_window *win, off_t offset)
*/
off_t win_off = win->offset;
return win_off <= offset
- && (offset + the_hash_algo->rawsz) <= (win_off + win->len);
+ && (offset + r->hash_algo->rawsz) <= (win_off + win->len);
}
unsigned char *use_pack(struct packed_git *p,
@@ -642,11 +643,11 @@ unsigned char *use_pack(struct packed_git *p,
if (offset < 0)
die(_("offset before end of packfile (broken .idx?)"));
- if (!win || !in_window(win, offset)) {
+ if (!win || !in_window(p->repo, win, offset)) {
if (win)
win->inuse_cnt--;
for (win = p->windows; win; win = win->next) {
- if (in_window(win, offset))
+ if (in_window(p->repo, win, offset))
break;
}
if (!win) {
--
2.47.1
^ permalink raw reply related [flat|nested] 184+ messages in thread
* [PATCH v10 04/10] packfile: pass down repository to `odb_pack_name`
2024-12-03 14:43 ` [PATCH v10 00/10] packfile: avoid using the 'the_repository' global variable Karthik Nayak
` (2 preceding siblings ...)
2024-12-03 14:43 ` [PATCH v10 03/10] packfile: pass `repository` to static function in the file Karthik Nayak
@ 2024-12-03 14:43 ` Karthik Nayak
2024-12-03 14:43 ` [PATCH v10 05/10] packfile: pass down repository to `has_object[_kept]_pack` Karthik Nayak
` (6 subsequent siblings)
10 siblings, 0 replies; 184+ messages in thread
From: Karthik Nayak @ 2024-12-03 14:43 UTC (permalink / raw)
To: karthik.188; +Cc: git, gitster
The function `odb_pack_name` currently relies on the global variable
`the_repository`. To eliminate global variable usage in `packfile.c`, we
should progressively shift the dependency on the_repository to higher
layers.
Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
---
builtin/fast-import.c | 8 ++++----
builtin/index-pack.c | 4 ++--
builtin/pack-redundant.c | 2 +-
http.c | 2 +-
packfile.c | 9 ++++-----
packfile.h | 3 ++-
6 files changed, 14 insertions(+), 14 deletions(-)
diff --git a/builtin/fast-import.c b/builtin/fast-import.c
index da7e2d613b..3ccc4c5722 100644
--- a/builtin/fast-import.c
+++ b/builtin/fast-import.c
@@ -806,7 +806,7 @@ static char *keep_pack(const char *curr_index_name)
struct strbuf name = STRBUF_INIT;
int keep_fd;
- odb_pack_name(&name, pack_data->hash, "keep");
+ odb_pack_name(pack_data->repo, &name, pack_data->hash, "keep");
keep_fd = odb_pack_keep(name.buf);
if (keep_fd < 0)
die_errno("cannot create keep file");
@@ -814,11 +814,11 @@ static char *keep_pack(const char *curr_index_name)
if (close(keep_fd))
die_errno("failed to write keep file");
- odb_pack_name(&name, pack_data->hash, "pack");
+ odb_pack_name(pack_data->repo, &name, pack_data->hash, "pack");
if (finalize_object_file(pack_data->pack_name, name.buf))
die("cannot store pack file");
- odb_pack_name(&name, pack_data->hash, "idx");
+ odb_pack_name(pack_data->repo, &name, pack_data->hash, "idx");
if (finalize_object_file(curr_index_name, name.buf))
die("cannot store index file");
free((void *)curr_index_name);
@@ -832,7 +832,7 @@ static void unkeep_all_packs(void)
for (k = 0; k < pack_id; k++) {
struct packed_git *p = all_packs[k];
- odb_pack_name(&name, p->hash, "keep");
+ odb_pack_name(p->repo, &name, p->hash, "keep");
unlink_or_warn(name.buf);
}
strbuf_release(&name);
diff --git a/builtin/index-pack.c b/builtin/index-pack.c
index be2f99625e..eaefb41761 100644
--- a/builtin/index-pack.c
+++ b/builtin/index-pack.c
@@ -1479,7 +1479,7 @@ static void write_special_file(const char *suffix, const char *msg,
if (pack_name)
filename = derive_filename(pack_name, "pack", suffix, &name_buf);
else
- filename = odb_pack_name(&name_buf, hash, suffix);
+ filename = odb_pack_name(the_repository, &name_buf, hash, suffix);
fd = odb_pack_keep(filename);
if (fd < 0) {
@@ -1507,7 +1507,7 @@ static void rename_tmp_packfile(const char **final_name,
{
if (!*final_name || strcmp(*final_name, curr_name)) {
if (!*final_name)
- *final_name = odb_pack_name(name, hash, ext);
+ *final_name = odb_pack_name(the_repository, name, hash, ext);
if (finalize_object_file(curr_name, *final_name))
die(_("unable to rename temporary '*.%s' file to '%s'"),
ext, *final_name);
diff --git a/builtin/pack-redundant.c b/builtin/pack-redundant.c
index d2c1c4e5ec..bc61990a93 100644
--- a/builtin/pack-redundant.c
+++ b/builtin/pack-redundant.c
@@ -690,7 +690,7 @@ int cmd_pack_redundant(int argc, const char **argv, const char *prefix UNUSED, s
pl = red = pack_list_difference(local_packs, min);
while (pl) {
printf("%s\n%s\n",
- odb_pack_name(&idx_name, pl->pack->hash, "idx"),
+ odb_pack_name(pl->pack->repo, &idx_name, pl->pack->hash, "idx"),
pl->pack->pack_name);
pl = pl->next;
}
diff --git a/http.c b/http.c
index 6744e18409..420f1566f0 100644
--- a/http.c
+++ b/http.c
@@ -2581,7 +2581,7 @@ struct http_pack_request *new_direct_http_pack_request(
preq->url = url;
- odb_pack_name(&preq->tmpfile, packed_git_hash, "pack");
+ odb_pack_name(the_repository, &preq->tmpfile, packed_git_hash, "pack");
strbuf_addstr(&preq->tmpfile, ".temp");
preq->packfile = fopen(preq->tmpfile.buf, "a");
if (!preq->packfile) {
diff --git a/packfile.c b/packfile.c
index c96ebc4c69..1015dac6db 100644
--- a/packfile.c
+++ b/packfile.c
@@ -25,13 +25,12 @@
#include "pack-revindex.h"
#include "promisor-remote.h"
-char *odb_pack_name(struct strbuf *buf,
- const unsigned char *hash,
- const char *ext)
+char *odb_pack_name(struct repository *r, struct strbuf *buf,
+ const unsigned char *hash, const char *ext)
{
strbuf_reset(buf);
- strbuf_addf(buf, "%s/pack/pack-%s.%s", repo_get_object_directory(the_repository),
- hash_to_hex(hash), ext);
+ strbuf_addf(buf, "%s/pack/pack-%s.%s", repo_get_object_directory(r),
+ hash_to_hex_algop(hash, r->hash_algo), ext);
return buf->buf;
}
diff --git a/packfile.h b/packfile.h
index aee69d1a0b..51187f2393 100644
--- a/packfile.h
+++ b/packfile.h
@@ -29,7 +29,8 @@ struct pack_entry {
*
* Example: odb_pack_name(out, sha1, "idx") => ".git/objects/pack/pack-1234..idx"
*/
-char *odb_pack_name(struct strbuf *buf, const unsigned char *sha1, const char *ext);
+char *odb_pack_name(struct repository *r, struct strbuf *buf,
+ const unsigned char *hash, const char *ext);
/*
* Return the basename of the packfile, omitting any containing directory
--
2.47.1
^ permalink raw reply related [flat|nested] 184+ messages in thread
* [PATCH v10 05/10] packfile: pass down repository to `has_object[_kept]_pack`
2024-12-03 14:43 ` [PATCH v10 00/10] packfile: avoid using the 'the_repository' global variable Karthik Nayak
` (3 preceding siblings ...)
2024-12-03 14:43 ` [PATCH v10 04/10] packfile: pass down repository to `odb_pack_name` Karthik Nayak
@ 2024-12-03 14:43 ` Karthik Nayak
2024-12-03 14:44 ` [PATCH v10 06/10] packfile: pass down repository to `for_each_packed_object` Karthik Nayak
` (5 subsequent siblings)
10 siblings, 0 replies; 184+ messages in thread
From: Karthik Nayak @ 2024-12-03 14:43 UTC (permalink / raw)
To: karthik.188; +Cc: git, gitster
The functions `has_object[_kept]_pack` currently rely on the global
variable `the_repository`. To eliminate global variable usage in
`packfile.c`, we should progressively shift the dependency on
the_repository to higher layers. Let's remove its usage from these
functions and any related ones.
Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
---
builtin/count-objects.c | 2 +-
builtin/fsck.c | 2 +-
builtin/pack-objects.c | 4 ++--
diff.c | 3 ++-
list-objects.c | 3 ++-
pack-bitmap.c | 2 +-
packfile.c | 9 +++++----
packfile.h | 5 +++--
prune-packed.c | 2 +-
reachable.c | 2 +-
revision.c | 4 ++--
11 files changed, 21 insertions(+), 17 deletions(-)
diff --git a/builtin/count-objects.c b/builtin/count-objects.c
index 04d80887e0..1e89148ed7 100644
--- a/builtin/count-objects.c
+++ b/builtin/count-objects.c
@@ -67,7 +67,7 @@ static int count_loose(const struct object_id *oid, const char *path,
else {
loose_size += on_disk_bytes(st);
loose++;
- if (verbose && has_object_pack(oid))
+ if (verbose && has_object_pack(the_repository, oid))
packed_loose++;
}
return 0;
diff --git a/builtin/fsck.c b/builtin/fsck.c
index 7f4e2f0414..bb56eb98ac 100644
--- a/builtin/fsck.c
+++ b/builtin/fsck.c
@@ -272,7 +272,7 @@ static void check_reachable_object(struct object *obj)
if (!(obj->flags & HAS_OBJ)) {
if (is_promisor_object(&obj->oid))
return;
- if (has_object_pack(&obj->oid))
+ if (has_object_pack(the_repository, &obj->oid))
return; /* it is in pack - forget about it */
printf_ln(_("missing %s %s"),
printable_type(&obj->oid, obj->type),
diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index 0800714267..0f32e92a3a 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -1529,7 +1529,7 @@ static int want_found_object(const struct object_id *oid, int exclude,
return 0;
if (ignore_packed_keep_in_core && p->pack_keep_in_core)
return 0;
- if (has_object_kept_pack(oid, flags))
+ if (has_object_kept_pack(p->repo, oid, flags))
return 0;
}
@@ -3627,7 +3627,7 @@ static void show_cruft_commit(struct commit *commit, void *data)
static int cruft_include_check_obj(struct object *obj, void *data UNUSED)
{
- return !has_object_kept_pack(&obj->oid, IN_CORE_KEEP_PACKS);
+ return !has_object_kept_pack(to_pack.repo, &obj->oid, IN_CORE_KEEP_PACKS);
}
static int cruft_include_check(struct commit *commit, void *data)
diff --git a/diff.c b/diff.c
index dceac20d18..266ddf18e7 100644
--- a/diff.c
+++ b/diff.c
@@ -4041,7 +4041,8 @@ static int reuse_worktree_file(struct index_state *istate,
* objects however would tend to be slower as they need
* to be individually opened and inflated.
*/
- if (!FAST_WORKING_DIRECTORY && !want_file && has_object_pack(oid))
+ if (!FAST_WORKING_DIRECTORY && !want_file &&
+ has_object_pack(istate->repo, oid))
return 0;
/*
diff --git a/list-objects.c b/list-objects.c
index 985d008799..31236a8dc9 100644
--- a/list-objects.c
+++ b/list-objects.c
@@ -41,7 +41,8 @@ static void show_object(struct traversal_context *ctx,
{
if (!ctx->show_object)
return;
- if (ctx->revs->unpacked && has_object_pack(&object->oid))
+ if (ctx->revs->unpacked && has_object_pack(ctx->revs->repo,
+ &object->oid))
return;
ctx->show_object(object, name, ctx->show_data);
diff --git a/pack-bitmap.c b/pack-bitmap.c
index 4fa9dfc771..d34ba9909a 100644
--- a/pack-bitmap.c
+++ b/pack-bitmap.c
@@ -1889,7 +1889,7 @@ static void filter_packed_objects_from_bitmap(struct bitmap_index *bitmap_git,
bitmap_unset(result, i);
for (i = 0; i < eindex->count; ++i) {
- if (has_object_pack(&eindex->objects[i]->oid))
+ if (has_object_pack(the_repository, &eindex->objects[i]->oid))
bitmap_unset(result, objects_nr + i);
}
}
diff --git a/packfile.c b/packfile.c
index 1015dac6db..e7dd270217 100644
--- a/packfile.c
+++ b/packfile.c
@@ -2143,16 +2143,17 @@ int find_kept_pack_entry(struct repository *r,
return 0;
}
-int has_object_pack(const struct object_id *oid)
+int has_object_pack(struct repository *r, const struct object_id *oid)
{
struct pack_entry e;
- return find_pack_entry(the_repository, oid, &e);
+ return find_pack_entry(r, oid, &e);
}
-int has_object_kept_pack(const struct object_id *oid, unsigned flags)
+int has_object_kept_pack(struct repository *r, const struct object_id *oid,
+ unsigned flags)
{
struct pack_entry e;
- return find_kept_pack_entry(the_repository, oid, flags, &e);
+ return find_kept_pack_entry(r, oid, flags, &e);
}
int for_each_object_in_pack(struct packed_git *p,
diff --git a/packfile.h b/packfile.h
index 51187f2393..b09fb2c530 100644
--- a/packfile.h
+++ b/packfile.h
@@ -193,8 +193,9 @@ const struct packed_git *has_packed_and_bad(struct repository *, const struct ob
int find_pack_entry(struct repository *r, const struct object_id *oid, struct pack_entry *e);
int find_kept_pack_entry(struct repository *r, const struct object_id *oid, unsigned flags, struct pack_entry *e);
-int has_object_pack(const struct object_id *oid);
-int has_object_kept_pack(const struct object_id *oid, unsigned flags);
+int has_object_pack(struct repository *r, const struct object_id *oid);
+int has_object_kept_pack(struct repository *r, const struct object_id *oid,
+ unsigned flags);
/*
* Return 1 if an object in a promisor packfile is or refers to the given
diff --git a/prune-packed.c b/prune-packed.c
index 2bb99c29df..d1c65ab10e 100644
--- a/prune-packed.c
+++ b/prune-packed.c
@@ -24,7 +24,7 @@ static int prune_object(const struct object_id *oid, const char *path,
{
int *opts = data;
- if (!has_object_pack(oid))
+ if (!has_object_pack(the_repository, oid))
return 0;
if (*opts & PRUNE_PACKED_DRY_RUN)
diff --git a/reachable.c b/reachable.c
index 3e9b3dd0a4..09d2c50079 100644
--- a/reachable.c
+++ b/reachable.c
@@ -239,7 +239,7 @@ static int want_recent_object(struct recent_data *data,
const struct object_id *oid)
{
if (data->ignore_in_core_kept_packs &&
- has_object_kept_pack(oid, IN_CORE_KEEP_PACKS))
+ has_object_kept_pack(data->revs->repo, oid, IN_CORE_KEEP_PACKS))
return 0;
return 1;
}
diff --git a/revision.c b/revision.c
index f5f5b84f2b..d1d152a67b 100644
--- a/revision.c
+++ b/revision.c
@@ -4103,10 +4103,10 @@ enum commit_action get_commit_action(struct rev_info *revs, struct commit *commi
{
if (commit->object.flags & SHOWN)
return commit_ignore;
- if (revs->unpacked && has_object_pack(&commit->object.oid))
+ if (revs->unpacked && has_object_pack(revs->repo, &commit->object.oid))
return commit_ignore;
if (revs->no_kept_objects) {
- if (has_object_kept_pack(&commit->object.oid,
+ if (has_object_kept_pack(revs->repo, &commit->object.oid,
revs->keep_pack_cache_flags))
return commit_ignore;
}
--
2.47.1
^ permalink raw reply related [flat|nested] 184+ messages in thread
* [PATCH v10 06/10] packfile: pass down repository to `for_each_packed_object`
2024-12-03 14:43 ` [PATCH v10 00/10] packfile: avoid using the 'the_repository' global variable Karthik Nayak
` (4 preceding siblings ...)
2024-12-03 14:43 ` [PATCH v10 05/10] packfile: pass down repository to `has_object[_kept]_pack` Karthik Nayak
@ 2024-12-03 14:44 ` Karthik Nayak
2024-12-03 14:44 ` [PATCH v10 07/10] config: make `delta_base_cache_limit` a non-global variable Karthik Nayak
` (4 subsequent siblings)
10 siblings, 0 replies; 184+ messages in thread
From: Karthik Nayak @ 2024-12-03 14:44 UTC (permalink / raw)
To: karthik.188; +Cc: git, gitster
The function `for_each_packed_object` currently relies on the global
variable `the_repository`. To eliminate global variable usage in
`packfile.c`, we should progressively shift the dependency on
the_repository to higher layers. Let's remove its usage from this
function and closely related function `is_promisor_object`.
Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
---
builtin/cat-file.c | 7 ++++---
builtin/fsck.c | 18 +++++++++++-------
builtin/pack-objects.c | 7 +++++--
builtin/repack.c | 2 +-
builtin/rev-list.c | 2 +-
commit-graph.c | 2 +-
fsck.c | 2 +-
list-objects.c | 4 ++--
object-store-ll.h | 4 ++--
packfile.c | 14 +++++++-------
packfile.h | 2 +-
promisor-remote.c | 2 +-
reachable.c | 2 +-
revision.c | 9 +++++----
tag.c | 2 +-
15 files changed, 44 insertions(+), 35 deletions(-)
diff --git a/builtin/cat-file.c b/builtin/cat-file.c
index bfdfb51c7c..d67b101c20 100644
--- a/builtin/cat-file.c
+++ b/builtin/cat-file.c
@@ -827,15 +827,16 @@ static int batch_objects(struct batch_options *opt)
cb.seen = &seen;
for_each_loose_object(batch_unordered_loose, &cb, 0);
- for_each_packed_object(batch_unordered_packed, &cb,
- FOR_EACH_OBJECT_PACK_ORDER);
+ for_each_packed_object(the_repository, batch_unordered_packed,
+ &cb, FOR_EACH_OBJECT_PACK_ORDER);
oidset_clear(&seen);
} else {
struct oid_array sa = OID_ARRAY_INIT;
for_each_loose_object(collect_loose_object, &sa, 0);
- for_each_packed_object(collect_packed_object, &sa, 0);
+ for_each_packed_object(the_repository, collect_packed_object,
+ &sa, 0);
oid_array_for_each_unique(&sa, batch_object_cb, &cb);
diff --git a/builtin/fsck.c b/builtin/fsck.c
index bb56eb98ac..0196c54eb6 100644
--- a/builtin/fsck.c
+++ b/builtin/fsck.c
@@ -150,7 +150,7 @@ static int mark_object(struct object *obj, enum object_type type,
return 0;
obj->flags |= REACHABLE;
- if (is_promisor_object(&obj->oid))
+ if (is_promisor_object(the_repository, &obj->oid))
/*
* Further recursion does not need to be performed on this
* object since it is a promisor object (so it does not need to
@@ -270,7 +270,7 @@ static void check_reachable_object(struct object *obj)
* do a full fsck
*/
if (!(obj->flags & HAS_OBJ)) {
- if (is_promisor_object(&obj->oid))
+ if (is_promisor_object(the_repository, &obj->oid))
return;
if (has_object_pack(the_repository, &obj->oid))
return; /* it is in pack - forget about it */
@@ -391,7 +391,10 @@ static void check_connectivity(void)
* traversal.
*/
for_each_loose_object(mark_loose_unreachable_referents, NULL, 0);
- for_each_packed_object(mark_packed_unreachable_referents, NULL, 0);
+ for_each_packed_object(the_repository,
+ mark_packed_unreachable_referents,
+ NULL,
+ 0);
}
/* Look up all the requirements, warn about missing objects.. */
@@ -488,7 +491,7 @@ static void fsck_handle_reflog_oid(const char *refname, struct object_id *oid,
refname, timestamp);
obj->flags |= USED;
mark_object_reachable(obj);
- } else if (!is_promisor_object(oid)) {
+ } else if (!is_promisor_object(the_repository, oid)) {
error(_("%s: invalid reflog entry %s"),
refname, oid_to_hex(oid));
errors_found |= ERROR_REACHABLE;
@@ -531,7 +534,7 @@ static int fsck_handle_ref(const char *refname, const char *referent UNUSED, con
obj = parse_object(the_repository, oid);
if (!obj) {
- if (is_promisor_object(oid)) {
+ if (is_promisor_object(the_repository, oid)) {
/*
* Increment default_refs anyway, because this is a
* valid ref.
@@ -966,7 +969,8 @@ int cmd_fsck(int argc,
if (connectivity_only) {
for_each_loose_object(mark_loose_for_connectivity, NULL, 0);
- for_each_packed_object(mark_packed_for_connectivity, NULL, 0);
+ for_each_packed_object(the_repository,
+ mark_packed_for_connectivity, NULL, 0);
} else {
prepare_alt_odb(the_repository);
for (odb = the_repository->objects->odb; odb; odb = odb->next)
@@ -1011,7 +1015,7 @@ int cmd_fsck(int argc,
&oid);
if (!obj || !(obj->flags & HAS_OBJ)) {
- if (is_promisor_object(&oid))
+ if (is_promisor_object(the_repository, &oid))
continue;
error(_("%s: object missing"), oid_to_hex(&oid));
errors_found |= ERROR_OBJECT;
diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index 0f32e92a3a..db20f0cf51 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -3858,7 +3858,8 @@ static void show_object__ma_allow_promisor(struct object *obj, const char *name,
* Quietly ignore EXPECTED missing objects. This avoids problems with
* staging them now and getting an odd error later.
*/
- if (!has_object(the_repository, &obj->oid, 0) && is_promisor_object(&obj->oid))
+ if (!has_object(the_repository, &obj->oid, 0) &&
+ is_promisor_object(to_pack.repo, &obj->oid))
return;
show_object(obj, name, data);
@@ -3927,7 +3928,9 @@ static int add_object_in_unpacked_pack(const struct object_id *oid,
static void add_objects_in_unpacked_packs(void)
{
- if (for_each_packed_object(add_object_in_unpacked_pack, NULL,
+ if (for_each_packed_object(to_pack.repo,
+ add_object_in_unpacked_pack,
+ NULL,
FOR_EACH_OBJECT_PACK_ORDER |
FOR_EACH_OBJECT_LOCAL_ONLY |
FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS |
diff --git a/builtin/repack.c b/builtin/repack.c
index d6bb37e84a..96a4fa234b 100644
--- a/builtin/repack.c
+++ b/builtin/repack.c
@@ -404,7 +404,7 @@ static void repack_promisor_objects(const struct pack_objects_args *args,
* {type -> existing pack order} ordering when computing deltas instead
* of a {type -> size} ordering, which may produce better deltas.
*/
- for_each_packed_object(write_oid, &cmd,
+ for_each_packed_object(the_repository, write_oid, &cmd,
FOR_EACH_OBJECT_PROMISOR_ONLY);
if (cmd.in == -1) {
diff --git a/builtin/rev-list.c b/builtin/rev-list.c
index f62bcbf2b1..43c42621e3 100644
--- a/builtin/rev-list.c
+++ b/builtin/rev-list.c
@@ -121,7 +121,7 @@ static inline void finish_object__ma(struct object *obj)
return;
case MA_ALLOW_PROMISOR:
- if (is_promisor_object(&obj->oid))
+ if (is_promisor_object(the_repository, &obj->oid))
return;
die("unexpected missing %s object '%s'",
type_name(obj->type), oid_to_hex(&obj->oid));
diff --git a/commit-graph.c b/commit-graph.c
index 83dd69bfeb..e2e2083951 100644
--- a/commit-graph.c
+++ b/commit-graph.c
@@ -1960,7 +1960,7 @@ static void fill_oids_from_all_packs(struct write_commit_graph_context *ctx)
ctx->progress = start_delayed_progress(
_("Finding commits for commit graph among packed objects"),
ctx->approx_nr_objects);
- for_each_packed_object(add_packed_commits, ctx,
+ for_each_packed_object(ctx->r, add_packed_commits, ctx,
FOR_EACH_OBJECT_PACK_ORDER);
if (ctx->progress_done < ctx->approx_nr_objects)
display_progress(ctx->progress, ctx->approx_nr_objects);
diff --git a/fsck.c b/fsck.c
index 3756f52459..87ce999a49 100644
--- a/fsck.c
+++ b/fsck.c
@@ -1295,7 +1295,7 @@ static int fsck_blobs(struct oidset *blobs_found, struct oidset *blobs_done,
buf = repo_read_object_file(the_repository, oid, &type, &size);
if (!buf) {
- if (is_promisor_object(oid))
+ if (is_promisor_object(the_repository, oid))
continue;
ret |= report(options,
oid, OBJ_BLOB, msg_missing,
diff --git a/list-objects.c b/list-objects.c
index 31236a8dc9..d11a389b3a 100644
--- a/list-objects.c
+++ b/list-objects.c
@@ -75,7 +75,7 @@ static void process_blob(struct traversal_context *ctx,
*/
if (ctx->revs->exclude_promisor_objects &&
!repo_has_object_file(the_repository, &obj->oid) &&
- is_promisor_object(&obj->oid))
+ is_promisor_object(ctx->revs->repo, &obj->oid))
return;
pathlen = path->len;
@@ -180,7 +180,7 @@ static void process_tree(struct traversal_context *ctx,
* an incomplete list of missing objects.
*/
if (revs->exclude_promisor_objects &&
- is_promisor_object(&obj->oid))
+ is_promisor_object(revs->repo, &obj->oid))
return;
if (!revs->do_not_die_on_missing_objects)
diff --git a/object-store-ll.h b/object-store-ll.h
index d46cd0e654..cd3bd5bd99 100644
--- a/object-store-ll.h
+++ b/object-store-ll.h
@@ -550,7 +550,7 @@ typedef int each_packed_object_fn(const struct object_id *oid,
int for_each_object_in_pack(struct packed_git *p,
each_packed_object_fn, void *data,
enum for_each_object_flags flags);
-int for_each_packed_object(each_packed_object_fn, void *,
- enum for_each_object_flags flags);
+int for_each_packed_object(struct repository *repo, each_packed_object_fn cb,
+ void *data, enum for_each_object_flags flags);
#endif /* OBJECT_STORE_LL_H */
diff --git a/packfile.c b/packfile.c
index e7dd270217..5e8019b1fe 100644
--- a/packfile.c
+++ b/packfile.c
@@ -2200,15 +2200,15 @@ int for_each_object_in_pack(struct packed_git *p,
return r;
}
-int for_each_packed_object(each_packed_object_fn cb, void *data,
- enum for_each_object_flags flags)
+int for_each_packed_object(struct repository *repo, each_packed_object_fn cb,
+ void *data, enum for_each_object_flags flags)
{
struct packed_git *p;
int r = 0;
int pack_errors = 0;
- prepare_packed_git(the_repository);
- for (p = get_all_packs(the_repository); p; p = p->next) {
+ prepare_packed_git(repo);
+ for (p = get_all_packs(repo); p; p = p->next) {
if ((flags & FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local)
continue;
if ((flags & FOR_EACH_OBJECT_PROMISOR_ONLY) &&
@@ -2286,14 +2286,14 @@ static int add_promisor_object(const struct object_id *oid,
return 0;
}
-int is_promisor_object(const struct object_id *oid)
+int is_promisor_object(struct repository *r, const struct object_id *oid)
{
static struct oidset promisor_objects;
static int promisor_objects_prepared;
if (!promisor_objects_prepared) {
- if (repo_has_promisor_remote(the_repository)) {
- for_each_packed_object(add_promisor_object,
+ if (repo_has_promisor_remote(r)) {
+ for_each_packed_object(r, add_promisor_object,
&promisor_objects,
FOR_EACH_OBJECT_PROMISOR_ONLY |
FOR_EACH_OBJECT_PACK_ORDER);
diff --git a/packfile.h b/packfile.h
index b09fb2c530..addb95b0c4 100644
--- a/packfile.h
+++ b/packfile.h
@@ -201,7 +201,7 @@ int has_object_kept_pack(struct repository *r, const struct object_id *oid,
* Return 1 if an object in a promisor packfile is or refers to the given
* object, 0 otherwise.
*/
-int is_promisor_object(const struct object_id *oid);
+int is_promisor_object(struct repository *r, const struct object_id *oid);
/*
* Expose a function for fuzz testing.
diff --git a/promisor-remote.c b/promisor-remote.c
index 9345ae3db2..c714f4f007 100644
--- a/promisor-remote.c
+++ b/promisor-remote.c
@@ -283,7 +283,7 @@ void promisor_remote_get_direct(struct repository *repo,
}
for (i = 0; i < remaining_nr; i++) {
- if (is_promisor_object(&remaining_oids[i]))
+ if (is_promisor_object(repo, &remaining_oids[i]))
die(_("could not fetch %s from promisor remote"),
oid_to_hex(&remaining_oids[i]));
}
diff --git a/reachable.c b/reachable.c
index 09d2c50079..ecf7ccf504 100644
--- a/reachable.c
+++ b/reachable.c
@@ -324,7 +324,7 @@ int add_unseen_recent_objects_to_traversal(struct rev_info *revs,
if (ignore_in_core_kept_packs)
flags |= FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS;
- r = for_each_packed_object(add_recent_packed, &data, flags);
+ r = for_each_packed_object(revs->repo, add_recent_packed, &data, flags);
done:
oidset_clear(&data.extra_recent_oids);
diff --git a/revision.c b/revision.c
index d1d152a67b..45dc6d2819 100644
--- a/revision.c
+++ b/revision.c
@@ -390,7 +390,8 @@ static struct object *get_reference(struct rev_info *revs, const char *name,
if (!object) {
if (revs->ignore_missing)
return NULL;
- if (revs->exclude_promisor_objects && is_promisor_object(oid))
+ if (revs->exclude_promisor_objects &&
+ is_promisor_object(revs->repo, oid))
return NULL;
if (revs->do_not_die_on_missing_objects) {
oidset_insert(&revs->missing_commits, oid);
@@ -432,7 +433,7 @@ static struct commit *handle_commit(struct rev_info *revs,
if (revs->ignore_missing_links || (flags & UNINTERESTING))
return NULL;
if (revs->exclude_promisor_objects &&
- is_promisor_object(&tag->tagged->oid))
+ is_promisor_object(revs->repo, &tag->tagged->oid))
return NULL;
if (revs->do_not_die_on_missing_objects && oid) {
oidset_insert(&revs->missing_commits, oid);
@@ -1211,7 +1212,7 @@ static int process_parents(struct rev_info *revs, struct commit *commit,
revs->do_not_die_on_missing_objects;
if (repo_parse_commit_gently(revs->repo, p, gently) < 0) {
if (revs->exclude_promisor_objects &&
- is_promisor_object(&p->object.oid)) {
+ is_promisor_object(revs->repo, &p->object.oid)) {
if (revs->first_parent_only)
break;
continue;
@@ -3915,7 +3916,7 @@ int prepare_revision_walk(struct rev_info *revs)
revs->treesame.name = "treesame";
if (revs->exclude_promisor_objects) {
- for_each_packed_object(mark_uninteresting, revs,
+ for_each_packed_object(revs->repo, mark_uninteresting, revs,
FOR_EACH_OBJECT_PROMISOR_ONLY);
}
diff --git a/tag.c b/tag.c
index d24170e340..beef9571b5 100644
--- a/tag.c
+++ b/tag.c
@@ -84,7 +84,7 @@ struct object *deref_tag(struct repository *r, struct object *o, const char *war
o = NULL;
}
if (!o && warn) {
- if (last_oid && is_promisor_object(last_oid))
+ if (last_oid && is_promisor_object(r, last_oid))
return NULL;
if (!warnlen)
warnlen = strlen(warn);
--
2.47.1
^ permalink raw reply related [flat|nested] 184+ messages in thread
* [PATCH v10 07/10] config: make `delta_base_cache_limit` a non-global variable
2024-12-03 14:43 ` [PATCH v10 00/10] packfile: avoid using the 'the_repository' global variable Karthik Nayak
` (5 preceding siblings ...)
2024-12-03 14:44 ` [PATCH v10 06/10] packfile: pass down repository to `for_each_packed_object` Karthik Nayak
@ 2024-12-03 14:44 ` Karthik Nayak
2024-12-03 14:44 ` [PATCH v10 08/10] config: make `packed_git_(limit|window_size)` non-global variables Karthik Nayak
` (3 subsequent siblings)
10 siblings, 0 replies; 184+ messages in thread
From: Karthik Nayak @ 2024-12-03 14:44 UTC (permalink / raw)
To: karthik.188; +Cc: git, gitster
The `delta_base_cache_limit` variable is a global config variable used
by multiple subsystems. Let's make this non-global, by adding this
variable independently to the subsystems where it is used.
First, add the setting to the `repo_settings` struct, this provides
access to the config in places where the repository is available. Use
this in `packfile.c`.
In `index-pack.c` we add it to the `pack_idx_option` struct and its
constructor. While the repository struct is available here, it may not
be set because `git index-pack` can be used without a repository.
In `gc.c` add it to the `gc_config` struct and also the constructor
function. The gc functions currently do not have direct access to a
repository struct.
These changes are made to remove the usage of `delta_base_cache_limit`
as a global variable in `packfile.c`. This brings us one step closer to
removing the `USE_THE_REPOSITORY_VARIABLE` definition in `packfile.c`
which we complete in the next patch.
Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
---
builtin/gc.c | 12 +++++++++++-
builtin/index-pack.c | 10 +++++++---
config.c | 5 -----
environment.c | 1 -
environment.h | 1 -
pack-objects.h | 3 ++-
pack-write.c | 1 +
pack.h | 2 ++
packfile.c | 10 ++++++++--
repo-settings.c | 5 +++++
repo-settings.h | 3 +++
11 files changed, 39 insertions(+), 14 deletions(-)
diff --git a/builtin/gc.c b/builtin/gc.c
index d52735354c..efb6162fb0 100644
--- a/builtin/gc.c
+++ b/builtin/gc.c
@@ -138,6 +138,11 @@ struct gc_config {
char *repack_filter_to;
unsigned long big_pack_threshold;
unsigned long max_delta_cache_size;
+ /*
+ * Remove this member from gc_config once repo_settings is passed
+ * through the callchain.
+ */
+ size_t delta_base_cache_limit;
};
#define GC_CONFIG_INIT { \
@@ -153,6 +158,7 @@ struct gc_config {
.prune_expire = xstrdup("2.weeks.ago"), \
.prune_worktrees_expire = xstrdup("3.months.ago"), \
.max_delta_cache_size = DEFAULT_DELTA_CACHE_SIZE, \
+ .delta_base_cache_limit = DEFAULT_DELTA_BASE_CACHE_LIMIT, \
}
static void gc_config_release(struct gc_config *cfg)
@@ -168,6 +174,7 @@ static void gc_config(struct gc_config *cfg)
{
const char *value;
char *owned = NULL;
+ unsigned long ulongval;
if (!git_config_get_value("gc.packrefs", &value)) {
if (value && !strcmp(value, "notbare"))
@@ -206,6 +213,9 @@ static void gc_config(struct gc_config *cfg)
git_config_get_ulong("gc.bigpackthreshold", &cfg->big_pack_threshold);
git_config_get_ulong("pack.deltacachesize", &cfg->max_delta_cache_size);
+ if (!git_config_get_ulong("core.deltabasecachelimit", &ulongval))
+ cfg->delta_base_cache_limit = ulongval;
+
if (!git_config_get_string("gc.repackfilter", &owned)) {
free(cfg->repack_filter);
cfg->repack_filter = owned;
@@ -416,7 +426,7 @@ static uint64_t estimate_repack_memory(struct gc_config *cfg,
* read_sha1_file() (either at delta calculation phase, or
* writing phase) also fills up the delta base cache
*/
- heap += delta_base_cache_limit;
+ heap += cfg->delta_base_cache_limit;
/* and of course pack-objects has its own delta cache */
heap += cfg->max_delta_cache_size;
diff --git a/builtin/index-pack.c b/builtin/index-pack.c
index eaefb41761..23bfa45403 100644
--- a/builtin/index-pack.c
+++ b/builtin/index-pack.c
@@ -1238,7 +1238,7 @@ static void parse_pack_objects(unsigned char *hash)
* recursively checking if the resulting object is used as a base
* for some more deltas.
*/
-static void resolve_deltas(void)
+static void resolve_deltas(struct pack_idx_option *opts)
{
int i;
@@ -1254,7 +1254,7 @@ static void resolve_deltas(void)
nr_ref_deltas + nr_ofs_deltas);
nr_dispatched = 0;
- base_cache_limit = delta_base_cache_limit * nr_threads;
+ base_cache_limit = opts->delta_base_cache_limit * nr_threads;
if (nr_threads > 1 || getenv("GIT_FORCE_THREADS")) {
init_thread();
work_lock();
@@ -1604,6 +1604,10 @@ static int git_index_pack_config(const char *k, const char *v,
else
opts->flags &= ~WRITE_REV;
}
+ if (!strcmp(k, "core.deltabasecachelimit")) {
+ opts->delta_base_cache_limit = git_config_ulong(k, v, ctx->kvi);
+ return 0;
+ }
return git_default_config(k, v, ctx, cb);
}
@@ -1930,7 +1934,7 @@ int cmd_index_pack(int argc,
parse_pack_objects(pack_hash);
if (report_end_of_input)
write_in_full(2, "\0", 1);
- resolve_deltas();
+ resolve_deltas(&opts);
conclude_pack(fix_thin_pack, curr_pack, pack_hash);
free(ofs_deltas);
free(ref_deltas);
diff --git a/config.c b/config.c
index a11bb85da3..728ef98e42 100644
--- a/config.c
+++ b/config.c
@@ -1515,11 +1515,6 @@ static int git_default_core_config(const char *var, const char *value,
return 0;
}
- if (!strcmp(var, "core.deltabasecachelimit")) {
- delta_base_cache_limit = git_config_ulong(var, value, ctx->kvi);
- return 0;
- }
-
if (!strcmp(var, "core.autocrlf")) {
if (value && !strcasecmp(value, "input")) {
auto_crlf = AUTO_CRLF_INPUT;
diff --git a/environment.c b/environment.c
index a2ce998081..8e5022c282 100644
--- a/environment.c
+++ b/environment.c
@@ -51,7 +51,6 @@ enum fsync_method fsync_method = FSYNC_METHOD_DEFAULT;
enum fsync_component fsync_components = FSYNC_COMPONENTS_DEFAULT;
size_t packed_git_window_size = DEFAULT_PACKED_GIT_WINDOW_SIZE;
size_t packed_git_limit = DEFAULT_PACKED_GIT_LIMIT;
-size_t delta_base_cache_limit = 96 * 1024 * 1024;
unsigned long big_file_threshold = 512 * 1024 * 1024;
char *editor_program;
char *askpass_program;
diff --git a/environment.h b/environment.h
index 923e12661e..2f43340f0b 100644
--- a/environment.h
+++ b/environment.h
@@ -165,7 +165,6 @@ extern int zlib_compression_level;
extern int pack_compression_level;
extern size_t packed_git_window_size;
extern size_t packed_git_limit;
-extern size_t delta_base_cache_limit;
extern unsigned long big_file_threshold;
extern unsigned long pack_size_limit_cfg;
extern int max_allowed_tree_depth;
diff --git a/pack-objects.h b/pack-objects.h
index b9898a4e64..3f6f504203 100644
--- a/pack-objects.h
+++ b/pack-objects.h
@@ -7,7 +7,8 @@
struct repository;
-#define DEFAULT_DELTA_CACHE_SIZE (256 * 1024 * 1024)
+#define DEFAULT_DELTA_CACHE_SIZE (256 * 1024 * 1024)
+#define DEFAULT_DELTA_BASE_CACHE_LIMIT (96 * 1024 * 1024)
#define OE_DFS_STATE_BITS 2
#define OE_DEPTH_BITS 12
diff --git a/pack-write.c b/pack-write.c
index 8c7dfddc5a..98a8c0e785 100644
--- a/pack-write.c
+++ b/pack-write.c
@@ -21,6 +21,7 @@ void reset_pack_idx_option(struct pack_idx_option *opts)
memset(opts, 0, sizeof(*opts));
opts->version = 2;
opts->off32_limit = 0x7fffffff;
+ opts->delta_base_cache_limit = DEFAULT_DELTA_BASE_CACHE_LIMIT;
}
static int sha1_compare(const void *_a, const void *_b)
diff --git a/pack.h b/pack.h
index 02bbdfb19c..a8da040629 100644
--- a/pack.h
+++ b/pack.h
@@ -58,6 +58,8 @@ struct pack_idx_option {
*/
int anomaly_alloc, anomaly_nr;
uint32_t *anomaly;
+
+ size_t delta_base_cache_limit;
};
void reset_pack_idx_option(struct pack_idx_option *);
diff --git a/packfile.c b/packfile.c
index 5e8019b1fe..64248ca664 100644
--- a/packfile.c
+++ b/packfile.c
@@ -1496,7 +1496,9 @@ void clear_delta_base_cache(void)
}
static void add_delta_base_cache(struct packed_git *p, off_t base_offset,
- void *base, unsigned long base_size, enum object_type type)
+ void *base, unsigned long base_size,
+ unsigned long delta_base_cache_limit,
+ enum object_type type)
{
struct delta_base_cache_entry *ent;
struct list_head *lru, *tmp;
@@ -1698,6 +1700,8 @@ void *unpack_entry(struct repository *r, struct packed_git *p, off_t obj_offset,
int delta_stack_nr = 0, delta_stack_alloc = UNPACK_ENTRY_STACK_PREALLOC;
int base_from_cache = 0;
+ prepare_repo_settings(p->repo);
+
write_pack_access_log(p, obj_offset);
/* PHASE 1: drill down to the innermost base object */
@@ -1878,7 +1882,9 @@ void *unpack_entry(struct repository *r, struct packed_git *p, off_t obj_offset,
* before we are done using it.
*/
if (!external_base)
- add_delta_base_cache(p, base_obj_offset, base, base_size, type);
+ add_delta_base_cache(p, base_obj_offset, base, base_size,
+ p->repo->settings.delta_base_cache_limit,
+ type);
free(delta_data);
free(external_base);
diff --git a/repo-settings.c b/repo-settings.c
index 4699b4b365..acc27eb8fe 100644
--- a/repo-settings.c
+++ b/repo-settings.c
@@ -3,6 +3,7 @@
#include "repo-settings.h"
#include "repository.h"
#include "midx.h"
+#include "pack-objects.h"
static void repo_cfg_bool(struct repository *r, const char *key, int *dest,
int def)
@@ -26,6 +27,7 @@ void prepare_repo_settings(struct repository *r)
const char *strval;
int manyfiles;
int read_changed_paths;
+ unsigned long ulongval;
if (!r->gitdir)
BUG("Cannot add settings for uninitialized repository");
@@ -123,6 +125,9 @@ void prepare_repo_settings(struct repository *r)
* removed.
*/
r->settings.command_requires_full_index = 1;
+
+ if (!repo_config_get_ulong(r, "core.deltabasecachelimit", &ulongval))
+ r->settings.delta_base_cache_limit = ulongval;
}
enum log_refs_config repo_settings_get_log_all_ref_updates(struct repository *repo)
diff --git a/repo-settings.h b/repo-settings.h
index 51d6156a11..10a6f7ed64 100644
--- a/repo-settings.h
+++ b/repo-settings.h
@@ -57,12 +57,15 @@ struct repo_settings {
int core_multi_pack_index;
int warn_ambiguous_refs; /* lazily loaded via accessor */
+
+ size_t delta_base_cache_limit;
};
#define REPO_SETTINGS_INIT { \
.index_version = -1, \
.core_untracked_cache = UNTRACKED_CACHE_KEEP, \
.fetch_negotiation_algorithm = FETCH_NEGOTIATION_CONSECUTIVE, \
.warn_ambiguous_refs = -1, \
+ .delta_base_cache_limit = DEFAULT_DELTA_BASE_CACHE_LIMIT, \
}
void prepare_repo_settings(struct repository *r);
--
2.47.1
^ permalink raw reply related [flat|nested] 184+ messages in thread
* [PATCH v10 08/10] config: make `packed_git_(limit|window_size)` non-global variables
2024-12-03 14:43 ` [PATCH v10 00/10] packfile: avoid using the 'the_repository' global variable Karthik Nayak
` (6 preceding siblings ...)
2024-12-03 14:44 ` [PATCH v10 07/10] config: make `delta_base_cache_limit` a non-global variable Karthik Nayak
@ 2024-12-03 14:44 ` Karthik Nayak
2024-12-03 14:44 ` [PATCH v10 09/10] midx: add repository to `multi_pack_index` struct Karthik Nayak
` (2 subsequent siblings)
10 siblings, 0 replies; 184+ messages in thread
From: Karthik Nayak @ 2024-12-03 14:44 UTC (permalink / raw)
To: karthik.188; +Cc: git, gitster, Taylor Blau
The variables `packed_git_window_size` and `packed_git_limit` are global
config variables used in the `packfile.c` file. Since it is only used in
this file, let's change it from being a global config variable to a
local variable for the subsystem.
With this, we rid `packfile.c` from all global variable usage and this
means we can also remove the `USE_THE_REPOSITORY_VARIABLE` guard from
the file.
Helped-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
---
builtin/fast-import.c | 4 ++--
config.c | 17 -----------------
environment.c | 2 --
packfile.c | 23 +++++++++++++++--------
packfile.h | 2 +-
repo-settings.c | 13 +++++++++++++
repo-settings.h | 4 ++++
7 files changed, 35 insertions(+), 30 deletions(-)
diff --git a/builtin/fast-import.c b/builtin/fast-import.c
index 3ccc4c5722..0ece070260 100644
--- a/builtin/fast-import.c
+++ b/builtin/fast-import.c
@@ -3539,7 +3539,7 @@ static void parse_argv(void)
int cmd_fast_import(int argc,
const char **argv,
const char *prefix,
- struct repository *repo UNUSED)
+ struct repository *repo)
{
unsigned int i;
@@ -3660,7 +3660,7 @@ int cmd_fast_import(int argc,
fprintf(stderr, " pools: %10lu KiB\n", (unsigned long)((tree_entry_allocd + fi_mem_pool.pool_alloc) /1024));
fprintf(stderr, " objects: %10" PRIuMAX " KiB\n", (alloc_count*sizeof(struct object_entry))/1024);
fprintf(stderr, "---------------------------------------------------------------------\n");
- pack_report();
+ pack_report(repo);
fprintf(stderr, "---------------------------------------------------------------------\n");
fprintf(stderr, "\n");
}
diff --git a/config.c b/config.c
index 728ef98e42..2c295f7430 100644
--- a/config.c
+++ b/config.c
@@ -1493,28 +1493,11 @@ static int git_default_core_config(const char *var, const char *value,
return 0;
}
- if (!strcmp(var, "core.packedgitwindowsize")) {
- int pgsz_x2 = getpagesize() * 2;
- packed_git_window_size = git_config_ulong(var, value, ctx->kvi);
-
- /* This value must be multiple of (pagesize * 2) */
- packed_git_window_size /= pgsz_x2;
- if (packed_git_window_size < 1)
- packed_git_window_size = 1;
- packed_git_window_size *= pgsz_x2;
- return 0;
- }
-
if (!strcmp(var, "core.bigfilethreshold")) {
big_file_threshold = git_config_ulong(var, value, ctx->kvi);
return 0;
}
- if (!strcmp(var, "core.packedgitlimit")) {
- packed_git_limit = git_config_ulong(var, value, ctx->kvi);
- return 0;
- }
-
if (!strcmp(var, "core.autocrlf")) {
if (value && !strcasecmp(value, "input")) {
auto_crlf = AUTO_CRLF_INPUT;
diff --git a/environment.c b/environment.c
index 8e5022c282..8389a27270 100644
--- a/environment.c
+++ b/environment.c
@@ -49,8 +49,6 @@ int fsync_object_files = -1;
int use_fsync = -1;
enum fsync_method fsync_method = FSYNC_METHOD_DEFAULT;
enum fsync_component fsync_components = FSYNC_COMPONENTS_DEFAULT;
-size_t packed_git_window_size = DEFAULT_PACKED_GIT_WINDOW_SIZE;
-size_t packed_git_limit = DEFAULT_PACKED_GIT_LIMIT;
unsigned long big_file_threshold = 512 * 1024 * 1024;
char *editor_program;
char *askpass_program;
diff --git a/packfile.c b/packfile.c
index 64248ca664..2e0e28c7de 100644
--- a/packfile.c
+++ b/packfile.c
@@ -1,4 +1,3 @@
-#define USE_THE_REPOSITORY_VARIABLE
#include "git-compat-util.h"
#include "environment.h"
@@ -46,15 +45,15 @@ static size_t pack_mapped;
#define SZ_FMT PRIuMAX
static inline uintmax_t sz_fmt(size_t s) { return s; }
-void pack_report(void)
+void pack_report(struct repository *repo)
{
fprintf(stderr,
"pack_report: getpagesize() = %10" SZ_FMT "\n"
"pack_report: core.packedGitWindowSize = %10" SZ_FMT "\n"
"pack_report: core.packedGitLimit = %10" SZ_FMT "\n",
sz_fmt(getpagesize()),
- sz_fmt(packed_git_window_size),
- sz_fmt(packed_git_limit));
+ sz_fmt(repo->settings.packed_git_window_size),
+ sz_fmt(repo->settings.packed_git_limit));
fprintf(stderr,
"pack_report: pack_used_ctr = %10u\n"
"pack_report: pack_mmap_calls = %10u\n"
@@ -650,8 +649,15 @@ unsigned char *use_pack(struct packed_git *p,
break;
}
if (!win) {
- size_t window_align = packed_git_window_size / 2;
+ size_t window_align;
off_t len;
+ struct repo_settings *settings;
+
+ /* lazy load the settings in case it hasn't been setup */
+ prepare_repo_settings(p->repo);
+ settings = &p->repo->settings;
+
+ window_align = settings->packed_git_window_size / 2;
if (p->pack_fd == -1 && open_packed_git(p))
die("packfile %s cannot be accessed", p->pack_name);
@@ -659,11 +665,12 @@ unsigned char *use_pack(struct packed_git *p,
CALLOC_ARRAY(win, 1);
win->offset = (offset / window_align) * window_align;
len = p->pack_size - win->offset;
- if (len > packed_git_window_size)
- len = packed_git_window_size;
+ if (len > settings->packed_git_window_size)
+ len = settings->packed_git_window_size;
win->len = (size_t)len;
pack_mapped += win->len;
- while (packed_git_limit < pack_mapped
+
+ while (settings->packed_git_limit < pack_mapped
&& unuse_one_window(p))
; /* nothing */
win->base = xmmap_gently(NULL, win->len,
diff --git a/packfile.h b/packfile.h
index addb95b0c4..58104fa009 100644
--- a/packfile.h
+++ b/packfile.h
@@ -89,7 +89,7 @@ unsigned long repo_approximate_object_count(struct repository *r);
struct packed_git *find_oid_pack(const struct object_id *oid,
struct packed_git *packs);
-void pack_report(void);
+void pack_report(struct repository *repo);
/*
* mmap the index file for the specified packfile (if it is not
diff --git a/repo-settings.c b/repo-settings.c
index acc27eb8fe..9d16d5399e 100644
--- a/repo-settings.c
+++ b/repo-settings.c
@@ -128,6 +128,19 @@ void prepare_repo_settings(struct repository *r)
if (!repo_config_get_ulong(r, "core.deltabasecachelimit", &ulongval))
r->settings.delta_base_cache_limit = ulongval;
+
+ if (!repo_config_get_ulong(r, "core.packedgitwindowsize", &ulongval)) {
+ int pgsz_x2 = getpagesize() * 2;
+
+ /* This value must be multiple of (pagesize * 2) */
+ ulongval /= pgsz_x2;
+ if (ulongval < 1)
+ ulongval = 1;
+ r->settings.packed_git_window_size = ulongval * pgsz_x2;
+ }
+
+ if (!repo_config_get_ulong(r, "core.packedgitlimit", &ulongval))
+ r->settings.packed_git_limit = ulongval;
}
enum log_refs_config repo_settings_get_log_all_ref_updates(struct repository *repo)
diff --git a/repo-settings.h b/repo-settings.h
index 10a6f7ed64..93ea0c3274 100644
--- a/repo-settings.h
+++ b/repo-settings.h
@@ -59,6 +59,8 @@ struct repo_settings {
int warn_ambiguous_refs; /* lazily loaded via accessor */
size_t delta_base_cache_limit;
+ size_t packed_git_window_size;
+ size_t packed_git_limit;
};
#define REPO_SETTINGS_INIT { \
.index_version = -1, \
@@ -66,6 +68,8 @@ struct repo_settings {
.fetch_negotiation_algorithm = FETCH_NEGOTIATION_CONSECUTIVE, \
.warn_ambiguous_refs = -1, \
.delta_base_cache_limit = DEFAULT_DELTA_BASE_CACHE_LIMIT, \
+ .packed_git_window_size = DEFAULT_PACKED_GIT_WINDOW_SIZE, \
+ .packed_git_limit = DEFAULT_PACKED_GIT_LIMIT, \
}
void prepare_repo_settings(struct repository *r);
--
2.47.1
^ permalink raw reply related [flat|nested] 184+ messages in thread
* [PATCH v10 09/10] midx: add repository to `multi_pack_index` struct
2024-12-03 14:43 ` [PATCH v10 00/10] packfile: avoid using the 'the_repository' global variable Karthik Nayak
` (7 preceding siblings ...)
2024-12-03 14:44 ` [PATCH v10 08/10] config: make `packed_git_(limit|window_size)` non-global variables Karthik Nayak
@ 2024-12-03 14:44 ` Karthik Nayak
2024-12-03 14:44 ` [PATCH v10 10/10] packfile.c: remove unnecessary prepare_packed_git() call Karthik Nayak
2024-12-03 16:46 ` [PATCH v10 00/10] packfile: avoid using the 'the_repository' global variable Kristoffer Haugsbakk
10 siblings, 0 replies; 184+ messages in thread
From: Karthik Nayak @ 2024-12-03 14:44 UTC (permalink / raw)
To: karthik.188; +Cc: git, gitster
The `multi_pack_index` struct represents the MIDX for a repository.
Here, we add a pointer to the repository in this struct, allowing direct
use of the repository variable without relying on the global
`the_repository` struct.
With this addition, we can determine the repository associated with a
`bitmap_index` struct. A `bitmap_index` points to either a `packed_git`
or a `multi_pack_index`, both of which have direct repository
references. To support this, we introduce a static helper function,
`bitmap_repo`, in `pack-bitmap.c`, which retrieves a repository given a
`bitmap_index`.
With this, we clear up all usages of `the_repository` within
`pack-bitmap.c` and also remove the `USE_THE_REPOSITORY_VARIABLE`
definition. Bringing us another step closer to remove all global
variable usage.
Although this change also opens up the potential to clean up `midx.c`,
doing so would require additional refactoring to pass the repository
struct to functions where the MIDX struct is created: a task better
suited for future patches.
Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
---
midx.c | 1 +
midx.h | 3 ++
pack-bitmap.c | 90 +++++++++++++++++++++++++++++++--------------------
3 files changed, 59 insertions(+), 35 deletions(-)
diff --git a/midx.c b/midx.c
index 8edb75f51d..079c45a1aa 100644
--- a/midx.c
+++ b/midx.c
@@ -131,6 +131,7 @@ static struct multi_pack_index *load_multi_pack_index_one(const char *object_dir
m->data = midx_map;
m->data_len = midx_size;
m->local = local;
+ m->repo = the_repository;
m->signature = get_be32(m->data);
if (m->signature != MIDX_SIGNATURE)
diff --git a/midx.h b/midx.h
index 42d4f8d149..3b0ac4d878 100644
--- a/midx.h
+++ b/midx.h
@@ -71,6 +71,9 @@ struct multi_pack_index {
const char **pack_names;
struct packed_git **packs;
+
+ struct repository *repo;
+
char object_dir[FLEX_ARRAY];
};
diff --git a/pack-bitmap.c b/pack-bitmap.c
index d34ba9909a..0cb1b56c9d 100644
--- a/pack-bitmap.c
+++ b/pack-bitmap.c
@@ -1,5 +1,3 @@
-#define USE_THE_REPOSITORY_VARIABLE
-
#include "git-compat-util.h"
#include "commit.h"
#include "gettext.h"
@@ -177,12 +175,21 @@ static uint32_t bitmap_num_objects(struct bitmap_index *index)
return index->pack->num_objects;
}
+static struct repository *bitmap_repo(struct bitmap_index *bitmap_git)
+{
+ if (bitmap_is_midx(bitmap_git))
+ return bitmap_git->midx->repo;
+ return bitmap_git->pack->repo;
+}
+
static int load_bitmap_header(struct bitmap_index *index)
{
struct bitmap_disk_header *header = (void *)index->map;
- size_t header_size = sizeof(*header) - GIT_MAX_RAWSZ + the_hash_algo->rawsz;
+ const struct git_hash_algo *hash_algo = bitmap_repo(index)->hash_algo;
+
+ size_t header_size = sizeof(*header) - GIT_MAX_RAWSZ + hash_algo->rawsz;
- if (index->map_size < header_size + the_hash_algo->rawsz)
+ if (index->map_size < header_size + hash_algo->rawsz)
return error(_("corrupted bitmap index (too small)"));
if (memcmp(header->magic, BITMAP_IDX_SIGNATURE, sizeof(BITMAP_IDX_SIGNATURE)) != 0)
@@ -196,7 +203,7 @@ static int load_bitmap_header(struct bitmap_index *index)
{
uint32_t flags = ntohs(header->options);
size_t cache_size = st_mult(bitmap_num_objects(index), sizeof(uint32_t));
- unsigned char *index_end = index->map + index->map_size - the_hash_algo->rawsz;
+ unsigned char *index_end = index->map + index->map_size - hash_algo->rawsz;
if ((flags & BITMAP_OPT_FULL_DAG) == 0)
BUG("unsupported options for bitmap index file "
@@ -409,7 +416,7 @@ static int open_midx_bitmap_1(struct bitmap_index *bitmap_git,
if (bitmap_git->pack || bitmap_git->midx) {
struct strbuf buf = STRBUF_INIT;
get_midx_filename(&buf, midx->object_dir);
- trace2_data_string("bitmap", the_repository,
+ trace2_data_string("bitmap", bitmap_repo(bitmap_git),
"ignoring extra midx bitmap file", buf.buf);
close(fd);
strbuf_release(&buf);
@@ -427,7 +434,7 @@ static int open_midx_bitmap_1(struct bitmap_index *bitmap_git,
goto cleanup;
if (!hasheq(get_midx_checksum(bitmap_git->midx), bitmap_git->checksum,
- the_repository->hash_algo)) {
+ bitmap_repo(bitmap_git)->hash_algo)) {
error(_("checksum doesn't match in MIDX and bitmap"));
goto cleanup;
}
@@ -438,7 +445,9 @@ static int open_midx_bitmap_1(struct bitmap_index *bitmap_git,
}
for (i = 0; i < bitmap_git->midx->num_packs; i++) {
- if (prepare_midx_pack(the_repository, bitmap_git->midx, i)) {
+ if (prepare_midx_pack(bitmap_repo(bitmap_git),
+ bitmap_git->midx,
+ i)) {
warning(_("could not open pack %s"),
bitmap_git->midx->pack_names[i]);
goto cleanup;
@@ -492,8 +501,9 @@ static int open_pack_bitmap_1(struct bitmap_index *bitmap_git, struct packed_git
}
if (bitmap_git->pack || bitmap_git->midx) {
- trace2_data_string("bitmap", the_repository,
- "ignoring extra bitmap file", packfile->pack_name);
+ trace2_data_string("bitmap", bitmap_repo(bitmap_git),
+ "ignoring extra bitmap file",
+ packfile->pack_name);
close(fd);
return -1;
}
@@ -518,8 +528,8 @@ static int open_pack_bitmap_1(struct bitmap_index *bitmap_git, struct packed_git
return -1;
}
- trace2_data_string("bitmap", the_repository, "opened bitmap file",
- packfile->pack_name);
+ trace2_data_string("bitmap", bitmap_repo(bitmap_git),
+ "opened bitmap file", packfile->pack_name);
return 0;
}
@@ -649,7 +659,7 @@ struct bitmap_index *prepare_bitmap_git(struct repository *r)
struct bitmap_index *prepare_midx_bitmap_git(struct multi_pack_index *midx)
{
- struct repository *r = the_repository;
+ struct repository *r = midx->repo;
struct bitmap_index *bitmap_git = xcalloc(1, sizeof(*bitmap_git));
if (!open_midx_bitmap_1(bitmap_git, midx) && !load_bitmap(r, bitmap_git))
@@ -1213,6 +1223,7 @@ static struct bitmap *find_boundary_objects(struct bitmap_index *bitmap_git,
{
struct bitmap_boundary_cb cb;
struct object_list *root;
+ struct repository *repo;
unsigned int i;
unsigned int tmp_blobs, tmp_trees, tmp_tags;
int any_missing = 0;
@@ -1222,6 +1233,8 @@ static struct bitmap *find_boundary_objects(struct bitmap_index *bitmap_git,
cb.base = bitmap_new();
object_array_init(&cb.boundary);
+ repo = bitmap_repo(bitmap_git);
+
revs->ignore_missing_links = 1;
if (bitmap_git->pseudo_merges.nr) {
@@ -1280,19 +1293,19 @@ static struct bitmap *find_boundary_objects(struct bitmap_index *bitmap_git,
* revision walk to (a) OR in any bitmaps that are UNINTERESTING
* between the tips and boundary, and (b) record the boundary.
*/
- trace2_region_enter("pack-bitmap", "boundary-prepare", the_repository);
+ trace2_region_enter("pack-bitmap", "boundary-prepare", repo);
if (prepare_revision_walk(revs))
die("revision walk setup failed");
- trace2_region_leave("pack-bitmap", "boundary-prepare", the_repository);
+ trace2_region_leave("pack-bitmap", "boundary-prepare", repo);
- trace2_region_enter("pack-bitmap", "boundary-traverse", the_repository);
+ trace2_region_enter("pack-bitmap", "boundary-traverse", repo);
revs->boundary = 1;
traverse_commit_list_filtered(revs,
show_boundary_commit,
show_boundary_object,
&cb, NULL);
revs->boundary = 0;
- trace2_region_leave("pack-bitmap", "boundary-traverse", the_repository);
+ trace2_region_leave("pack-bitmap", "boundary-traverse", repo);
revs->blob_objects = tmp_blobs;
revs->tree_objects = tmp_trees;
@@ -1304,7 +1317,7 @@ static struct bitmap *find_boundary_objects(struct bitmap_index *bitmap_git,
/*
* Then add the boundary commit(s) as fill-in traversal tips.
*/
- trace2_region_enter("pack-bitmap", "boundary-fill-in", the_repository);
+ trace2_region_enter("pack-bitmap", "boundary-fill-in", repo);
for (i = 0; i < cb.boundary.nr; i++) {
struct object *obj = cb.boundary.objects[i].item;
if (bitmap_walk_contains(bitmap_git, cb.base, &obj->oid))
@@ -1314,7 +1327,7 @@ static struct bitmap *find_boundary_objects(struct bitmap_index *bitmap_git,
}
if (revs->pending.nr)
cb.base = fill_in_bitmap(bitmap_git, revs, cb.base, NULL);
- trace2_region_leave("pack-bitmap", "boundary-fill-in", the_repository);
+ trace2_region_leave("pack-bitmap", "boundary-fill-in", repo);
cleanup:
object_array_clear(&cb.boundary);
@@ -1718,7 +1731,8 @@ static unsigned long get_size_by_pos(struct bitmap_index *bitmap_git,
ofs = pack_pos_to_offset(pack, pos);
}
- if (packed_object_info(the_repository, pack, ofs, &oi) < 0) {
+ if (packed_object_info(bitmap_repo(bitmap_git), pack, ofs,
+ &oi) < 0) {
struct object_id oid;
nth_bitmap_object_oid(bitmap_git, &oid,
pack_pos_to_index(pack, pos));
@@ -1727,7 +1741,8 @@ static unsigned long get_size_by_pos(struct bitmap_index *bitmap_git,
} else {
struct eindex *eindex = &bitmap_git->ext_index;
struct object *obj = eindex->objects[pos - bitmap_num_objects(bitmap_git)];
- if (oid_object_info_extended(the_repository, &obj->oid, &oi, 0) < 0)
+ if (oid_object_info_extended(bitmap_repo(bitmap_git), &obj->oid,
+ &oi, 0) < 0)
die(_("unable to get size of %s"), oid_to_hex(&obj->oid));
}
@@ -1889,7 +1904,8 @@ static void filter_packed_objects_from_bitmap(struct bitmap_index *bitmap_git,
bitmap_unset(result, i);
for (i = 0; i < eindex->count; ++i) {
- if (has_object_pack(the_repository, &eindex->objects[i]->oid))
+ if (has_object_pack(bitmap_repo(bitmap_git),
+ &eindex->objects[i]->oid))
bitmap_unset(result, objects_nr + i);
}
}
@@ -1907,6 +1923,7 @@ struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs,
struct bitmap *haves_bitmap = NULL;
struct bitmap_index *bitmap_git;
+ struct repository *repo;
/*
* We can't do pathspec limiting with bitmaps, because we don't know
@@ -1980,18 +1997,20 @@ struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs,
if (!use_boundary_traversal)
object_array_clear(&revs->pending);
+ repo = bitmap_repo(bitmap_git);
+
if (haves) {
if (use_boundary_traversal) {
- trace2_region_enter("pack-bitmap", "haves/boundary", the_repository);
+ trace2_region_enter("pack-bitmap", "haves/boundary", repo);
haves_bitmap = find_boundary_objects(bitmap_git, revs, haves);
- trace2_region_leave("pack-bitmap", "haves/boundary", the_repository);
+ trace2_region_leave("pack-bitmap", "haves/boundary", repo);
} else {
- trace2_region_enter("pack-bitmap", "haves/classic", the_repository);
+ trace2_region_enter("pack-bitmap", "haves/classic", repo);
revs->ignore_missing_links = 1;
haves_bitmap = find_objects(bitmap_git, revs, haves, NULL);
reset_revision_walk();
revs->ignore_missing_links = 0;
- trace2_region_leave("pack-bitmap", "haves/classic", the_repository);
+ trace2_region_leave("pack-bitmap", "haves/classic", repo);
}
if (!haves_bitmap)
@@ -2025,17 +2044,17 @@ struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs,
object_list_free(&wants);
object_list_free(&haves);
- trace2_data_intmax("bitmap", the_repository, "pseudo_merges_satisfied",
+ trace2_data_intmax("bitmap", repo, "pseudo_merges_satisfied",
pseudo_merges_satisfied_nr);
- trace2_data_intmax("bitmap", the_repository, "pseudo_merges_cascades",
+ trace2_data_intmax("bitmap", repo, "pseudo_merges_cascades",
pseudo_merges_cascades_nr);
- trace2_data_intmax("bitmap", the_repository, "bitmap/hits",
+ trace2_data_intmax("bitmap", repo, "bitmap/hits",
existing_bitmaps_hits_nr);
- trace2_data_intmax("bitmap", the_repository, "bitmap/misses",
+ trace2_data_intmax("bitmap", repo, "bitmap/misses",
existing_bitmaps_misses_nr);
- trace2_data_intmax("bitmap", the_repository, "bitmap/roots_with_bitmap",
+ trace2_data_intmax("bitmap", repo, "bitmap/roots_with_bitmap",
roots_with_bitmaps_nr);
- trace2_data_intmax("bitmap", the_repository, "bitmap/roots_without_bitmap",
+ trace2_data_intmax("bitmap", repo, "bitmap/roots_without_bitmap",
roots_without_bitmaps_nr);
return bitmap_git;
@@ -2256,7 +2275,7 @@ void reuse_partial_packfile_from_bitmap(struct bitmap_index *bitmap_git,
struct bitmap **reuse_out,
int multi_pack_reuse)
{
- struct repository *r = the_repository;
+ struct repository *r = bitmap_repo(bitmap_git);
struct bitmapped_pack *packs = NULL;
struct bitmap *result = bitmap_git->result;
struct bitmap *reuse;
@@ -2792,7 +2811,7 @@ int rebuild_bitmap(const uint32_t *reposition,
uint32_t *create_bitmap_mapping(struct bitmap_index *bitmap_git,
struct packing_data *mapping)
{
- struct repository *r = the_repository;
+ struct repository *r = bitmap_repo(bitmap_git);
uint32_t i, num_objects;
uint32_t *reposition;
@@ -2948,7 +2967,8 @@ static off_t get_disk_usage_for_extended(struct bitmap_index *bitmap_git)
st_add(bitmap_num_objects(bitmap_git), i)))
continue;
- if (oid_object_info_extended(the_repository, &obj->oid, &oi, 0) < 0)
+ if (oid_object_info_extended(bitmap_repo(bitmap_git), &obj->oid,
+ &oi, 0) < 0)
die(_("unable to get disk usage of '%s'"),
oid_to_hex(&obj->oid));
--
2.47.1
^ permalink raw reply related [flat|nested] 184+ messages in thread
* [PATCH v10 10/10] packfile.c: remove unnecessary prepare_packed_git() call
2024-12-03 14:43 ` [PATCH v10 00/10] packfile: avoid using the 'the_repository' global variable Karthik Nayak
` (8 preceding siblings ...)
2024-12-03 14:44 ` [PATCH v10 09/10] midx: add repository to `multi_pack_index` struct Karthik Nayak
@ 2024-12-03 14:44 ` Karthik Nayak
2024-12-03 16:46 ` [PATCH v10 00/10] packfile: avoid using the 'the_repository' global variable Kristoffer Haugsbakk
10 siblings, 0 replies; 184+ messages in thread
From: Karthik Nayak @ 2024-12-03 14:44 UTC (permalink / raw)
To: karthik.188; +Cc: git, gitster, Taylor Blau
From: Taylor Blau <me@ttaylorr.com>
In 454ea2e4d7 (treewide: use get_all_packs, 2018-08-20) we converted
existing calls to both:
- get_packed_git(), as well as
- the_repository->objects->packed_git
, to instead use the new get_all_packs() function.
In the instance that this commit addresses, there was a preceding call
to prepare_packed_git(), which dates all the way back to 660c889e46
(sha1_file: add for_each iterators for loose and packed objects,
2014-10-15) when its caller (for_each_packed_object()) was first
introduced.
This call could have been removed in 454ea2e4d7, since get_all_packs()
itself calls prepare_packed_git(). But the translation in 454ea2e4d7 was
(to the best of my knowledge) a find-and-replace rather than inspecting
each individual caller.
Having an extra prepare_packed_git() call here is harmless, since it
will notice that we have already set the 'packed_git_initialized' field
and the call will be a noop. So we're only talking about a few dozen CPU
cycles to set up and tear down the stack frame.
But having a lone prepare_packed_git() call immediately before a call to
get_all_packs() confused me, so let's remove it as redundant to avoid
more confusion in the future.
Signed-off-by: Taylor Blau <me@ttaylorr.com>
---
packfile.c | 1 -
1 file changed, 1 deletion(-)
diff --git a/packfile.c b/packfile.c
index 2e0e28c7de..9c4bd81a8c 100644
--- a/packfile.c
+++ b/packfile.c
@@ -2220,7 +2220,6 @@ int for_each_packed_object(struct repository *repo, each_packed_object_fn cb,
int r = 0;
int pack_errors = 0;
- prepare_packed_git(repo);
for (p = get_all_packs(repo); p; p = p->next) {
if ((flags & FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local)
continue;
--
2.47.1
^ permalink raw reply related [flat|nested] 184+ messages in thread
* Re: [PATCH v10 00/10] packfile: avoid using the 'the_repository' global variable
2024-12-03 14:43 ` [PATCH v10 00/10] packfile: avoid using the 'the_repository' global variable Karthik Nayak
` (9 preceding siblings ...)
2024-12-03 14:44 ` [PATCH v10 10/10] packfile.c: remove unnecessary prepare_packed_git() call Karthik Nayak
@ 2024-12-03 16:46 ` Kristoffer Haugsbakk
2024-12-03 23:24 ` Junio C Hamano
10 siblings, 1 reply; 184+ messages in thread
From: Kristoffer Haugsbakk @ 2024-12-03 16:46 UTC (permalink / raw)
To: Karthik Nayak; +Cc: git, Junio C Hamano
On Tue, Dec 3, 2024, at 15:43, Karthik Nayak wrote:
> Range-diff against v9:
> 1: d1fdd6996a ! 1: d6d571c58e packfile: add repository to struct
> `packed_git`
> @@ Commit message
> on the global `the_repository` object in `packfile.c` by
> simply using
> repository information now readily available in the struct.
>
> - We do need to consider that a pack file could be part of the
> alternates
> + We do need to consider that a packfile could be part of the
> alternates
> of a repository, but considering that we only have one
> repository struct
> - and also that we currently anyways use 'the_repository'. We
> should be
> + and also that we currently anyways use 'the_repository', we
> should be
> OK with this change.
>
> We also modify `alloc_packed_git` to ensure that the
> repository is added
> 2: 65c09858ce = 2: fa69763468 packfile: use `repository` from
> `packed_git` directly
> 3: 80632934d1 ! 3: c6acbece46 packfile: pass `repository` to static
> function in the file
> @@ Commit message
> packfile: pass `repository` to static function in the file
>
> Some of the static functions in the `packfile.c` access global
> - variables, which can simply be avoiding by passing the `repository`
> + variables, which can simply be avoided by passing the `repository`
> struct down to them. Let's do that.
Nice, thank you.
^ permalink raw reply [flat|nested] 184+ messages in thread