* [PATCH 01/10] reftable/blocksource: drop malloc block source
2024-08-19 15:39 [PATCH 00/10] reftable: fix reload with active iterators Patrick Steinhardt
@ 2024-08-19 15:39 ` Patrick Steinhardt
2024-08-19 15:39 ` [PATCH 02/10] reftable/stack: inline `stack_compact_range_stats()` Patrick Steinhardt
` (11 subsequent siblings)
12 siblings, 0 replies; 38+ messages in thread
From: Patrick Steinhardt @ 2024-08-19 15:39 UTC (permalink / raw)
To: git; +Cc: Jeff King
The reftable blocksource provides a generic interface to read blocks via
different sources, e.g. from disk or from memory. One of the block
sources is the malloc block source, which can in theory read data from
memory. We nowadays also have a strbuf block source though, which
provides essentially the same functionality with better ergonomics.
Adapt the only remaining user of the malloc block source in our tests
to use the strbuf block source, instead, and remove the now-unused
malloc block source.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
reftable/block_test.c | 3 ++-
reftable/blocksource.c | 20 --------------------
reftable/blocksource.h | 2 --
3 files changed, 2 insertions(+), 23 deletions(-)
diff --git a/reftable/block_test.c b/reftable/block_test.c
index 90aecd5a7c..de8f426a42 100644
--- a/reftable/block_test.c
+++ b/reftable/block_test.c
@@ -34,11 +34,12 @@ static void test_block_read_write(void)
struct block_reader br = { 0 };
struct block_iter it = BLOCK_ITER_INIT;
int j = 0;
+ struct strbuf block_data = STRBUF_INIT;
struct strbuf want = STRBUF_INIT;
REFTABLE_CALLOC_ARRAY(block.data, block_size);
block.len = block_size;
- block.source = malloc_block_source();
+ block_source_from_strbuf(&block.source, &block_data);
block_writer_init(&bw, BLOCK_TYPE_REF, block.data, block_size,
header_off, hash_size(GIT_SHA1_FORMAT_ID));
diff --git a/reftable/blocksource.c b/reftable/blocksource.c
index eeed254ba9..1774853011 100644
--- a/reftable/blocksource.c
+++ b/reftable/blocksource.c
@@ -55,26 +55,6 @@ void block_source_from_strbuf(struct reftable_block_source *bs,
bs->arg = buf;
}
-static void malloc_return_block(void *b, struct reftable_block *dest)
-{
- if (dest->len)
- memset(dest->data, 0xff, dest->len);
- reftable_free(dest->data);
-}
-
-static struct reftable_block_source_vtable malloc_vtable = {
- .return_block = &malloc_return_block,
-};
-
-static struct reftable_block_source malloc_block_source_instance = {
- .ops = &malloc_vtable,
-};
-
-struct reftable_block_source malloc_block_source(void)
-{
- return malloc_block_source_instance;
-}
-
struct file_block_source {
uint64_t size;
unsigned char *data;
diff --git a/reftable/blocksource.h b/reftable/blocksource.h
index 072e2727ad..659a27b406 100644
--- a/reftable/blocksource.h
+++ b/reftable/blocksource.h
@@ -17,6 +17,4 @@ struct reftable_block_source;
void block_source_from_strbuf(struct reftable_block_source *bs,
struct strbuf *buf);
-struct reftable_block_source malloc_block_source(void);
-
#endif
--
2.46.0.164.g477ce5ccd6.dirty
^ permalink raw reply related [flat|nested] 38+ messages in thread
* [PATCH 02/10] reftable/stack: inline `stack_compact_range_stats()`
2024-08-19 15:39 [PATCH 00/10] reftable: fix reload with active iterators Patrick Steinhardt
2024-08-19 15:39 ` [PATCH 01/10] reftable/blocksource: drop malloc block source Patrick Steinhardt
@ 2024-08-19 15:39 ` Patrick Steinhardt
2024-08-22 8:23 ` karthik nayak
2024-08-19 15:39 ` [PATCH 03/10] reftable/reader: rename `reftable_new_reader()` Patrick Steinhardt
` (10 subsequent siblings)
12 siblings, 1 reply; 38+ messages in thread
From: Patrick Steinhardt @ 2024-08-19 15:39 UTC (permalink / raw)
To: git; +Cc: Jeff King
The only difference between `stack_compact_range_stats()` and
`stack_compact_range()` is that the former updates stats on failure,
whereas the latter doesn't. There are no callers anymore that do not
want their stats updated though, making the indirection unnecessary.
Inline the stat updates into `stack_compact_range()`.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
reftable/stack.c | 16 ++++------------
1 file changed, 4 insertions(+), 12 deletions(-)
diff --git a/reftable/stack.c b/reftable/stack.c
index d3a95d2f1d..891ea971b7 100644
--- a/reftable/stack.c
+++ b/reftable/stack.c
@@ -1328,17 +1328,9 @@ static int stack_compact_range(struct reftable_stack *st,
strbuf_release(&table_name);
free_names(names);
- return err;
-}
-
-static int stack_compact_range_stats(struct reftable_stack *st,
- size_t first, size_t last,
- struct reftable_log_expiry_config *config,
- unsigned int flags)
-{
- int err = stack_compact_range(st, first, last, config, flags);
if (err == REFTABLE_LOCK_ERROR)
st->stats.failures++;
+
return err;
}
@@ -1346,7 +1338,7 @@ int reftable_stack_compact_all(struct reftable_stack *st,
struct reftable_log_expiry_config *config)
{
size_t last = st->merged->readers_len ? st->merged->readers_len - 1 : 0;
- return stack_compact_range_stats(st, 0, last, config, 0);
+ return stack_compact_range(st, 0, last, config, 0);
}
static int segment_size(struct segment *s)
@@ -1452,8 +1444,8 @@ int reftable_stack_auto_compact(struct reftable_stack *st)
st->opts.auto_compaction_factor);
reftable_free(sizes);
if (segment_size(&seg) > 0)
- return stack_compact_range_stats(st, seg.start, seg.end - 1,
- NULL, STACK_COMPACT_RANGE_BEST_EFFORT);
+ return stack_compact_range(st, seg.start, seg.end - 1,
+ NULL, STACK_COMPACT_RANGE_BEST_EFFORT);
return 0;
}
--
2.46.0.164.g477ce5ccd6.dirty
^ permalink raw reply related [flat|nested] 38+ messages in thread
* [PATCH 03/10] reftable/reader: rename `reftable_new_reader()`
2024-08-19 15:39 [PATCH 00/10] reftable: fix reload with active iterators Patrick Steinhardt
2024-08-19 15:39 ` [PATCH 01/10] reftable/blocksource: drop malloc block source Patrick Steinhardt
2024-08-19 15:39 ` [PATCH 02/10] reftable/stack: inline `stack_compact_range_stats()` Patrick Steinhardt
@ 2024-08-19 15:39 ` Patrick Steinhardt
2024-08-22 18:51 ` Justin Tobler
2024-08-19 15:39 ` [PATCH 04/10] reftable/reader: inline `init_reader()` Patrick Steinhardt
` (9 subsequent siblings)
12 siblings, 1 reply; 38+ messages in thread
From: Patrick Steinhardt @ 2024-08-19 15:39 UTC (permalink / raw)
To: git; +Cc: Jeff King
Rename the `reftable_new_reader()` function to `reftable_reader_new()`
to match our coding guidelines.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
reftable/reader.c | 4 ++--
reftable/reader.h | 2 +-
reftable/readwrite_test.c | 6 +++---
reftable/reftable-reader.h | 4 ++--
reftable/stack.c | 4 ++--
t/helper/test-reftable.c | 2 +-
t/unit-tests/t-reftable-merged.c | 6 +++---
7 files changed, 14 insertions(+), 14 deletions(-)
diff --git a/reftable/reader.c b/reftable/reader.c
index 082cf00b60..ea4fdea90b 100644
--- a/reftable/reader.c
+++ b/reftable/reader.c
@@ -637,7 +637,7 @@ void reader_close(struct reftable_reader *r)
FREE_AND_NULL(r->name);
}
-int reftable_new_reader(struct reftable_reader **p,
+int reftable_reader_new(struct reftable_reader **p,
struct reftable_block_source *src, char const *name)
{
struct reftable_reader *rd = reftable_calloc(1, sizeof(*rd));
@@ -786,7 +786,7 @@ int reftable_reader_print_blocks(const char *tablename)
if (err < 0)
goto done;
- err = reftable_new_reader(&r, &src, tablename);
+ err = reftable_reader_new(&r, &src, tablename);
if (err < 0)
goto done;
diff --git a/reftable/reader.h b/reftable/reader.h
index a2c204d523..d8cbfd6404 100644
--- a/reftable/reader.h
+++ b/reftable/reader.h
@@ -52,7 +52,7 @@ struct reftable_reader {
struct reftable_reader_offsets log_offsets;
};
-int init_reader(struct reftable_reader *r, struct reftable_block_source *source,
+int reader_init(struct reftable_reader *r, struct reftable_block_source *source,
const char *name);
void reader_close(struct reftable_reader *r);
const char *reader_name(struct reftable_reader *r);
diff --git a/reftable/readwrite_test.c b/reftable/readwrite_test.c
index f411abfe9c..101cdb5cd6 100644
--- a/reftable/readwrite_test.c
+++ b/reftable/readwrite_test.c
@@ -646,7 +646,7 @@ static void test_write_empty_table(void)
block_source_from_strbuf(&source, &buf);
- err = reftable_new_reader(&rd, &source, "filename");
+ err = reftable_reader_new(&rd, &source, "filename");
EXPECT_ERR(err);
reftable_reader_init_ref_iterator(rd, &it);
@@ -850,7 +850,7 @@ static void test_write_multiple_indices(void)
EXPECT(stats->log_stats.index_offset > 0);
block_source_from_strbuf(&source, &writer_buf);
- err = reftable_new_reader(&reader, &source, "filename");
+ err = reftable_reader_new(&reader, &source, "filename");
EXPECT_ERR(err);
/*
@@ -907,7 +907,7 @@ static void test_write_multi_level_index(void)
EXPECT(stats->ref_stats.max_index_level == 2);
block_source_from_strbuf(&source, &writer_buf);
- err = reftable_new_reader(&reader, &source, "filename");
+ err = reftable_reader_new(&reader, &source, "filename");
EXPECT_ERR(err);
/*
diff --git a/reftable/reftable-reader.h b/reftable/reftable-reader.h
index 69621c5b0f..8a05c84685 100644
--- a/reftable/reftable-reader.h
+++ b/reftable/reftable-reader.h
@@ -23,14 +23,14 @@
/* The reader struct is a handle to an open reftable file. */
struct reftable_reader;
-/* reftable_new_reader opens a reftable for reading. If successful,
+/* reftable_reader_new opens a reftable for reading. If successful,
* returns 0 code and sets pp. The name is used for creating a
* stack. Typically, it is the basename of the file. The block source
* `src` is owned by the reader, and is closed on calling
* reftable_reader_destroy(). On error, the block source `src` is
* closed as well.
*/
-int reftable_new_reader(struct reftable_reader **pp,
+int reftable_reader_new(struct reftable_reader **pp,
struct reftable_block_source *src, const char *name);
/* Initialize a reftable iterator for reading refs. */
diff --git a/reftable/stack.c b/reftable/stack.c
index 891ea971b7..c72435b059 100644
--- a/reftable/stack.c
+++ b/reftable/stack.c
@@ -258,7 +258,7 @@ static int reftable_stack_reload_once(struct reftable_stack *st,
if (err < 0)
goto done;
- err = reftable_new_reader(&rd, &src, name);
+ err = reftable_reader_new(&rd, &src, name);
if (err < 0)
goto done;
}
@@ -1532,7 +1532,7 @@ static void remove_maybe_stale_table(struct reftable_stack *st, uint64_t max,
if (err < 0)
goto done;
- err = reftable_new_reader(&rd, &src, name);
+ err = reftable_reader_new(&rd, &src, name);
if (err < 0)
goto done;
diff --git a/t/helper/test-reftable.c b/t/helper/test-reftable.c
index c1942156b5..87c2f42aae 100644
--- a/t/helper/test-reftable.c
+++ b/t/helper/test-reftable.c
@@ -139,7 +139,7 @@ static int dump_reftable(const char *tablename)
if (err < 0)
goto done;
- err = reftable_new_reader(&r, &src, tablename);
+ err = reftable_reader_new(&r, &src, tablename);
if (err < 0)
goto done;
diff --git a/t/unit-tests/t-reftable-merged.c b/t/unit-tests/t-reftable-merged.c
index 93345c6c8b..8f51aca1b6 100644
--- a/t/unit-tests/t-reftable-merged.c
+++ b/t/unit-tests/t-reftable-merged.c
@@ -102,7 +102,7 @@ merged_table_from_records(struct reftable_ref_record **refs,
write_test_table(&buf[i], refs[i], sizes[i]);
block_source_from_strbuf(&(*source)[i], &buf[i]);
- err = reftable_new_reader(&(*readers)[i], &(*source)[i],
+ err = reftable_reader_new(&(*readers)[i], &(*source)[i],
"name");
check(!err);
}
@@ -277,7 +277,7 @@ merged_table_from_log_records(struct reftable_log_record **logs,
write_test_log_table(&buf[i], logs[i], sizes[i], i + 1);
block_source_from_strbuf(&(*source)[i], &buf[i]);
- err = reftable_new_reader(&(*readers)[i], &(*source)[i],
+ err = reftable_reader_new(&(*readers)[i], &(*source)[i],
"name");
check(!err);
}
@@ -426,7 +426,7 @@ static void t_default_write_opts(void)
block_source_from_strbuf(&source, &buf);
- err = reftable_new_reader(&rd, &source, "filename");
+ err = reftable_reader_new(&rd, &source, "filename");
check(!err);
hash_id = reftable_reader_hash_id(rd);
--
2.46.0.164.g477ce5ccd6.dirty
^ permalink raw reply related [flat|nested] 38+ messages in thread
* Re: [PATCH 03/10] reftable/reader: rename `reftable_new_reader()`
2024-08-19 15:39 ` [PATCH 03/10] reftable/reader: rename `reftable_new_reader()` Patrick Steinhardt
@ 2024-08-22 18:51 ` Justin Tobler
2024-08-22 19:09 ` Junio C Hamano
0 siblings, 1 reply; 38+ messages in thread
From: Justin Tobler @ 2024-08-22 18:51 UTC (permalink / raw)
To: Patrick Steinhardt; +Cc: git, Jeff King
On 24/08/19 05:39PM, Patrick Steinhardt wrote:
> Rename the `reftable_new_reader()` function to `reftable_reader_new()`
> to match our coding guidelines.
>
> Signed-off-by: Patrick Steinhardt <ps@pks.im>
> ---
[snip]
> -int init_reader(struct reftable_reader *r, struct reftable_block_source *source,
> +int reader_init(struct reftable_reader *r, struct reftable_block_source *source,
> const char *name);
Here we also rename `init_reader()` to `reader_init()`, but do not
update its call sites resulting in build errors. Since we remove this in
the next patch anyway, let's drop this change.
-Justin
^ permalink raw reply [flat|nested] 38+ messages in thread
* Re: [PATCH 03/10] reftable/reader: rename `reftable_new_reader()`
2024-08-22 18:51 ` Justin Tobler
@ 2024-08-22 19:09 ` Junio C Hamano
2024-08-23 5:22 ` Patrick Steinhardt
0 siblings, 1 reply; 38+ messages in thread
From: Junio C Hamano @ 2024-08-22 19:09 UTC (permalink / raw)
To: Justin Tobler; +Cc: Patrick Steinhardt, git, Jeff King
Justin Tobler <jltobler@gmail.com> writes:
> On 24/08/19 05:39PM, Patrick Steinhardt wrote:
>> Rename the `reftable_new_reader()` function to `reftable_reader_new()`
>> to match our coding guidelines.
>>
>> Signed-off-by: Patrick Steinhardt <ps@pks.im>
>> ---
> [snip]
>> -int init_reader(struct reftable_reader *r, struct reftable_block_source *source,
>> +int reader_init(struct reftable_reader *r, struct reftable_block_source *source,
>> const char *name);
>
> Here we also rename `init_reader()` to `reader_init()`, but do not
> update its call sites resulting in build errors. Since we remove this in
> the next patch anyway, let's drop this change.
True. The actual definition of the function is also left
unmodified. Let me locally edit the hunk out. As you pointed out,
the next step does mention the "new" name in order to remove it, so
there needs a cascading adjustment there, but the fallout is fairly
small (see the attached range-diff).
Thanks.
1: 348438a69e = 1: 348438a69e reftable/blocksource: drop malloc block source
2: ed8bb0e345 = 2: ed8bb0e345 reftable/stack: inline `stack_compact_range_stats()`
3: 80cf24d54f ! 3: 6b5466771c reftable/reader: rename `reftable_new_reader()`
@@ reftable/reader.c: int reftable_reader_print_blocks(const char *tablename)
goto done;
- ## reftable/reader.h ##
-@@ reftable/reader.h: struct reftable_reader {
- struct reftable_reader_offsets log_offsets;
- };
-
--int init_reader(struct reftable_reader *r, struct reftable_block_source *source,
-+int reader_init(struct reftable_reader *r, struct reftable_block_source *source,
- const char *name);
- void reader_close(struct reftable_reader *r);
- const char *reader_name(struct reftable_reader *r);
-
## reftable/readwrite_test.c ##
@@ reftable/readwrite_test.c: static void test_write_empty_table(void)
4: ce93c128f5 ! 4: bc1eeb32ef reftable/reader: inline `init_reader()`
@@ reftable/reader.h: struct reftable_reader {
struct reftable_reader_offsets log_offsets;
};
--int reader_init(struct reftable_reader *r, struct reftable_block_source *source,
+-int init_reader(struct reftable_reader *r, struct reftable_block_source *source,
- const char *name);
void reader_close(struct reftable_reader *r);
const char *reader_name(struct reftable_reader *r);
5: 079a3d365e = 5: 3e244b036a reftable/reader: inline `reader_close()`
6: 7e951256c6 = 6: 41c537e44b reftable/stack: fix broken refnames in `write_n_ref_tables()`
7: 8e99330cff = 7: b7596c4c32 reftable/reader: introduce refcounting
8: 2383c196a6 = 8: 9f28628ecc reftable/reader: keep readers alive during iteration
9: 8af7c4485f = 9: 51df76889c reftable/stack: reorder swapping in the reloaded stack contents
10: 1ccdac05ab = 10: 32bbcc46b9 reftable/stack: fix segfault when reload with reused readers fails
^ permalink raw reply [flat|nested] 38+ messages in thread
* Re: [PATCH 03/10] reftable/reader: rename `reftable_new_reader()`
2024-08-22 19:09 ` Junio C Hamano
@ 2024-08-23 5:22 ` Patrick Steinhardt
0 siblings, 0 replies; 38+ messages in thread
From: Patrick Steinhardt @ 2024-08-23 5:22 UTC (permalink / raw)
To: Junio C Hamano; +Cc: Justin Tobler, git, Jeff King
On Thu, Aug 22, 2024 at 12:09:33PM -0700, Junio C Hamano wrote:
> Justin Tobler <jltobler@gmail.com> writes:
>
> > On 24/08/19 05:39PM, Patrick Steinhardt wrote:
> >> Rename the `reftable_new_reader()` function to `reftable_reader_new()`
> >> to match our coding guidelines.
> >>
> >> Signed-off-by: Patrick Steinhardt <ps@pks.im>
> >> ---
> > [snip]
> >> -int init_reader(struct reftable_reader *r, struct reftable_block_source *source,
> >> +int reader_init(struct reftable_reader *r, struct reftable_block_source *source,
> >> const char *name);
> >
> > Here we also rename `init_reader()` to `reader_init()`, but do not
> > update its call sites resulting in build errors. Since we remove this in
> > the next patch anyway, let's drop this change.
Well spotted! I originally wanted to fix that function's name, too. But
then I realized it's not even needed in the first place.
> True. The actual definition of the function is also left
> unmodified. Let me locally edit the hunk out. As you pointed out,
> the next step does mention the "new" name in order to remove it, so
> there needs a cascading adjustment there, but the fallout is fairly
> small (see the attached range-diff).
Thanks. The range diff looks as expected to me.
Patrick
^ permalink raw reply [flat|nested] 38+ messages in thread
* [PATCH 04/10] reftable/reader: inline `init_reader()`
2024-08-19 15:39 [PATCH 00/10] reftable: fix reload with active iterators Patrick Steinhardt
` (2 preceding siblings ...)
2024-08-19 15:39 ` [PATCH 03/10] reftable/reader: rename `reftable_new_reader()` Patrick Steinhardt
@ 2024-08-19 15:39 ` Patrick Steinhardt
2024-08-19 15:39 ` [PATCH 05/10] reftable/reader: inline `reader_close()` Patrick Steinhardt
` (8 subsequent siblings)
12 siblings, 0 replies; 38+ messages in thread
From: Patrick Steinhardt @ 2024-08-19 15:39 UTC (permalink / raw)
To: git; +Cc: Jeff King
Most users use an allocated version of the `reftable_reader`, except for
some tests. We are about to convert the reader to become refcounted
though, and providing the ability to keep a reader on the stack makes
this conversion harder than necessary.
Update the tests to use `reftable_reader_new()` instead to prepare for
this change.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
reftable/reader.c | 122 +++++++++++++++++++-------------------
reftable/reader.h | 2 -
reftable/readwrite_test.c | 73 ++++++++++++-----------
3 files changed, 98 insertions(+), 99 deletions(-)
diff --git a/reftable/reader.c b/reftable/reader.c
index ea4fdea90b..9239679ad9 100644
--- a/reftable/reader.c
+++ b/reftable/reader.c
@@ -162,58 +162,6 @@ static int parse_footer(struct reftable_reader *r, uint8_t *footer,
return err;
}
-int init_reader(struct reftable_reader *r, struct reftable_block_source *source,
- const char *name)
-{
- struct reftable_block footer = { NULL };
- struct reftable_block header = { NULL };
- int err = 0;
- uint64_t file_size = block_source_size(source);
-
- /* Need +1 to read type of first block. */
- uint32_t read_size = header_size(2) + 1; /* read v2 because it's larger. */
- memset(r, 0, sizeof(struct reftable_reader));
-
- if (read_size > file_size) {
- err = REFTABLE_FORMAT_ERROR;
- goto done;
- }
-
- err = block_source_read_block(source, &header, 0, read_size);
- if (err != read_size) {
- err = REFTABLE_IO_ERROR;
- goto done;
- }
-
- if (memcmp(header.data, "REFT", 4)) {
- err = REFTABLE_FORMAT_ERROR;
- goto done;
- }
- r->version = header.data[4];
- if (r->version != 1 && r->version != 2) {
- err = REFTABLE_FORMAT_ERROR;
- goto done;
- }
-
- r->size = file_size - footer_size(r->version);
- r->source = *source;
- r->name = xstrdup(name);
- r->hash_id = 0;
-
- err = block_source_read_block(source, &footer, r->size,
- footer_size(r->version));
- if (err != footer_size(r->version)) {
- err = REFTABLE_IO_ERROR;
- goto done;
- }
-
- err = parse_footer(r, footer.data, header.data);
-done:
- reftable_block_done(&footer);
- reftable_block_done(&header);
- return err;
-}
-
struct table_iter {
struct reftable_reader *r;
uint8_t typ;
@@ -637,16 +585,68 @@ void reader_close(struct reftable_reader *r)
FREE_AND_NULL(r->name);
}
-int reftable_reader_new(struct reftable_reader **p,
- struct reftable_block_source *src, char const *name)
+int reftable_reader_new(struct reftable_reader **out,
+ struct reftable_block_source *source, char const *name)
{
- struct reftable_reader *rd = reftable_calloc(1, sizeof(*rd));
- int err = init_reader(rd, src, name);
- if (err == 0) {
- *p = rd;
- } else {
- block_source_close(src);
- reftable_free(rd);
+ struct reftable_block footer = { 0 };
+ struct reftable_block header = { 0 };
+ struct reftable_reader *r;
+ uint64_t file_size = block_source_size(source);
+ uint32_t read_size;
+ int err;
+
+ REFTABLE_CALLOC_ARRAY(r, 1);
+
+ /*
+ * We need one extra byte to read the type of first block. We also
+ * pretend to always be reading v2 of the format because it is larger.
+ */
+ read_size = header_size(2) + 1;
+ if (read_size > file_size) {
+ err = REFTABLE_FORMAT_ERROR;
+ goto done;
+ }
+
+ err = block_source_read_block(source, &header, 0, read_size);
+ if (err != read_size) {
+ err = REFTABLE_IO_ERROR;
+ goto done;
+ }
+
+ if (memcmp(header.data, "REFT", 4)) {
+ err = REFTABLE_FORMAT_ERROR;
+ goto done;
+ }
+ r->version = header.data[4];
+ if (r->version != 1 && r->version != 2) {
+ err = REFTABLE_FORMAT_ERROR;
+ goto done;
+ }
+
+ r->size = file_size - footer_size(r->version);
+ r->source = *source;
+ r->name = xstrdup(name);
+ r->hash_id = 0;
+
+ err = block_source_read_block(source, &footer, r->size,
+ footer_size(r->version));
+ if (err != footer_size(r->version)) {
+ err = REFTABLE_IO_ERROR;
+ goto done;
+ }
+
+ err = parse_footer(r, footer.data, header.data);
+ if (err)
+ goto done;
+
+ *out = r;
+
+done:
+ reftable_block_done(&footer);
+ reftable_block_done(&header);
+ if (err) {
+ reftable_free(r);
+ block_source_close(source);
}
return err;
}
diff --git a/reftable/reader.h b/reftable/reader.h
index d8cbfd6404..762cd6de66 100644
--- a/reftable/reader.h
+++ b/reftable/reader.h
@@ -52,8 +52,6 @@ struct reftable_reader {
struct reftable_reader_offsets log_offsets;
};
-int reader_init(struct reftable_reader *r, struct reftable_block_source *source,
- const char *name);
void reader_close(struct reftable_reader *r);
const char *reader_name(struct reftable_reader *r);
diff --git a/reftable/readwrite_test.c b/reftable/readwrite_test.c
index 101cdb5cd6..2f2ff787b2 100644
--- a/reftable/readwrite_test.c
+++ b/reftable/readwrite_test.c
@@ -195,7 +195,7 @@ static void test_log_write_read(void)
struct reftable_log_record log = { NULL };
int n;
struct reftable_iterator it = { NULL };
- struct reftable_reader rd = { NULL };
+ struct reftable_reader *reader;
struct reftable_block_source source = { NULL };
struct strbuf buf = STRBUF_INIT;
struct reftable_writer *w =
@@ -236,10 +236,10 @@ static void test_log_write_read(void)
block_source_from_strbuf(&source, &buf);
- err = init_reader(&rd, &source, "file.log");
+ err = reftable_reader_new(&reader, &source, "file.log");
EXPECT_ERR(err);
- reftable_reader_init_ref_iterator(&rd, &it);
+ reftable_reader_init_ref_iterator(reader, &it);
err = reftable_iterator_seek_ref(&it, names[N - 1]);
EXPECT_ERR(err);
@@ -254,7 +254,7 @@ static void test_log_write_read(void)
reftable_iterator_destroy(&it);
reftable_ref_record_release(&ref);
- reftable_reader_init_log_iterator(&rd, &it);
+ reftable_reader_init_log_iterator(reader, &it);
err = reftable_iterator_seek_log(&it, "");
EXPECT_ERR(err);
@@ -279,7 +279,7 @@ static void test_log_write_read(void)
/* cleanup. */
strbuf_release(&buf);
free_names(names);
- reader_close(&rd);
+ reftable_reader_free(reader);
}
static void test_log_zlib_corruption(void)
@@ -288,7 +288,7 @@ static void test_log_zlib_corruption(void)
.block_size = 256,
};
struct reftable_iterator it = { 0 };
- struct reftable_reader rd = { 0 };
+ struct reftable_reader *reader;
struct reftable_block_source source = { 0 };
struct strbuf buf = STRBUF_INIT;
struct reftable_writer *w =
@@ -331,18 +331,18 @@ static void test_log_zlib_corruption(void)
block_source_from_strbuf(&source, &buf);
- err = init_reader(&rd, &source, "file.log");
+ err = reftable_reader_new(&reader, &source, "file.log");
EXPECT_ERR(err);
- reftable_reader_init_log_iterator(&rd, &it);
+ reftable_reader_init_log_iterator(reader, &it);
err = reftable_iterator_seek_log(&it, "refname");
EXPECT(err == REFTABLE_ZLIB_ERROR);
reftable_iterator_destroy(&it);
/* cleanup. */
+ reftable_reader_free(reader);
strbuf_release(&buf);
- reader_close(&rd);
}
static void test_table_read_write_sequential(void)
@@ -352,7 +352,7 @@ static void test_table_read_write_sequential(void)
int N = 50;
struct reftable_iterator it = { NULL };
struct reftable_block_source source = { NULL };
- struct reftable_reader rd = { NULL };
+ struct reftable_reader *reader;
int err = 0;
int j = 0;
@@ -360,10 +360,10 @@ static void test_table_read_write_sequential(void)
block_source_from_strbuf(&source, &buf);
- err = init_reader(&rd, &source, "file.ref");
+ err = reftable_reader_new(&reader, &source, "file.ref");
EXPECT_ERR(err);
- reftable_reader_init_ref_iterator(&rd, &it);
+ reftable_reader_init_ref_iterator(reader, &it);
err = reftable_iterator_seek_ref(&it, "");
EXPECT_ERR(err);
@@ -381,11 +381,11 @@ static void test_table_read_write_sequential(void)
reftable_ref_record_release(&ref);
}
EXPECT(j == N);
+
reftable_iterator_destroy(&it);
+ reftable_reader_free(reader);
strbuf_release(&buf);
free_names(names);
-
- reader_close(&rd);
}
static void test_table_write_small_table(void)
@@ -404,7 +404,7 @@ static void test_table_read_api(void)
char **names;
struct strbuf buf = STRBUF_INIT;
int N = 50;
- struct reftable_reader rd = { NULL };
+ struct reftable_reader *reader;
struct reftable_block_source source = { NULL };
int err;
int i;
@@ -415,10 +415,10 @@ static void test_table_read_api(void)
block_source_from_strbuf(&source, &buf);
- err = init_reader(&rd, &source, "file.ref");
+ err = reftable_reader_new(&reader, &source, "file.ref");
EXPECT_ERR(err);
- reftable_reader_init_ref_iterator(&rd, &it);
+ reftable_reader_init_ref_iterator(reader, &it);
err = reftable_iterator_seek_ref(&it, names[0]);
EXPECT_ERR(err);
@@ -431,7 +431,7 @@ static void test_table_read_api(void)
}
reftable_iterator_destroy(&it);
reftable_free(names);
- reader_close(&rd);
+ reftable_reader_free(reader);
strbuf_release(&buf);
}
@@ -440,7 +440,7 @@ static void test_table_read_write_seek(int index, int hash_id)
char **names;
struct strbuf buf = STRBUF_INIT;
int N = 50;
- struct reftable_reader rd = { NULL };
+ struct reftable_reader *reader;
struct reftable_block_source source = { NULL };
int err;
int i = 0;
@@ -453,18 +453,18 @@ static void test_table_read_write_seek(int index, int hash_id)
block_source_from_strbuf(&source, &buf);
- err = init_reader(&rd, &source, "file.ref");
+ err = reftable_reader_new(&reader, &source, "file.ref");
EXPECT_ERR(err);
- EXPECT(hash_id == reftable_reader_hash_id(&rd));
+ EXPECT(hash_id == reftable_reader_hash_id(reader));
if (!index) {
- rd.ref_offsets.index_offset = 0;
+ reader->ref_offsets.index_offset = 0;
} else {
- EXPECT(rd.ref_offsets.index_offset > 0);
+ EXPECT(reader->ref_offsets.index_offset > 0);
}
for (i = 1; i < N; i++) {
- reftable_reader_init_ref_iterator(&rd, &it);
+ reftable_reader_init_ref_iterator(reader, &it);
err = reftable_iterator_seek_ref(&it, names[i]);
EXPECT_ERR(err);
err = reftable_iterator_next_ref(&it, &ref);
@@ -480,7 +480,7 @@ static void test_table_read_write_seek(int index, int hash_id)
strbuf_addstr(&pastLast, names[N - 1]);
strbuf_addstr(&pastLast, "/");
- reftable_reader_init_ref_iterator(&rd, &it);
+ reftable_reader_init_ref_iterator(reader, &it);
err = reftable_iterator_seek_ref(&it, pastLast.buf);
if (err == 0) {
struct reftable_ref_record ref = { NULL };
@@ -498,7 +498,7 @@ static void test_table_read_write_seek(int index, int hash_id)
reftable_free(names[i]);
}
reftable_free(names);
- reader_close(&rd);
+ reftable_reader_free(reader);
}
static void test_table_read_write_seek_linear(void)
@@ -530,7 +530,7 @@ static void test_table_refs_for(int indexed)
int i = 0;
int n;
int err;
- struct reftable_reader rd;
+ struct reftable_reader *reader;
struct reftable_block_source source = { NULL };
struct strbuf buf = STRBUF_INIT;
@@ -579,18 +579,18 @@ static void test_table_refs_for(int indexed)
block_source_from_strbuf(&source, &buf);
- err = init_reader(&rd, &source, "file.ref");
+ err = reftable_reader_new(&reader, &source, "file.ref");
EXPECT_ERR(err);
if (!indexed) {
- rd.obj_offsets.is_present = 0;
+ reader->obj_offsets.is_present = 0;
}
- reftable_reader_init_ref_iterator(&rd, &it);
+ reftable_reader_init_ref_iterator(reader, &it);
err = reftable_iterator_seek_ref(&it, "");
EXPECT_ERR(err);
reftable_iterator_destroy(&it);
- err = reftable_reader_refs_for(&rd, &it, want_hash);
+ err = reftable_reader_refs_for(reader, &it, want_hash);
EXPECT_ERR(err);
j = 0;
@@ -611,7 +611,7 @@ static void test_table_refs_for(int indexed)
strbuf_release(&buf);
free_names(want_names);
reftable_iterator_destroy(&it);
- reader_close(&rd);
+ reftable_reader_free(reader);
}
static void test_table_refs_for_no_index(void)
@@ -928,11 +928,11 @@ static void test_corrupt_table_empty(void)
{
struct strbuf buf = STRBUF_INIT;
struct reftable_block_source source = { NULL };
- struct reftable_reader rd = { NULL };
+ struct reftable_reader *reader;
int err;
block_source_from_strbuf(&source, &buf);
- err = init_reader(&rd, &source, "file.log");
+ err = reftable_reader_new(&reader, &source, "file.log");
EXPECT(err == REFTABLE_FORMAT_ERROR);
}
@@ -941,13 +941,14 @@ static void test_corrupt_table(void)
uint8_t zeros[1024] = { 0 };
struct strbuf buf = STRBUF_INIT;
struct reftable_block_source source = { NULL };
- struct reftable_reader rd = { NULL };
+ struct reftable_reader *reader;
int err;
strbuf_add(&buf, zeros, sizeof(zeros));
block_source_from_strbuf(&source, &buf);
- err = init_reader(&rd, &source, "file.log");
+ err = reftable_reader_new(&reader, &source, "file.log");
EXPECT(err == REFTABLE_FORMAT_ERROR);
+
strbuf_release(&buf);
}
--
2.46.0.164.g477ce5ccd6.dirty
^ permalink raw reply related [flat|nested] 38+ messages in thread
* [PATCH 05/10] reftable/reader: inline `reader_close()`
2024-08-19 15:39 [PATCH 00/10] reftable: fix reload with active iterators Patrick Steinhardt
` (3 preceding siblings ...)
2024-08-19 15:39 ` [PATCH 04/10] reftable/reader: inline `init_reader()` Patrick Steinhardt
@ 2024-08-19 15:39 ` Patrick Steinhardt
2024-08-19 15:40 ` [PATCH 06/10] reftable/stack: fix broken refnames in `write_n_ref_tables()` Patrick Steinhardt
` (7 subsequent siblings)
12 siblings, 0 replies; 38+ messages in thread
From: Patrick Steinhardt @ 2024-08-19 15:39 UTC (permalink / raw)
To: git; +Cc: Jeff King
Same as with the preceding commit, we also provide a `reader_close()`
function that allows the caller to close a reader without freeing it.
This is unnecessary now that all users will have an allocated version of
the reader.
Inline it into `reftable_reader_free()`.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
reftable/reader.c | 9 ++-------
reftable/reader.h | 1 -
reftable/stack.c | 5 +----
3 files changed, 3 insertions(+), 12 deletions(-)
diff --git a/reftable/reader.c b/reftable/reader.c
index 9239679ad9..037417fcf6 100644
--- a/reftable/reader.c
+++ b/reftable/reader.c
@@ -579,12 +579,6 @@ void reftable_reader_init_log_iterator(struct reftable_reader *r,
reader_init_iter(r, it, BLOCK_TYPE_LOG);
}
-void reader_close(struct reftable_reader *r)
-{
- block_source_close(&r->source);
- FREE_AND_NULL(r->name);
-}
-
int reftable_reader_new(struct reftable_reader **out,
struct reftable_block_source *source, char const *name)
{
@@ -655,7 +649,8 @@ void reftable_reader_free(struct reftable_reader *r)
{
if (!r)
return;
- reader_close(r);
+ block_source_close(&r->source);
+ FREE_AND_NULL(r->name);
reftable_free(r);
}
diff --git a/reftable/reader.h b/reftable/reader.h
index 762cd6de66..88b4f3b421 100644
--- a/reftable/reader.h
+++ b/reftable/reader.h
@@ -52,7 +52,6 @@ struct reftable_reader {
struct reftable_reader_offsets log_offsets;
};
-void reader_close(struct reftable_reader *r);
const char *reader_name(struct reftable_reader *r);
void reader_init_iter(struct reftable_reader *r,
diff --git a/reftable/stack.c b/reftable/stack.c
index c72435b059..0ac9cdf8de 100644
--- a/reftable/stack.c
+++ b/reftable/stack.c
@@ -290,7 +290,6 @@ static int reftable_stack_reload_once(struct reftable_stack *st,
const char *name = reader_name(cur[i]);
stack_filename(&table_path, st, name);
- reader_close(cur[i]);
reftable_reader_free(cur[i]);
/* On Windows, can only unlink after closing. */
@@ -299,10 +298,8 @@ static int reftable_stack_reload_once(struct reftable_stack *st,
}
done:
- for (i = 0; i < new_readers_len; i++) {
- reader_close(new_readers[i]);
+ for (i = 0; i < new_readers_len; i++)
reftable_reader_free(new_readers[i]);
- }
reftable_free(new_readers);
reftable_free(cur);
strbuf_release(&table_path);
--
2.46.0.164.g477ce5ccd6.dirty
^ permalink raw reply related [flat|nested] 38+ messages in thread
* [PATCH 06/10] reftable/stack: fix broken refnames in `write_n_ref_tables()`
2024-08-19 15:39 [PATCH 00/10] reftable: fix reload with active iterators Patrick Steinhardt
` (4 preceding siblings ...)
2024-08-19 15:39 ` [PATCH 05/10] reftable/reader: inline `reader_close()` Patrick Steinhardt
@ 2024-08-19 15:40 ` Patrick Steinhardt
2024-08-19 15:40 ` [PATCH 07/10] reftable/reader: introduce refcounting Patrick Steinhardt
` (6 subsequent siblings)
12 siblings, 0 replies; 38+ messages in thread
From: Patrick Steinhardt @ 2024-08-19 15:40 UTC (permalink / raw)
To: git; +Cc: Jeff King
The `write_n_ref_tables()` helper function writes N references in
separate tables. We never reset the computed name of those references
though, leading us to end up with unexpected names.
Fix this by resetting the buffer.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
reftable/stack_test.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/reftable/stack_test.c b/reftable/stack_test.c
index 42044ed8a3..de0669b7b8 100644
--- a/reftable/stack_test.c
+++ b/reftable/stack_test.c
@@ -125,6 +125,7 @@ static void write_n_ref_tables(struct reftable_stack *st,
.value_type = REFTABLE_REF_VAL1,
};
+ strbuf_reset(&buf);
strbuf_addf(&buf, "refs/heads/branch-%04u", (unsigned) i);
ref.refname = buf.buf;
set_test_hash(ref.value.val1, i);
--
2.46.0.164.g477ce5ccd6.dirty
^ permalink raw reply related [flat|nested] 38+ messages in thread
* [PATCH 07/10] reftable/reader: introduce refcounting
2024-08-19 15:39 [PATCH 00/10] reftable: fix reload with active iterators Patrick Steinhardt
` (5 preceding siblings ...)
2024-08-19 15:40 ` [PATCH 06/10] reftable/stack: fix broken refnames in `write_n_ref_tables()` Patrick Steinhardt
@ 2024-08-19 15:40 ` Patrick Steinhardt
2024-08-22 9:47 ` karthik nayak
2024-08-19 15:40 ` [PATCH 08/10] reftable/reader: keep readers alive during iteration Patrick Steinhardt
` (5 subsequent siblings)
12 siblings, 1 reply; 38+ messages in thread
From: Patrick Steinhardt @ 2024-08-19 15:40 UTC (permalink / raw)
To: git; +Cc: Jeff King
It was recently reported that concurrent reads and writes may cause the
reftable backend to segfault. The root cause of this is that we do not
properly keep track of reftable readers across reloads.
Suppose that you have a reftable iterator and then decide to reload the
stack while iterating through the iterator. When the stack has been
rewritten since we have created the iterator, then we would end up
discarding a subset of readers that may still be in use by the iterator.
The consequence is that we now try to reference deallocated memory,
which of course segfaults.
One way to trigger this is in t5616, where some background maintenance
jobs have been leaking from one test into another. This leads to stack
traces like the following one:
+ git -c protocol.version=0 -C pc1 fetch --filter=blob:limit=29999 --refetch origin
AddressSanitizer:DEADLYSIGNAL
=================================================================
==657994==ERROR: AddressSanitizer: SEGV on unknown address 0x7fa0f0ec6089 (pc 0x55f23e52ddf9 bp
0x7ffe7bfa1700 sp 0x7ffe7bfa1700 T0)
==657994==The signal is caused by a READ memory access.
#0 0x55f23e52ddf9 in get_var_int reftable/record.c:29
#1 0x55f23e53295e in reftable_decode_keylen reftable/record.c:170
#2 0x55f23e532cc0 in reftable_decode_key reftable/record.c:194
#3 0x55f23e54e72e in block_iter_next reftable/block.c:398
#4 0x55f23e5573dc in table_iter_next_in_block reftable/reader.c:240
#5 0x55f23e5573dc in table_iter_next reftable/reader.c:355
#6 0x55f23e5573dc in table_iter_next reftable/reader.c:339
#7 0x55f23e551283 in merged_iter_advance_subiter reftable/merged.c:69
#8 0x55f23e55169e in merged_iter_next_entry reftable/merged.c:123
#9 0x55f23e55169e in merged_iter_next_void reftable/merged.c:172
#10 0x55f23e537625 in reftable_iterator_next_ref reftable/generic.c:175
#11 0x55f23e2cf9c6 in reftable_ref_iterator_advance refs/reftable-backend.c:464
#12 0x55f23e2d996e in ref_iterator_advance refs/iterator.c:13
#13 0x55f23e2d996e in do_for_each_ref_iterator refs/iterator.c:452
#14 0x55f23dca6767 in get_ref_map builtin/fetch.c:623
#15 0x55f23dca6767 in do_fetch builtin/fetch.c:1659
#16 0x55f23dca6767 in fetch_one builtin/fetch.c:2133
#17 0x55f23dca6767 in cmd_fetch builtin/fetch.c:2432
#18 0x55f23dba7764 in run_builtin git.c:484
#19 0x55f23dba7764 in handle_builtin git.c:741
#20 0x55f23dbab61e in run_argv git.c:805
#21 0x55f23dbab61e in cmd_main git.c:1000
#22 0x55f23dba4781 in main common-main.c:64
#23 0x7fa0f063fc89 in __libc_start_call_main ../sysdeps/nptl/libc_start_call_main.h:58
#24 0x7fa0f063fd44 in __libc_start_main_impl ../csu/libc-start.c:360
#25 0x55f23dba6ad0 in _start (git+0xadfad0) (BuildId: 803b2b7f59beb03d7849fb8294a8e2145dd4aa27)
While it is somewhat awkward that the maintenance processes survive
tests in the first place, it is totally expected that reftables should
work alright with concurrent writers. Seemingly they don't.
The only underlying resource that we need to care about in this context
is the reftable reader, which is responsible for reading a single table
from disk. These readers get discarded immediately (unless reused) when
calling `reftable_stack_reload()`, which is wrong. We can only close
them once we know that there are no iterators using them anymore.
Prepare for a fix by converting the reftable readers to be refcounted.
Reported-by: Jeff King <peff@peff.net>
Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
reftable/reader.c | 16 ++++++++++++++--
reftable/reader.h | 2 ++
reftable/readwrite_test.c | 18 +++++++++---------
reftable/reftable-reader.h | 15 ++++++++++++---
reftable/stack.c | 8 ++++----
reftable/stack_test.c | 6 ++----
t/helper/test-reftable.c | 2 +-
t/unit-tests/t-reftable-merged.c | 4 ++--
8 files changed, 46 insertions(+), 25 deletions(-)
diff --git a/reftable/reader.c b/reftable/reader.c
index 037417fcf6..64a0953e68 100644
--- a/reftable/reader.c
+++ b/reftable/reader.c
@@ -621,6 +621,7 @@ int reftable_reader_new(struct reftable_reader **out,
r->source = *source;
r->name = xstrdup(name);
r->hash_id = 0;
+ r->refcount = 1;
err = block_source_read_block(source, &footer, r->size,
footer_size(r->version));
@@ -645,10 +646,21 @@ int reftable_reader_new(struct reftable_reader **out,
return err;
}
-void reftable_reader_free(struct reftable_reader *r)
+void reftable_reader_incref(struct reftable_reader *r)
+{
+ if (!r->refcount)
+ BUG("cannot increment ref counter of dead reader");
+ r->refcount++;
+}
+
+void reftable_reader_decref(struct reftable_reader *r)
{
if (!r)
return;
+ if (!r->refcount)
+ BUG("cannot decrement ref counter of dead reader");
+ if (--r->refcount)
+ return;
block_source_close(&r->source);
FREE_AND_NULL(r->name);
reftable_free(r);
@@ -812,7 +824,7 @@ int reftable_reader_print_blocks(const char *tablename)
}
done:
- reftable_reader_free(r);
+ reftable_reader_decref(r);
table_iter_close(&ti);
return err;
}
diff --git a/reftable/reader.h b/reftable/reader.h
index 88b4f3b421..3710ee09b4 100644
--- a/reftable/reader.h
+++ b/reftable/reader.h
@@ -50,6 +50,8 @@ struct reftable_reader {
struct reftable_reader_offsets ref_offsets;
struct reftable_reader_offsets obj_offsets;
struct reftable_reader_offsets log_offsets;
+
+ uint64_t refcount;
};
const char *reader_name(struct reftable_reader *r);
diff --git a/reftable/readwrite_test.c b/reftable/readwrite_test.c
index 2f2ff787b2..0494e7955a 100644
--- a/reftable/readwrite_test.c
+++ b/reftable/readwrite_test.c
@@ -279,7 +279,7 @@ static void test_log_write_read(void)
/* cleanup. */
strbuf_release(&buf);
free_names(names);
- reftable_reader_free(reader);
+ reftable_reader_decref(reader);
}
static void test_log_zlib_corruption(void)
@@ -341,7 +341,7 @@ static void test_log_zlib_corruption(void)
reftable_iterator_destroy(&it);
/* cleanup. */
- reftable_reader_free(reader);
+ reftable_reader_decref(reader);
strbuf_release(&buf);
}
@@ -383,7 +383,7 @@ static void test_table_read_write_sequential(void)
EXPECT(j == N);
reftable_iterator_destroy(&it);
- reftable_reader_free(reader);
+ reftable_reader_decref(reader);
strbuf_release(&buf);
free_names(names);
}
@@ -431,7 +431,7 @@ static void test_table_read_api(void)
}
reftable_iterator_destroy(&it);
reftable_free(names);
- reftable_reader_free(reader);
+ reftable_reader_decref(reader);
strbuf_release(&buf);
}
@@ -498,7 +498,7 @@ static void test_table_read_write_seek(int index, int hash_id)
reftable_free(names[i]);
}
reftable_free(names);
- reftable_reader_free(reader);
+ reftable_reader_decref(reader);
}
static void test_table_read_write_seek_linear(void)
@@ -611,7 +611,7 @@ static void test_table_refs_for(int indexed)
strbuf_release(&buf);
free_names(want_names);
reftable_iterator_destroy(&it);
- reftable_reader_free(reader);
+ reftable_reader_decref(reader);
}
static void test_table_refs_for_no_index(void)
@@ -657,7 +657,7 @@ static void test_write_empty_table(void)
EXPECT(err > 0);
reftable_iterator_destroy(&it);
- reftable_reader_free(rd);
+ reftable_reader_decref(rd);
strbuf_release(&buf);
}
@@ -863,7 +863,7 @@ static void test_write_multiple_indices(void)
reftable_iterator_destroy(&it);
reftable_writer_free(writer);
- reftable_reader_free(reader);
+ reftable_reader_decref(reader);
strbuf_release(&writer_buf);
strbuf_release(&buf);
}
@@ -919,7 +919,7 @@ static void test_write_multi_level_index(void)
reftable_iterator_destroy(&it);
reftable_writer_free(writer);
- reftable_reader_free(reader);
+ reftable_reader_decref(reader);
strbuf_release(&writer_buf);
strbuf_release(&buf);
}
diff --git a/reftable/reftable-reader.h b/reftable/reftable-reader.h
index 8a05c84685..a600452b56 100644
--- a/reftable/reftable-reader.h
+++ b/reftable/reftable-reader.h
@@ -33,6 +33,18 @@ struct reftable_reader;
int reftable_reader_new(struct reftable_reader **pp,
struct reftable_block_source *src, const char *name);
+/*
+ * Manage the reference count of the reftable reader. A newly initialized
+ * reader starts with a refcount of 1 and will be deleted once the refcount has
+ * reached 0.
+ *
+ * This is required because readers may have longer lifetimes than the stack
+ * they belong to. The stack may for example be reloaded while the old tables
+ * are still being accessed by an iterator.
+ */
+void reftable_reader_incref(struct reftable_reader *reader);
+void reftable_reader_decref(struct reftable_reader *reader);
+
/* Initialize a reftable iterator for reading refs. */
void reftable_reader_init_ref_iterator(struct reftable_reader *r,
struct reftable_iterator *it);
@@ -44,9 +56,6 @@ void reftable_reader_init_log_iterator(struct reftable_reader *r,
/* returns the hash ID used in this table. */
uint32_t reftable_reader_hash_id(struct reftable_reader *r);
-/* closes and deallocates a reader. */
-void reftable_reader_free(struct reftable_reader *);
-
/* return an iterator for the refs pointing to `oid`. */
int reftable_reader_refs_for(struct reftable_reader *r,
struct reftable_iterator *it, uint8_t *oid);
diff --git a/reftable/stack.c b/reftable/stack.c
index 0ac9cdf8de..8e85f8b4d9 100644
--- a/reftable/stack.c
+++ b/reftable/stack.c
@@ -186,7 +186,7 @@ void reftable_stack_destroy(struct reftable_stack *st)
if (names && !has_name(names, name)) {
stack_filename(&filename, st, name);
}
- reftable_reader_free(st->readers[i]);
+ reftable_reader_decref(st->readers[i]);
if (filename.len) {
/* On Windows, can only unlink after closing. */
@@ -290,7 +290,7 @@ static int reftable_stack_reload_once(struct reftable_stack *st,
const char *name = reader_name(cur[i]);
stack_filename(&table_path, st, name);
- reftable_reader_free(cur[i]);
+ reftable_reader_decref(cur[i]);
/* On Windows, can only unlink after closing. */
unlink(table_path.buf);
@@ -299,7 +299,7 @@ static int reftable_stack_reload_once(struct reftable_stack *st,
done:
for (i = 0; i < new_readers_len; i++)
- reftable_reader_free(new_readers[i]);
+ reftable_reader_decref(new_readers[i]);
reftable_free(new_readers);
reftable_free(cur);
strbuf_release(&table_path);
@@ -1534,7 +1534,7 @@ static void remove_maybe_stale_table(struct reftable_stack *st, uint64_t max,
goto done;
update_idx = reftable_reader_max_update_index(rd);
- reftable_reader_free(rd);
+ reftable_reader_decref(rd);
if (update_idx <= max) {
unlink(table_path.buf);
diff --git a/reftable/stack_test.c b/reftable/stack_test.c
index de0669b7b8..bc3bf77274 100644
--- a/reftable/stack_test.c
+++ b/reftable/stack_test.c
@@ -1036,10 +1036,8 @@ static void test_reftable_stack_compaction_concurrent(void)
static void unclean_stack_close(struct reftable_stack *st)
{
/* break abstraction boundary to simulate unclean shutdown. */
- int i = 0;
- for (; i < st->readers_len; i++) {
- reftable_reader_free(st->readers[i]);
- }
+ for (size_t i = 0; i < st->readers_len; i++)
+ reftable_reader_decref(st->readers[i]);
st->readers_len = 0;
FREE_AND_NULL(st->readers);
}
diff --git a/t/helper/test-reftable.c b/t/helper/test-reftable.c
index 87c2f42aae..f6d855a9d7 100644
--- a/t/helper/test-reftable.c
+++ b/t/helper/test-reftable.c
@@ -152,7 +152,7 @@ static int dump_reftable(const char *tablename)
done:
reftable_merged_table_free(mt);
- reftable_reader_free(r);
+ reftable_reader_decref(r);
return err;
}
diff --git a/t/unit-tests/t-reftable-merged.c b/t/unit-tests/t-reftable-merged.c
index 8f51aca1b6..081d3c8b69 100644
--- a/t/unit-tests/t-reftable-merged.c
+++ b/t/unit-tests/t-reftable-merged.c
@@ -115,7 +115,7 @@ merged_table_from_records(struct reftable_ref_record **refs,
static void readers_destroy(struct reftable_reader **readers, const size_t n)
{
for (size_t i = 0; i < n; i++)
- reftable_reader_free(readers[i]);
+ reftable_reader_decref(readers[i]);
reftable_free(readers);
}
@@ -437,7 +437,7 @@ static void t_default_write_opts(void)
err = reftable_merged_table_new(&merged, &rd, 1, GIT_SHA1_FORMAT_ID);
check(!err);
- reftable_reader_free(rd);
+ reftable_reader_decref(rd);
reftable_merged_table_free(merged);
strbuf_release(&buf);
}
--
2.46.0.164.g477ce5ccd6.dirty
^ permalink raw reply related [flat|nested] 38+ messages in thread
* Re: [PATCH 07/10] reftable/reader: introduce refcounting
2024-08-19 15:40 ` [PATCH 07/10] reftable/reader: introduce refcounting Patrick Steinhardt
@ 2024-08-22 9:47 ` karthik nayak
2024-08-22 11:35 ` Patrick Steinhardt
0 siblings, 1 reply; 38+ messages in thread
From: karthik nayak @ 2024-08-22 9:47 UTC (permalink / raw)
To: Patrick Steinhardt, git; +Cc: Jeff King
[-- Attachment #1: Type: text/plain, Size: 6631 bytes --]
Patrick Steinhardt <ps@pks.im> writes:
> It was recently reported that concurrent reads and writes may cause the
> reftable backend to segfault. The root cause of this is that we do not
> properly keep track of reftable readers across reloads.
>
> Suppose that you have a reftable iterator and then decide to reload the
> stack while iterating through the iterator. When the stack has been
> rewritten since we have created the iterator, then we would end up
> discarding a subset of readers that may still be in use by the iterator.
> The consequence is that we now try to reference deallocated memory,
> which of course segfaults.
>
> One way to trigger this is in t5616, where some background maintenance
> jobs have been leaking from one test into another. This leads to stack
> traces like the following one:
>
> + git -c protocol.version=0 -C pc1 fetch --filter=blob:limit=29999 --refetch origin
> AddressSanitizer:DEADLYSIGNAL
> =================================================================
> ==657994==ERROR: AddressSanitizer: SEGV on unknown address 0x7fa0f0ec6089 (pc 0x55f23e52ddf9 bp
> 0x7ffe7bfa1700 sp 0x7ffe7bfa1700 T0)
> ==657994==The signal is caused by a READ memory access.
> #0 0x55f23e52ddf9 in get_var_int reftable/record.c:29
> #1 0x55f23e53295e in reftable_decode_keylen reftable/record.c:170
> #2 0x55f23e532cc0 in reftable_decode_key reftable/record.c:194
> #3 0x55f23e54e72e in block_iter_next reftable/block.c:398
> #4 0x55f23e5573dc in table_iter_next_in_block reftable/reader.c:240
> #5 0x55f23e5573dc in table_iter_next reftable/reader.c:355
> #6 0x55f23e5573dc in table_iter_next reftable/reader.c:339
> #7 0x55f23e551283 in merged_iter_advance_subiter reftable/merged.c:69
> #8 0x55f23e55169e in merged_iter_next_entry reftable/merged.c:123
> #9 0x55f23e55169e in merged_iter_next_void reftable/merged.c:172
> #10 0x55f23e537625 in reftable_iterator_next_ref reftable/generic.c:175
> #11 0x55f23e2cf9c6 in reftable_ref_iterator_advance refs/reftable-backend.c:464
> #12 0x55f23e2d996e in ref_iterator_advance refs/iterator.c:13
> #13 0x55f23e2d996e in do_for_each_ref_iterator refs/iterator.c:452
> #14 0x55f23dca6767 in get_ref_map builtin/fetch.c:623
> #15 0x55f23dca6767 in do_fetch builtin/fetch.c:1659
> #16 0x55f23dca6767 in fetch_one builtin/fetch.c:2133
> #17 0x55f23dca6767 in cmd_fetch builtin/fetch.c:2432
> #18 0x55f23dba7764 in run_builtin git.c:484
> #19 0x55f23dba7764 in handle_builtin git.c:741
> #20 0x55f23dbab61e in run_argv git.c:805
> #21 0x55f23dbab61e in cmd_main git.c:1000
> #22 0x55f23dba4781 in main common-main.c:64
> #23 0x7fa0f063fc89 in __libc_start_call_main ../sysdeps/nptl/libc_start_call_main.h:58
> #24 0x7fa0f063fd44 in __libc_start_main_impl ../csu/libc-start.c:360
> #25 0x55f23dba6ad0 in _start (git+0xadfad0) (BuildId: 803b2b7f59beb03d7849fb8294a8e2145dd4aa27)
>
The stacktrace is for iterating over refs, what I don't understand is
where in this flow do we actually reload the stack.
> While it is somewhat awkward that the maintenance processes survive
> tests in the first place, it is totally expected that reftables should
> work alright with concurrent writers. Seemingly they don't.
>
> The only underlying resource that we need to care about in this context
> is the reftable reader, which is responsible for reading a single table
> from disk. These readers get discarded immediately (unless reused) when
> calling `reftable_stack_reload()`, which is wrong. We can only close
> them once we know that there are no iterators using them anymore.
>
> Prepare for a fix by converting the reftable readers to be refcounted.
>
Okay so my understanding is that `refcounted` refers to a reference
count which keeps tracks of the stacks which are referring to the
reader. The name is also used in `struct blame_origin` in blame.{c,h}.
Makes a lot more sense now :)
> Reported-by: Jeff King <peff@peff.net>
> Signed-off-by: Patrick Steinhardt <ps@pks.im>
> ---
> reftable/reader.c | 16 ++++++++++++++--
> reftable/reader.h | 2 ++
> reftable/readwrite_test.c | 18 +++++++++---------
> reftable/reftable-reader.h | 15 ++++++++++++---
> reftable/stack.c | 8 ++++----
> reftable/stack_test.c | 6 ++----
> t/helper/test-reftable.c | 2 +-
> t/unit-tests/t-reftable-merged.c | 4 ++--
> 8 files changed, 46 insertions(+), 25 deletions(-)
>
> diff --git a/reftable/reader.c b/reftable/reader.c
> index 037417fcf6..64a0953e68 100644
> --- a/reftable/reader.c
> +++ b/reftable/reader.c
> @@ -621,6 +621,7 @@ int reftable_reader_new(struct reftable_reader **out,
> r->source = *source;
> r->name = xstrdup(name);
> r->hash_id = 0;
> + r->refcount = 1;
>
So when the reader is initialized by someone, this sets the counter to one.
> err = block_source_read_block(source, &footer, r->size,
> footer_size(r->version));
> @@ -645,10 +646,21 @@ int reftable_reader_new(struct reftable_reader **out,
> return err;
> }
>
> -void reftable_reader_free(struct reftable_reader *r)
> +void reftable_reader_incref(struct reftable_reader *r)
> +{
> + if (!r->refcount)
> + BUG("cannot increment ref counter of dead reader");
> + r->refcount++;
> +}
> +
> +void reftable_reader_decref(struct reftable_reader *r)
> {
> if (!r)
> return;
> + if (!r->refcount)
> + BUG("cannot decrement ref counter of dead reader");
> + if (--r->refcount)
> + return;
> block_source_close(&r->source);
> FREE_AND_NULL(r->name);
> reftable_free(r);
> @@ -812,7 +824,7 @@ int reftable_reader_print_blocks(const char *tablename)
> }
>
> done:
> - reftable_reader_free(r);
> + reftable_reader_decref(r);
> table_iter_close(&ti);
> return err;
> }
>
We remove the `_free()` function and instead introduce `_incref()` and
`_decref()` to increase and decrease the counter. The latter takes over
the functionality of `_free()` when counter hits 0.
> diff --git a/reftable/reader.h b/reftable/reader.h
> index 88b4f3b421..3710ee09b4 100644
> --- a/reftable/reader.h
> +++ b/reftable/reader.h
> @@ -50,6 +50,8 @@ struct reftable_reader {
> struct reftable_reader_offsets ref_offsets;
> struct reftable_reader_offsets obj_offsets;
> struct reftable_reader_offsets log_offsets;
> +
> + uint64_t refcount;
Wonder if there is a chance that we decrement refcount from 0 and hence
cause a wraparound.
[snip]
I have some questions regarding my own understanding, but the patch
looks good to me.
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 690 bytes --]
^ permalink raw reply [flat|nested] 38+ messages in thread
* Re: [PATCH 07/10] reftable/reader: introduce refcounting
2024-08-22 9:47 ` karthik nayak
@ 2024-08-22 11:35 ` Patrick Steinhardt
2024-08-23 10:14 ` karthik nayak
0 siblings, 1 reply; 38+ messages in thread
From: Patrick Steinhardt @ 2024-08-22 11:35 UTC (permalink / raw)
To: karthik nayak; +Cc: git, Jeff King
On Thu, Aug 22, 2024 at 02:47:43AM -0700, karthik nayak wrote:
> Patrick Steinhardt <ps@pks.im> writes:
>
> > It was recently reported that concurrent reads and writes may cause the
> > reftable backend to segfault. The root cause of this is that we do not
> > properly keep track of reftable readers across reloads.
> >
> > Suppose that you have a reftable iterator and then decide to reload the
> > stack while iterating through the iterator. When the stack has been
> > rewritten since we have created the iterator, then we would end up
> > discarding a subset of readers that may still be in use by the iterator.
> > The consequence is that we now try to reference deallocated memory,
> > which of course segfaults.
> >
> > One way to trigger this is in t5616, where some background maintenance
> > jobs have been leaking from one test into another. This leads to stack
> > traces like the following one:
> >
> > + git -c protocol.version=0 -C pc1 fetch --filter=blob:limit=29999 --refetch origin
> > AddressSanitizer:DEADLYSIGNAL
> > =================================================================
> > ==657994==ERROR: AddressSanitizer: SEGV on unknown address 0x7fa0f0ec6089 (pc 0x55f23e52ddf9 bp
> > 0x7ffe7bfa1700 sp 0x7ffe7bfa1700 T0)
> > ==657994==The signal is caused by a READ memory access.
> > #0 0x55f23e52ddf9 in get_var_int reftable/record.c:29
> > #1 0x55f23e53295e in reftable_decode_keylen reftable/record.c:170
> > #2 0x55f23e532cc0 in reftable_decode_key reftable/record.c:194
> > #3 0x55f23e54e72e in block_iter_next reftable/block.c:398
> > #4 0x55f23e5573dc in table_iter_next_in_block reftable/reader.c:240
> > #5 0x55f23e5573dc in table_iter_next reftable/reader.c:355
> > #6 0x55f23e5573dc in table_iter_next reftable/reader.c:339
> > #7 0x55f23e551283 in merged_iter_advance_subiter reftable/merged.c:69
> > #8 0x55f23e55169e in merged_iter_next_entry reftable/merged.c:123
> > #9 0x55f23e55169e in merged_iter_next_void reftable/merged.c:172
> > #10 0x55f23e537625 in reftable_iterator_next_ref reftable/generic.c:175
> > #11 0x55f23e2cf9c6 in reftable_ref_iterator_advance refs/reftable-backend.c:464
> > #12 0x55f23e2d996e in ref_iterator_advance refs/iterator.c:13
> > #13 0x55f23e2d996e in do_for_each_ref_iterator refs/iterator.c:452
> > #14 0x55f23dca6767 in get_ref_map builtin/fetch.c:623
> > #15 0x55f23dca6767 in do_fetch builtin/fetch.c:1659
> > #16 0x55f23dca6767 in fetch_one builtin/fetch.c:2133
> > #17 0x55f23dca6767 in cmd_fetch builtin/fetch.c:2432
> > #18 0x55f23dba7764 in run_builtin git.c:484
> > #19 0x55f23dba7764 in handle_builtin git.c:741
> > #20 0x55f23dbab61e in run_argv git.c:805
> > #21 0x55f23dbab61e in cmd_main git.c:1000
> > #22 0x55f23dba4781 in main common-main.c:64
> > #23 0x7fa0f063fc89 in __libc_start_call_main ../sysdeps/nptl/libc_start_call_main.h:58
> > #24 0x7fa0f063fd44 in __libc_start_main_impl ../csu/libc-start.c:360
> > #25 0x55f23dba6ad0 in _start (git+0xadfad0) (BuildId: 803b2b7f59beb03d7849fb8294a8e2145dd4aa27)
> >
>
> The stacktrace is for iterating over refs, what I don't understand is
> where in this flow do we actually reload the stack.
Basically, whenever you call into the reftable backend we check whether
we need to reload the stack. So, when creating a reftable iterator,
reading a single ref, writing refs and so on. So in the above code flow
we had a ref iterator, but during iteration we ended up reading other
refs, as well.
> > While it is somewhat awkward that the maintenance processes survive
> > tests in the first place, it is totally expected that reftables should
> > work alright with concurrent writers. Seemingly they don't.
> >
> > The only underlying resource that we need to care about in this context
> > is the reftable reader, which is responsible for reading a single table
> > from disk. These readers get discarded immediately (unless reused) when
> > calling `reftable_stack_reload()`, which is wrong. We can only close
> > them once we know that there are no iterators using them anymore.
> >
> > Prepare for a fix by converting the reftable readers to be refcounted.
> >
>
> Okay so my understanding is that `refcounted` refers to a reference
> count which keeps tracks of the stacks which are referring to the
> reader. The name is also used in `struct blame_origin` in blame.{c,h}.
> Makes a lot more sense now :)
Yup.
> > diff --git a/reftable/reader.h b/reftable/reader.h
> > index 88b4f3b421..3710ee09b4 100644
> > --- a/reftable/reader.h
> > +++ b/reftable/reader.h
> > @@ -50,6 +50,8 @@ struct reftable_reader {
> > struct reftable_reader_offsets ref_offsets;
> > struct reftable_reader_offsets obj_offsets;
> > struct reftable_reader_offsets log_offsets;
> > +
> > + uint64_t refcount;
>
> Wonder if there is a chance that we decrement refcount from 0 and hence
> cause a wraparound.
This should never happen in practice. And if it does, we would hit a
BUG():
void reftable_reader_decref(struct reftable_reader *r)
{
if (!r)
return;
if (!r->refcount)
BUG("cannot decrement ref counter of dead reader");
if (--r->refcount)
return;
block_source_close(&r->source);
FREE_AND_NULL(r->name);
reftable_free(r);
}
If the refcount is at zero already, we hit the bug.
Patrick
^ permalink raw reply [flat|nested] 38+ messages in thread
* Re: [PATCH 07/10] reftable/reader: introduce refcounting
2024-08-22 11:35 ` Patrick Steinhardt
@ 2024-08-23 10:14 ` karthik nayak
0 siblings, 0 replies; 38+ messages in thread
From: karthik nayak @ 2024-08-23 10:14 UTC (permalink / raw)
To: Patrick Steinhardt; +Cc: git, Jeff King
[-- Attachment #1: Type: text/plain, Size: 5696 bytes --]
Patrick Steinhardt <ps@pks.im> writes:
> On Thu, Aug 22, 2024 at 02:47:43AM -0700, karthik nayak wrote:
>> Patrick Steinhardt <ps@pks.im> writes:
>>
>> > It was recently reported that concurrent reads and writes may cause the
>> > reftable backend to segfault. The root cause of this is that we do not
>> > properly keep track of reftable readers across reloads.
>> >
>> > Suppose that you have a reftable iterator and then decide to reload the
>> > stack while iterating through the iterator. When the stack has been
>> > rewritten since we have created the iterator, then we would end up
>> > discarding a subset of readers that may still be in use by the iterator.
>> > The consequence is that we now try to reference deallocated memory,
>> > which of course segfaults.
>> >
>> > One way to trigger this is in t5616, where some background maintenance
>> > jobs have been leaking from one test into another. This leads to stack
>> > traces like the following one:
>> >
>> > + git -c protocol.version=0 -C pc1 fetch --filter=blob:limit=29999 --refetch origin
>> > AddressSanitizer:DEADLYSIGNAL
>> > =================================================================
>> > ==657994==ERROR: AddressSanitizer: SEGV on unknown address 0x7fa0f0ec6089 (pc 0x55f23e52ddf9 bp
>> > 0x7ffe7bfa1700 sp 0x7ffe7bfa1700 T0)
>> > ==657994==The signal is caused by a READ memory access.
>> > #0 0x55f23e52ddf9 in get_var_int reftable/record.c:29
>> > #1 0x55f23e53295e in reftable_decode_keylen reftable/record.c:170
>> > #2 0x55f23e532cc0 in reftable_decode_key reftable/record.c:194
>> > #3 0x55f23e54e72e in block_iter_next reftable/block.c:398
>> > #4 0x55f23e5573dc in table_iter_next_in_block reftable/reader.c:240
>> > #5 0x55f23e5573dc in table_iter_next reftable/reader.c:355
>> > #6 0x55f23e5573dc in table_iter_next reftable/reader.c:339
>> > #7 0x55f23e551283 in merged_iter_advance_subiter reftable/merged.c:69
>> > #8 0x55f23e55169e in merged_iter_next_entry reftable/merged.c:123
>> > #9 0x55f23e55169e in merged_iter_next_void reftable/merged.c:172
>> > #10 0x55f23e537625 in reftable_iterator_next_ref reftable/generic.c:175
>> > #11 0x55f23e2cf9c6 in reftable_ref_iterator_advance refs/reftable-backend.c:464
>> > #12 0x55f23e2d996e in ref_iterator_advance refs/iterator.c:13
>> > #13 0x55f23e2d996e in do_for_each_ref_iterator refs/iterator.c:452
>> > #14 0x55f23dca6767 in get_ref_map builtin/fetch.c:623
>> > #15 0x55f23dca6767 in do_fetch builtin/fetch.c:1659
>> > #16 0x55f23dca6767 in fetch_one builtin/fetch.c:2133
>> > #17 0x55f23dca6767 in cmd_fetch builtin/fetch.c:2432
>> > #18 0x55f23dba7764 in run_builtin git.c:484
>> > #19 0x55f23dba7764 in handle_builtin git.c:741
>> > #20 0x55f23dbab61e in run_argv git.c:805
>> > #21 0x55f23dbab61e in cmd_main git.c:1000
>> > #22 0x55f23dba4781 in main common-main.c:64
>> > #23 0x7fa0f063fc89 in __libc_start_call_main ../sysdeps/nptl/libc_start_call_main.h:58
>> > #24 0x7fa0f063fd44 in __libc_start_main_impl ../csu/libc-start.c:360
>> > #25 0x55f23dba6ad0 in _start (git+0xadfad0) (BuildId: 803b2b7f59beb03d7849fb8294a8e2145dd4aa27)
>> >
>>
>> The stacktrace is for iterating over refs, what I don't understand is
>> where in this flow do we actually reload the stack.
>
> Basically, whenever you call into the reftable backend we check whether
> we need to reload the stack. So, when creating a reftable iterator,
> reading a single ref, writing refs and so on. So in the above code flow
> we had a ref iterator, but during iteration we ended up reading other
> refs, as well.
>
Yes, makes sense. Thanks for clarifying.
>> > While it is somewhat awkward that the maintenance processes survive
>> > tests in the first place, it is totally expected that reftables should
>> > work alright with concurrent writers. Seemingly they don't.
>> >
>> > The only underlying resource that we need to care about in this context
>> > is the reftable reader, which is responsible for reading a single table
>> > from disk. These readers get discarded immediately (unless reused) when
>> > calling `reftable_stack_reload()`, which is wrong. We can only close
>> > them once we know that there are no iterators using them anymore.
>> >
>> > Prepare for a fix by converting the reftable readers to be refcounted.
>> >
>>
>> Okay so my understanding is that `refcounted` refers to a reference
>> count which keeps tracks of the stacks which are referring to the
>> reader. The name is also used in `struct blame_origin` in blame.{c,h}.
>> Makes a lot more sense now :)
>
> Yup.
>
>> > diff --git a/reftable/reader.h b/reftable/reader.h
>> > index 88b4f3b421..3710ee09b4 100644
>> > --- a/reftable/reader.h
>> > +++ b/reftable/reader.h
>> > @@ -50,6 +50,8 @@ struct reftable_reader {
>> > struct reftable_reader_offsets ref_offsets;
>> > struct reftable_reader_offsets obj_offsets;
>> > struct reftable_reader_offsets log_offsets;
>> > +
>> > + uint64_t refcount;
>>
>> Wonder if there is a chance that we decrement refcount from 0 and hence
>> cause a wraparound.
>
> This should never happen in practice. And if it does, we would hit a
> BUG():
>
> void reftable_reader_decref(struct reftable_reader *r)
> {
> if (!r)
> return;
> if (!r->refcount)
> BUG("cannot decrement ref counter of dead reader");
> if (--r->refcount)
> return;
> block_source_close(&r->source);
> FREE_AND_NULL(r->name);
> reftable_free(r);
> }
>
> If the refcount is at zero already, we hit the bug.
>
> Patrick
Yup, this seems correct. Thanks.
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 690 bytes --]
^ permalink raw reply [flat|nested] 38+ messages in thread
* [PATCH 08/10] reftable/reader: keep readers alive during iteration
2024-08-19 15:39 [PATCH 00/10] reftable: fix reload with active iterators Patrick Steinhardt
` (6 preceding siblings ...)
2024-08-19 15:40 ` [PATCH 07/10] reftable/reader: introduce refcounting Patrick Steinhardt
@ 2024-08-19 15:40 ` Patrick Steinhardt
2024-08-23 10:21 ` karthik nayak
2024-08-19 15:40 ` [PATCH 09/10] reftable/stack: reorder swapping in the reloaded stack contents Patrick Steinhardt
` (4 subsequent siblings)
12 siblings, 1 reply; 38+ messages in thread
From: Patrick Steinhardt @ 2024-08-19 15:40 UTC (permalink / raw)
To: git; +Cc: Jeff King
The lifetime of a table iterator may surive the lifetime of a reader
when the stack gets reloaded. Keep the reader from being released by
increasing its refcount while the iterator is still being used.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
reftable/reader.c | 2 ++
reftable/stack_test.c | 49 +++++++++++++++++++++++++++++++++++++++++++
2 files changed, 51 insertions(+)
diff --git a/reftable/reader.c b/reftable/reader.c
index 64a0953e68..f877099087 100644
--- a/reftable/reader.c
+++ b/reftable/reader.c
@@ -175,6 +175,7 @@ static int table_iter_init(struct table_iter *ti, struct reftable_reader *r)
{
struct block_iter bi = BLOCK_ITER_INIT;
memset(ti, 0, sizeof(*ti));
+ reftable_reader_incref(r);
ti->r = r;
ti->bi = bi;
return 0;
@@ -262,6 +263,7 @@ static void table_iter_close(struct table_iter *ti)
{
table_iter_block_done(ti);
block_iter_close(&ti->bi);
+ reftable_reader_decref(ti->r);
}
static int table_iter_next_block(struct table_iter *ti)
diff --git a/reftable/stack_test.c b/reftable/stack_test.c
index bc3bf77274..91e716dc0a 100644
--- a/reftable/stack_test.c
+++ b/reftable/stack_test.c
@@ -1076,6 +1076,54 @@ static void test_reftable_stack_compaction_concurrent_clean(void)
clear_dir(dir);
}
+static void test_reftable_stack_read_across_reload(void)
+{
+ struct reftable_write_options opts = { 0 };
+ struct reftable_stack *st1 = NULL, *st2 = NULL;
+ struct reftable_ref_record rec = { 0 };
+ struct reftable_iterator it = { 0 };
+ char *dir = get_tmp_dir(__LINE__);
+ int err;
+
+ /* Create a first stack and set up an iterator for it. */
+ err = reftable_new_stack(&st1, dir, &opts);
+ EXPECT_ERR(err);
+ write_n_ref_tables(st1, 2);
+ EXPECT(st1->merged->readers_len == 2);
+ reftable_stack_init_ref_iterator(st1, &it);
+ err = reftable_iterator_seek_ref(&it, "");
+ EXPECT_ERR(err);
+
+ /* Set up a second stack for the same directory and compact it. */
+ err = reftable_new_stack(&st2, dir, &opts);
+ EXPECT_ERR(err);
+ EXPECT(st2->merged->readers_len == 2);
+ err = reftable_stack_compact_all(st2, NULL);
+ EXPECT_ERR(err);
+
+ /*
+ * Verify that we can continue to use the old iterator even after we
+ * have reloaded its stack.
+ */
+ err = reftable_stack_reload(st1);
+ EXPECT_ERR(err);
+ EXPECT(st2->merged->readers_len == 1);
+ err = reftable_iterator_next_ref(&it, &rec);
+ EXPECT_ERR(err);
+ EXPECT(!strcmp(rec.refname, "refs/heads/branch-0000"));
+ err = reftable_iterator_next_ref(&it, &rec);
+ EXPECT_ERR(err);
+ EXPECT(!strcmp(rec.refname, "refs/heads/branch-0001"));
+ err = reftable_iterator_next_ref(&it, &rec);
+ EXPECT(err > 0);
+
+ reftable_ref_record_release(&rec);
+ reftable_iterator_destroy(&it);
+ reftable_stack_destroy(st1);
+ reftable_stack_destroy(st2);
+ clear_dir(dir);
+}
+
int stack_test_main(int argc, const char *argv[])
{
RUN_TEST(test_empty_add);
@@ -1098,6 +1146,7 @@ int stack_test_main(int argc, const char *argv[])
RUN_TEST(test_reftable_stack_auto_compaction_fails_gracefully);
RUN_TEST(test_reftable_stack_update_index_check);
RUN_TEST(test_reftable_stack_uptodate);
+ RUN_TEST(test_reftable_stack_read_across_reload);
RUN_TEST(test_suggest_compaction_segment);
RUN_TEST(test_suggest_compaction_segment_nothing);
return 0;
--
2.46.0.164.g477ce5ccd6.dirty
^ permalink raw reply related [flat|nested] 38+ messages in thread
* Re: [PATCH 08/10] reftable/reader: keep readers alive during iteration
2024-08-19 15:40 ` [PATCH 08/10] reftable/reader: keep readers alive during iteration Patrick Steinhardt
@ 2024-08-23 10:21 ` karthik nayak
2024-08-23 13:42 ` Patrick Steinhardt
0 siblings, 1 reply; 38+ messages in thread
From: karthik nayak @ 2024-08-23 10:21 UTC (permalink / raw)
To: Patrick Steinhardt, git; +Cc: Jeff King
[-- Attachment #1: Type: text/plain, Size: 3789 bytes --]
Patrick Steinhardt <ps@pks.im> writes:
> The lifetime of a table iterator may surive the lifetime of a reader
s/surive/survive
> when the stack gets reloaded. Keep the reader from being released by
> increasing its refcount while the iterator is still being used.
>
> Signed-off-by: Patrick Steinhardt <ps@pks.im>
> ---
> reftable/reader.c | 2 ++
> reftable/stack_test.c | 49 +++++++++++++++++++++++++++++++++++++++++++
> 2 files changed, 51 insertions(+)
>
> diff --git a/reftable/reader.c b/reftable/reader.c
> index 64a0953e68..f877099087 100644
> --- a/reftable/reader.c
> +++ b/reftable/reader.c
> @@ -175,6 +175,7 @@ static int table_iter_init(struct table_iter *ti, struct reftable_reader *r)
> {
> struct block_iter bi = BLOCK_ITER_INIT;
> memset(ti, 0, sizeof(*ti));
> + reftable_reader_incref(r);
> ti->r = r;
> ti->bi = bi;
> return 0;
> @@ -262,6 +263,7 @@ static void table_iter_close(struct table_iter *ti)
> {
> table_iter_block_done(ti);
> block_iter_close(&ti->bi);
> + reftable_reader_decref(ti->r);
> }
>
> static int table_iter_next_block(struct table_iter *ti)
> diff --git a/reftable/stack_test.c b/reftable/stack_test.c
> index bc3bf77274..91e716dc0a 100644
> --- a/reftable/stack_test.c
> +++ b/reftable/stack_test.c
> @@ -1076,6 +1076,54 @@ static void test_reftable_stack_compaction_concurrent_clean(void)
> clear_dir(dir);
> }
>
> +static void test_reftable_stack_read_across_reload(void)
> +{
> + struct reftable_write_options opts = { 0 };
> + struct reftable_stack *st1 = NULL, *st2 = NULL;
> + struct reftable_ref_record rec = { 0 };
> + struct reftable_iterator it = { 0 };
> + char *dir = get_tmp_dir(__LINE__);
> + int err;
> +
> + /* Create a first stack and set up an iterator for it. */
> + err = reftable_new_stack(&st1, dir, &opts);
> + EXPECT_ERR(err);
> + write_n_ref_tables(st1, 2);
> + EXPECT(st1->merged->readers_len == 2);
> + reftable_stack_init_ref_iterator(st1, &it);
> + err = reftable_iterator_seek_ref(&it, "");
> + EXPECT_ERR(err);
> +
> + /* Set up a second stack for the same directory and compact it. */
> + err = reftable_new_stack(&st2, dir, &opts);
> + EXPECT_ERR(err);
> + EXPECT(st2->merged->readers_len == 2);
> + err = reftable_stack_compact_all(st2, NULL);
Shouldn't we verify that `EXPECT(st2->merged->readers_len == 1);` here?
> + EXPECT_ERR(err);
> +
> + /*
> + * Verify that we can continue to use the old iterator even after we
> + * have reloaded its stack.
> + */
> + err = reftable_stack_reload(st1);
> + EXPECT_ERR(err);
> + EXPECT(st2->merged->readers_len == 1);
Oh we do it here, I would've expected it above the `st1` reload. And
probably expected `EXPECT(st1->merged->readers_len == 2);` here to
confirm that we still have the readers.
> + err = reftable_iterator_next_ref(&it, &rec);
> + EXPECT_ERR(err);
> + EXPECT(!strcmp(rec.refname, "refs/heads/branch-0000"));
> + err = reftable_iterator_next_ref(&it, &rec);
> + EXPECT_ERR(err);
> + EXPECT(!strcmp(rec.refname, "refs/heads/branch-0001"));
> + err = reftable_iterator_next_ref(&it, &rec);
> + EXPECT(err > 0);
> +
> + reftable_ref_record_release(&rec);
> + reftable_iterator_destroy(&it);
> + reftable_stack_destroy(st1);
> + reftable_stack_destroy(st2);
> + clear_dir(dir);
> +}
> +
> int stack_test_main(int argc, const char *argv[])
> {
> RUN_TEST(test_empty_add);
> @@ -1098,6 +1146,7 @@ int stack_test_main(int argc, const char *argv[])
> RUN_TEST(test_reftable_stack_auto_compaction_fails_gracefully);
> RUN_TEST(test_reftable_stack_update_index_check);
> RUN_TEST(test_reftable_stack_uptodate);
> + RUN_TEST(test_reftable_stack_read_across_reload);
> RUN_TEST(test_suggest_compaction_segment);
> RUN_TEST(test_suggest_compaction_segment_nothing);
> return 0;
> --
> 2.46.0.164.g477ce5ccd6.dirty
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 690 bytes --]
^ permalink raw reply [flat|nested] 38+ messages in thread
* Re: [PATCH 08/10] reftable/reader: keep readers alive during iteration
2024-08-23 10:21 ` karthik nayak
@ 2024-08-23 13:42 ` Patrick Steinhardt
0 siblings, 0 replies; 38+ messages in thread
From: Patrick Steinhardt @ 2024-08-23 13:42 UTC (permalink / raw)
To: karthik nayak; +Cc: git, Jeff King
On Fri, Aug 23, 2024 at 03:21:27AM -0700, karthik nayak wrote:
> Patrick Steinhardt <ps@pks.im> writes:
> > diff --git a/reftable/stack_test.c b/reftable/stack_test.c
> > index bc3bf77274..91e716dc0a 100644
> > --- a/reftable/stack_test.c
> > +++ b/reftable/stack_test.c
> > @@ -1076,6 +1076,54 @@ static void test_reftable_stack_compaction_concurrent_clean(void)
> > clear_dir(dir);
> > }
> >
> > +static void test_reftable_stack_read_across_reload(void)
> > +{
> > + struct reftable_write_options opts = { 0 };
> > + struct reftable_stack *st1 = NULL, *st2 = NULL;
> > + struct reftable_ref_record rec = { 0 };
> > + struct reftable_iterator it = { 0 };
> > + char *dir = get_tmp_dir(__LINE__);
> > + int err;
> > +
> > + /* Create a first stack and set up an iterator for it. */
> > + err = reftable_new_stack(&st1, dir, &opts);
> > + EXPECT_ERR(err);
> > + write_n_ref_tables(st1, 2);
> > + EXPECT(st1->merged->readers_len == 2);
> > + reftable_stack_init_ref_iterator(st1, &it);
> > + err = reftable_iterator_seek_ref(&it, "");
> > + EXPECT_ERR(err);
> > +
> > + /* Set up a second stack for the same directory and compact it. */
> > + err = reftable_new_stack(&st2, dir, &opts);
> > + EXPECT_ERR(err);
> > + EXPECT(st2->merged->readers_len == 2);
> > + err = reftable_stack_compact_all(st2, NULL);
>
> Shouldn't we verify that `EXPECT(st2->merged->readers_len == 1);` here?
Yes, we should.
> > + EXPECT_ERR(err);
> > +
> > + /*
> > + * Verify that we can continue to use the old iterator even after we
> > + * have reloaded its stack.
> > + */
> > + err = reftable_stack_reload(st1);
> > + EXPECT_ERR(err);
> > + EXPECT(st2->merged->readers_len == 1);
>
> Oh we do it here, I would've expected it above the `st1` reload. And
> probably expected `EXPECT(st1->merged->readers_len == 2);` here to
> confirm that we still have the readers.
And yes, this was a typo, it should've been `st1` indeed. Good eyes,
thanks!
Patrick
^ permalink raw reply [flat|nested] 38+ messages in thread
* [PATCH 09/10] reftable/stack: reorder swapping in the reloaded stack contents
2024-08-19 15:39 [PATCH 00/10] reftable: fix reload with active iterators Patrick Steinhardt
` (7 preceding siblings ...)
2024-08-19 15:40 ` [PATCH 08/10] reftable/reader: keep readers alive during iteration Patrick Steinhardt
@ 2024-08-19 15:40 ` Patrick Steinhardt
2024-08-19 15:40 ` [PATCH 10/10] reftable/stack: fix segfault when reload with reused readers fails Patrick Steinhardt
` (3 subsequent siblings)
12 siblings, 0 replies; 38+ messages in thread
From: Patrick Steinhardt @ 2024-08-19 15:40 UTC (permalink / raw)
To: git; +Cc: Jeff King
The code flow of how we swap in the reloaded stack contents is somewhat
convoluted because we switch back and forth between swapping in
different parts of the stack.
Reorder the code to simplify it. We now first close and unlink the old
tables which do not get reused before we update the stack to point to
the new stack.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
reftable/stack.c | 34 +++++++++++++++++++---------------
1 file changed, 19 insertions(+), 15 deletions(-)
diff --git a/reftable/stack.c b/reftable/stack.c
index 8e85f8b4d9..0247222258 100644
--- a/reftable/stack.c
+++ b/reftable/stack.c
@@ -273,30 +273,34 @@ static int reftable_stack_reload_once(struct reftable_stack *st,
if (err < 0)
goto done;
- st->readers_len = new_readers_len;
- if (st->merged)
- reftable_merged_table_free(st->merged);
- if (st->readers) {
- reftable_free(st->readers);
- }
- st->readers = new_readers;
- new_readers = NULL;
- new_readers_len = 0;
-
- new_merged->suppress_deletions = 1;
- st->merged = new_merged;
+ /*
+ * Close the old, non-reused readers and proactively try to unlink
+ * them. This is done for systems like Windows, where the underlying
+ * file of such an open reader wouldn't have been possible to be
+ * unlinked by the compacting process.
+ */
for (i = 0; i < cur_len; i++) {
if (cur[i]) {
const char *name = reader_name(cur[i]);
stack_filename(&table_path, st, name);
-
reftable_reader_decref(cur[i]);
-
- /* On Windows, can only unlink after closing. */
unlink(table_path.buf);
}
}
+ /* Update the stack to point to the new tables. */
+ if (st->merged)
+ reftable_merged_table_free(st->merged);
+ new_merged->suppress_deletions = 1;
+ st->merged = new_merged;
+
+ if (st->readers)
+ reftable_free(st->readers);
+ st->readers = new_readers;
+ st->readers_len = new_readers_len;
+ new_readers = NULL;
+ new_readers_len = 0;
+
done:
for (i = 0; i < new_readers_len; i++)
reftable_reader_decref(new_readers[i]);
--
2.46.0.164.g477ce5ccd6.dirty
^ permalink raw reply related [flat|nested] 38+ messages in thread
* [PATCH 10/10] reftable/stack: fix segfault when reload with reused readers fails
2024-08-19 15:39 [PATCH 00/10] reftable: fix reload with active iterators Patrick Steinhardt
` (8 preceding siblings ...)
2024-08-19 15:40 ` [PATCH 09/10] reftable/stack: reorder swapping in the reloaded stack contents Patrick Steinhardt
@ 2024-08-19 15:40 ` Patrick Steinhardt
2024-08-22 8:13 ` [PATCH 00/10] reftable: fix reload with active iterators karthik nayak
` (2 subsequent siblings)
12 siblings, 0 replies; 38+ messages in thread
From: Patrick Steinhardt @ 2024-08-19 15:40 UTC (permalink / raw)
To: git; +Cc: Jeff King
It is expected that reloading the stack fails with concurrent writers,
e.g. because a table that we just wanted to read just got compacted.
In case we decided to reuse readers this will cause a segfault though
because we unconditionally release all new readers, including the reused
ones. As those are still referenced by the current stack, the result is
that we will eventually try to dereference those already-freed readers.
Fix this bug by incrementing the refcount of reused readers temporarily.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
reftable/stack.c | 23 +++++++++++++++++
reftable/stack_test.c | 59 +++++++++++++++++++++++++++++++++++++++++++
2 files changed, 82 insertions(+)
diff --git a/reftable/stack.c b/reftable/stack.c
index 0247222258..ce0a35216b 100644
--- a/reftable/stack.c
+++ b/reftable/stack.c
@@ -226,6 +226,8 @@ static int reftable_stack_reload_once(struct reftable_stack *st,
{
size_t cur_len = !st->merged ? 0 : st->merged->readers_len;
struct reftable_reader **cur = stack_copy_readers(st, cur_len);
+ struct reftable_reader **reused = NULL;
+ size_t reused_len = 0, reused_alloc = 0;
size_t names_len = names_length(names);
struct reftable_reader **new_readers =
reftable_calloc(names_len, sizeof(*new_readers));
@@ -245,6 +247,18 @@ static int reftable_stack_reload_once(struct reftable_stack *st,
if (cur[i] && 0 == strcmp(cur[i]->name, name)) {
rd = cur[i];
cur[i] = NULL;
+
+ /*
+ * When reloading the stack fails, we end up
+ * releasing all new readers. This also
+ * includes the reused readers, even though
+ * they are still in used by the old stack. We
+ * thus need to keep them alive here, which we
+ * do by bumping their refcount.
+ */
+ REFTABLE_ALLOC_GROW(reused, reused_len + 1, reused_alloc);
+ reused[reused_len++] = rd;
+ reftable_reader_incref(rd);
break;
}
}
@@ -301,10 +315,19 @@ static int reftable_stack_reload_once(struct reftable_stack *st,
new_readers = NULL;
new_readers_len = 0;
+ /*
+ * Decrement the refcount of reused readers again. This only needs to
+ * happen on the successful case, because on the unsuccessful one we
+ * decrement their refcount via `new_readers`.
+ */
+ for (i = 0; i < reused_len; i++)
+ reftable_reader_decref(reused[i]);
+
done:
for (i = 0; i < new_readers_len; i++)
reftable_reader_decref(new_readers[i]);
reftable_free(new_readers);
+ reftable_free(reused);
reftable_free(cur);
strbuf_release(&table_path);
return err;
diff --git a/reftable/stack_test.c b/reftable/stack_test.c
index 91e716dc0a..30cdb1739c 100644
--- a/reftable/stack_test.c
+++ b/reftable/stack_test.c
@@ -10,6 +10,7 @@ license that can be found in the LICENSE file or at
#include "system.h"
+#include "copy.h"
#include "reftable-reader.h"
#include "merged.h"
#include "basics.h"
@@ -1124,6 +1125,63 @@ static void test_reftable_stack_read_across_reload(void)
clear_dir(dir);
}
+static void test_reftable_stack_reload_with_missing_table(void)
+{
+ struct reftable_write_options opts = { 0 };
+ struct reftable_stack *st = NULL;
+ struct reftable_ref_record rec = { 0 };
+ struct reftable_iterator it = { 0 };
+ struct strbuf table_path = STRBUF_INIT, content = STRBUF_INIT;
+ char *dir = get_tmp_dir(__LINE__);
+ int err;
+
+ /* Create a first stack and set up an iterator for it. */
+ err = reftable_new_stack(&st, dir, &opts);
+ EXPECT_ERR(err);
+ write_n_ref_tables(st, 2);
+ EXPECT(st->merged->readers_len == 2);
+ reftable_stack_init_ref_iterator(st, &it);
+ err = reftable_iterator_seek_ref(&it, "");
+ EXPECT_ERR(err);
+
+ /*
+ * Update the tables.list file with some garbage data, while reusing
+ * our old readers. This should trigger a partial reload of the stack,
+ * where we try to reuse our old readers.
+ */
+ strbuf_addf(&content, "%s\n", st->readers[0]->name);
+ strbuf_addf(&content, "%s\n", st->readers[1]->name);
+ strbuf_addstr(&content, "garbage\n");
+ strbuf_addf(&table_path, "%s.lock", st->list_file);
+ write_file_buf(table_path.buf, content.buf, content.len);
+ err = rename(table_path.buf, st->list_file);
+ EXPECT_ERR(err);
+
+ err = reftable_stack_reload(st);
+ EXPECT(err == -4);
+ EXPECT(st->merged->readers_len == 2);
+
+ /*
+ * Even though the reload has failed, we should be able to continue
+ * using the iterator.
+ */
+ err = reftable_iterator_next_ref(&it, &rec);
+ EXPECT_ERR(err);
+ EXPECT(!strcmp(rec.refname, "refs/heads/branch-0000"));
+ err = reftable_iterator_next_ref(&it, &rec);
+ EXPECT_ERR(err);
+ EXPECT(!strcmp(rec.refname, "refs/heads/branch-0001"));
+ err = reftable_iterator_next_ref(&it, &rec);
+ EXPECT(err > 0);
+
+ reftable_ref_record_release(&rec);
+ reftable_iterator_destroy(&it);
+ reftable_stack_destroy(st);
+ strbuf_release(&table_path);
+ strbuf_release(&content);
+ clear_dir(dir);
+}
+
int stack_test_main(int argc, const char *argv[])
{
RUN_TEST(test_empty_add);
@@ -1147,6 +1205,7 @@ int stack_test_main(int argc, const char *argv[])
RUN_TEST(test_reftable_stack_update_index_check);
RUN_TEST(test_reftable_stack_uptodate);
RUN_TEST(test_reftable_stack_read_across_reload);
+ RUN_TEST(test_reftable_stack_reload_with_missing_table);
RUN_TEST(test_suggest_compaction_segment);
RUN_TEST(test_suggest_compaction_segment_nothing);
return 0;
--
2.46.0.164.g477ce5ccd6.dirty
^ permalink raw reply related [flat|nested] 38+ messages in thread
* Re: [PATCH 00/10] reftable: fix reload with active iterators
2024-08-19 15:39 [PATCH 00/10] reftable: fix reload with active iterators Patrick Steinhardt
` (9 preceding siblings ...)
2024-08-19 15:40 ` [PATCH 10/10] reftable/stack: fix segfault when reload with reused readers fails Patrick Steinhardt
@ 2024-08-22 8:13 ` karthik nayak
2024-08-22 12:41 ` Jeff King
2024-08-23 14:12 ` [PATCH v2 " Patrick Steinhardt
12 siblings, 0 replies; 38+ messages in thread
From: karthik nayak @ 2024-08-22 8:13 UTC (permalink / raw)
To: Patrick Steinhardt, git; +Cc: Jeff King
[-- Attachment #1: Type: text/plain, Size: 1535 bytes --]
Patrick Steinhardt <ps@pks.im> writes:
> Hi,
>
> as reported by Peff in [1], the reftable backend fails in t5616 in a
> racy manner. The cause of this is that git-maintenance(1) processes leak
> into subsequent tests and perform concurrent compaction of the stack.
> The reftable backend is expected to handle this gracefully, but doesn't.
>
I've not looked into the recent changes made to `git-maintenance(1)` but
my understanding is that, now, `git-maintenance(1)` can detach itself
without relying on `git-gc(1)`'s detach.
> The issue can surface whenever reloading the reftable stack while an
> iterator is active. In case some of the tables got compacted, we will
> end up closing them and thus the iterator now tries to use those closed
> tables.
>
Make sense till here.
> This patch series fixes that issue by starting to refcount the readers.
Not sure what you mean by 'refcount' here though.
> Each iterator will bump the refcount, thus avoiding the problem. While
> at it, I also found a second issue where we segfault when reloading a
> table fails while reusing one of the table readers. In this scenario, we
> would end up releasing the reader of the stack itself, even though it
> would still be used by it.
>
> This patch series addresses those issues by making the reftable reader
> refcounted. The first 6 patches do some simplifications of code which is
> close. The remaining 4 ones introduce refcounting and wire it up as
> required.
>
Perhaps it becomes more clearer as I read through the patches.
[snip]
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 690 bytes --]
^ permalink raw reply [flat|nested] 38+ messages in thread
* Re: [PATCH 00/10] reftable: fix reload with active iterators
2024-08-19 15:39 [PATCH 00/10] reftable: fix reload with active iterators Patrick Steinhardt
` (10 preceding siblings ...)
2024-08-22 8:13 ` [PATCH 00/10] reftable: fix reload with active iterators karthik nayak
@ 2024-08-22 12:41 ` Jeff King
2024-08-22 12:53 ` Patrick Steinhardt
2024-08-22 15:11 ` Junio C Hamano
2024-08-23 14:12 ` [PATCH v2 " Patrick Steinhardt
12 siblings, 2 replies; 38+ messages in thread
From: Jeff King @ 2024-08-22 12:41 UTC (permalink / raw)
To: Patrick Steinhardt; +Cc: git
On Mon, Aug 19, 2024 at 05:39:38PM +0200, Patrick Steinhardt wrote:
> This patch series fixes that issue by starting to refcount the readers.
> Each iterator will bump the refcount, thus avoiding the problem. While
> at it, I also found a second issue where we segfault when reloading a
> table fails while reusing one of the table readers. In this scenario, we
> would end up releasing the reader of the stack itself, even though it
> would still be used by it.
I gave a fairly cursory look over this, as I'm not all that familiar
with the reftable code. But everything looked pretty sensible to me.
I wondered how we might test this. It looks like you checked the
--stress output (which I confirmed seems fixed for me), along with a
synthetic test directly calling various reftable functions. Both seem
good to me.
I had hoped to be able to have a non-racy external test, where running
actual Git commands showed the segfault in a deterministic way. But I
couldn't come up with one. One trick we've used for "pausing" a reading
process is to run "cat-file --batch" from a fifo. You can ask it to do
some ref lookups, then while it waits for more input, update the
reftable, and then ask it to do more.
But I don't think that's sufficient here, as the race happens while we
are actually iterating. You'd really need some for_each_ref() callback
that blocks in some externally controllable way. Possibly you could do
something clever with partial-clone lazy fetches (where you stall the
fetch and then do ref updates in the middle), but that is getting pretty
convoluted.
So I think the tests you included seem like a good place to stop.
I did have a little trouble applying this for testing. I wanted to do it
on 'next', which has the maintenance changes to cause the race. But
merging it there ended up with a lot of conflicts with other reftable
topics (especially the tests). I was able to resolve them all, but you
might be able to make Junio's life easier by coordinating the topics a
bit.
-Peff
^ permalink raw reply [flat|nested] 38+ messages in thread
* Re: [PATCH 00/10] reftable: fix reload with active iterators
2024-08-22 12:41 ` Jeff King
@ 2024-08-22 12:53 ` Patrick Steinhardt
2024-08-22 15:15 ` Junio C Hamano
2024-08-22 15:11 ` Junio C Hamano
1 sibling, 1 reply; 38+ messages in thread
From: Patrick Steinhardt @ 2024-08-22 12:53 UTC (permalink / raw)
To: Jeff King; +Cc: git
On Thu, Aug 22, 2024 at 08:41:00AM -0400, Jeff King wrote:
> On Mon, Aug 19, 2024 at 05:39:38PM +0200, Patrick Steinhardt wrote:
>
> > This patch series fixes that issue by starting to refcount the readers.
> > Each iterator will bump the refcount, thus avoiding the problem. While
> > at it, I also found a second issue where we segfault when reloading a
> > table fails while reusing one of the table readers. In this scenario, we
> > would end up releasing the reader of the stack itself, even though it
> > would still be used by it.
>
> I gave a fairly cursory look over this, as I'm not all that familiar
> with the reftable code. But everything looked pretty sensible to me.
Thanks for your review, appreciated!
> I wondered how we might test this. It looks like you checked the
> --stress output (which I confirmed seems fixed for me), along with a
> synthetic test directly calling various reftable functions. Both seem
> good to me.
>
> I had hoped to be able to have a non-racy external test, where running
> actual Git commands showed the segfault in a deterministic way. But I
> couldn't come up with one. One trick we've used for "pausing" a reading
> process is to run "cat-file --batch" from a fifo. You can ask it to do
> some ref lookups, then while it waits for more input, update the
> reftable, and then ask it to do more.
>
> But I don't think that's sufficient here, as the race happens while we
> are actually iterating. You'd really need some for_each_ref() callback
> that blocks in some externally controllable way. Possibly you could do
> something clever with partial-clone lazy fetches (where you stall the
> fetch and then do ref updates in the middle), but that is getting pretty
> convoluted.
>
> So I think the tests you included seem like a good place to stop.
Yeah. I think demonstrating the issues on the API level is okayish. It
would of course have been nice to also do it via our shell scripts, but
as you say, it feels rather fragile.
> I did have a little trouble applying this for testing. I wanted to do it
> on 'next', which has the maintenance changes to cause the race. But
> merging it there ended up with a lot of conflicts with other reftable
> topics (especially the tests). I was able to resolve them all, but you
> might be able to make Junio's life easier by coordinating the topics a
> bit.
I can certainly do that. I know that there are conflicts both with the
patch series dropping the generic tables and with the patch series that
move the reftable unit tests into our own codebase. I did address the
former by basing it on top of that series, but didn't yet address the
latter.
I'm okay with waiting a bit until most of the conflicting topics land. I
guess most of them should be close to landing anyway. Alternatively, I
can also pull all of them in as dependencies.
Patrick
^ permalink raw reply [flat|nested] 38+ messages in thread
* Re: [PATCH 00/10] reftable: fix reload with active iterators
2024-08-22 12:53 ` Patrick Steinhardt
@ 2024-08-22 15:15 ` Junio C Hamano
2024-08-23 5:23 ` Patrick Steinhardt
0 siblings, 1 reply; 38+ messages in thread
From: Junio C Hamano @ 2024-08-22 15:15 UTC (permalink / raw)
To: Patrick Steinhardt; +Cc: Jeff King, git
Patrick Steinhardt <ps@pks.im> writes:
> I can certainly do that. I know that there are conflicts both with the
> patch series dropping the generic tables and with the patch series that
> move the reftable unit tests into our own codebase. I did address the
> former by basing it on top of that series, but didn't yet address the
> latter.
>
> I'm okay with waiting a bit until most of the conflicting topics land. I
> guess most of them should be close to landing anyway. Alternatively, I
> can also pull all of them in as dependencies.
As long as the resolution you see in 'seen' looks acceptable, I'd
rather prefer you do nothing, than a rebase that _reduces_ the
number of conflicts (but does not completely eliminate them), when I
already have a working resolution recorded in my rerere database.
Of course, updates to polish the substance of the topic are very
much appreciated, and I'll redo the resolution in such a case if
needed. That's one of the things the maintainers do.
Thanks.
^ permalink raw reply [flat|nested] 38+ messages in thread
* Re: [PATCH 00/10] reftable: fix reload with active iterators
2024-08-22 15:15 ` Junio C Hamano
@ 2024-08-23 5:23 ` Patrick Steinhardt
0 siblings, 0 replies; 38+ messages in thread
From: Patrick Steinhardt @ 2024-08-23 5:23 UTC (permalink / raw)
To: Junio C Hamano; +Cc: Jeff King, git
On Thu, Aug 22, 2024 at 08:15:55AM -0700, Junio C Hamano wrote:
> Patrick Steinhardt <ps@pks.im> writes:
>
> > I can certainly do that. I know that there are conflicts both with the
> > patch series dropping the generic tables and with the patch series that
> > move the reftable unit tests into our own codebase. I did address the
> > former by basing it on top of that series, but didn't yet address the
> > latter.
> >
> > I'm okay with waiting a bit until most of the conflicting topics land. I
> > guess most of them should be close to landing anyway. Alternatively, I
> > can also pull all of them in as dependencies.
>
> As long as the resolution you see in 'seen' looks acceptable, I'd
> rather prefer you do nothing, than a rebase that _reduces_ the
> number of conflicts (but does not completely eliminate them), when I
> already have a working resolution recorded in my rerere database.
>
> Of course, updates to polish the substance of the topic are very
> much appreciated, and I'll redo the resolution in such a case if
> needed. That's one of the things the maintainers do.
Yup, the merge in `seen` looks good to me. Thanks!
Patrick
^ permalink raw reply [flat|nested] 38+ messages in thread
* Re: [PATCH 00/10] reftable: fix reload with active iterators
2024-08-22 12:41 ` Jeff King
2024-08-22 12:53 ` Patrick Steinhardt
@ 2024-08-22 15:11 ` Junio C Hamano
1 sibling, 0 replies; 38+ messages in thread
From: Junio C Hamano @ 2024-08-22 15:11 UTC (permalink / raw)
To: Jeff King; +Cc: Patrick Steinhardt, git
Jeff King <peff@peff.net> writes:
> But I don't think that's sufficient here, as the race happens while we
> are actually iterating. You'd really need some for_each_ref() callback
> that blocks in some externally controllable way. Possibly you could do
> something clever with partial-clone lazy fetches (where you stall the
> fetch and then do ref updates in the middle), but that is getting pretty
> convoluted.
;-)
> So I think the tests you included seem like a good place to stop.
Yeah, I agree with this assessment.
> I did have a little trouble applying this for testing. I wanted to do it
> on 'next', which has the maintenance changes to cause the race. But
> merging it there ended up with a lot of conflicts with other reftable
> topics (especially the tests). I was able to resolve them all, but you
> might be able to make Junio's life easier by coordinating the topics a
> bit.
I haven't tried merging the topic to 'next', jumping over other
topics in flight in 'seen'. Thanks for a heads up, but I am hoping
that my rerere database is fairly complete for this topic by now.
Thanks.
^ permalink raw reply [flat|nested] 38+ messages in thread
* [PATCH v2 00/10] reftable: fix reload with active iterators
2024-08-19 15:39 [PATCH 00/10] reftable: fix reload with active iterators Patrick Steinhardt
` (11 preceding siblings ...)
2024-08-22 12:41 ` Jeff King
@ 2024-08-23 14:12 ` Patrick Steinhardt
2024-08-23 14:12 ` [PATCH v2 01/10] reftable/blocksource: drop malloc block source Patrick Steinhardt
` (10 more replies)
12 siblings, 11 replies; 38+ messages in thread
From: Patrick Steinhardt @ 2024-08-23 14:12 UTC (permalink / raw)
To: git; +Cc: Jeff King, karthik nayak, Junio C Hamano
Hi,
this is the second version of my patch series that fixes issues in the
reftable libary caused by having concurrent readers and writers.
Changes compared to v1:
- Remove a spurious change that renamed `init_reader()` to
`reader_init()`. That function went away in the subsequent commit
anyway.
- Fix a typo in a commit message.
- Fix some assertions in one of the added tests.
Thanks!
Patrick
Patrick Steinhardt (10):
reftable/blocksource: drop malloc block source
reftable/stack: inline `stack_compact_range_stats()`
reftable/reader: rename `reftable_new_reader()`
reftable/reader: inline `init_reader()`
reftable/reader: inline `reader_close()`
reftable/stack: fix broken refnames in `write_n_ref_tables()`
reftable/reader: introduce refcounting
reftable/reader: keep readers alive during iteration
reftable/stack: reorder swapping in the reloaded stack contents
reftable/stack: fix segfault when reload with reused readers fails
reftable/block_test.c | 3 +-
reftable/blocksource.c | 20 -----
reftable/blocksource.h | 2 -
reftable/reader.c | 149 ++++++++++++++++---------------
reftable/reader.h | 5 +-
reftable/readwrite_test.c | 85 +++++++++---------
reftable/reftable-reader.h | 19 ++--
reftable/stack.c | 90 +++++++++++--------
reftable/stack_test.c | 116 +++++++++++++++++++++++-
t/helper/test-reftable.c | 4 +-
t/unit-tests/t-reftable-merged.c | 10 +--
11 files changed, 312 insertions(+), 191 deletions(-)
Range-diff against v1:
1: fee3d3523eb = 1: fee3d3523eb reftable/blocksource: drop malloc block source
2: 3c0cf2bf46f = 2: 3c0cf2bf46f reftable/stack: inline `stack_compact_range_stats()`
3: e658b372f04 ! 3: b4cf97bf758 reftable/reader: rename `reftable_new_reader()`
@@ reftable/reader.c: int reftable_reader_print_blocks(const char *tablename)
goto done;
- ## reftable/reader.h ##
-@@ reftable/reader.h: struct reftable_reader {
- struct reftable_reader_offsets log_offsets;
- };
-
--int init_reader(struct reftable_reader *r, struct reftable_block_source *source,
-+int reader_init(struct reftable_reader *r, struct reftable_block_source *source,
- const char *name);
- void reader_close(struct reftable_reader *r);
- const char *reader_name(struct reftable_reader *r);
-
## reftable/readwrite_test.c ##
@@ reftable/readwrite_test.c: static void test_write_empty_table(void)
4: f628b7dafb9 ! 4: 3b667097501 reftable/reader: inline `init_reader()`
@@ reftable/reader.h: struct reftable_reader {
struct reftable_reader_offsets log_offsets;
};
--int reader_init(struct reftable_reader *r, struct reftable_block_source *source,
+-int init_reader(struct reftable_reader *r, struct reftable_block_source *source,
- const char *name);
void reader_close(struct reftable_reader *r);
const char *reader_name(struct reftable_reader *r);
5: 4a9fe150427 = 5: b129d8a8687 reftable/reader: inline `reader_close()`
6: 4965402e7bf = 6: e3b28709b5f reftable/stack: fix broken refnames in `write_n_ref_tables()`
7: fc0ed68d467 = 7: 6535d1ca9de reftable/reader: introduce refcounting
8: 02682056288 ! 8: 8d08c3bc515 reftable/reader: keep readers alive during iteration
@@ Metadata
## Commit message ##
reftable/reader: keep readers alive during iteration
- The lifetime of a table iterator may surive the lifetime of a reader
+ The lifetime of a table iterator may survive the lifetime of a reader
when the stack gets reloaded. Keep the reader from being released by
increasing its refcount while the iterator is still being used.
@@ reftable/stack_test.c: static void test_reftable_stack_compaction_concurrent_cle
+ EXPECT(st2->merged->readers_len == 2);
+ err = reftable_stack_compact_all(st2, NULL);
+ EXPECT_ERR(err);
++ EXPECT(st2->merged->readers_len == 1);
+
+ /*
+ * Verify that we can continue to use the old iterator even after we
@@ reftable/stack_test.c: static void test_reftable_stack_compaction_concurrent_cle
+ */
+ err = reftable_stack_reload(st1);
+ EXPECT_ERR(err);
-+ EXPECT(st2->merged->readers_len == 1);
++ EXPECT(st1->merged->readers_len == 1);
+ err = reftable_iterator_next_ref(&it, &rec);
+ EXPECT_ERR(err);
+ EXPECT(!strcmp(rec.refname, "refs/heads/branch-0000"));
9: d98316fbf4c = 9: 5aee91de25e reftable/stack: reorder swapping in the reloaded stack contents
10: b777818ea99 = 10: 4a8d45cc9b4 reftable/stack: fix segfault when reload with reused readers fails
--
2.46.0.164.g477ce5ccd6.dirty
^ permalink raw reply [flat|nested] 38+ messages in thread
* [PATCH v2 01/10] reftable/blocksource: drop malloc block source
2024-08-23 14:12 ` [PATCH v2 " Patrick Steinhardt
@ 2024-08-23 14:12 ` Patrick Steinhardt
2024-08-23 14:12 ` [PATCH v2 02/10] reftable/stack: inline `stack_compact_range_stats()` Patrick Steinhardt
` (9 subsequent siblings)
10 siblings, 0 replies; 38+ messages in thread
From: Patrick Steinhardt @ 2024-08-23 14:12 UTC (permalink / raw)
To: git; +Cc: Jeff King, karthik nayak, Junio C Hamano
The reftable blocksource provides a generic interface to read blocks via
different sources, e.g. from disk or from memory. One of the block
sources is the malloc block source, which can in theory read data from
memory. We nowadays also have a strbuf block source though, which
provides essentially the same functionality with better ergonomics.
Adapt the only remaining user of the malloc block source in our tests
to use the strbuf block source, instead, and remove the now-unused
malloc block source.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
reftable/block_test.c | 3 ++-
reftable/blocksource.c | 20 --------------------
reftable/blocksource.h | 2 --
3 files changed, 2 insertions(+), 23 deletions(-)
diff --git a/reftable/block_test.c b/reftable/block_test.c
index 90aecd5a7c6..de8f426a429 100644
--- a/reftable/block_test.c
+++ b/reftable/block_test.c
@@ -34,11 +34,12 @@ static void test_block_read_write(void)
struct block_reader br = { 0 };
struct block_iter it = BLOCK_ITER_INIT;
int j = 0;
+ struct strbuf block_data = STRBUF_INIT;
struct strbuf want = STRBUF_INIT;
REFTABLE_CALLOC_ARRAY(block.data, block_size);
block.len = block_size;
- block.source = malloc_block_source();
+ block_source_from_strbuf(&block.source, &block_data);
block_writer_init(&bw, BLOCK_TYPE_REF, block.data, block_size,
header_off, hash_size(GIT_SHA1_FORMAT_ID));
diff --git a/reftable/blocksource.c b/reftable/blocksource.c
index eeed254ba9c..1774853011d 100644
--- a/reftable/blocksource.c
+++ b/reftable/blocksource.c
@@ -55,26 +55,6 @@ void block_source_from_strbuf(struct reftable_block_source *bs,
bs->arg = buf;
}
-static void malloc_return_block(void *b, struct reftable_block *dest)
-{
- if (dest->len)
- memset(dest->data, 0xff, dest->len);
- reftable_free(dest->data);
-}
-
-static struct reftable_block_source_vtable malloc_vtable = {
- .return_block = &malloc_return_block,
-};
-
-static struct reftable_block_source malloc_block_source_instance = {
- .ops = &malloc_vtable,
-};
-
-struct reftable_block_source malloc_block_source(void)
-{
- return malloc_block_source_instance;
-}
-
struct file_block_source {
uint64_t size;
unsigned char *data;
diff --git a/reftable/blocksource.h b/reftable/blocksource.h
index 072e2727ad2..659a27b4063 100644
--- a/reftable/blocksource.h
+++ b/reftable/blocksource.h
@@ -17,6 +17,4 @@ struct reftable_block_source;
void block_source_from_strbuf(struct reftable_block_source *bs,
struct strbuf *buf);
-struct reftable_block_source malloc_block_source(void);
-
#endif
--
2.46.0.164.g477ce5ccd6.dirty
^ permalink raw reply related [flat|nested] 38+ messages in thread
* [PATCH v2 02/10] reftable/stack: inline `stack_compact_range_stats()`
2024-08-23 14:12 ` [PATCH v2 " Patrick Steinhardt
2024-08-23 14:12 ` [PATCH v2 01/10] reftable/blocksource: drop malloc block source Patrick Steinhardt
@ 2024-08-23 14:12 ` Patrick Steinhardt
2024-08-23 14:12 ` [PATCH v2 03/10] reftable/reader: rename `reftable_new_reader()` Patrick Steinhardt
` (8 subsequent siblings)
10 siblings, 0 replies; 38+ messages in thread
From: Patrick Steinhardt @ 2024-08-23 14:12 UTC (permalink / raw)
To: git; +Cc: Jeff King, karthik nayak, Junio C Hamano
The only difference between `stack_compact_range_stats()` and
`stack_compact_range()` is that the former updates stats on failure,
whereas the latter doesn't. There are no callers anymore that do not
want their stats updated though, making the indirection unnecessary.
Inline the stat updates into `stack_compact_range()`.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
reftable/stack.c | 16 ++++------------
1 file changed, 4 insertions(+), 12 deletions(-)
diff --git a/reftable/stack.c b/reftable/stack.c
index d3a95d2f1d7..891ea971b72 100644
--- a/reftable/stack.c
+++ b/reftable/stack.c
@@ -1328,17 +1328,9 @@ static int stack_compact_range(struct reftable_stack *st,
strbuf_release(&table_name);
free_names(names);
- return err;
-}
-
-static int stack_compact_range_stats(struct reftable_stack *st,
- size_t first, size_t last,
- struct reftable_log_expiry_config *config,
- unsigned int flags)
-{
- int err = stack_compact_range(st, first, last, config, flags);
if (err == REFTABLE_LOCK_ERROR)
st->stats.failures++;
+
return err;
}
@@ -1346,7 +1338,7 @@ int reftable_stack_compact_all(struct reftable_stack *st,
struct reftable_log_expiry_config *config)
{
size_t last = st->merged->readers_len ? st->merged->readers_len - 1 : 0;
- return stack_compact_range_stats(st, 0, last, config, 0);
+ return stack_compact_range(st, 0, last, config, 0);
}
static int segment_size(struct segment *s)
@@ -1452,8 +1444,8 @@ int reftable_stack_auto_compact(struct reftable_stack *st)
st->opts.auto_compaction_factor);
reftable_free(sizes);
if (segment_size(&seg) > 0)
- return stack_compact_range_stats(st, seg.start, seg.end - 1,
- NULL, STACK_COMPACT_RANGE_BEST_EFFORT);
+ return stack_compact_range(st, seg.start, seg.end - 1,
+ NULL, STACK_COMPACT_RANGE_BEST_EFFORT);
return 0;
}
--
2.46.0.164.g477ce5ccd6.dirty
^ permalink raw reply related [flat|nested] 38+ messages in thread
* [PATCH v2 03/10] reftable/reader: rename `reftable_new_reader()`
2024-08-23 14:12 ` [PATCH v2 " Patrick Steinhardt
2024-08-23 14:12 ` [PATCH v2 01/10] reftable/blocksource: drop malloc block source Patrick Steinhardt
2024-08-23 14:12 ` [PATCH v2 02/10] reftable/stack: inline `stack_compact_range_stats()` Patrick Steinhardt
@ 2024-08-23 14:12 ` Patrick Steinhardt
2024-08-23 14:12 ` [PATCH v2 04/10] reftable/reader: inline `init_reader()` Patrick Steinhardt
` (7 subsequent siblings)
10 siblings, 0 replies; 38+ messages in thread
From: Patrick Steinhardt @ 2024-08-23 14:12 UTC (permalink / raw)
To: git; +Cc: Jeff King, karthik nayak, Junio C Hamano
Rename the `reftable_new_reader()` function to `reftable_reader_new()`
to match our coding guidelines.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
reftable/reader.c | 4 ++--
reftable/readwrite_test.c | 6 +++---
reftable/reftable-reader.h | 4 ++--
reftable/stack.c | 4 ++--
t/helper/test-reftable.c | 2 +-
t/unit-tests/t-reftable-merged.c | 6 +++---
6 files changed, 13 insertions(+), 13 deletions(-)
diff --git a/reftable/reader.c b/reftable/reader.c
index 082cf00b606..ea4fdea90b6 100644
--- a/reftable/reader.c
+++ b/reftable/reader.c
@@ -637,7 +637,7 @@ void reader_close(struct reftable_reader *r)
FREE_AND_NULL(r->name);
}
-int reftable_new_reader(struct reftable_reader **p,
+int reftable_reader_new(struct reftable_reader **p,
struct reftable_block_source *src, char const *name)
{
struct reftable_reader *rd = reftable_calloc(1, sizeof(*rd));
@@ -786,7 +786,7 @@ int reftable_reader_print_blocks(const char *tablename)
if (err < 0)
goto done;
- err = reftable_new_reader(&r, &src, tablename);
+ err = reftable_reader_new(&r, &src, tablename);
if (err < 0)
goto done;
diff --git a/reftable/readwrite_test.c b/reftable/readwrite_test.c
index f411abfe9cb..101cdb5cd66 100644
--- a/reftable/readwrite_test.c
+++ b/reftable/readwrite_test.c
@@ -646,7 +646,7 @@ static void test_write_empty_table(void)
block_source_from_strbuf(&source, &buf);
- err = reftable_new_reader(&rd, &source, "filename");
+ err = reftable_reader_new(&rd, &source, "filename");
EXPECT_ERR(err);
reftable_reader_init_ref_iterator(rd, &it);
@@ -850,7 +850,7 @@ static void test_write_multiple_indices(void)
EXPECT(stats->log_stats.index_offset > 0);
block_source_from_strbuf(&source, &writer_buf);
- err = reftable_new_reader(&reader, &source, "filename");
+ err = reftable_reader_new(&reader, &source, "filename");
EXPECT_ERR(err);
/*
@@ -907,7 +907,7 @@ static void test_write_multi_level_index(void)
EXPECT(stats->ref_stats.max_index_level == 2);
block_source_from_strbuf(&source, &writer_buf);
- err = reftable_new_reader(&reader, &source, "filename");
+ err = reftable_reader_new(&reader, &source, "filename");
EXPECT_ERR(err);
/*
diff --git a/reftable/reftable-reader.h b/reftable/reftable-reader.h
index 69621c5b0fc..8a05c846858 100644
--- a/reftable/reftable-reader.h
+++ b/reftable/reftable-reader.h
@@ -23,14 +23,14 @@
/* The reader struct is a handle to an open reftable file. */
struct reftable_reader;
-/* reftable_new_reader opens a reftable for reading. If successful,
+/* reftable_reader_new opens a reftable for reading. If successful,
* returns 0 code and sets pp. The name is used for creating a
* stack. Typically, it is the basename of the file. The block source
* `src` is owned by the reader, and is closed on calling
* reftable_reader_destroy(). On error, the block source `src` is
* closed as well.
*/
-int reftable_new_reader(struct reftable_reader **pp,
+int reftable_reader_new(struct reftable_reader **pp,
struct reftable_block_source *src, const char *name);
/* Initialize a reftable iterator for reading refs. */
diff --git a/reftable/stack.c b/reftable/stack.c
index 891ea971b72..c72435b0596 100644
--- a/reftable/stack.c
+++ b/reftable/stack.c
@@ -258,7 +258,7 @@ static int reftable_stack_reload_once(struct reftable_stack *st,
if (err < 0)
goto done;
- err = reftable_new_reader(&rd, &src, name);
+ err = reftable_reader_new(&rd, &src, name);
if (err < 0)
goto done;
}
@@ -1532,7 +1532,7 @@ static void remove_maybe_stale_table(struct reftable_stack *st, uint64_t max,
if (err < 0)
goto done;
- err = reftable_new_reader(&rd, &src, name);
+ err = reftable_reader_new(&rd, &src, name);
if (err < 0)
goto done;
diff --git a/t/helper/test-reftable.c b/t/helper/test-reftable.c
index c1942156b50..87c2f42aaed 100644
--- a/t/helper/test-reftable.c
+++ b/t/helper/test-reftable.c
@@ -139,7 +139,7 @@ static int dump_reftable(const char *tablename)
if (err < 0)
goto done;
- err = reftable_new_reader(&r, &src, tablename);
+ err = reftable_reader_new(&r, &src, tablename);
if (err < 0)
goto done;
diff --git a/t/unit-tests/t-reftable-merged.c b/t/unit-tests/t-reftable-merged.c
index 93345c6c8be..8f51aca1b62 100644
--- a/t/unit-tests/t-reftable-merged.c
+++ b/t/unit-tests/t-reftable-merged.c
@@ -102,7 +102,7 @@ merged_table_from_records(struct reftable_ref_record **refs,
write_test_table(&buf[i], refs[i], sizes[i]);
block_source_from_strbuf(&(*source)[i], &buf[i]);
- err = reftable_new_reader(&(*readers)[i], &(*source)[i],
+ err = reftable_reader_new(&(*readers)[i], &(*source)[i],
"name");
check(!err);
}
@@ -277,7 +277,7 @@ merged_table_from_log_records(struct reftable_log_record **logs,
write_test_log_table(&buf[i], logs[i], sizes[i], i + 1);
block_source_from_strbuf(&(*source)[i], &buf[i]);
- err = reftable_new_reader(&(*readers)[i], &(*source)[i],
+ err = reftable_reader_new(&(*readers)[i], &(*source)[i],
"name");
check(!err);
}
@@ -426,7 +426,7 @@ static void t_default_write_opts(void)
block_source_from_strbuf(&source, &buf);
- err = reftable_new_reader(&rd, &source, "filename");
+ err = reftable_reader_new(&rd, &source, "filename");
check(!err);
hash_id = reftable_reader_hash_id(rd);
--
2.46.0.164.g477ce5ccd6.dirty
^ permalink raw reply related [flat|nested] 38+ messages in thread
* [PATCH v2 04/10] reftable/reader: inline `init_reader()`
2024-08-23 14:12 ` [PATCH v2 " Patrick Steinhardt
` (2 preceding siblings ...)
2024-08-23 14:12 ` [PATCH v2 03/10] reftable/reader: rename `reftable_new_reader()` Patrick Steinhardt
@ 2024-08-23 14:12 ` Patrick Steinhardt
2024-08-23 14:12 ` [PATCH v2 05/10] reftable/reader: inline `reader_close()` Patrick Steinhardt
` (6 subsequent siblings)
10 siblings, 0 replies; 38+ messages in thread
From: Patrick Steinhardt @ 2024-08-23 14:12 UTC (permalink / raw)
To: git; +Cc: Jeff King, karthik nayak, Junio C Hamano
Most users use an allocated version of the `reftable_reader`, except for
some tests. We are about to convert the reader to become refcounted
though, and providing the ability to keep a reader on the stack makes
this conversion harder than necessary.
Update the tests to use `reftable_reader_new()` instead to prepare for
this change.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
reftable/reader.c | 122 +++++++++++++++++++-------------------
reftable/reader.h | 2 -
reftable/readwrite_test.c | 73 ++++++++++++-----------
3 files changed, 98 insertions(+), 99 deletions(-)
diff --git a/reftable/reader.c b/reftable/reader.c
index ea4fdea90b6..9239679ad95 100644
--- a/reftable/reader.c
+++ b/reftable/reader.c
@@ -162,58 +162,6 @@ static int parse_footer(struct reftable_reader *r, uint8_t *footer,
return err;
}
-int init_reader(struct reftable_reader *r, struct reftable_block_source *source,
- const char *name)
-{
- struct reftable_block footer = { NULL };
- struct reftable_block header = { NULL };
- int err = 0;
- uint64_t file_size = block_source_size(source);
-
- /* Need +1 to read type of first block. */
- uint32_t read_size = header_size(2) + 1; /* read v2 because it's larger. */
- memset(r, 0, sizeof(struct reftable_reader));
-
- if (read_size > file_size) {
- err = REFTABLE_FORMAT_ERROR;
- goto done;
- }
-
- err = block_source_read_block(source, &header, 0, read_size);
- if (err != read_size) {
- err = REFTABLE_IO_ERROR;
- goto done;
- }
-
- if (memcmp(header.data, "REFT", 4)) {
- err = REFTABLE_FORMAT_ERROR;
- goto done;
- }
- r->version = header.data[4];
- if (r->version != 1 && r->version != 2) {
- err = REFTABLE_FORMAT_ERROR;
- goto done;
- }
-
- r->size = file_size - footer_size(r->version);
- r->source = *source;
- r->name = xstrdup(name);
- r->hash_id = 0;
-
- err = block_source_read_block(source, &footer, r->size,
- footer_size(r->version));
- if (err != footer_size(r->version)) {
- err = REFTABLE_IO_ERROR;
- goto done;
- }
-
- err = parse_footer(r, footer.data, header.data);
-done:
- reftable_block_done(&footer);
- reftable_block_done(&header);
- return err;
-}
-
struct table_iter {
struct reftable_reader *r;
uint8_t typ;
@@ -637,16 +585,68 @@ void reader_close(struct reftable_reader *r)
FREE_AND_NULL(r->name);
}
-int reftable_reader_new(struct reftable_reader **p,
- struct reftable_block_source *src, char const *name)
+int reftable_reader_new(struct reftable_reader **out,
+ struct reftable_block_source *source, char const *name)
{
- struct reftable_reader *rd = reftable_calloc(1, sizeof(*rd));
- int err = init_reader(rd, src, name);
- if (err == 0) {
- *p = rd;
- } else {
- block_source_close(src);
- reftable_free(rd);
+ struct reftable_block footer = { 0 };
+ struct reftable_block header = { 0 };
+ struct reftable_reader *r;
+ uint64_t file_size = block_source_size(source);
+ uint32_t read_size;
+ int err;
+
+ REFTABLE_CALLOC_ARRAY(r, 1);
+
+ /*
+ * We need one extra byte to read the type of first block. We also
+ * pretend to always be reading v2 of the format because it is larger.
+ */
+ read_size = header_size(2) + 1;
+ if (read_size > file_size) {
+ err = REFTABLE_FORMAT_ERROR;
+ goto done;
+ }
+
+ err = block_source_read_block(source, &header, 0, read_size);
+ if (err != read_size) {
+ err = REFTABLE_IO_ERROR;
+ goto done;
+ }
+
+ if (memcmp(header.data, "REFT", 4)) {
+ err = REFTABLE_FORMAT_ERROR;
+ goto done;
+ }
+ r->version = header.data[4];
+ if (r->version != 1 && r->version != 2) {
+ err = REFTABLE_FORMAT_ERROR;
+ goto done;
+ }
+
+ r->size = file_size - footer_size(r->version);
+ r->source = *source;
+ r->name = xstrdup(name);
+ r->hash_id = 0;
+
+ err = block_source_read_block(source, &footer, r->size,
+ footer_size(r->version));
+ if (err != footer_size(r->version)) {
+ err = REFTABLE_IO_ERROR;
+ goto done;
+ }
+
+ err = parse_footer(r, footer.data, header.data);
+ if (err)
+ goto done;
+
+ *out = r;
+
+done:
+ reftable_block_done(&footer);
+ reftable_block_done(&header);
+ if (err) {
+ reftable_free(r);
+ block_source_close(source);
}
return err;
}
diff --git a/reftable/reader.h b/reftable/reader.h
index a2c204d523c..762cd6de667 100644
--- a/reftable/reader.h
+++ b/reftable/reader.h
@@ -52,8 +52,6 @@ struct reftable_reader {
struct reftable_reader_offsets log_offsets;
};
-int init_reader(struct reftable_reader *r, struct reftable_block_source *source,
- const char *name);
void reader_close(struct reftable_reader *r);
const char *reader_name(struct reftable_reader *r);
diff --git a/reftable/readwrite_test.c b/reftable/readwrite_test.c
index 101cdb5cd66..2f2ff787b26 100644
--- a/reftable/readwrite_test.c
+++ b/reftable/readwrite_test.c
@@ -195,7 +195,7 @@ static void test_log_write_read(void)
struct reftable_log_record log = { NULL };
int n;
struct reftable_iterator it = { NULL };
- struct reftable_reader rd = { NULL };
+ struct reftable_reader *reader;
struct reftable_block_source source = { NULL };
struct strbuf buf = STRBUF_INIT;
struct reftable_writer *w =
@@ -236,10 +236,10 @@ static void test_log_write_read(void)
block_source_from_strbuf(&source, &buf);
- err = init_reader(&rd, &source, "file.log");
+ err = reftable_reader_new(&reader, &source, "file.log");
EXPECT_ERR(err);
- reftable_reader_init_ref_iterator(&rd, &it);
+ reftable_reader_init_ref_iterator(reader, &it);
err = reftable_iterator_seek_ref(&it, names[N - 1]);
EXPECT_ERR(err);
@@ -254,7 +254,7 @@ static void test_log_write_read(void)
reftable_iterator_destroy(&it);
reftable_ref_record_release(&ref);
- reftable_reader_init_log_iterator(&rd, &it);
+ reftable_reader_init_log_iterator(reader, &it);
err = reftable_iterator_seek_log(&it, "");
EXPECT_ERR(err);
@@ -279,7 +279,7 @@ static void test_log_write_read(void)
/* cleanup. */
strbuf_release(&buf);
free_names(names);
- reader_close(&rd);
+ reftable_reader_free(reader);
}
static void test_log_zlib_corruption(void)
@@ -288,7 +288,7 @@ static void test_log_zlib_corruption(void)
.block_size = 256,
};
struct reftable_iterator it = { 0 };
- struct reftable_reader rd = { 0 };
+ struct reftable_reader *reader;
struct reftable_block_source source = { 0 };
struct strbuf buf = STRBUF_INIT;
struct reftable_writer *w =
@@ -331,18 +331,18 @@ static void test_log_zlib_corruption(void)
block_source_from_strbuf(&source, &buf);
- err = init_reader(&rd, &source, "file.log");
+ err = reftable_reader_new(&reader, &source, "file.log");
EXPECT_ERR(err);
- reftable_reader_init_log_iterator(&rd, &it);
+ reftable_reader_init_log_iterator(reader, &it);
err = reftable_iterator_seek_log(&it, "refname");
EXPECT(err == REFTABLE_ZLIB_ERROR);
reftable_iterator_destroy(&it);
/* cleanup. */
+ reftable_reader_free(reader);
strbuf_release(&buf);
- reader_close(&rd);
}
static void test_table_read_write_sequential(void)
@@ -352,7 +352,7 @@ static void test_table_read_write_sequential(void)
int N = 50;
struct reftable_iterator it = { NULL };
struct reftable_block_source source = { NULL };
- struct reftable_reader rd = { NULL };
+ struct reftable_reader *reader;
int err = 0;
int j = 0;
@@ -360,10 +360,10 @@ static void test_table_read_write_sequential(void)
block_source_from_strbuf(&source, &buf);
- err = init_reader(&rd, &source, "file.ref");
+ err = reftable_reader_new(&reader, &source, "file.ref");
EXPECT_ERR(err);
- reftable_reader_init_ref_iterator(&rd, &it);
+ reftable_reader_init_ref_iterator(reader, &it);
err = reftable_iterator_seek_ref(&it, "");
EXPECT_ERR(err);
@@ -381,11 +381,11 @@ static void test_table_read_write_sequential(void)
reftable_ref_record_release(&ref);
}
EXPECT(j == N);
+
reftable_iterator_destroy(&it);
+ reftable_reader_free(reader);
strbuf_release(&buf);
free_names(names);
-
- reader_close(&rd);
}
static void test_table_write_small_table(void)
@@ -404,7 +404,7 @@ static void test_table_read_api(void)
char **names;
struct strbuf buf = STRBUF_INIT;
int N = 50;
- struct reftable_reader rd = { NULL };
+ struct reftable_reader *reader;
struct reftable_block_source source = { NULL };
int err;
int i;
@@ -415,10 +415,10 @@ static void test_table_read_api(void)
block_source_from_strbuf(&source, &buf);
- err = init_reader(&rd, &source, "file.ref");
+ err = reftable_reader_new(&reader, &source, "file.ref");
EXPECT_ERR(err);
- reftable_reader_init_ref_iterator(&rd, &it);
+ reftable_reader_init_ref_iterator(reader, &it);
err = reftable_iterator_seek_ref(&it, names[0]);
EXPECT_ERR(err);
@@ -431,7 +431,7 @@ static void test_table_read_api(void)
}
reftable_iterator_destroy(&it);
reftable_free(names);
- reader_close(&rd);
+ reftable_reader_free(reader);
strbuf_release(&buf);
}
@@ -440,7 +440,7 @@ static void test_table_read_write_seek(int index, int hash_id)
char **names;
struct strbuf buf = STRBUF_INIT;
int N = 50;
- struct reftable_reader rd = { NULL };
+ struct reftable_reader *reader;
struct reftable_block_source source = { NULL };
int err;
int i = 0;
@@ -453,18 +453,18 @@ static void test_table_read_write_seek(int index, int hash_id)
block_source_from_strbuf(&source, &buf);
- err = init_reader(&rd, &source, "file.ref");
+ err = reftable_reader_new(&reader, &source, "file.ref");
EXPECT_ERR(err);
- EXPECT(hash_id == reftable_reader_hash_id(&rd));
+ EXPECT(hash_id == reftable_reader_hash_id(reader));
if (!index) {
- rd.ref_offsets.index_offset = 0;
+ reader->ref_offsets.index_offset = 0;
} else {
- EXPECT(rd.ref_offsets.index_offset > 0);
+ EXPECT(reader->ref_offsets.index_offset > 0);
}
for (i = 1; i < N; i++) {
- reftable_reader_init_ref_iterator(&rd, &it);
+ reftable_reader_init_ref_iterator(reader, &it);
err = reftable_iterator_seek_ref(&it, names[i]);
EXPECT_ERR(err);
err = reftable_iterator_next_ref(&it, &ref);
@@ -480,7 +480,7 @@ static void test_table_read_write_seek(int index, int hash_id)
strbuf_addstr(&pastLast, names[N - 1]);
strbuf_addstr(&pastLast, "/");
- reftable_reader_init_ref_iterator(&rd, &it);
+ reftable_reader_init_ref_iterator(reader, &it);
err = reftable_iterator_seek_ref(&it, pastLast.buf);
if (err == 0) {
struct reftable_ref_record ref = { NULL };
@@ -498,7 +498,7 @@ static void test_table_read_write_seek(int index, int hash_id)
reftable_free(names[i]);
}
reftable_free(names);
- reader_close(&rd);
+ reftable_reader_free(reader);
}
static void test_table_read_write_seek_linear(void)
@@ -530,7 +530,7 @@ static void test_table_refs_for(int indexed)
int i = 0;
int n;
int err;
- struct reftable_reader rd;
+ struct reftable_reader *reader;
struct reftable_block_source source = { NULL };
struct strbuf buf = STRBUF_INIT;
@@ -579,18 +579,18 @@ static void test_table_refs_for(int indexed)
block_source_from_strbuf(&source, &buf);
- err = init_reader(&rd, &source, "file.ref");
+ err = reftable_reader_new(&reader, &source, "file.ref");
EXPECT_ERR(err);
if (!indexed) {
- rd.obj_offsets.is_present = 0;
+ reader->obj_offsets.is_present = 0;
}
- reftable_reader_init_ref_iterator(&rd, &it);
+ reftable_reader_init_ref_iterator(reader, &it);
err = reftable_iterator_seek_ref(&it, "");
EXPECT_ERR(err);
reftable_iterator_destroy(&it);
- err = reftable_reader_refs_for(&rd, &it, want_hash);
+ err = reftable_reader_refs_for(reader, &it, want_hash);
EXPECT_ERR(err);
j = 0;
@@ -611,7 +611,7 @@ static void test_table_refs_for(int indexed)
strbuf_release(&buf);
free_names(want_names);
reftable_iterator_destroy(&it);
- reader_close(&rd);
+ reftable_reader_free(reader);
}
static void test_table_refs_for_no_index(void)
@@ -928,11 +928,11 @@ static void test_corrupt_table_empty(void)
{
struct strbuf buf = STRBUF_INIT;
struct reftable_block_source source = { NULL };
- struct reftable_reader rd = { NULL };
+ struct reftable_reader *reader;
int err;
block_source_from_strbuf(&source, &buf);
- err = init_reader(&rd, &source, "file.log");
+ err = reftable_reader_new(&reader, &source, "file.log");
EXPECT(err == REFTABLE_FORMAT_ERROR);
}
@@ -941,13 +941,14 @@ static void test_corrupt_table(void)
uint8_t zeros[1024] = { 0 };
struct strbuf buf = STRBUF_INIT;
struct reftable_block_source source = { NULL };
- struct reftable_reader rd = { NULL };
+ struct reftable_reader *reader;
int err;
strbuf_add(&buf, zeros, sizeof(zeros));
block_source_from_strbuf(&source, &buf);
- err = init_reader(&rd, &source, "file.log");
+ err = reftable_reader_new(&reader, &source, "file.log");
EXPECT(err == REFTABLE_FORMAT_ERROR);
+
strbuf_release(&buf);
}
--
2.46.0.164.g477ce5ccd6.dirty
^ permalink raw reply related [flat|nested] 38+ messages in thread
* [PATCH v2 05/10] reftable/reader: inline `reader_close()`
2024-08-23 14:12 ` [PATCH v2 " Patrick Steinhardt
` (3 preceding siblings ...)
2024-08-23 14:12 ` [PATCH v2 04/10] reftable/reader: inline `init_reader()` Patrick Steinhardt
@ 2024-08-23 14:12 ` Patrick Steinhardt
2024-08-23 14:12 ` [PATCH v2 06/10] reftable/stack: fix broken refnames in `write_n_ref_tables()` Patrick Steinhardt
` (5 subsequent siblings)
10 siblings, 0 replies; 38+ messages in thread
From: Patrick Steinhardt @ 2024-08-23 14:12 UTC (permalink / raw)
To: git; +Cc: Jeff King, karthik nayak, Junio C Hamano
Same as with the preceding commit, we also provide a `reader_close()`
function that allows the caller to close a reader without freeing it.
This is unnecessary now that all users will have an allocated version of
the reader.
Inline it into `reftable_reader_free()`.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
reftable/reader.c | 9 ++-------
reftable/reader.h | 1 -
reftable/stack.c | 5 +----
3 files changed, 3 insertions(+), 12 deletions(-)
diff --git a/reftable/reader.c b/reftable/reader.c
index 9239679ad95..037417fcf63 100644
--- a/reftable/reader.c
+++ b/reftable/reader.c
@@ -579,12 +579,6 @@ void reftable_reader_init_log_iterator(struct reftable_reader *r,
reader_init_iter(r, it, BLOCK_TYPE_LOG);
}
-void reader_close(struct reftable_reader *r)
-{
- block_source_close(&r->source);
- FREE_AND_NULL(r->name);
-}
-
int reftable_reader_new(struct reftable_reader **out,
struct reftable_block_source *source, char const *name)
{
@@ -655,7 +649,8 @@ void reftable_reader_free(struct reftable_reader *r)
{
if (!r)
return;
- reader_close(r);
+ block_source_close(&r->source);
+ FREE_AND_NULL(r->name);
reftable_free(r);
}
diff --git a/reftable/reader.h b/reftable/reader.h
index 762cd6de667..88b4f3b4212 100644
--- a/reftable/reader.h
+++ b/reftable/reader.h
@@ -52,7 +52,6 @@ struct reftable_reader {
struct reftable_reader_offsets log_offsets;
};
-void reader_close(struct reftable_reader *r);
const char *reader_name(struct reftable_reader *r);
void reader_init_iter(struct reftable_reader *r,
diff --git a/reftable/stack.c b/reftable/stack.c
index c72435b0596..0ac9cdf8de1 100644
--- a/reftable/stack.c
+++ b/reftable/stack.c
@@ -290,7 +290,6 @@ static int reftable_stack_reload_once(struct reftable_stack *st,
const char *name = reader_name(cur[i]);
stack_filename(&table_path, st, name);
- reader_close(cur[i]);
reftable_reader_free(cur[i]);
/* On Windows, can only unlink after closing. */
@@ -299,10 +298,8 @@ static int reftable_stack_reload_once(struct reftable_stack *st,
}
done:
- for (i = 0; i < new_readers_len; i++) {
- reader_close(new_readers[i]);
+ for (i = 0; i < new_readers_len; i++)
reftable_reader_free(new_readers[i]);
- }
reftable_free(new_readers);
reftable_free(cur);
strbuf_release(&table_path);
--
2.46.0.164.g477ce5ccd6.dirty
^ permalink raw reply related [flat|nested] 38+ messages in thread
* [PATCH v2 06/10] reftable/stack: fix broken refnames in `write_n_ref_tables()`
2024-08-23 14:12 ` [PATCH v2 " Patrick Steinhardt
` (4 preceding siblings ...)
2024-08-23 14:12 ` [PATCH v2 05/10] reftable/reader: inline `reader_close()` Patrick Steinhardt
@ 2024-08-23 14:12 ` Patrick Steinhardt
2024-08-23 14:12 ` [PATCH v2 07/10] reftable/reader: introduce refcounting Patrick Steinhardt
` (4 subsequent siblings)
10 siblings, 0 replies; 38+ messages in thread
From: Patrick Steinhardt @ 2024-08-23 14:12 UTC (permalink / raw)
To: git; +Cc: Jeff King, karthik nayak, Junio C Hamano
The `write_n_ref_tables()` helper function writes N references in
separate tables. We never reset the computed name of those references
though, leading us to end up with unexpected names.
Fix this by resetting the buffer.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
reftable/stack_test.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/reftable/stack_test.c b/reftable/stack_test.c
index 42044ed8a3e..de0669b7b8a 100644
--- a/reftable/stack_test.c
+++ b/reftable/stack_test.c
@@ -125,6 +125,7 @@ static void write_n_ref_tables(struct reftable_stack *st,
.value_type = REFTABLE_REF_VAL1,
};
+ strbuf_reset(&buf);
strbuf_addf(&buf, "refs/heads/branch-%04u", (unsigned) i);
ref.refname = buf.buf;
set_test_hash(ref.value.val1, i);
--
2.46.0.164.g477ce5ccd6.dirty
^ permalink raw reply related [flat|nested] 38+ messages in thread
* [PATCH v2 07/10] reftable/reader: introduce refcounting
2024-08-23 14:12 ` [PATCH v2 " Patrick Steinhardt
` (5 preceding siblings ...)
2024-08-23 14:12 ` [PATCH v2 06/10] reftable/stack: fix broken refnames in `write_n_ref_tables()` Patrick Steinhardt
@ 2024-08-23 14:12 ` Patrick Steinhardt
2024-08-23 14:12 ` [PATCH v2 08/10] reftable/reader: keep readers alive during iteration Patrick Steinhardt
` (3 subsequent siblings)
10 siblings, 0 replies; 38+ messages in thread
From: Patrick Steinhardt @ 2024-08-23 14:12 UTC (permalink / raw)
To: git; +Cc: Jeff King, karthik nayak, Junio C Hamano
It was recently reported that concurrent reads and writes may cause the
reftable backend to segfault. The root cause of this is that we do not
properly keep track of reftable readers across reloads.
Suppose that you have a reftable iterator and then decide to reload the
stack while iterating through the iterator. When the stack has been
rewritten since we have created the iterator, then we would end up
discarding a subset of readers that may still be in use by the iterator.
The consequence is that we now try to reference deallocated memory,
which of course segfaults.
One way to trigger this is in t5616, where some background maintenance
jobs have been leaking from one test into another. This leads to stack
traces like the following one:
+ git -c protocol.version=0 -C pc1 fetch --filter=blob:limit=29999 --refetch origin
AddressSanitizer:DEADLYSIGNAL
=================================================================
==657994==ERROR: AddressSanitizer: SEGV on unknown address 0x7fa0f0ec6089 (pc 0x55f23e52ddf9 bp
0x7ffe7bfa1700 sp 0x7ffe7bfa1700 T0)
==657994==The signal is caused by a READ memory access.
#0 0x55f23e52ddf9 in get_var_int reftable/record.c:29
#1 0x55f23e53295e in reftable_decode_keylen reftable/record.c:170
#2 0x55f23e532cc0 in reftable_decode_key reftable/record.c:194
#3 0x55f23e54e72e in block_iter_next reftable/block.c:398
#4 0x55f23e5573dc in table_iter_next_in_block reftable/reader.c:240
#5 0x55f23e5573dc in table_iter_next reftable/reader.c:355
#6 0x55f23e5573dc in table_iter_next reftable/reader.c:339
#7 0x55f23e551283 in merged_iter_advance_subiter reftable/merged.c:69
#8 0x55f23e55169e in merged_iter_next_entry reftable/merged.c:123
#9 0x55f23e55169e in merged_iter_next_void reftable/merged.c:172
#10 0x55f23e537625 in reftable_iterator_next_ref reftable/generic.c:175
#11 0x55f23e2cf9c6 in reftable_ref_iterator_advance refs/reftable-backend.c:464
#12 0x55f23e2d996e in ref_iterator_advance refs/iterator.c:13
#13 0x55f23e2d996e in do_for_each_ref_iterator refs/iterator.c:452
#14 0x55f23dca6767 in get_ref_map builtin/fetch.c:623
#15 0x55f23dca6767 in do_fetch builtin/fetch.c:1659
#16 0x55f23dca6767 in fetch_one builtin/fetch.c:2133
#17 0x55f23dca6767 in cmd_fetch builtin/fetch.c:2432
#18 0x55f23dba7764 in run_builtin git.c:484
#19 0x55f23dba7764 in handle_builtin git.c:741
#20 0x55f23dbab61e in run_argv git.c:805
#21 0x55f23dbab61e in cmd_main git.c:1000
#22 0x55f23dba4781 in main common-main.c:64
#23 0x7fa0f063fc89 in __libc_start_call_main ../sysdeps/nptl/libc_start_call_main.h:58
#24 0x7fa0f063fd44 in __libc_start_main_impl ../csu/libc-start.c:360
#25 0x55f23dba6ad0 in _start (git+0xadfad0) (BuildId: 803b2b7f59beb03d7849fb8294a8e2145dd4aa27)
While it is somewhat awkward that the maintenance processes survive
tests in the first place, it is totally expected that reftables should
work alright with concurrent writers. Seemingly they don't.
The only underlying resource that we need to care about in this context
is the reftable reader, which is responsible for reading a single table
from disk. These readers get discarded immediately (unless reused) when
calling `reftable_stack_reload()`, which is wrong. We can only close
them once we know that there are no iterators using them anymore.
Prepare for a fix by converting the reftable readers to be refcounted.
Reported-by: Jeff King <peff@peff.net>
Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
reftable/reader.c | 16 ++++++++++++++--
reftable/reader.h | 2 ++
reftable/readwrite_test.c | 18 +++++++++---------
reftable/reftable-reader.h | 15 ++++++++++++---
reftable/stack.c | 8 ++++----
reftable/stack_test.c | 6 ++----
t/helper/test-reftable.c | 2 +-
t/unit-tests/t-reftable-merged.c | 4 ++--
8 files changed, 46 insertions(+), 25 deletions(-)
diff --git a/reftable/reader.c b/reftable/reader.c
index 037417fcf63..64a0953e687 100644
--- a/reftable/reader.c
+++ b/reftable/reader.c
@@ -621,6 +621,7 @@ int reftable_reader_new(struct reftable_reader **out,
r->source = *source;
r->name = xstrdup(name);
r->hash_id = 0;
+ r->refcount = 1;
err = block_source_read_block(source, &footer, r->size,
footer_size(r->version));
@@ -645,10 +646,21 @@ int reftable_reader_new(struct reftable_reader **out,
return err;
}
-void reftable_reader_free(struct reftable_reader *r)
+void reftable_reader_incref(struct reftable_reader *r)
+{
+ if (!r->refcount)
+ BUG("cannot increment ref counter of dead reader");
+ r->refcount++;
+}
+
+void reftable_reader_decref(struct reftable_reader *r)
{
if (!r)
return;
+ if (!r->refcount)
+ BUG("cannot decrement ref counter of dead reader");
+ if (--r->refcount)
+ return;
block_source_close(&r->source);
FREE_AND_NULL(r->name);
reftable_free(r);
@@ -812,7 +824,7 @@ int reftable_reader_print_blocks(const char *tablename)
}
done:
- reftable_reader_free(r);
+ reftable_reader_decref(r);
table_iter_close(&ti);
return err;
}
diff --git a/reftable/reader.h b/reftable/reader.h
index 88b4f3b4212..3710ee09b4c 100644
--- a/reftable/reader.h
+++ b/reftable/reader.h
@@ -50,6 +50,8 @@ struct reftable_reader {
struct reftable_reader_offsets ref_offsets;
struct reftable_reader_offsets obj_offsets;
struct reftable_reader_offsets log_offsets;
+
+ uint64_t refcount;
};
const char *reader_name(struct reftable_reader *r);
diff --git a/reftable/readwrite_test.c b/reftable/readwrite_test.c
index 2f2ff787b26..0494e7955a5 100644
--- a/reftable/readwrite_test.c
+++ b/reftable/readwrite_test.c
@@ -279,7 +279,7 @@ static void test_log_write_read(void)
/* cleanup. */
strbuf_release(&buf);
free_names(names);
- reftable_reader_free(reader);
+ reftable_reader_decref(reader);
}
static void test_log_zlib_corruption(void)
@@ -341,7 +341,7 @@ static void test_log_zlib_corruption(void)
reftable_iterator_destroy(&it);
/* cleanup. */
- reftable_reader_free(reader);
+ reftable_reader_decref(reader);
strbuf_release(&buf);
}
@@ -383,7 +383,7 @@ static void test_table_read_write_sequential(void)
EXPECT(j == N);
reftable_iterator_destroy(&it);
- reftable_reader_free(reader);
+ reftable_reader_decref(reader);
strbuf_release(&buf);
free_names(names);
}
@@ -431,7 +431,7 @@ static void test_table_read_api(void)
}
reftable_iterator_destroy(&it);
reftable_free(names);
- reftable_reader_free(reader);
+ reftable_reader_decref(reader);
strbuf_release(&buf);
}
@@ -498,7 +498,7 @@ static void test_table_read_write_seek(int index, int hash_id)
reftable_free(names[i]);
}
reftable_free(names);
- reftable_reader_free(reader);
+ reftable_reader_decref(reader);
}
static void test_table_read_write_seek_linear(void)
@@ -611,7 +611,7 @@ static void test_table_refs_for(int indexed)
strbuf_release(&buf);
free_names(want_names);
reftable_iterator_destroy(&it);
- reftable_reader_free(reader);
+ reftable_reader_decref(reader);
}
static void test_table_refs_for_no_index(void)
@@ -657,7 +657,7 @@ static void test_write_empty_table(void)
EXPECT(err > 0);
reftable_iterator_destroy(&it);
- reftable_reader_free(rd);
+ reftable_reader_decref(rd);
strbuf_release(&buf);
}
@@ -863,7 +863,7 @@ static void test_write_multiple_indices(void)
reftable_iterator_destroy(&it);
reftable_writer_free(writer);
- reftable_reader_free(reader);
+ reftable_reader_decref(reader);
strbuf_release(&writer_buf);
strbuf_release(&buf);
}
@@ -919,7 +919,7 @@ static void test_write_multi_level_index(void)
reftable_iterator_destroy(&it);
reftable_writer_free(writer);
- reftable_reader_free(reader);
+ reftable_reader_decref(reader);
strbuf_release(&writer_buf);
strbuf_release(&buf);
}
diff --git a/reftable/reftable-reader.h b/reftable/reftable-reader.h
index 8a05c846858..a600452b565 100644
--- a/reftable/reftable-reader.h
+++ b/reftable/reftable-reader.h
@@ -33,6 +33,18 @@ struct reftable_reader;
int reftable_reader_new(struct reftable_reader **pp,
struct reftable_block_source *src, const char *name);
+/*
+ * Manage the reference count of the reftable reader. A newly initialized
+ * reader starts with a refcount of 1 and will be deleted once the refcount has
+ * reached 0.
+ *
+ * This is required because readers may have longer lifetimes than the stack
+ * they belong to. The stack may for example be reloaded while the old tables
+ * are still being accessed by an iterator.
+ */
+void reftable_reader_incref(struct reftable_reader *reader);
+void reftable_reader_decref(struct reftable_reader *reader);
+
/* Initialize a reftable iterator for reading refs. */
void reftable_reader_init_ref_iterator(struct reftable_reader *r,
struct reftable_iterator *it);
@@ -44,9 +56,6 @@ void reftable_reader_init_log_iterator(struct reftable_reader *r,
/* returns the hash ID used in this table. */
uint32_t reftable_reader_hash_id(struct reftable_reader *r);
-/* closes and deallocates a reader. */
-void reftable_reader_free(struct reftable_reader *);
-
/* return an iterator for the refs pointing to `oid`. */
int reftable_reader_refs_for(struct reftable_reader *r,
struct reftable_iterator *it, uint8_t *oid);
diff --git a/reftable/stack.c b/reftable/stack.c
index 0ac9cdf8de1..8e85f8b4d99 100644
--- a/reftable/stack.c
+++ b/reftable/stack.c
@@ -186,7 +186,7 @@ void reftable_stack_destroy(struct reftable_stack *st)
if (names && !has_name(names, name)) {
stack_filename(&filename, st, name);
}
- reftable_reader_free(st->readers[i]);
+ reftable_reader_decref(st->readers[i]);
if (filename.len) {
/* On Windows, can only unlink after closing. */
@@ -290,7 +290,7 @@ static int reftable_stack_reload_once(struct reftable_stack *st,
const char *name = reader_name(cur[i]);
stack_filename(&table_path, st, name);
- reftable_reader_free(cur[i]);
+ reftable_reader_decref(cur[i]);
/* On Windows, can only unlink after closing. */
unlink(table_path.buf);
@@ -299,7 +299,7 @@ static int reftable_stack_reload_once(struct reftable_stack *st,
done:
for (i = 0; i < new_readers_len; i++)
- reftable_reader_free(new_readers[i]);
+ reftable_reader_decref(new_readers[i]);
reftable_free(new_readers);
reftable_free(cur);
strbuf_release(&table_path);
@@ -1534,7 +1534,7 @@ static void remove_maybe_stale_table(struct reftable_stack *st, uint64_t max,
goto done;
update_idx = reftable_reader_max_update_index(rd);
- reftable_reader_free(rd);
+ reftable_reader_decref(rd);
if (update_idx <= max) {
unlink(table_path.buf);
diff --git a/reftable/stack_test.c b/reftable/stack_test.c
index de0669b7b8a..bc3bf772749 100644
--- a/reftable/stack_test.c
+++ b/reftable/stack_test.c
@@ -1036,10 +1036,8 @@ static void test_reftable_stack_compaction_concurrent(void)
static void unclean_stack_close(struct reftable_stack *st)
{
/* break abstraction boundary to simulate unclean shutdown. */
- int i = 0;
- for (; i < st->readers_len; i++) {
- reftable_reader_free(st->readers[i]);
- }
+ for (size_t i = 0; i < st->readers_len; i++)
+ reftable_reader_decref(st->readers[i]);
st->readers_len = 0;
FREE_AND_NULL(st->readers);
}
diff --git a/t/helper/test-reftable.c b/t/helper/test-reftable.c
index 87c2f42aaed..f6d855a9d7f 100644
--- a/t/helper/test-reftable.c
+++ b/t/helper/test-reftable.c
@@ -152,7 +152,7 @@ static int dump_reftable(const char *tablename)
done:
reftable_merged_table_free(mt);
- reftable_reader_free(r);
+ reftable_reader_decref(r);
return err;
}
diff --git a/t/unit-tests/t-reftable-merged.c b/t/unit-tests/t-reftable-merged.c
index 8f51aca1b62..081d3c8b697 100644
--- a/t/unit-tests/t-reftable-merged.c
+++ b/t/unit-tests/t-reftable-merged.c
@@ -115,7 +115,7 @@ merged_table_from_records(struct reftable_ref_record **refs,
static void readers_destroy(struct reftable_reader **readers, const size_t n)
{
for (size_t i = 0; i < n; i++)
- reftable_reader_free(readers[i]);
+ reftable_reader_decref(readers[i]);
reftable_free(readers);
}
@@ -437,7 +437,7 @@ static void t_default_write_opts(void)
err = reftable_merged_table_new(&merged, &rd, 1, GIT_SHA1_FORMAT_ID);
check(!err);
- reftable_reader_free(rd);
+ reftable_reader_decref(rd);
reftable_merged_table_free(merged);
strbuf_release(&buf);
}
--
2.46.0.164.g477ce5ccd6.dirty
^ permalink raw reply related [flat|nested] 38+ messages in thread
* [PATCH v2 08/10] reftable/reader: keep readers alive during iteration
2024-08-23 14:12 ` [PATCH v2 " Patrick Steinhardt
` (6 preceding siblings ...)
2024-08-23 14:12 ` [PATCH v2 07/10] reftable/reader: introduce refcounting Patrick Steinhardt
@ 2024-08-23 14:12 ` Patrick Steinhardt
2024-08-23 14:12 ` [PATCH v2 09/10] reftable/stack: reorder swapping in the reloaded stack contents Patrick Steinhardt
` (2 subsequent siblings)
10 siblings, 0 replies; 38+ messages in thread
From: Patrick Steinhardt @ 2024-08-23 14:12 UTC (permalink / raw)
To: git; +Cc: Jeff King, karthik nayak, Junio C Hamano
The lifetime of a table iterator may survive the lifetime of a reader
when the stack gets reloaded. Keep the reader from being released by
increasing its refcount while the iterator is still being used.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
reftable/reader.c | 2 ++
reftable/stack_test.c | 50 +++++++++++++++++++++++++++++++++++++++++++
2 files changed, 52 insertions(+)
diff --git a/reftable/reader.c b/reftable/reader.c
index 64a0953e687..f8770990876 100644
--- a/reftable/reader.c
+++ b/reftable/reader.c
@@ -175,6 +175,7 @@ static int table_iter_init(struct table_iter *ti, struct reftable_reader *r)
{
struct block_iter bi = BLOCK_ITER_INIT;
memset(ti, 0, sizeof(*ti));
+ reftable_reader_incref(r);
ti->r = r;
ti->bi = bi;
return 0;
@@ -262,6 +263,7 @@ static void table_iter_close(struct table_iter *ti)
{
table_iter_block_done(ti);
block_iter_close(&ti->bi);
+ reftable_reader_decref(ti->r);
}
static int table_iter_next_block(struct table_iter *ti)
diff --git a/reftable/stack_test.c b/reftable/stack_test.c
index bc3bf772749..7fb5beb7c94 100644
--- a/reftable/stack_test.c
+++ b/reftable/stack_test.c
@@ -1076,6 +1076,55 @@ static void test_reftable_stack_compaction_concurrent_clean(void)
clear_dir(dir);
}
+static void test_reftable_stack_read_across_reload(void)
+{
+ struct reftable_write_options opts = { 0 };
+ struct reftable_stack *st1 = NULL, *st2 = NULL;
+ struct reftable_ref_record rec = { 0 };
+ struct reftable_iterator it = { 0 };
+ char *dir = get_tmp_dir(__LINE__);
+ int err;
+
+ /* Create a first stack and set up an iterator for it. */
+ err = reftable_new_stack(&st1, dir, &opts);
+ EXPECT_ERR(err);
+ write_n_ref_tables(st1, 2);
+ EXPECT(st1->merged->readers_len == 2);
+ reftable_stack_init_ref_iterator(st1, &it);
+ err = reftable_iterator_seek_ref(&it, "");
+ EXPECT_ERR(err);
+
+ /* Set up a second stack for the same directory and compact it. */
+ err = reftable_new_stack(&st2, dir, &opts);
+ EXPECT_ERR(err);
+ EXPECT(st2->merged->readers_len == 2);
+ err = reftable_stack_compact_all(st2, NULL);
+ EXPECT_ERR(err);
+ EXPECT(st2->merged->readers_len == 1);
+
+ /*
+ * Verify that we can continue to use the old iterator even after we
+ * have reloaded its stack.
+ */
+ err = reftable_stack_reload(st1);
+ EXPECT_ERR(err);
+ EXPECT(st1->merged->readers_len == 1);
+ err = reftable_iterator_next_ref(&it, &rec);
+ EXPECT_ERR(err);
+ EXPECT(!strcmp(rec.refname, "refs/heads/branch-0000"));
+ err = reftable_iterator_next_ref(&it, &rec);
+ EXPECT_ERR(err);
+ EXPECT(!strcmp(rec.refname, "refs/heads/branch-0001"));
+ err = reftable_iterator_next_ref(&it, &rec);
+ EXPECT(err > 0);
+
+ reftable_ref_record_release(&rec);
+ reftable_iterator_destroy(&it);
+ reftable_stack_destroy(st1);
+ reftable_stack_destroy(st2);
+ clear_dir(dir);
+}
+
int stack_test_main(int argc, const char *argv[])
{
RUN_TEST(test_empty_add);
@@ -1098,6 +1147,7 @@ int stack_test_main(int argc, const char *argv[])
RUN_TEST(test_reftable_stack_auto_compaction_fails_gracefully);
RUN_TEST(test_reftable_stack_update_index_check);
RUN_TEST(test_reftable_stack_uptodate);
+ RUN_TEST(test_reftable_stack_read_across_reload);
RUN_TEST(test_suggest_compaction_segment);
RUN_TEST(test_suggest_compaction_segment_nothing);
return 0;
--
2.46.0.164.g477ce5ccd6.dirty
^ permalink raw reply related [flat|nested] 38+ messages in thread
* [PATCH v2 09/10] reftable/stack: reorder swapping in the reloaded stack contents
2024-08-23 14:12 ` [PATCH v2 " Patrick Steinhardt
` (7 preceding siblings ...)
2024-08-23 14:12 ` [PATCH v2 08/10] reftable/reader: keep readers alive during iteration Patrick Steinhardt
@ 2024-08-23 14:12 ` Patrick Steinhardt
2024-08-23 14:12 ` [PATCH v2 10/10] reftable/stack: fix segfault when reload with reused readers fails Patrick Steinhardt
2024-08-23 16:49 ` [PATCH v2 00/10] reftable: fix reload with active iterators Junio C Hamano
10 siblings, 0 replies; 38+ messages in thread
From: Patrick Steinhardt @ 2024-08-23 14:12 UTC (permalink / raw)
To: git; +Cc: Jeff King, karthik nayak, Junio C Hamano
The code flow of how we swap in the reloaded stack contents is somewhat
convoluted because we switch back and forth between swapping in
different parts of the stack.
Reorder the code to simplify it. We now first close and unlink the old
tables which do not get reused before we update the stack to point to
the new stack.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
reftable/stack.c | 34 +++++++++++++++++++---------------
1 file changed, 19 insertions(+), 15 deletions(-)
diff --git a/reftable/stack.c b/reftable/stack.c
index 8e85f8b4d99..02472222589 100644
--- a/reftable/stack.c
+++ b/reftable/stack.c
@@ -273,30 +273,34 @@ static int reftable_stack_reload_once(struct reftable_stack *st,
if (err < 0)
goto done;
- st->readers_len = new_readers_len;
- if (st->merged)
- reftable_merged_table_free(st->merged);
- if (st->readers) {
- reftable_free(st->readers);
- }
- st->readers = new_readers;
- new_readers = NULL;
- new_readers_len = 0;
-
- new_merged->suppress_deletions = 1;
- st->merged = new_merged;
+ /*
+ * Close the old, non-reused readers and proactively try to unlink
+ * them. This is done for systems like Windows, where the underlying
+ * file of such an open reader wouldn't have been possible to be
+ * unlinked by the compacting process.
+ */
for (i = 0; i < cur_len; i++) {
if (cur[i]) {
const char *name = reader_name(cur[i]);
stack_filename(&table_path, st, name);
-
reftable_reader_decref(cur[i]);
-
- /* On Windows, can only unlink after closing. */
unlink(table_path.buf);
}
}
+ /* Update the stack to point to the new tables. */
+ if (st->merged)
+ reftable_merged_table_free(st->merged);
+ new_merged->suppress_deletions = 1;
+ st->merged = new_merged;
+
+ if (st->readers)
+ reftable_free(st->readers);
+ st->readers = new_readers;
+ st->readers_len = new_readers_len;
+ new_readers = NULL;
+ new_readers_len = 0;
+
done:
for (i = 0; i < new_readers_len; i++)
reftable_reader_decref(new_readers[i]);
--
2.46.0.164.g477ce5ccd6.dirty
^ permalink raw reply related [flat|nested] 38+ messages in thread
* [PATCH v2 10/10] reftable/stack: fix segfault when reload with reused readers fails
2024-08-23 14:12 ` [PATCH v2 " Patrick Steinhardt
` (8 preceding siblings ...)
2024-08-23 14:12 ` [PATCH v2 09/10] reftable/stack: reorder swapping in the reloaded stack contents Patrick Steinhardt
@ 2024-08-23 14:12 ` Patrick Steinhardt
2024-08-23 16:49 ` [PATCH v2 00/10] reftable: fix reload with active iterators Junio C Hamano
10 siblings, 0 replies; 38+ messages in thread
From: Patrick Steinhardt @ 2024-08-23 14:12 UTC (permalink / raw)
To: git; +Cc: Jeff King, karthik nayak, Junio C Hamano
It is expected that reloading the stack fails with concurrent writers,
e.g. because a table that we just wanted to read just got compacted.
In case we decided to reuse readers this will cause a segfault though
because we unconditionally release all new readers, including the reused
ones. As those are still referenced by the current stack, the result is
that we will eventually try to dereference those already-freed readers.
Fix this bug by incrementing the refcount of reused readers temporarily.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
reftable/stack.c | 23 +++++++++++++++++
reftable/stack_test.c | 59 +++++++++++++++++++++++++++++++++++++++++++
2 files changed, 82 insertions(+)
diff --git a/reftable/stack.c b/reftable/stack.c
index 02472222589..ce0a35216ba 100644
--- a/reftable/stack.c
+++ b/reftable/stack.c
@@ -226,6 +226,8 @@ static int reftable_stack_reload_once(struct reftable_stack *st,
{
size_t cur_len = !st->merged ? 0 : st->merged->readers_len;
struct reftable_reader **cur = stack_copy_readers(st, cur_len);
+ struct reftable_reader **reused = NULL;
+ size_t reused_len = 0, reused_alloc = 0;
size_t names_len = names_length(names);
struct reftable_reader **new_readers =
reftable_calloc(names_len, sizeof(*new_readers));
@@ -245,6 +247,18 @@ static int reftable_stack_reload_once(struct reftable_stack *st,
if (cur[i] && 0 == strcmp(cur[i]->name, name)) {
rd = cur[i];
cur[i] = NULL;
+
+ /*
+ * When reloading the stack fails, we end up
+ * releasing all new readers. This also
+ * includes the reused readers, even though
+ * they are still in used by the old stack. We
+ * thus need to keep them alive here, which we
+ * do by bumping their refcount.
+ */
+ REFTABLE_ALLOC_GROW(reused, reused_len + 1, reused_alloc);
+ reused[reused_len++] = rd;
+ reftable_reader_incref(rd);
break;
}
}
@@ -301,10 +315,19 @@ static int reftable_stack_reload_once(struct reftable_stack *st,
new_readers = NULL;
new_readers_len = 0;
+ /*
+ * Decrement the refcount of reused readers again. This only needs to
+ * happen on the successful case, because on the unsuccessful one we
+ * decrement their refcount via `new_readers`.
+ */
+ for (i = 0; i < reused_len; i++)
+ reftable_reader_decref(reused[i]);
+
done:
for (i = 0; i < new_readers_len; i++)
reftable_reader_decref(new_readers[i]);
reftable_free(new_readers);
+ reftable_free(reused);
reftable_free(cur);
strbuf_release(&table_path);
return err;
diff --git a/reftable/stack_test.c b/reftable/stack_test.c
index 7fb5beb7c94..6809bf9d300 100644
--- a/reftable/stack_test.c
+++ b/reftable/stack_test.c
@@ -10,6 +10,7 @@ license that can be found in the LICENSE file or at
#include "system.h"
+#include "copy.h"
#include "reftable-reader.h"
#include "merged.h"
#include "basics.h"
@@ -1125,6 +1126,63 @@ static void test_reftable_stack_read_across_reload(void)
clear_dir(dir);
}
+static void test_reftable_stack_reload_with_missing_table(void)
+{
+ struct reftable_write_options opts = { 0 };
+ struct reftable_stack *st = NULL;
+ struct reftable_ref_record rec = { 0 };
+ struct reftable_iterator it = { 0 };
+ struct strbuf table_path = STRBUF_INIT, content = STRBUF_INIT;
+ char *dir = get_tmp_dir(__LINE__);
+ int err;
+
+ /* Create a first stack and set up an iterator for it. */
+ err = reftable_new_stack(&st, dir, &opts);
+ EXPECT_ERR(err);
+ write_n_ref_tables(st, 2);
+ EXPECT(st->merged->readers_len == 2);
+ reftable_stack_init_ref_iterator(st, &it);
+ err = reftable_iterator_seek_ref(&it, "");
+ EXPECT_ERR(err);
+
+ /*
+ * Update the tables.list file with some garbage data, while reusing
+ * our old readers. This should trigger a partial reload of the stack,
+ * where we try to reuse our old readers.
+ */
+ strbuf_addf(&content, "%s\n", st->readers[0]->name);
+ strbuf_addf(&content, "%s\n", st->readers[1]->name);
+ strbuf_addstr(&content, "garbage\n");
+ strbuf_addf(&table_path, "%s.lock", st->list_file);
+ write_file_buf(table_path.buf, content.buf, content.len);
+ err = rename(table_path.buf, st->list_file);
+ EXPECT_ERR(err);
+
+ err = reftable_stack_reload(st);
+ EXPECT(err == -4);
+ EXPECT(st->merged->readers_len == 2);
+
+ /*
+ * Even though the reload has failed, we should be able to continue
+ * using the iterator.
+ */
+ err = reftable_iterator_next_ref(&it, &rec);
+ EXPECT_ERR(err);
+ EXPECT(!strcmp(rec.refname, "refs/heads/branch-0000"));
+ err = reftable_iterator_next_ref(&it, &rec);
+ EXPECT_ERR(err);
+ EXPECT(!strcmp(rec.refname, "refs/heads/branch-0001"));
+ err = reftable_iterator_next_ref(&it, &rec);
+ EXPECT(err > 0);
+
+ reftable_ref_record_release(&rec);
+ reftable_iterator_destroy(&it);
+ reftable_stack_destroy(st);
+ strbuf_release(&table_path);
+ strbuf_release(&content);
+ clear_dir(dir);
+}
+
int stack_test_main(int argc, const char *argv[])
{
RUN_TEST(test_empty_add);
@@ -1148,6 +1206,7 @@ int stack_test_main(int argc, const char *argv[])
RUN_TEST(test_reftable_stack_update_index_check);
RUN_TEST(test_reftable_stack_uptodate);
RUN_TEST(test_reftable_stack_read_across_reload);
+ RUN_TEST(test_reftable_stack_reload_with_missing_table);
RUN_TEST(test_suggest_compaction_segment);
RUN_TEST(test_suggest_compaction_segment_nothing);
return 0;
--
2.46.0.164.g477ce5ccd6.dirty
^ permalink raw reply related [flat|nested] 38+ messages in thread
* Re: [PATCH v2 00/10] reftable: fix reload with active iterators
2024-08-23 14:12 ` [PATCH v2 " Patrick Steinhardt
` (9 preceding siblings ...)
2024-08-23 14:12 ` [PATCH v2 10/10] reftable/stack: fix segfault when reload with reused readers fails Patrick Steinhardt
@ 2024-08-23 16:49 ` Junio C Hamano
10 siblings, 0 replies; 38+ messages in thread
From: Junio C Hamano @ 2024-08-23 16:49 UTC (permalink / raw)
To: Patrick Steinhardt; +Cc: git, Jeff King, karthik nayak
Patrick Steinhardt <ps@pks.im> writes:
> Changes compared to v1:
>
> - Remove a spurious change that renamed `init_reader()` to
> `reader_init()`. That function went away in the subsequent commit
> anyway.
>
> - Fix a typo in a commit message.
>
> - Fix some assertions in one of the added tests.
All changes relative to v1 looked sensible to me.
Will replace.
^ permalink raw reply [flat|nested] 38+ messages in thread