* [PATCH 0/3] cache-tree: fix segfaults with invalid cache-trees @ 2024-09-17 7:13 Patrick Steinhardt 2024-09-17 7:13 ` [PATCH 1/3] cache-tree: refactor verification to return error codes Patrick Steinhardt ` (3 more replies) 0 siblings, 4 replies; 13+ messages in thread From: Patrick Steinhardt @ 2024-09-17 7:13 UTC (permalink / raw) To: git Hi, this small patch series fixes segfaults that can happen when the index has a corrupted cache-tree extension. This scenario is covered by t4058, which documented this segfault. I've noitced that this triggers the leak sanitizer, so I've decided to fix these segfaults now and handle them gracefully. Patrick Patrick Steinhardt (3): cache-tree: refactor verification to return error codes cache-tree: detect mismatching number of index entries unpack-trees: detect mismatching number of cache-tree/index entries cache-tree.c | 102 ++++++++++++++++++++++++++----------- cache-tree.h | 2 +- read-cache.c | 5 +- t/t4058-diff-duplicates.sh | 19 ++++--- unpack-trees.c | 12 +++-- 5 files changed, 97 insertions(+), 43 deletions(-) -- 2.46.0.551.gc5ee8f2d1c.dirty ^ permalink raw reply [flat|nested] 13+ messages in thread
* [PATCH 1/3] cache-tree: refactor verification to return error codes 2024-09-17 7:13 [PATCH 0/3] cache-tree: fix segfaults with invalid cache-trees Patrick Steinhardt @ 2024-09-17 7:13 ` Patrick Steinhardt 2024-09-17 17:05 ` Eric Sunshine 2024-09-17 7:13 ` [PATCH 2/3] cache-tree: detect mismatching number of index entries Patrick Steinhardt ` (2 subsequent siblings) 3 siblings, 1 reply; 13+ messages in thread From: Patrick Steinhardt @ 2024-09-17 7:13 UTC (permalink / raw) To: git The function `cache_tree_verify()` will `BUG()` whenever it finds that the cache-tree extension of the index is corrupt. The function is thus inherently untestable because the resulting call to `abort()` will be detected by our testing framework and labelled an error. And rightfully so: it shouldn't ever be possible to hit bugs, as they should indicate a programming error rather than corruption of on-disk state. Refactor the function to instead return error codes. This also ensures that the function can be used e.g. by git-fsck(1) without the whole process dying. Signed-off-by: Patrick Steinhardt <ps@pks.im> --- cache-tree.c | 97 +++++++++++++++++++++++++++++++++++--------------- cache-tree.h | 2 +- read-cache.c | 5 +-- unpack-trees.c | 10 ++++-- 4 files changed, 79 insertions(+), 35 deletions(-) diff --git a/cache-tree.c b/cache-tree.c index 50610c3f3cb..4228b6fad48 100644 --- a/cache-tree.c +++ b/cache-tree.c @@ -2,6 +2,7 @@ #include "git-compat-util.h" #include "environment.h" +#include "gettext.h" #include "hex.h" #include "lockfile.h" #include "tree.h" @@ -864,15 +865,15 @@ int cache_tree_matches_traversal(struct cache_tree *root, return 0; } -static void verify_one_sparse(struct index_state *istate, - struct strbuf *path, - int pos) +static int verify_one_sparse(struct index_state *istate, + struct strbuf *path, + int pos) { struct cache_entry *ce = istate->cache[pos]; - if (!S_ISSPARSEDIR(ce->ce_mode)) - BUG("directory '%s' is present in index, but not sparse", - path->buf); + return error(_("directory '%s' is present in index, but not sparse"), + path->buf); + return 0; } /* @@ -881,6 +882,7 @@ static void verify_one_sparse(struct index_state *istate, * 1 - Restart verification - a call to ensure_full_index() freed the cache * tree that is being verified and verification needs to be restarted from * the new toplevel cache tree. + * -1 - Verification failed. */ static int verify_one(struct repository *r, struct index_state *istate, @@ -890,18 +892,23 @@ static int verify_one(struct repository *r, int i, pos, len = path->len; struct strbuf tree_buf = STRBUF_INIT; struct object_id new_oid; + int ret; for (i = 0; i < it->subtree_nr; i++) { strbuf_addf(path, "%s/", it->down[i]->name); - if (verify_one(r, istate, it->down[i]->cache_tree, path)) - return 1; + ret = verify_one(r, istate, it->down[i]->cache_tree, path); + if (ret) + goto out; + strbuf_setlen(path, len); } if (it->entry_count < 0 || /* no verification on tests (t7003) that replace trees */ - lookup_replace_object(r, &it->oid) != &it->oid) - return 0; + lookup_replace_object(r, &it->oid) != &it->oid) { + ret = 0; + goto out; + } if (path->len) { /* @@ -911,12 +918,14 @@ static int verify_one(struct repository *r, */ int is_sparse = istate->sparse_index; pos = index_name_pos(istate, path->buf, path->len); - if (is_sparse && !istate->sparse_index) - return 1; + if (is_sparse && !istate->sparse_index) { + ret = 1; + goto out; + } if (pos >= 0) { - verify_one_sparse(istate, path, pos); - return 0; + ret = verify_one_sparse(istate, path, pos); + goto out; } pos = -pos - 1; @@ -934,16 +943,23 @@ static int verify_one(struct repository *r, unsigned mode; int entlen; - if (ce->ce_flags & (CE_STAGEMASK | CE_INTENT_TO_ADD | CE_REMOVE)) - BUG("%s with flags 0x%x should not be in cache-tree", - ce->name, ce->ce_flags); + if (ce->ce_flags & (CE_STAGEMASK | CE_INTENT_TO_ADD | CE_REMOVE)) { + ret = error(_("%s with flags 0x%x should not be in cache-tree"), + ce->name, ce->ce_flags); + goto out; + } + name = ce->name + path->len; slash = strchr(name, '/'); if (slash) { entlen = slash - name; + sub = find_subtree(it, ce->name + path->len, entlen, 0); - if (!sub || sub->cache_tree->entry_count < 0) - BUG("bad subtree '%.*s'", entlen, name); + if (!sub || sub->cache_tree->entry_count < 0) { + ret = error(_("bad subtree '%.*s'"), entlen, name); + goto out; + } + oid = &sub->cache_tree->oid; mode = S_IFDIR; i += sub->cache_tree->entry_count; @@ -956,27 +972,50 @@ static int verify_one(struct repository *r, strbuf_addf(&tree_buf, "%o %.*s%c", mode, entlen, name, '\0'); strbuf_add(&tree_buf, oid->hash, r->hash_algo->rawsz); } + hash_object_file(r->hash_algo, tree_buf.buf, tree_buf.len, OBJ_TREE, &new_oid); - if (!oideq(&new_oid, &it->oid)) - BUG("cache-tree for path %.*s does not match. " - "Expected %s got %s", len, path->buf, - oid_to_hex(&new_oid), oid_to_hex(&it->oid)); + + if (!oideq(&new_oid, &it->oid)) { + ret = error(_("cache-tree for path %.*s does not match. " + "Expected %s got %s"), len, path->buf, + oid_to_hex(&new_oid), oid_to_hex(&it->oid)); + goto out; + } + + ret = 0; +out: strbuf_setlen(path, len); strbuf_release(&tree_buf); - return 0; + return ret; } -void cache_tree_verify(struct repository *r, struct index_state *istate) +int cache_tree_verify(struct repository *r, struct index_state *istate) { struct strbuf path = STRBUF_INIT; + int ret; - if (!istate->cache_tree) - return; - if (verify_one(r, istate, istate->cache_tree, &path)) { + if (!istate->cache_tree) { + ret = 0; + goto out; + } + + ret = verify_one(r, istate, istate->cache_tree, &path); + if (ret < 0) + goto out; + if (ret > 0) { strbuf_reset(&path); - if (verify_one(r, istate, istate->cache_tree, &path)) + + ret = verify_one(r, istate, istate->cache_tree, &path); + if (ret < 0) + goto out; + if (ret > 0) BUG("ensure_full_index() called twice while verifying cache tree"); } + + ret = 0; + +out: strbuf_release(&path); + return ret; } diff --git a/cache-tree.h b/cache-tree.h index faae88be63c..b82c4963e7c 100644 --- a/cache-tree.h +++ b/cache-tree.h @@ -33,7 +33,7 @@ struct cache_tree *cache_tree_read(const char *buffer, unsigned long size); int cache_tree_fully_valid(struct cache_tree *); int cache_tree_update(struct index_state *, int); -void cache_tree_verify(struct repository *, struct index_state *); +int cache_tree_verify(struct repository *, struct index_state *); /* bitmasks to write_index_as_tree flags */ #define WRITE_TREE_MISSING_OK 1 diff --git a/read-cache.c b/read-cache.c index 4e67dc182e7..d72a3266b6f 100644 --- a/read-cache.c +++ b/read-cache.c @@ -3331,8 +3331,9 @@ int write_locked_index(struct index_state *istate, struct lock_file *lock, int new_shared_index, ret, test_split_index_env; struct split_index *si = istate->split_index; - if (git_env_bool("GIT_TEST_CHECK_CACHE_TREE", 0)) - cache_tree_verify(the_repository, istate); + if (git_env_bool("GIT_TEST_CHECK_CACHE_TREE", 0) && + cache_tree_verify(the_repository, istate) < 0) + return -1; if ((flags & SKIP_IF_UNCHANGED) && !istate->cache_changed) { if (flags & COMMIT_LOCK) diff --git a/unpack-trees.c b/unpack-trees.c index 9a55cb62040..21cc197d471 100644 --- a/unpack-trees.c +++ b/unpack-trees.c @@ -2070,9 +2070,13 @@ int unpack_trees(unsigned len, struct tree_desc *t, struct unpack_trees_options if (o->dst_index) { move_index_extensions(&o->internal.result, o->src_index); if (!ret) { - if (git_env_bool("GIT_TEST_CHECK_CACHE_TREE", 0)) - cache_tree_verify(the_repository, - &o->internal.result); + if (git_env_bool("GIT_TEST_CHECK_CACHE_TREE", 0) && + cache_tree_verify(the_repository, + &o->internal.result) < 0) { + ret = -1; + goto done; + } + if (!o->skip_cache_tree_update && !cache_tree_fully_valid(o->internal.result.cache_tree)) cache_tree_update(&o->internal.result, -- 2.46.0.551.gc5ee8f2d1c.dirty ^ permalink raw reply related [flat|nested] 13+ messages in thread
* Re: [PATCH 1/3] cache-tree: refactor verification to return error codes 2024-09-17 7:13 ` [PATCH 1/3] cache-tree: refactor verification to return error codes Patrick Steinhardt @ 2024-09-17 17:05 ` Eric Sunshine 2024-09-18 5:11 ` Patrick Steinhardt 0 siblings, 1 reply; 13+ messages in thread From: Eric Sunshine @ 2024-09-17 17:05 UTC (permalink / raw) To: Patrick Steinhardt; +Cc: git On Tue, Sep 17, 2024 at 3:13 AM Patrick Steinhardt <ps@pks.im> wrote: > The function `cache_tree_verify()` will `BUG()` whenever it finds that > the cache-tree extension of the index is corrupt. The function is thus > inherently untestable because the resulting call to `abort()` will be > detected by our testing framework and labelled an error. And rightfully > so: it shouldn't ever be possible to hit bugs, as they should indicate a > programming error rather than corruption of on-disk state. > > Refactor the function to instead return error codes. This also ensures > that the function can be used e.g. by git-fsck(1) without the whole > process dying. Makes sense. > Signed-off-by: Patrick Steinhardt <ps@pks.im> > --- > diff --git a/cache-tree.c b/cache-tree.c > @@ -890,18 +892,23 @@ static int verify_one(struct repository *r, > struct strbuf tree_buf = STRBUF_INIT; > for (i = 0; i < it->subtree_nr; i++) { > strbuf_addf(path, "%s/", it->down[i]->name); > - if (verify_one(r, istate, it->down[i]->cache_tree, path)) > - return 1; > + ret = verify_one(r, istate, it->down[i]->cache_tree, path); > + if (ret) > + goto out; Assuming I am understanding correctly that the original code was leaking the strbuf by returning early, I was surprised that the commit message didn't mention that the patch is also fixing the leak. (Probably not worth a reroll, though.) ^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH 1/3] cache-tree: refactor verification to return error codes 2024-09-17 17:05 ` Eric Sunshine @ 2024-09-18 5:11 ` Patrick Steinhardt 0 siblings, 0 replies; 13+ messages in thread From: Patrick Steinhardt @ 2024-09-18 5:11 UTC (permalink / raw) To: Eric Sunshine; +Cc: git On Tue, Sep 17, 2024 at 01:05:50PM -0400, Eric Sunshine wrote: > On Tue, Sep 17, 2024 at 3:13 AM Patrick Steinhardt <ps@pks.im> wrote: > > Signed-off-by: Patrick Steinhardt <ps@pks.im> > > --- > > diff --git a/cache-tree.c b/cache-tree.c > > @@ -890,18 +892,23 @@ static int verify_one(struct repository *r, > > struct strbuf tree_buf = STRBUF_INIT; > > for (i = 0; i < it->subtree_nr; i++) { > > strbuf_addf(path, "%s/", it->down[i]->name); > > - if (verify_one(r, istate, it->down[i]->cache_tree, path)) > > - return 1; > > + ret = verify_one(r, istate, it->down[i]->cache_tree, path); > > + if (ret) > > + goto out; > > Assuming I am understanding correctly that the original code was > leaking the strbuf by returning early, I was surprised that the commit > message didn't mention that the patch is also fixing the leak. > (Probably not worth a reroll, though.) It just wasn't my main motivation here, so I forgot to mention it. Added it now though, so it's included in case I'll have to reroll. Thanks! Patrick ^ permalink raw reply [flat|nested] 13+ messages in thread
* [PATCH 2/3] cache-tree: detect mismatching number of index entries 2024-09-17 7:13 [PATCH 0/3] cache-tree: fix segfaults with invalid cache-trees Patrick Steinhardt 2024-09-17 7:13 ` [PATCH 1/3] cache-tree: refactor verification to return error codes Patrick Steinhardt @ 2024-09-17 7:13 ` Patrick Steinhardt 2024-09-19 1:35 ` Junio C Hamano 2024-09-17 7:13 ` [PATCH 3/3] unpack-trees: detect mismatching number of cache-tree/index entries Patrick Steinhardt 2024-10-07 4:38 ` [PATCH v2 0/3] cache-tree: fix segfaults with invalid cache-trees Patrick Steinhardt 3 siblings, 1 reply; 13+ messages in thread From: Patrick Steinhardt @ 2024-09-17 7:13 UTC (permalink / raw) To: git In t4058 we have some tests that exercise git-read-tree(1) when used with a tree that contains duplicate entries. While the expectation is that we fail, we ideally should fail gracefully without a segfault. But that is not the case: we never check that the number of entries in the cache-tree is less than or equal to the number of entries in the index. This can lead to an out-of-bounds read as we unconditionally access `istate->cache[idx]`, where `idx` is controlled by the number of cache-tree entries and the current position therein. The result is a segfault. Fix this segfault by adding a sanity check for the number of index entries before dereferencing them. Signed-off-by: Patrick Steinhardt <ps@pks.im> --- cache-tree.c | 5 +++++ t/t4058-diff-duplicates.sh | 12 ++++++------ 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/cache-tree.c b/cache-tree.c index 4228b6fad48..1e625673086 100644 --- a/cache-tree.c +++ b/cache-tree.c @@ -933,6 +933,11 @@ static int verify_one(struct repository *r, pos = 0; } + if (it->entry_count + pos > istate->cache_nr) { + ret = error(_("corrupted cache-tree has entries not present in index")); + goto out; + } + i = 0; while (i < it->entry_count) { struct cache_entry *ce = istate->cache[pos + i]; diff --git a/t/t4058-diff-duplicates.sh b/t/t4058-diff-duplicates.sh index 2501c89c1c9..3f602adb055 100755 --- a/t/t4058-diff-duplicates.sh +++ b/t/t4058-diff-duplicates.sh @@ -132,15 +132,15 @@ test_expect_success 'create a few commits' ' rm commit_id up final ' -test_expect_failure 'git read-tree does not segfault' ' - test_when_finished rm .git/index.lock && - test_might_fail git read-tree --reset base +test_expect_success 'git read-tree does not segfault' ' + test_must_fail git read-tree --reset base 2>err && + test_grep "error: corrupted cache-tree has entries not present in index" err ' -test_expect_failure 'reset --hard does not segfault' ' - test_when_finished rm .git/index.lock && +test_expect_success 'reset --hard does not segfault' ' git checkout base && - test_might_fail git reset --hard + test_must_fail git reset --hard 2>err && + test_grep "error: corrupted cache-tree has entries not present in index" err ' test_expect_failure 'git diff HEAD does not segfault' ' -- 2.46.0.551.gc5ee8f2d1c.dirty ^ permalink raw reply related [flat|nested] 13+ messages in thread
* Re: [PATCH 2/3] cache-tree: detect mismatching number of index entries 2024-09-17 7:13 ` [PATCH 2/3] cache-tree: detect mismatching number of index entries Patrick Steinhardt @ 2024-09-19 1:35 ` Junio C Hamano 2024-09-24 6:48 ` Patrick Steinhardt 0 siblings, 1 reply; 13+ messages in thread From: Junio C Hamano @ 2024-09-19 1:35 UTC (permalink / raw) To: Patrick Steinhardt; +Cc: git Patrick Steinhardt <ps@pks.im> writes: > + if (it->entry_count + pos > istate->cache_nr) { > + ret = error(_("corrupted cache-tree has entries not present in index")); > + goto out; > + } Is it a safe assumption that the if() condition always indicates an error? When sparse-index is in effect, istate->cache_nr may be a number that is smaller than the true number of paths in the index (because all paths under a subdirectory we are not interested in are folded into a single tree-ish entry), and I am not sure how it should interact with it->entry_count (i.e. the number of paths under the current directory we are looking at, which obviously cannot be a sparsified entry) and pos (i.e. the index into active_cache[] that represend the first path under the current directory)? I guess as long as "it" is not folded, it does not matter how other paths from different directories in active_cache[] are sparsified or expanded, as long as "pos" keeps track of the current position correctly. > diff --git a/t/t4058-diff-duplicates.sh b/t/t4058-diff-duplicates.sh > index 2501c89c1c9..3f602adb055 100755 > --- a/t/t4058-diff-duplicates.sh > +++ b/t/t4058-diff-duplicates.sh > @@ -132,15 +132,15 @@ test_expect_success 'create a few commits' ' > rm commit_id up final > ' > > -test_expect_failure 'git read-tree does not segfault' ' > - test_when_finished rm .git/index.lock && > - test_might_fail git read-tree --reset base > +test_expect_success 'git read-tree does not segfault' ' > + test_must_fail git read-tree --reset base 2>err && > + test_grep "error: corrupted cache-tree has entries not present in index" err > ' Very good. test_might_fail is a sign of trouble, and this gives us a lot more predictability. ^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH 2/3] cache-tree: detect mismatching number of index entries 2024-09-19 1:35 ` Junio C Hamano @ 2024-09-24 6:48 ` Patrick Steinhardt 2024-09-24 17:01 ` Junio C Hamano 0 siblings, 1 reply; 13+ messages in thread From: Patrick Steinhardt @ 2024-09-24 6:48 UTC (permalink / raw) To: Junio C Hamano; +Cc: git On Wed, Sep 18, 2024 at 06:35:35PM -0700, Junio C Hamano wrote: > Patrick Steinhardt <ps@pks.im> writes: > > > + if (it->entry_count + pos > istate->cache_nr) { > > + ret = error(_("corrupted cache-tree has entries not present in index")); > > + goto out; > > + } > > Is it a safe assumption that the if() condition always indicates an > error? When sparse-index is in effect, istate->cache_nr may be a > number that is smaller than the true number of paths in the index > (because all paths under a subdirectory we are not interested in are > folded into a single tree-ish entry), and I am not sure how it > should interact with it->entry_count (i.e. the number of paths under > the current directory we are looking at, which obviously cannot be a > sparsified entry) and pos (i.e. the index into active_cache[] that > represend the first path under the current directory)? > > I guess as long as "it" is not folded, it does not matter how other > paths from different directories in active_cache[] are sparsified or > expanded, as long as "pos" keeps track of the current position > correctly. It seems like we end up calling `ensure_full_index()` for a sparse index, which does cause us to signal to the caller that they should restart verification. So for all I understand, this function shouldn't act on a sparsely-populated index. But I cannot see how it could lead to anything sensible when the added condition is violated because the first thing we do in the loop is this: struct cache_entry *ce = istate->cache[pos + i]; And before we do anything else, we dereference that pointer. So if the condition doesn't hold we _will_ get an out-of-bounds read of the cache array and act on the garbage data. And that causes the observed segfault on my machine and in the test. So I think that ensuring this property is always the right thing to do. But I wouldn't be surprised if overall this code could require more love to make it behave sanely in all scenarios. It certainly feels somewhat fragile to me. Patrick ^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH 2/3] cache-tree: detect mismatching number of index entries 2024-09-24 6:48 ` Patrick Steinhardt @ 2024-09-24 17:01 ` Junio C Hamano 0 siblings, 0 replies; 13+ messages in thread From: Junio C Hamano @ 2024-09-24 17:01 UTC (permalink / raw) To: Patrick Steinhardt; +Cc: git Patrick Steinhardt <ps@pks.im> writes: >> I guess as long as "it" is not folded, it does not matter how other >> paths from different directories in active_cache[] are sparsified or >> expanded, as long as "pos" keeps track of the current position >> correctly. > > It seems like we end up calling `ensure_full_index()` for a sparse > index, which does cause us to signal to the caller that they should > restart verification. So for all I understand, this function shouldn't > act on a sparsely-populated index. OK. That sounds sensible and safe. ^ permalink raw reply [flat|nested] 13+ messages in thread
* [PATCH 3/3] unpack-trees: detect mismatching number of cache-tree/index entries 2024-09-17 7:13 [PATCH 0/3] cache-tree: fix segfaults with invalid cache-trees Patrick Steinhardt 2024-09-17 7:13 ` [PATCH 1/3] cache-tree: refactor verification to return error codes Patrick Steinhardt 2024-09-17 7:13 ` [PATCH 2/3] cache-tree: detect mismatching number of index entries Patrick Steinhardt @ 2024-09-17 7:13 ` Patrick Steinhardt 2024-10-07 4:38 ` [PATCH v2 0/3] cache-tree: fix segfaults with invalid cache-trees Patrick Steinhardt 3 siblings, 0 replies; 13+ messages in thread From: Patrick Steinhardt @ 2024-09-17 7:13 UTC (permalink / raw) To: git Same as the preceding commit, we unconditionally dereference the index's cache entries depending on the number of cache-tree entries, which can lead to a segfault when the cache-tree is corrupted. Fix this bug. This also makes t4058 pass with the leak sanitizer enabled. Signed-off-by: Patrick Steinhardt <ps@pks.im> --- t/t4058-diff-duplicates.sh | 7 +++++-- unpack-trees.c | 2 ++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/t/t4058-diff-duplicates.sh b/t/t4058-diff-duplicates.sh index 3f602adb055..18e5ac88c31 100755 --- a/t/t4058-diff-duplicates.sh +++ b/t/t4058-diff-duplicates.sh @@ -10,6 +10,8 @@ # that the diff output isn't wildly unreasonable. test_description='test tree diff when trees have duplicate entries' + +TEST_PASSES_SANITIZE_LEAK=true . ./test-lib.sh # make_tree_entry <mode> <mode> <sha1> @@ -143,11 +145,12 @@ test_expect_success 'reset --hard does not segfault' ' test_grep "error: corrupted cache-tree has entries not present in index" err ' -test_expect_failure 'git diff HEAD does not segfault' ' +test_expect_success 'git diff HEAD does not segfault' ' git checkout base && GIT_TEST_CHECK_CACHE_TREE=false && git reset --hard && - test_might_fail git diff HEAD + test_must_fail git diff HEAD 2>err && + test_grep "error: corrupted cache-tree has entries not present in index" err ' test_expect_failure 'can switch to another branch when status is empty' ' diff --git a/unpack-trees.c b/unpack-trees.c index 21cc197d471..e10a9d12091 100644 --- a/unpack-trees.c +++ b/unpack-trees.c @@ -808,6 +808,8 @@ static int traverse_by_cache_tree(int pos, int nr_entries, int nr_names, if (!o->merge) BUG("We need cache-tree to do this optimization"); + if (nr_entries + pos > o->src_index->cache_nr) + return error(_("corrupted cache-tree has entries not present in index")); /* * Do what unpack_callback() and unpack_single_entry() normally -- 2.46.0.551.gc5ee8f2d1c.dirty ^ permalink raw reply related [flat|nested] 13+ messages in thread
* [PATCH v2 0/3] cache-tree: fix segfaults with invalid cache-trees 2024-09-17 7:13 [PATCH 0/3] cache-tree: fix segfaults with invalid cache-trees Patrick Steinhardt ` (2 preceding siblings ...) 2024-09-17 7:13 ` [PATCH 3/3] unpack-trees: detect mismatching number of cache-tree/index entries Patrick Steinhardt @ 2024-10-07 4:38 ` Patrick Steinhardt 2024-10-07 4:38 ` [PATCH v2 1/3] cache-tree: refactor verification to return error codes Patrick Steinhardt ` (2 more replies) 3 siblings, 3 replies; 13+ messages in thread From: Patrick Steinhardt @ 2024-10-07 4:38 UTC (permalink / raw) To: git; +Cc: Eric Sunshine, Junio C Hamano Hi, this is the second version of my patch series that fixes segfaults when the index has a corrupted cache-tree extension. I completely forgot about this series, and it seems to have slipped the radar, until I rediscovered some of the segfaults while doing the last couple of leak fixes. So I decided to just resend it with Eric's comment addressed, which boils down to a single clarification of one of the commit messages. Thanks! Patrick Patrick Steinhardt (3): cache-tree: refactor verification to return error codes cache-tree: detect mismatching number of index entries unpack-trees: detect mismatching number of cache-tree/index entries cache-tree.c | 102 ++++++++++++++++++++++++++----------- cache-tree.h | 2 +- read-cache.c | 5 +- t/t4058-diff-duplicates.sh | 19 ++++--- unpack-trees.c | 12 +++-- 5 files changed, 97 insertions(+), 43 deletions(-) Range-diff against v1: 1: 413faa2b81 ! 1: df5a2d0dbc cache-tree: refactor verification to return error codes @@ Commit message Refactor the function to instead return error codes. This also ensures that the function can be used e.g. by git-fsck(1) without the whole - process dying. + process dying. Furthermore, this refactoring plugs some memory leaks + when returning early by creating a common exit path. Signed-off-by: Patrick Steinhardt <ps@pks.im> 2: 4bdcc43518 = 2: d63087c53c cache-tree: detect mismatching number of index entries 3: fbffeeb6f1 = 3: 5e578c1f41 unpack-trees: detect mismatching number of cache-tree/index entries base-commit: 3969d78396e707c5a900dd5e15c365c54bef0283 -- 2.47.0.rc0.dirty ^ permalink raw reply [flat|nested] 13+ messages in thread
* [PATCH v2 1/3] cache-tree: refactor verification to return error codes 2024-10-07 4:38 ` [PATCH v2 0/3] cache-tree: fix segfaults with invalid cache-trees Patrick Steinhardt @ 2024-10-07 4:38 ` Patrick Steinhardt 2024-10-07 4:38 ` [PATCH v2 2/3] cache-tree: detect mismatching number of index entries Patrick Steinhardt 2024-10-07 4:38 ` [PATCH v2 3/3] unpack-trees: detect mismatching number of cache-tree/index entries Patrick Steinhardt 2 siblings, 0 replies; 13+ messages in thread From: Patrick Steinhardt @ 2024-10-07 4:38 UTC (permalink / raw) To: git; +Cc: Eric Sunshine, Junio C Hamano The function `cache_tree_verify()` will `BUG()` whenever it finds that the cache-tree extension of the index is corrupt. The function is thus inherently untestable because the resulting call to `abort()` will be detected by our testing framework and labelled an error. And rightfully so: it shouldn't ever be possible to hit bugs, as they should indicate a programming error rather than corruption of on-disk state. Refactor the function to instead return error codes. This also ensures that the function can be used e.g. by git-fsck(1) without the whole process dying. Furthermore, this refactoring plugs some memory leaks when returning early by creating a common exit path. Signed-off-by: Patrick Steinhardt <ps@pks.im> --- cache-tree.c | 97 +++++++++++++++++++++++++++++++++++--------------- cache-tree.h | 2 +- read-cache.c | 5 +-- unpack-trees.c | 10 ++++-- 4 files changed, 79 insertions(+), 35 deletions(-) diff --git a/cache-tree.c b/cache-tree.c index 50610c3f3c..4228b6fad4 100644 --- a/cache-tree.c +++ b/cache-tree.c @@ -2,6 +2,7 @@ #include "git-compat-util.h" #include "environment.h" +#include "gettext.h" #include "hex.h" #include "lockfile.h" #include "tree.h" @@ -864,15 +865,15 @@ int cache_tree_matches_traversal(struct cache_tree *root, return 0; } -static void verify_one_sparse(struct index_state *istate, - struct strbuf *path, - int pos) +static int verify_one_sparse(struct index_state *istate, + struct strbuf *path, + int pos) { struct cache_entry *ce = istate->cache[pos]; - if (!S_ISSPARSEDIR(ce->ce_mode)) - BUG("directory '%s' is present in index, but not sparse", - path->buf); + return error(_("directory '%s' is present in index, but not sparse"), + path->buf); + return 0; } /* @@ -881,6 +882,7 @@ static void verify_one_sparse(struct index_state *istate, * 1 - Restart verification - a call to ensure_full_index() freed the cache * tree that is being verified and verification needs to be restarted from * the new toplevel cache tree. + * -1 - Verification failed. */ static int verify_one(struct repository *r, struct index_state *istate, @@ -890,18 +892,23 @@ static int verify_one(struct repository *r, int i, pos, len = path->len; struct strbuf tree_buf = STRBUF_INIT; struct object_id new_oid; + int ret; for (i = 0; i < it->subtree_nr; i++) { strbuf_addf(path, "%s/", it->down[i]->name); - if (verify_one(r, istate, it->down[i]->cache_tree, path)) - return 1; + ret = verify_one(r, istate, it->down[i]->cache_tree, path); + if (ret) + goto out; + strbuf_setlen(path, len); } if (it->entry_count < 0 || /* no verification on tests (t7003) that replace trees */ - lookup_replace_object(r, &it->oid) != &it->oid) - return 0; + lookup_replace_object(r, &it->oid) != &it->oid) { + ret = 0; + goto out; + } if (path->len) { /* @@ -911,12 +918,14 @@ static int verify_one(struct repository *r, */ int is_sparse = istate->sparse_index; pos = index_name_pos(istate, path->buf, path->len); - if (is_sparse && !istate->sparse_index) - return 1; + if (is_sparse && !istate->sparse_index) { + ret = 1; + goto out; + } if (pos >= 0) { - verify_one_sparse(istate, path, pos); - return 0; + ret = verify_one_sparse(istate, path, pos); + goto out; } pos = -pos - 1; @@ -934,16 +943,23 @@ static int verify_one(struct repository *r, unsigned mode; int entlen; - if (ce->ce_flags & (CE_STAGEMASK | CE_INTENT_TO_ADD | CE_REMOVE)) - BUG("%s with flags 0x%x should not be in cache-tree", - ce->name, ce->ce_flags); + if (ce->ce_flags & (CE_STAGEMASK | CE_INTENT_TO_ADD | CE_REMOVE)) { + ret = error(_("%s with flags 0x%x should not be in cache-tree"), + ce->name, ce->ce_flags); + goto out; + } + name = ce->name + path->len; slash = strchr(name, '/'); if (slash) { entlen = slash - name; + sub = find_subtree(it, ce->name + path->len, entlen, 0); - if (!sub || sub->cache_tree->entry_count < 0) - BUG("bad subtree '%.*s'", entlen, name); + if (!sub || sub->cache_tree->entry_count < 0) { + ret = error(_("bad subtree '%.*s'"), entlen, name); + goto out; + } + oid = &sub->cache_tree->oid; mode = S_IFDIR; i += sub->cache_tree->entry_count; @@ -956,27 +972,50 @@ static int verify_one(struct repository *r, strbuf_addf(&tree_buf, "%o %.*s%c", mode, entlen, name, '\0'); strbuf_add(&tree_buf, oid->hash, r->hash_algo->rawsz); } + hash_object_file(r->hash_algo, tree_buf.buf, tree_buf.len, OBJ_TREE, &new_oid); - if (!oideq(&new_oid, &it->oid)) - BUG("cache-tree for path %.*s does not match. " - "Expected %s got %s", len, path->buf, - oid_to_hex(&new_oid), oid_to_hex(&it->oid)); + + if (!oideq(&new_oid, &it->oid)) { + ret = error(_("cache-tree for path %.*s does not match. " + "Expected %s got %s"), len, path->buf, + oid_to_hex(&new_oid), oid_to_hex(&it->oid)); + goto out; + } + + ret = 0; +out: strbuf_setlen(path, len); strbuf_release(&tree_buf); - return 0; + return ret; } -void cache_tree_verify(struct repository *r, struct index_state *istate) +int cache_tree_verify(struct repository *r, struct index_state *istate) { struct strbuf path = STRBUF_INIT; + int ret; - if (!istate->cache_tree) - return; - if (verify_one(r, istate, istate->cache_tree, &path)) { + if (!istate->cache_tree) { + ret = 0; + goto out; + } + + ret = verify_one(r, istate, istate->cache_tree, &path); + if (ret < 0) + goto out; + if (ret > 0) { strbuf_reset(&path); - if (verify_one(r, istate, istate->cache_tree, &path)) + + ret = verify_one(r, istate, istate->cache_tree, &path); + if (ret < 0) + goto out; + if (ret > 0) BUG("ensure_full_index() called twice while verifying cache tree"); } + + ret = 0; + +out: strbuf_release(&path); + return ret; } diff --git a/cache-tree.h b/cache-tree.h index faae88be63..b82c4963e7 100644 --- a/cache-tree.h +++ b/cache-tree.h @@ -33,7 +33,7 @@ struct cache_tree *cache_tree_read(const char *buffer, unsigned long size); int cache_tree_fully_valid(struct cache_tree *); int cache_tree_update(struct index_state *, int); -void cache_tree_verify(struct repository *, struct index_state *); +int cache_tree_verify(struct repository *, struct index_state *); /* bitmasks to write_index_as_tree flags */ #define WRITE_TREE_MISSING_OK 1 diff --git a/read-cache.c b/read-cache.c index 4e67dc182e..d72a3266b6 100644 --- a/read-cache.c +++ b/read-cache.c @@ -3331,8 +3331,9 @@ int write_locked_index(struct index_state *istate, struct lock_file *lock, int new_shared_index, ret, test_split_index_env; struct split_index *si = istate->split_index; - if (git_env_bool("GIT_TEST_CHECK_CACHE_TREE", 0)) - cache_tree_verify(the_repository, istate); + if (git_env_bool("GIT_TEST_CHECK_CACHE_TREE", 0) && + cache_tree_verify(the_repository, istate) < 0) + return -1; if ((flags & SKIP_IF_UNCHANGED) && !istate->cache_changed) { if (flags & COMMIT_LOCK) diff --git a/unpack-trees.c b/unpack-trees.c index 9a55cb6204..21cc197d47 100644 --- a/unpack-trees.c +++ b/unpack-trees.c @@ -2070,9 +2070,13 @@ int unpack_trees(unsigned len, struct tree_desc *t, struct unpack_trees_options if (o->dst_index) { move_index_extensions(&o->internal.result, o->src_index); if (!ret) { - if (git_env_bool("GIT_TEST_CHECK_CACHE_TREE", 0)) - cache_tree_verify(the_repository, - &o->internal.result); + if (git_env_bool("GIT_TEST_CHECK_CACHE_TREE", 0) && + cache_tree_verify(the_repository, + &o->internal.result) < 0) { + ret = -1; + goto done; + } + if (!o->skip_cache_tree_update && !cache_tree_fully_valid(o->internal.result.cache_tree)) cache_tree_update(&o->internal.result, -- 2.47.0.rc0.dirty ^ permalink raw reply related [flat|nested] 13+ messages in thread
* [PATCH v2 2/3] cache-tree: detect mismatching number of index entries 2024-10-07 4:38 ` [PATCH v2 0/3] cache-tree: fix segfaults with invalid cache-trees Patrick Steinhardt 2024-10-07 4:38 ` [PATCH v2 1/3] cache-tree: refactor verification to return error codes Patrick Steinhardt @ 2024-10-07 4:38 ` Patrick Steinhardt 2024-10-07 4:38 ` [PATCH v2 3/3] unpack-trees: detect mismatching number of cache-tree/index entries Patrick Steinhardt 2 siblings, 0 replies; 13+ messages in thread From: Patrick Steinhardt @ 2024-10-07 4:38 UTC (permalink / raw) To: git; +Cc: Eric Sunshine, Junio C Hamano In t4058 we have some tests that exercise git-read-tree(1) when used with a tree that contains duplicate entries. While the expectation is that we fail, we ideally should fail gracefully without a segfault. But that is not the case: we never check that the number of entries in the cache-tree is less than or equal to the number of entries in the index. This can lead to an out-of-bounds read as we unconditionally access `istate->cache[idx]`, where `idx` is controlled by the number of cache-tree entries and the current position therein. The result is a segfault. Fix this segfault by adding a sanity check for the number of index entries before dereferencing them. Signed-off-by: Patrick Steinhardt <ps@pks.im> --- cache-tree.c | 5 +++++ t/t4058-diff-duplicates.sh | 12 ++++++------ 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/cache-tree.c b/cache-tree.c index 4228b6fad4..1e62567308 100644 --- a/cache-tree.c +++ b/cache-tree.c @@ -933,6 +933,11 @@ static int verify_one(struct repository *r, pos = 0; } + if (it->entry_count + pos > istate->cache_nr) { + ret = error(_("corrupted cache-tree has entries not present in index")); + goto out; + } + i = 0; while (i < it->entry_count) { struct cache_entry *ce = istate->cache[pos + i]; diff --git a/t/t4058-diff-duplicates.sh b/t/t4058-diff-duplicates.sh index 2501c89c1c..3f602adb05 100755 --- a/t/t4058-diff-duplicates.sh +++ b/t/t4058-diff-duplicates.sh @@ -132,15 +132,15 @@ test_expect_success 'create a few commits' ' rm commit_id up final ' -test_expect_failure 'git read-tree does not segfault' ' - test_when_finished rm .git/index.lock && - test_might_fail git read-tree --reset base +test_expect_success 'git read-tree does not segfault' ' + test_must_fail git read-tree --reset base 2>err && + test_grep "error: corrupted cache-tree has entries not present in index" err ' -test_expect_failure 'reset --hard does not segfault' ' - test_when_finished rm .git/index.lock && +test_expect_success 'reset --hard does not segfault' ' git checkout base && - test_might_fail git reset --hard + test_must_fail git reset --hard 2>err && + test_grep "error: corrupted cache-tree has entries not present in index" err ' test_expect_failure 'git diff HEAD does not segfault' ' -- 2.47.0.rc0.dirty ^ permalink raw reply related [flat|nested] 13+ messages in thread
* [PATCH v2 3/3] unpack-trees: detect mismatching number of cache-tree/index entries 2024-10-07 4:38 ` [PATCH v2 0/3] cache-tree: fix segfaults with invalid cache-trees Patrick Steinhardt 2024-10-07 4:38 ` [PATCH v2 1/3] cache-tree: refactor verification to return error codes Patrick Steinhardt 2024-10-07 4:38 ` [PATCH v2 2/3] cache-tree: detect mismatching number of index entries Patrick Steinhardt @ 2024-10-07 4:38 ` Patrick Steinhardt 2 siblings, 0 replies; 13+ messages in thread From: Patrick Steinhardt @ 2024-10-07 4:38 UTC (permalink / raw) To: git; +Cc: Eric Sunshine, Junio C Hamano Same as the preceding commit, we unconditionally dereference the index's cache entries depending on the number of cache-tree entries, which can lead to a segfault when the cache-tree is corrupted. Fix this bug. This also makes t4058 pass with the leak sanitizer enabled. Signed-off-by: Patrick Steinhardt <ps@pks.im> --- t/t4058-diff-duplicates.sh | 7 +++++-- unpack-trees.c | 2 ++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/t/t4058-diff-duplicates.sh b/t/t4058-diff-duplicates.sh index 3f602adb05..18e5ac88c3 100755 --- a/t/t4058-diff-duplicates.sh +++ b/t/t4058-diff-duplicates.sh @@ -10,6 +10,8 @@ # that the diff output isn't wildly unreasonable. test_description='test tree diff when trees have duplicate entries' + +TEST_PASSES_SANITIZE_LEAK=true . ./test-lib.sh # make_tree_entry <mode> <mode> <sha1> @@ -143,11 +145,12 @@ test_expect_success 'reset --hard does not segfault' ' test_grep "error: corrupted cache-tree has entries not present in index" err ' -test_expect_failure 'git diff HEAD does not segfault' ' +test_expect_success 'git diff HEAD does not segfault' ' git checkout base && GIT_TEST_CHECK_CACHE_TREE=false && git reset --hard && - test_might_fail git diff HEAD + test_must_fail git diff HEAD 2>err && + test_grep "error: corrupted cache-tree has entries not present in index" err ' test_expect_failure 'can switch to another branch when status is empty' ' diff --git a/unpack-trees.c b/unpack-trees.c index 21cc197d47..e10a9d1209 100644 --- a/unpack-trees.c +++ b/unpack-trees.c @@ -808,6 +808,8 @@ static int traverse_by_cache_tree(int pos, int nr_entries, int nr_names, if (!o->merge) BUG("We need cache-tree to do this optimization"); + if (nr_entries + pos > o->src_index->cache_nr) + return error(_("corrupted cache-tree has entries not present in index")); /* * Do what unpack_callback() and unpack_single_entry() normally -- 2.47.0.rc0.dirty ^ permalink raw reply related [flat|nested] 13+ messages in thread
end of thread, other threads:[~2024-10-07 4:38 UTC | newest] Thread overview: 13+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2024-09-17 7:13 [PATCH 0/3] cache-tree: fix segfaults with invalid cache-trees Patrick Steinhardt 2024-09-17 7:13 ` [PATCH 1/3] cache-tree: refactor verification to return error codes Patrick Steinhardt 2024-09-17 17:05 ` Eric Sunshine 2024-09-18 5:11 ` Patrick Steinhardt 2024-09-17 7:13 ` [PATCH 2/3] cache-tree: detect mismatching number of index entries Patrick Steinhardt 2024-09-19 1:35 ` Junio C Hamano 2024-09-24 6:48 ` Patrick Steinhardt 2024-09-24 17:01 ` Junio C Hamano 2024-09-17 7:13 ` [PATCH 3/3] unpack-trees: detect mismatching number of cache-tree/index entries Patrick Steinhardt 2024-10-07 4:38 ` [PATCH v2 0/3] cache-tree: fix segfaults with invalid cache-trees Patrick Steinhardt 2024-10-07 4:38 ` [PATCH v2 1/3] cache-tree: refactor verification to return error codes Patrick Steinhardt 2024-10-07 4:38 ` [PATCH v2 2/3] cache-tree: detect mismatching number of index entries Patrick Steinhardt 2024-10-07 4:38 ` [PATCH v2 3/3] unpack-trees: detect mismatching number of cache-tree/index entries Patrick Steinhardt
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).