git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* Re: [PATCH 11/11] builtin/maintenance: fix locking race when handling "gc" task
@ 2025-05-30 12:55 Ben Knoble
  2025-05-30 14:05 ` Patrick Steinhardt
  0 siblings, 1 reply; 6+ messages in thread
From: Ben Knoble @ 2025-05-30 12:55 UTC (permalink / raw)
  To: Patrick Steinhardt; +Cc: git, Yonatan Roth, david asraf



> Le 27 mai 2025 à 11:29, Patrick Steinhardt <ps@pks.im> a écrit :
> 
> The "gc" task has a similar locking race as the one that we have fixed
> for the "pack-refs" and "reflog-expire" tasks in preceding commits. Fix
> this by splitting up the logic of the "gc" task:
> 
> - Before detaching we execute `gc_before_repack()`, which contains the
>   logic that git-gc(1) itself would execute before detaching.
> 
> - After detaching we spawn git-gc(1), but with a new hidden flag that
>   suppresses calling `gc_before_repack()`.
> 
> Like this we have roughly the same logic as git-gc(1) itself and know to
> repack refs and reflogs before detaching, thus fixing the race.
> 
> Note that `gc_before_repack()` is renamed to `gc_before_detach()` to
> better reflect what this function does.
> 
> Signed-off-by: Patrick Steinhardt <ps@pks.im>
> ---
> builtin/gc.c           | 39 ++++++++++++++++++++++++++-------------
> t/t7900-maintenance.sh | 12 ++++++------
> 2 files changed, 32 insertions(+), 19 deletions(-)
> 
> diff --git a/builtin/gc.c b/builtin/gc.c
> index 174357b9c25..2cf61efcee9 100644
> --- a/builtin/gc.c
> +++ b/builtin/gc.c
> @@ -816,7 +816,7 @@ static int report_last_gc_error(void)
>   return ret;
> }
> 
> -static int gc_before_repack(struct maintenance_run_opts *opts,
> +static int gc_before_detach(struct maintenance_run_opts *opts,
>               struct gc_config *cfg)
> {
>   if (cfg->pack_refs && maintenance_task_pack_refs(opts, cfg))
> @@ -837,6 +837,7 @@ int cmd_gc(int argc,
>   pid_t pid;
>   int daemonized = 0;
>   int keep_largest_pack = -1;
> +    int skip_maintenance_before_detach = 0;
>   timestamp_t dummy;
>   struct maintenance_run_opts opts = MAINTENANCE_RUN_OPTS_INIT;
>   struct gc_config cfg = GC_CONFIG_INIT;
> @@ -869,6 +870,8 @@ int cmd_gc(int argc,
>            N_("repack all other packs except the largest pack")),
>       OPT_STRING(0, "expire-to", &cfg.repack_expire_to, N_("dir"),
>              N_("pack prefix to store a pack containing pruned objects")),
> +        OPT_HIDDEN_BOOL(0, "skip-maintenance-before-detach", &skip_maintenance_before_detach,
> +               N_("skip maintenance steps typically done before detaching")),
>       OPT_END()
>   };
> 
> @@ -952,14 +955,16 @@ int cmd_gc(int argc,
>           goto out;
>       }
> 
> -        if (lock_repo_for_gc(force, &pid)) {
> -            ret = 0;
> -            goto out;
> -        }
> +        if (!skip_maintenance_before_detach) {
> +            if (lock_repo_for_gc(force, &pid)) {
> +                ret = 0;
> +                goto out;
> +            }
> 
> -        if (gc_before_repack(&opts, &cfg) < 0)
> -            exit(127);
> -        delete_tempfile(&pidfile);
> +            if (gc_before_detach(&opts, &cfg) < 0)
> +                exit(127);
> +            delete_tempfile(&pidfile);
> +        }
> 
>       /*
>        * failure to daemonize is ok, we'll continue
> @@ -988,8 +993,8 @@ int cmd_gc(int argc,
>       free(path);
>   }
> 
> -    if (opts.detach <= 0)
> -        gc_before_repack(&opts, &cfg);
> +    if (opts.detach <= 0 && !skip_maintenance_before_detach)
> +        gc_before_detach(&opts, &cfg);
> 
>   if (!repository_format_precious_objects) {
>       struct child_process repack_cmd = CHILD_PROCESS_INIT;
> @@ -1225,8 +1230,14 @@ static int maintenance_task_prefetch(struct maintenance_run_opts *opts,
>   return 0;
> }
> 
> -static int maintenance_task_gc(struct maintenance_run_opts *opts,
> -                   struct gc_config *cfg UNUSED)
> +static int maintenance_task_gc_before_detach(struct maintenance_run_opts *opts,
> +                         struct gc_config *cfg)
> +{
> +    return gc_before_detach(opts, cfg);
> +}
> +
> +static int maintenance_task_gc_after_detach(struct maintenance_run_opts *opts,
> +                        struct gc_config *cfg UNUSED)
> {
>   struct child_process child = CHILD_PROCESS_INIT;
> 
> @@ -1240,6 +1251,7 @@ static int maintenance_task_gc(struct maintenance_run_opts *opts,
>   else
>       strvec_push(&child.args, "--no-quiet");
>   strvec_push(&child.args, "--no-detach");
> +    strvec_push(&child.args, "--skip-maintenance-before-detach");

I suspect this would be more obvious to me if I had the manual available right now, but if we are not detaching (« --no-detach ») why do we need to skip something before detaching (that presumably won’t happen)?

> 
>   return run_command(&child);
> }
> @@ -1561,7 +1573,8 @@ static const struct maintenance_task tasks[] = {
>   },
>   [TASK_GC] = {
>       .name = "gc",
> -        .after_detach = maintenance_task_gc,
> +        .before_detach = maintenance_task_gc_before_detach,
> +        .after_detach = maintenance_task_gc_after_detach,
>       .auto_condition = need_to_gc,
>   },
>   [TASK_COMMIT_GRAPH] = {
> diff --git a/t/t7900-maintenance.sh b/t/t7900-maintenance.sh
> index 1ada5246606..e09a36ab021 100755
> --- a/t/t7900-maintenance.sh
> +++ b/t/t7900-maintenance.sh
> @@ -49,9 +49,9 @@ test_expect_success 'run [--auto|--quiet]' '
>       git maintenance run --auto 2>/dev/null &&
>   GIT_TRACE2_EVENT="$(pwd)/run-no-quiet.txt" \
>       git maintenance run --no-quiet 2>/dev/null &&
> -    test_subcommand git gc --quiet --no-detach <run-no-auto.txt &&
> -    test_subcommand ! git gc --auto --quiet --no-detach <run-auto.txt &&
> -    test_subcommand git gc --no-quiet --no-detach <run-no-quiet.txt
> +    test_subcommand git gc --quiet --no-detach --skip-maintenance-before-detach <run-no-auto.txt &&
> +    test_subcommand ! git gc --auto --quiet --no-detach --skip-maintenance-before-detach <run-auto.txt &&
> +    test_subcommand git gc --no-quiet --no-detach --skip-maintenance-before-detach <run-no-quiet.txt
> '
> 
> test_expect_success 'maintenance.auto config option' '
> @@ -154,9 +154,9 @@ test_expect_success 'run --task=<task>' '
>       git maintenance run --task=commit-graph 2>/dev/null &&
>   GIT_TRACE2_EVENT="$(pwd)/run-both.txt" \
>       git maintenance run --task=commit-graph --task=gc 2>/dev/null &&
> -    test_subcommand ! git gc --quiet --no-detach <run-commit-graph.txt &&
> -    test_subcommand git gc --quiet --no-detach <run-gc.txt &&
> -    test_subcommand git gc --quiet --no-detach <run-both.txt &&
> +    test_subcommand ! git gc --quiet --no-detach --skip-maintenance-before-detach <run-commit-graph.txt &&
> +    test_subcommand git gc --quiet --no-detach --skip-maintenance-before-detach <run-gc.txt &&
> +    test_subcommand git gc --quiet --no-detach --skip-maintenance-before-detach <run-both.txt &&
>   test_subcommand git commit-graph write --split --reachable --no-progress <run-commit-graph.txt &&
>   test_subcommand ! git commit-graph write --split --reachable --no-progress <run-gc.txt &&
>   test_subcommand git commit-graph write --split --reachable --no-progress <run-both.txt
> 
> --
> 2.49.0.1266.g31b7d2e469.dirty

^ permalink raw reply	[flat|nested] 6+ messages in thread
* [PATCH 00/11] builtin/maintenance: fix ref lock races when detaching
@ 2025-05-27 14:04 Patrick Steinhardt
  2025-05-27 14:04 ` [PATCH 11/11] builtin/maintenance: fix locking race when handling "gc" task Patrick Steinhardt
  0 siblings, 1 reply; 6+ messages in thread
From: Patrick Steinhardt @ 2025-05-27 14:04 UTC (permalink / raw)
  To: git; +Cc: Yonatan Roth, david asraf

Hi,

this patch series fixes races around locking the "packed-refs" file when
auto-maintenance decides to repack it. This issue has been reported e.g.
via [1] and [2].

The root cause is that git-gc(1) used to know to detach _after_ having
repacked references. As such, callers wouldn't continue with their thing
until we have already packed refs, and thus the race does not exist
there. git-maintenance(1) didn't have the same split though, so this
patch series retrofits that logic.

The series is structured as follows:

  - Patches 1 and 2 do some light refactorings.

  - Patches 3 to 5 refactor how we set up the list of tasks to not rely
    on globals anymore. Instead, we now have a single source of truth
    for which tasks exactly will be run.

  - The remaining patches introduce the split of before/after-detach
    tasks and wire them up for "pack-refs", "reflog-expire" and "gc"
    tasks.

Thanks!

Patrick

[1]: <CAJR-fbZ4X1+gN75m2dUvocR6NkowLOZ9F26cjBy8w1qd181OoQ@mail.gmail.com>
[2]: <CANi7bVAkNc+gY1NoXfJuDRjxjZLTgL8Lfn8_ZmWsvLAoiLPkNg@mail.gmail.com>

---
Patrick Steinhardt (11):
      builtin/gc: use designated field initializers for maintenance tasks
      builtin/gc: drop redundant local variable
      builtin/maintenance: centralize configuration of explicit tasks
      builtin/maintenance: mark "--task=" and "--schedule=" as incompatible
      builtin/maintenance: stop modifying global array of tasks
      builtin/maintenance: extract function to run tasks
      builtin/maintenance: fix typedef for function pointers
      builtin/maintenance: let tasks do maintenance before and after detach
      builtin/maintenance: fix locking race when packing refs and reflogs
      builtin/gc: avoid global state in `gc_before_repack()`
      builtin/maintenance: fix locking race when handling "gc" task

 builtin/gc.c           | 386 +++++++++++++++++++++++++++----------------------
 t/t7900-maintenance.sh |  19 ++-
 2 files changed, 229 insertions(+), 176 deletions(-)


---
base-commit: 845c48a16a7f7b2c44d8cb137b16a4a1f0140229
change-id: 20250527-b4-pks-maintenance-ref-lock-race-11ae5d68e06f


^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2025-05-30 20:46 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-05-30 12:55 [PATCH 11/11] builtin/maintenance: fix locking race when handling "gc" task Ben Knoble
2025-05-30 14:05 ` Patrick Steinhardt
2025-05-30 15:10   ` Ben Knoble
2025-05-30 15:24     ` Patrick Steinhardt
2025-05-30 20:46       ` Ben Knoble
  -- strict thread matches above, loose matches on Subject: below --
2025-05-27 14:04 [PATCH 00/11] builtin/maintenance: fix ref lock races when detaching Patrick Steinhardt
2025-05-27 14:04 ` [PATCH 11/11] builtin/maintenance: fix locking race when handling "gc" task Patrick Steinhardt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).