git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Calvin Wan <calvinwan@google.com>
To: Han Young <hanyang.tony@bytedance.com>
Cc: git@vger.kernel.org, jonathantanmy@google.com,
	sokcevic@google.com,  gitster@pobox.com,
	phillip.wood123@gmail.com
Subject: Re: [PATCH v2 1/3] repack: pack everything into packfile
Date: Tue, 8 Oct 2024 14:41:04 -0700	[thread overview]
Message-ID: <CAFySSZDjbWdYKRSdSkpd8XzxwOskZ-8tp0tZWxrYBgC6eCED4Q@mail.gmail.com> (raw)
In-Reply-To: <20241008081350.8950-2-hanyang.tony@bytedance.com>

On Tue, Oct 8, 2024 at 1:14 AM Han Young <hanyang.tony@bytedance.com> wrote:
>
> In a partial repository, creating a local commit and then fetching
> causes the following state to occur:
>
> commit  tree  blob
>  C3 ---- T3 -- B3 (fetched from remote, in promisor pack)
>  |
>  C2 ---- T2 -- B2 (created locally, in non-promisor pack)
>  |
>  C1 ---- T1 -- B1 (fetched from remote, in promisor pack)
>
> During garbage collection, parents of promisor objects are marked as
> UNINTERESTING and are subsequently garbage collected. In this case, C2
> would be deleted and attempts to access that commit would result in "bad
> object" errors (originally reported here[1]).
>
> For partial repos, repacking is done in two steps. We first repack all the
> objects in promisor packfile, then repack all the non-promisor objects.
> It turns out C2, T2 and B2 are not repacked in either steps, ended up deleted.
> We can avoid this by packing everything into a promisor packfile, if the repo
> is partial cloned.
>
> [1] https://lore.kernel.org/git/20240802073143.56731-1-hanyang.tony@bytedance.com/
>
> Helped-by: Calvin Wan <calvinwan@google.com>
> Signed-off-by: Han Young <hanyang.tony@bytedance.com>
> ---
>  builtin/repack.c | 257 ++++++++++++++++++++++++++---------------------
>  1 file changed, 143 insertions(+), 114 deletions(-)
>
> diff --git a/builtin/repack.c b/builtin/repack.c
> index cb4420f085..e9e18a31fe 100644
> --- a/builtin/repack.c
> +++ b/builtin/repack.c
> @@ -321,6 +321,23 @@ static int write_oid(const struct object_id *oid,
>         return 0;
>  }
>
> +static int write_loose_oid(const struct object_id *oid,
> +                                const char *path UNUSED,
> +                                void *data)
> +{
> +       struct child_process *cmd = data;
> +
> +       if (cmd->in == -1) {
> +               if (start_command(cmd))
> +                       die(_("could not start pack-objects to repack promisor objects"));
> +       }
> +
> +       if (write_in_full(cmd->in, oid_to_hex(oid), the_hash_algo->hexsz) < 0 ||
> +           write_in_full(cmd->in, "\n", 1) < 0)
> +               die(_("failed to feed promisor objects to pack-objects"));
> +       return 0;
> +}
> +
>  static struct {
>         const char *name;
>         unsigned optional:1;
> @@ -370,12 +387,15 @@ static int has_pack_ext(const struct generated_pack_data *data,
>         BUG("unknown pack extension: '%s'", ext);
>  }
>
> -static void repack_promisor_objects(const struct pack_objects_args *args,
> -                                   struct string_list *names)
> +static int repack_promisor_objects(const struct pack_objects_args *args,
> +                                   struct string_list *names,
> +                                   struct string_list *list,
> +                                   int pack_all)
>  {
>         struct child_process cmd = CHILD_PROCESS_INIT;
>         FILE *out;
>         struct strbuf line = STRBUF_INIT;
> +       struct string_list_item *item;
>
>         prepare_pack_objects(&cmd, args, packtmp);
>         cmd.in = -1;
> @@ -387,13 +407,19 @@ static void repack_promisor_objects(const struct pack_objects_args *args,
>          * {type -> existing pack order} ordering when computing deltas instead
>          * of a {type -> size} ordering, which may produce better deltas.
>          */
> -       for_each_packed_object(write_oid, &cmd,
> -                              FOR_EACH_OBJECT_PROMISOR_ONLY);
> +       if (pack_all)
> +               for_each_packed_object(write_oid, &cmd, 0);
> +       else
> +               for_each_string_list_item(item, list) {
> +                       pack_mark_retained(item);
> +               }
> +
> +       for_each_loose_object(write_loose_oid, &cmd, 0);
>
>         if (cmd.in == -1) {
>                 /* No packed objects; cmd was never started */
>                 child_process_clear(&cmd);
> -               return;
> +               return 0;
>         }
>
>         close(cmd.in);
> @@ -431,6 +457,7 @@ static void repack_promisor_objects(const struct pack_objects_args *args,
>         if (finish_command(&cmd))
>                 die(_("could not finish pack-objects to repack promisor objects"));
>         strbuf_release(&line);
> +       return 0;
>  }
>
>  struct pack_geometry {
> @@ -1312,8 +1339,7 @@ int cmd_repack(int argc,
>                 strvec_push(&cmd.args, "--reflog");
>                 strvec_push(&cmd.args, "--indexed-objects");
>         }
> -       if (repo_has_promisor_remote(the_repository))
> -               strvec_push(&cmd.args, "--exclude-promisor-objects");
> +
>         if (!write_midx) {
>                 if (write_bitmaps > 0)
>                         strvec_push(&cmd.args, "--write-bitmap-index");
> @@ -1323,125 +1349,128 @@ int cmd_repack(int argc,
>         if (use_delta_islands)
>                 strvec_push(&cmd.args, "--delta-islands");
>
> -       if (pack_everything & ALL_INTO_ONE) {
> -               repack_promisor_objects(&po_args, &names);
> -
> -               if (has_existing_non_kept_packs(&existing) &&
> -                   delete_redundant &&
> -                   !(pack_everything & PACK_CRUFT)) {
> -                       for_each_string_list_item(item, &names) {
> -                               strvec_pushf(&cmd.args, "--keep-pack=%s-%s.pack",
> -                                            packtmp_name, item->string);
> -                       }
> -                       if (unpack_unreachable) {
> -                               strvec_pushf(&cmd.args,
> -                                            "--unpack-unreachable=%s",
> -                                            unpack_unreachable);
> -                       } else if (pack_everything & LOOSEN_UNREACHABLE) {
> -                               strvec_push(&cmd.args,
> -                                           "--unpack-unreachable");
> -                       } else if (keep_unreachable) {
> -                               strvec_push(&cmd.args, "--keep-unreachable");
> -                               strvec_push(&cmd.args, "--pack-loose-unreachable");
> +       if (repo_has_promisor_remote(the_repository)) {
> +               ret = repack_promisor_objects(&po_args, &names,
> +                       &existing.non_kept_packs, pack_everything & ALL_INTO_ONE);

Using a goto jump would be easier to both read your patch and
remove the need to indent this entire code block.

  reply	other threads:[~2024-10-08 21:41 UTC|newest]

Thread overview: 65+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-08-02  7:31 [PATCH 0/1] revision: fix reachable objects being gc'ed in no blob clone repo Han Young
2024-08-02  7:31 ` [PATCH 1/1] revision: don't set parents as uninteresting if exclude promisor objects Han Young
2024-08-02 16:45   ` Junio C Hamano
2024-08-12 12:34     ` [External] " 韩仰
2024-08-12 16:09       ` Junio C Hamano
2024-08-22  8:28         ` 韩仰
2024-08-13  0:45 ` [PATCH 0/1] revision: fix reachable objects being gc'ed in no blob clone repo Jonathan Tan
2024-08-13 17:18   ` Jonathan Tan
2024-08-14  4:10     ` Junio C Hamano
2024-08-14 19:30       ` Jonathan Tan
2024-08-23 12:43 ` [WIP v2 0/4] " Han Young
2024-08-23 12:43   ` [WIP v2 1/4] packfile: split promisor objects oidset into two Han Young
2024-08-23 12:43   ` [WIP v2 2/4] revision: add exclude-promisor-pack-objects option Han Young
2024-08-23 12:43   ` [WIP v2 3/4] revision: don't mark commit as UNINTERESTING if --exclude-promisor-objects is set Han Young
2024-08-23 12:43   ` [WIP v2 4/4] repack: use new exclude promisor pack objects option Han Young
2024-09-19 23:47 ` [PATCH 0/2] revision: fix reachable commits being gc'ed in partial repo Calvin Wan
2024-09-19 23:47 ` [PATCH 1/2] packfile: split promisor objects oidset into two Calvin Wan
2024-09-22  6:37   ` Junio C Hamano
2024-09-19 23:47 ` [PATCH 2/2] fetch-pack.c: do not declare local commits as "have" in partial repos Calvin Wan
2024-09-22  6:53   ` Junio C Hamano
2024-09-22 16:41     ` Junio C Hamano
2024-09-23  3:44     ` [External] " 韩仰
2024-09-23 16:21       ` Junio C Hamano
2024-10-02 22:35       ` Calvin Wan
2024-09-25  7:20 ` [PATCH 0/2] repack: pack everything into promisor packfile " Han Young
2024-09-25  7:20   ` [PATCH 1/2] repack: pack everything into packfile Han Young
2024-09-25  7:20   ` [PATCH 2/2] t0410: adapt tests to repack changes Han Young
2024-09-25 15:20   ` [PATCH 0/2] repack: pack everything into promisor packfile in partial repos Phillip Wood
2024-09-25 16:48     ` Junio C Hamano
2024-09-25 17:03   ` Junio C Hamano
2024-10-01 19:17 ` Missing Promisor Objects in Partial Repo Design Doc Calvin Wan
2024-10-01 19:35   ` Junio C Hamano
2024-10-02  2:54   ` Junio C Hamano
2024-10-02  7:57     ` [External] " Han Young
2024-10-08 21:35     ` Calvin Wan
2024-10-09  6:46       ` [External] " Han Young
2024-10-09 18:34         ` Jonathan Tan
2024-10-12  2:05           ` Jonathan Tan
2024-10-12  3:30             ` Han Young
2024-10-14 17:52               ` Jonathan Tan
2024-10-09 18:53     ` Jonathan Tan
2024-10-08  8:13 ` [PATCH v2 0/3] repack: pack everything into promisor packfile in partial repos Han Young
2024-10-08  8:13   ` [PATCH v2 1/3] repack: pack everything into packfile Han Young
2024-10-08 21:41     ` Calvin Wan [this message]
2024-10-08  8:13   ` [PATCH v2 2/3] t0410: adapt tests to repack changes Han Young
2024-10-08  8:13   ` [PATCH v2 3/3] partial-clone: update doc Han Young
2024-10-08 21:57   ` [PATCH v2 0/3] repack: pack everything into promisor packfile in partial repos Junio C Hamano
2024-10-08 22:43     ` Junio C Hamano
2024-10-09  6:31     ` [External] " Han Young
2024-10-11  8:24 ` [PATCH v3 " Han Young
2024-10-11  8:24   ` [PATCH v3 1/3] repack: pack everything into packfile Han Young
2024-10-11  8:24   ` [PATCH v3 2/3] repack: adapt tests to repack changes Han Young
2024-10-11  8:24   ` [PATCH v3 3/3] partial-clone: update doc Han Young
2024-10-11 18:18   ` [PATCH v3 0/3] repack: pack everything into promisor packfile in partial repos Junio C Hamano
2024-10-11 18:23     ` Junio C Hamano
2024-10-14  3:25 ` [PATCH v4 " Han Young
2024-10-14  3:25   ` [PATCH v4 1/3] repack: pack everything into packfile Han Young
2024-10-14  3:25   ` [PATCH v4 2/3] t0410: adapt tests to repack changes Han Young
2024-10-14  3:25   ` [PATCH v4 3/3] partial-clone: update doc Han Young
2024-10-21 22:29   ` [WIP 0/3] Repack on fetch Jonathan Tan
2024-10-21 22:29     ` [WIP 1/3] move variable Jonathan Tan
2024-10-21 22:29     ` [WIP 2/3] pack-objects Jonathan Tan
2024-10-21 22:29     ` [WIP 3/3] record local links and call pack-objects Jonathan Tan
2024-10-23  7:00     ` [External] [WIP 0/3] Repack on fetch Han Young
2024-10-23 17:03       ` Jonathan Tan

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=CAFySSZDjbWdYKRSdSkpd8XzxwOskZ-8tp0tZWxrYBgC6eCED4Q@mail.gmail.com \
    --to=calvinwan@google.com \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    --cc=hanyang.tony@bytedance.com \
    --cc=jonathantanmy@google.com \
    --cc=phillip.wood123@gmail.com \
    --cc=sokcevic@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).