All of lore.kernel.org
 help / color / mirror / Atom feed
From: Junio C Hamano <gitster@pobox.com>
To: "Derrick Stolee via GitGitGadget" <gitgitgadget@gmail.com>
Cc: git@vger.kernel.org,  Derrick Stolee <stolee@gmail.com>
Subject: Re: [PATCH 2/2] restore: avoid sparse index expansion
Date: Mon, 25 May 2026 08:05:30 +0900	[thread overview]
Message-ID: <xmqqtsrwh0hx.fsf@gitster.g> (raw)
In-Reply-To: <47542cbd42eb13b63d0d852fb2f5bf967952b318.1779644412.git.gitgitgadget@gmail.com> (Derrick Stolee via GitGitGadget's message of "Sun, 24 May 2026 17:40:12 +0000")

"Derrick Stolee via GitGitGadget" <gitgitgadget@gmail.com> writes:

> From: Derrick Stolee <stolee@gmail.com>
>
> Teach update_some() to handle sparse directory entries at the tree
> level rather than expanding the entire sparse index. When iterating a
> source tree during checkout/restore operations:
>
>  - If a directory matches a sparse directory entry with the same OID,
>    skip it entirely (no change needed).
>
>  - If the OID differs and we are in non-overlay mode (e.g., restore
>    --staged), update the sparse directory entry's OID in place. This
>    is semantically correct because non-overlay mode removes paths not
>    in the source tree anyway.
>
>  - In overlay mode (e.g., checkout <tree> -- .), fall through to
>    recursive descent so individual file entries are preserved
>    correctly.
>
> Also switch from index_name_pos() to index_name_pos_sparse() for
> individual file lookups to avoid triggering ensure_full_index() when
> the file is already individually tracked in the index.
>
> Update the test expectation in t1092 to assert that 'restore --staged'
> no longer expands the sparse index.
>
> Signed-off-by: Derrick Stolee <stolee@gmail.com>
> ---
>  builtin/checkout.c                       | 57 +++++++++++++++++++++---
>  t/t1092-sparse-checkout-compatibility.sh |  8 ++--
>  2 files changed, 55 insertions(+), 10 deletions(-)
>
> diff --git a/builtin/checkout.c b/builtin/checkout.c
> index 1345e8574a..67f03dea10 100644
> --- a/builtin/checkout.c
> +++ b/builtin/checkout.c
> @@ -31,6 +31,7 @@
>  #include "revision.h"
>  #include "sequencer.h"
>  #include "setup.h"
> +#include "sparse-index.h"
>  #include "strvec.h"
>  #include "submodule.h"
>  #include "symlinks.h"
> @@ -142,14 +143,56 @@ static int post_checkout_hook(struct commit *old_commit, struct commit *new_comm
>  }
>  
>  static int update_some(const struct object_id *oid, struct strbuf *base,
> -		       const char *pathname, unsigned mode, void *context UNUSED)
> +		       const char *pathname, unsigned mode, void *context)
>  {
>  	int len;
>  	struct cache_entry *ce;
>  	int pos;
> +	int overlay_mode = context ? *((int *)context) : 1;
>  
> -	if (S_ISDIR(mode))
> +	if (S_ISDIR(mode)) {
> +		/*
> +		 * If this directory exists as a sparse directory entry in
> +		 * the index, we can handle it at the tree level without
> +		 * descending into individual files.
> +		 */
> +		if (the_repository->index->sparse_index) {

I wonder if this deep nesting is a sign that the newly added code
from here to ...

> +			struct strbuf dirpath = STRBUF_INIT;
> +
> +			strbuf_addbuf(&dirpath, base);
> +			strbuf_addstr(&dirpath, pathname);
> +			strbuf_addch(&dirpath, '/');
> +
> +			pos = index_name_pos_sparse(the_repository->index,
> +						    dirpath.buf, dirpath.len);
> +			if (pos >= 0) {
> +				struct cache_entry *old =
> +					the_repository->index->cache[pos];
> +				if (S_ISSPARSEDIR(old->ce_mode)) {
> +					if (oideq(oid, &old->oid)) {
> +						strbuf_release(&dirpath);
> +						return 0;
> +					}
> +					if (!overlay_mode) {
> +						/*
> +						 * In non-overlay mode (e.g.,
> +						 * restore --staged), we can
> +						 * replace the sparse dir OID
> +						 * directly since files not in
> +						 * the source tree should be
> +						 * removed anyway.
> +						 */
> +						oidcpy(&old->oid, oid);
> +						old->ce_flags |= CE_UPDATE;
> +						strbuf_release(&dirpath);
> +						return 0;
> +					}
> +				}
> +			}
> +			strbuf_release(&dirpath);
> +		}

... here may become easier to understand if it is made into a small
helper function with a descriptive name.

>  		return READ_TREE_RECURSIVE;
> +	}
>  
>  	len = base->len + strlen(pathname);
>  	ce = make_empty_cache_entry(the_repository->index, len);
> @@ -165,7 +208,7 @@ static int update_some(const struct object_id *oid, struct strbuf *base,
>  	 * entry in place. Whether it is UPTODATE or not, checkout_entry will
>  	 * do the right thing.
>  	 */
> -	pos = index_name_pos(the_repository->index, ce->name, ce->ce_namelen);
> +	pos = index_name_pos_sparse(the_repository->index, ce->name, ce->ce_namelen);
>  	if (pos >= 0) {
>  		struct cache_entry *old = the_repository->index->cache[pos];
>  		if (ce->ce_mode == old->ce_mode &&
> @@ -182,10 +225,11 @@ static int update_some(const struct object_id *oid, struct strbuf *base,
>  	return 0;
>  }
>  
> -static int read_tree_some(struct tree *tree, const struct pathspec *pathspec)
> +static int read_tree_some(struct tree *tree, const struct pathspec *pathspec,
> +			  int overlay_mode)
>  {
>  	read_tree(the_repository, tree,
> -		  pathspec, update_some, NULL);
> +		  pathspec, update_some, &overlay_mode);
>  
>  	/* update the index with the given tree's info
>  	 * for all args, expanding wildcards, and exit
> @@ -580,7 +624,8 @@ static int checkout_paths(const struct checkout_opts *opts,
>  		return error(_("index file corrupt"));
>  
>  	if (opts->source_tree)
> -		read_tree_some(opts->source_tree, &opts->pathspec);
> +		read_tree_some(opts->source_tree, &opts->pathspec,
> +			       opts->overlay_mode);
>  	if (opts->merge)
>  		unmerge_index(the_repository->index, &opts->pathspec, CE_MATCHED);
>  
> diff --git a/t/t1092-sparse-checkout-compatibility.sh b/t/t1092-sparse-checkout-compatibility.sh
> index d69434e7ab..8186da5c88 100755
> --- a/t/t1092-sparse-checkout-compatibility.sh
> +++ b/t/t1092-sparse-checkout-compatibility.sh
> @@ -2608,19 +2608,19 @@ test_expect_success 'restore --staged with wildcards' '
>  	test_all_match git diff --cached
>  '
>  
> -test_expect_success 'sparse-index is expanded: restore --staged' '
> +test_expect_success 'sparse-index is not expanded: restore --staged' '
>  	init_repos &&
>  
>  	git -C sparse-index checkout -b restore-staged-exp base &&
>  	git -C sparse-index reset --soft update-folder1 &&
> -	ensure_expanded restore --staged .
> +	ensure_not_expanded restore --staged .
>  '
>  
> -test_expect_success 'sparse-index is expanded: restore --source --staged' '
> +test_expect_success 'sparse-index is not expanded: restore --source --staged' '
>  	init_repos &&
>  
>  	git -C sparse-index checkout -b restore-source-staged base &&
> -	ensure_expanded restore --source update-folder1 --staged .
> +	ensure_not_expanded restore --source update-folder1 --staged .
>  '

Very nice.

  reply	other threads:[~2026-05-24 23:05 UTC|newest]

Thread overview: 9+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-05-24 17:40 [PATCH 0/2] restore: better integrate with sparse index Derrick Stolee via GitGitGadget
2026-05-24 17:40 ` [PATCH 1/2] t1092: test 'git restore' " Derrick Stolee via GitGitGadget
2026-05-24 22:51   ` Junio C Hamano
2026-05-24 17:40 ` [PATCH 2/2] restore: avoid sparse index expansion Derrick Stolee via GitGitGadget
2026-05-24 23:05   ` Junio C Hamano [this message]
2026-05-26  2:54     ` Derrick Stolee
2026-05-26 20:26 ` [PATCH v2 0/2] restore: better integrate with sparse index Derrick Stolee via GitGitGadget
2026-05-26 20:26   ` [PATCH v2 1/2] t1092: test 'git restore' " Derrick Stolee via GitGitGadget
2026-05-26 20:26   ` [PATCH v2 2/2] restore: avoid sparse index expansion Derrick Stolee via GitGitGadget

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=xmqqtsrwh0hx.fsf@gitster.g \
    --to=gitster@pobox.com \
    --cc=git@vger.kernel.org \
    --cc=gitgitgadget@gmail.com \
    --cc=stolee@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.