Git development
 help / color / mirror / Atom feed
* [PATCH] stash: reuse cached index entries in --patch temporary index
@ 2026-05-19 12:43 Adam Johnson via GitGitGadget
  2026-05-20  2:08 ` Junio C Hamano
  2026-05-20  2:26 ` Junio C Hamano
  0 siblings, 2 replies; 3+ messages in thread
From: Adam Johnson via GitGitGadget @ 2026-05-19 12:43 UTC (permalink / raw)
  To: git
  Cc: Thomas Gummerer, Elijah Newren, Phillip Wood, Victoria Dye,
	Adam Johnson, Adam Johnson

From: Adam Johnson <me@adamj.eu>

`git stash -p` prepares the interactive selection by creating a
temporary index at HEAD, switching `GIT_INDEX_FILE` to it, and then
running the `add -p` machinery.

That temporary index was created by running `git read-tree HEAD`.  The
resulting index had no useful cached stat data or fsmonitor-valid bits
from the real index.  When `run_add_p()` refreshed that temporary index
before showing the first prompt, it could end up lstat(2)-ing every
tracked file, even in a repository where `git diff` and `git restore -p`
can use fsmonitor to avoid that work.

Create the temporary index in-process instead.  Use `unpack_trees()` to
reset the real index contents to HEAD while writing the result to the
temporary index path.  For paths whose index entries already match HEAD,
`oneway_merge()` reuses the existing cache entries, preserving their
cached stat data and `CE_FSMONITOR_VALID` state.

This makes the refresh performed by `run_add_p()` behave like the one
used by `git restore -p`: unchanged paths can be skipped via fsmonitor
instead of being scanned again.

In a 206k file repository with `core.fsmonitor` enabled and a one-line
change in one file, time to first prompt dropped from 34.774 seconds to
0.659 seconds.

Signed-off-by: Adam Johnson <me@adamj.eu>
---
    stash: reuse cached index entries in --patch temporary index

Published-As: https://github.com/gitgitgadget/git/releases/tag/pr-git-2306%2Fadamchainz%2Faj%2Foptimize-stash-patch-v1
Fetch-It-Via: git fetch https://github.com/gitgitgadget/git pr-git-2306/adamchainz/aj/optimize-stash-patch-v1
Pull-Request: https://github.com/git/git/pull/2306

 builtin/stash.c        | 71 ++++++++++++++++++++++++++++++++++++++----
 t/t3904-stash-patch.sh | 18 +++++++++++
 2 files changed, 83 insertions(+), 6 deletions(-)

diff --git a/builtin/stash.c b/builtin/stash.c
index 32dbc97b47..48189cb9f7 100644
--- a/builtin/stash.c
+++ b/builtin/stash.c
@@ -372,6 +372,57 @@ static int reset_tree(struct object_id *i_tree, int update, int reset)
 	return 0;
 }
 
+static int create_index_from_tree(const struct object_id *tree_id,
+				  const char *index_path)
+{
+	int nr_trees = 1;
+	int ret = 0;
+	struct unpack_trees_options opts;
+	struct tree_desc t[MAX_UNPACK_TREES];
+	struct tree *tree;
+	struct index_state dst_istate = INDEX_STATE_INIT(the_repository);
+	struct lock_file lock_file = LOCK_INIT;
+
+	repo_read_index_preload(the_repository, NULL, 0);
+	if (refresh_index(the_repository->index, REFRESH_QUIET, NULL, NULL, NULL))
+		return -1;
+
+	hold_lock_file_for_update(&lock_file, index_path, LOCK_DIE_ON_ERROR);
+
+	memset(&opts, 0, sizeof(opts));
+
+	tree = repo_parse_tree_indirect(the_repository, tree_id);
+	if (!tree || repo_parse_tree(the_repository, tree)) {
+		ret = -1;
+		goto done;
+	}
+
+	init_tree_desc(t, &tree->object.oid, tree->buffer, tree->size);
+
+	opts.head_idx = 1;
+	opts.src_index = the_repository->index;
+	opts.dst_index = &dst_istate;
+	opts.merge = 1;
+	opts.reset = UNPACK_RESET_PROTECT_UNTRACKED;
+	opts.fn = oneway_merge;
+
+	if (unpack_trees(nr_trees, t, &opts)) {
+		ret = -1;
+		goto done;
+	}
+
+	if (write_locked_index(&dst_istate, &lock_file, COMMIT_LOCK)) {
+		ret = error(_("unable to write new index file"));
+		goto done;
+	}
+
+done:
+	release_index(&dst_istate);
+	if (ret)
+		rollback_lock_file(&lock_file);
+	return ret;
+}
+
 static int diff_tree_binary(struct strbuf *out, struct object_id *w_commit)
 {
 	struct child_process cp = CHILD_PROCESS_INIT;
@@ -1321,18 +1372,26 @@ static int stash_patch(struct stash_info *info, const struct pathspec *ps,
 		       struct interactive_options *interactive_opts)
 {
 	int ret = 0;
-	struct child_process cp_read_tree = CHILD_PROCESS_INIT;
 	struct child_process cp_diff_tree = CHILD_PROCESS_INIT;
+	struct commit *head_commit;
+	const struct object_id *head_tree;
 	struct index_state istate = INDEX_STATE_INIT(the_repository);
 	char *old_index_env = NULL, *old_repo_index_file;
 
 	remove_path(stash_index_path.buf);
 
-	cp_read_tree.git_cmd = 1;
-	strvec_pushl(&cp_read_tree.args, "read-tree", "HEAD", NULL);
-	strvec_pushf(&cp_read_tree.env, "GIT_INDEX_FILE=%s",
-		     stash_index_path.buf);
-	if (run_command(&cp_read_tree)) {
+	head_commit = lookup_commit(the_repository, &info->b_commit);
+	if (!head_commit || repo_parse_commit(the_repository, head_commit)) {
+		ret = -1;
+		goto done;
+	}
+	head_tree = get_commit_tree_oid(head_commit);
+	if (!head_tree) {
+		ret = -1;
+		goto done;
+	}
+
+	if (create_index_from_tree(head_tree, stash_index_path.buf)) {
 		ret = -1;
 		goto done;
 	}
diff --git a/t/t3904-stash-patch.sh b/t/t3904-stash-patch.sh
index 90a4ff2c10..4b3241c8cd 100755
--- a/t/t3904-stash-patch.sh
+++ b/t/t3904-stash-patch.sh
@@ -84,6 +84,24 @@ test_expect_success 'none of this moved HEAD' '
 	verify_saved_head
 '
 
+test_expect_success 'stash -p with unmodified tracked files present' '
+	git reset --hard &&
+	echo line1 >alpha &&
+	echo line1 >beta &&
+	git add alpha beta &&
+	git commit -m "add alpha and beta" &&
+	echo line2 >>alpha &&
+	echo y | git stash -p &&
+	echo line1 >expect &&
+	test_cmp expect alpha &&
+	test_cmp expect beta &&
+	git stash pop &&
+	printf "line1\nline2\n" >expect &&
+	test_cmp expect alpha &&
+	echo line1 >expect &&
+	test_cmp expect beta
+'
+
 test_expect_success 'stash -p with split hunk' '
 	git reset --hard &&
 	cat >test <<-\EOF &&

base-commit: 7bcaabddcf68bd0702697da5904c3b68c52f94cf
-- 
gitgitgadget

^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH] stash: reuse cached index entries in --patch temporary index
  2026-05-19 12:43 [PATCH] stash: reuse cached index entries in --patch temporary index Adam Johnson via GitGitGadget
@ 2026-05-20  2:08 ` Junio C Hamano
  2026-05-20  2:26 ` Junio C Hamano
  1 sibling, 0 replies; 3+ messages in thread
From: Junio C Hamano @ 2026-05-20  2:08 UTC (permalink / raw)
  To: Adam Johnson via GitGitGadget
  Cc: git, Thomas Gummerer, Elijah Newren, Phillip Wood, Victoria Dye,
	Adam Johnson

"Adam Johnson via GitGitGadget" <gitgitgadget@gmail.com> writes:

> From: Adam Johnson <me@adamj.eu>
>
> `git stash -p` prepares the interactive selection by creating a
> temporary index at HEAD, switching `GIT_INDEX_FILE` to it, and then
> running the `add -p` machinery.
>
> That temporary index was created by running `git read-tree HEAD`.  The
> resulting index had no useful cached stat data or fsmonitor-valid bits
> from the real index.  When `run_add_p()` refreshed that temporary index
> before showing the first prompt, it could end up lstat(2)-ing every
> tracked file, even in a repository where `git diff` and `git restore -p`
> can use fsmonitor to avoid that work.
>
> Create the temporary index in-process instead.  Use `unpack_trees()` to
> reset the real index contents to HEAD while writing the result to the
> temporary index path.  For paths whose index entries already match HEAD,
> `oneway_merge()` reuses the existing cache entries, preserving their
> cached stat data and `CE_FSMONITOR_VALID` state.

Clever.  As the fsmonitor_valid bit is in-core only, updating the
index in-process would be an obvious and probably the only sensible
way to preserve it.

I however have to wonder if simply replacing the external process
invocation with "git read-tree -m HEAD" (i.e., oneway merge) gives
a similar speed-up.

> This makes the refresh performed by `run_add_p()` behave like the one
> used by `git restore -p`: unchanged paths can be skipped via fsmonitor
> instead of being scanned again.
>
> In a 206k file repository with `core.fsmonitor` enabled and a one-line
> change in one file, time to first prompt dropped from 34.774 seconds to
> 0.659 seconds.

Interesting.

> diff --git a/t/t3904-stash-patch.sh b/t/t3904-stash-patch.sh
> index 90a4ff2c10..4b3241c8cd 100755
> --- a/t/t3904-stash-patch.sh
> +++ b/t/t3904-stash-patch.sh
> @@ -84,6 +84,24 @@ test_expect_success 'none of this moved HEAD' '
>  	verify_saved_head
>  '
>  
> +test_expect_success 'stash -p with unmodified tracked files present' '
> +	git reset --hard &&
> +	echo line1 >alpha &&
> +	echo line1 >beta &&
> +	git add alpha beta &&
> +	git commit -m "add alpha and beta" &&
> +	echo line2 >>alpha &&
> +	echo y | git stash -p &&
> +	echo line1 >expect &&
> +	test_cmp expect alpha &&
> +	test_cmp expect beta &&
> +	git stash pop &&
> +	printf "line1\nline2\n" >expect &&
> +	test_cmp expect alpha &&
> +	echo line1 >expect &&
> +	test_cmp expect beta
> +'

What I read from the proposed log message is that the change is
purely about performance and should not change any behaviour.  Why
do we need a new test in t/t3904?  I would not have surprised if we
saw a new test in t/perf/, though.

Thanks.

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH] stash: reuse cached index entries in --patch temporary index
  2026-05-19 12:43 [PATCH] stash: reuse cached index entries in --patch temporary index Adam Johnson via GitGitGadget
  2026-05-20  2:08 ` Junio C Hamano
@ 2026-05-20  2:26 ` Junio C Hamano
  1 sibling, 0 replies; 3+ messages in thread
From: Junio C Hamano @ 2026-05-20  2:26 UTC (permalink / raw)
  To: Adam Johnson via GitGitGadget
  Cc: git, Thomas Gummerer, Elijah Newren, Phillip Wood, Victoria Dye,
	Adam Johnson

"Adam Johnson via GitGitGadget" <gitgitgadget@gmail.com> writes:

>  2 files changed, 83 insertions(+), 6 deletions(-)
>
> diff --git a/builtin/stash.c b/builtin/stash.c
> index 32dbc97b47..48189cb9f7 100644
> --- a/builtin/stash.c
> +++ b/builtin/stash.c
> @@ -372,6 +372,57 @@ static int reset_tree(struct object_id *i_tree, int update, int reset)
>  	return 0;
>  }
>  
> +static int create_index_from_tree(const struct object_id *tree_id,
> +				  const char *index_path)
> +{
> +	int nr_trees = 1;
> +	int ret = 0;
> +	struct unpack_trees_options opts;
> +	struct tree_desc t[MAX_UNPACK_TREES];
> +	struct tree *tree;
> +	struct index_state dst_istate = INDEX_STATE_INIT(the_repository);
> +	struct lock_file lock_file = LOCK_INIT;
> +
> +	repo_read_index_preload(the_repository, NULL, 0);
> +	if (refresh_index(the_repository->index, REFRESH_QUIET, NULL, NULL, NULL))
> +		return -1;

Is this "non-zero return from refresh_index() leads to a failure"
intended?  The old "git read-tree HEAD" wouldn't have cared if the
original index were unmerged, for example, but with this update, we
will see an immediate failure.  There are other conditions that
refresh_index() flips its local variable has_errors on, which leads
to its non-zero return.

Since "git stash -p" is almost always invoked when the user has
unstaged modifications, I am not sure allowing refresh_index() to
notice and barf is what we want here.

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2026-05-20  2:26 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-05-19 12:43 [PATCH] stash: reuse cached index entries in --patch temporary index Adam Johnson via GitGitGadget
2026-05-20  2:08 ` Junio C Hamano
2026-05-20  2:26 ` Junio C Hamano

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox