* [PATCH v2 1/8] sparse-checkout: remove use of the_repository
2025-07-17 1:34 ` [PATCH v2 0/8] " Derrick Stolee via GitGitGadget
@ 2025-07-17 1:34 ` Derrick Stolee via GitGitGadget
2025-07-17 1:34 ` [PATCH v2 2/8] sparse-checkout: add basics of 'clean' command Derrick Stolee via GitGitGadget
` (8 subsequent siblings)
9 siblings, 0 replies; 69+ messages in thread
From: Derrick Stolee via GitGitGadget @ 2025-07-17 1:34 UTC (permalink / raw)
To: git; +Cc: gitster, newren, Patrick Steinhardt, Derrick Stolee,
Derrick Stolee
From: Derrick Stolee <stolee@gmail.com>
The logic for the 'git sparse-checkout' builtin uses the_repository all
over the place, despite some use of a repository struct in different
method parameters. Complete this removal of the_repository by using
'repo' when possible.
In one place, there was already a local variable 'r' that was set to
the_repository, so move that to a method parameter.
We cannot remove the USE_THE_REPOSITORY_VARIABLE declaration as we are
still using global constants for the state of the sparse-checkout.
Signed-off-by: Derrick Stolee <stolee@gmail.com>
---
builtin/sparse-checkout.c | 121 ++++++++++++++++++++------------------
1 file changed, 64 insertions(+), 57 deletions(-)
diff --git a/builtin/sparse-checkout.c b/builtin/sparse-checkout.c
index 8a0ffba9d4b3..61714bf80be0 100644
--- a/builtin/sparse-checkout.c
+++ b/builtin/sparse-checkout.c
@@ -204,12 +204,12 @@ static void clean_tracked_sparse_directories(struct repository *r)
ensure_full_index(r->index);
}
-static int update_working_directory(struct pattern_list *pl)
+static int update_working_directory(struct repository *r,
+ struct pattern_list *pl)
{
enum update_sparsity_result result;
struct unpack_trees_options o;
struct lock_file lock_file = LOCK_INIT;
- struct repository *r = the_repository;
struct pattern_list *old_pl;
/* If no branch has been checked out, there are no updates to make. */
@@ -327,7 +327,8 @@ static void write_cone_to_file(FILE *fp, struct pattern_list *pl)
string_list_clear(&sl, 0);
}
-static int write_patterns_and_update(struct pattern_list *pl)
+static int write_patterns_and_update(struct repository *repo,
+ struct pattern_list *pl)
{
char *sparse_filename;
FILE *fp;
@@ -336,15 +337,15 @@ static int write_patterns_and_update(struct pattern_list *pl)
sparse_filename = get_sparse_checkout_filename();
- if (safe_create_leading_directories(the_repository, sparse_filename))
+ if (safe_create_leading_directories(repo, sparse_filename))
die(_("failed to create directory for sparse-checkout file"));
hold_lock_file_for_update(&lk, sparse_filename, LOCK_DIE_ON_ERROR);
- result = update_working_directory(pl);
+ result = update_working_directory(repo, pl);
if (result) {
rollback_lock_file(&lk);
- update_working_directory(NULL);
+ update_working_directory(repo, NULL);
goto out;
}
@@ -372,25 +373,26 @@ enum sparse_checkout_mode {
MODE_CONE_PATTERNS = 2,
};
-static int set_config(enum sparse_checkout_mode mode)
+static int set_config(struct repository *repo,
+ enum sparse_checkout_mode mode)
{
/* Update to use worktree config, if not already. */
- if (init_worktree_config(the_repository)) {
+ if (init_worktree_config(repo)) {
error(_("failed to initialize worktree config"));
return 1;
}
- if (repo_config_set_worktree_gently(the_repository,
+ if (repo_config_set_worktree_gently(repo,
"core.sparseCheckout",
mode ? "true" : "false") ||
- repo_config_set_worktree_gently(the_repository,
+ repo_config_set_worktree_gently(repo,
"core.sparseCheckoutCone",
mode == MODE_CONE_PATTERNS ?
"true" : "false"))
return 1;
if (mode == MODE_NO_PATTERNS)
- return set_sparse_index_config(the_repository, 0);
+ return set_sparse_index_config(repo, 0);
return 0;
}
@@ -410,7 +412,7 @@ static enum sparse_checkout_mode update_cone_mode(int *cone_mode) {
return MODE_ALL_PATTERNS;
}
-static int update_modes(int *cone_mode, int *sparse_index)
+static int update_modes(struct repository *repo, int *cone_mode, int *sparse_index)
{
int mode, record_mode;
@@ -418,20 +420,20 @@ static int update_modes(int *cone_mode, int *sparse_index)
record_mode = (*cone_mode != -1) || !the_repository->settings.sparse_checkout;
mode = update_cone_mode(cone_mode);
- if (record_mode && set_config(mode))
+ if (record_mode && set_config(repo, mode))
return 1;
/* Set sparse-index/non-sparse-index mode if specified */
if (*sparse_index >= 0) {
- if (set_sparse_index_config(the_repository, *sparse_index) < 0)
+ if (set_sparse_index_config(repo, *sparse_index) < 0)
die(_("failed to modify sparse-index config"));
/* force an index rewrite */
- repo_read_index(the_repository);
- the_repository->index->updated_workdir = 1;
+ repo_read_index(repo);
+ repo->index->updated_workdir = 1;
if (!*sparse_index)
- ensure_full_index(the_repository->index);
+ ensure_full_index(repo->index);
}
return 0;
@@ -448,7 +450,7 @@ static struct sparse_checkout_init_opts {
} init_opts;
static int sparse_checkout_init(int argc, const char **argv, const char *prefix,
- struct repository *repo UNUSED)
+ struct repository *repo)
{
struct pattern_list pl;
char *sparse_filename;
@@ -464,7 +466,7 @@ static int sparse_checkout_init(int argc, const char **argv, const char *prefix,
};
setup_work_tree();
- repo_read_index(the_repository);
+ repo_read_index(repo);
init_opts.cone_mode = -1;
init_opts.sparse_index = -1;
@@ -473,7 +475,7 @@ static int sparse_checkout_init(int argc, const char **argv, const char *prefix,
builtin_sparse_checkout_init_options,
builtin_sparse_checkout_init_usage, 0);
- if (update_modes(&init_opts.cone_mode, &init_opts.sparse_index))
+ if (update_modes(repo, &init_opts.cone_mode, &init_opts.sparse_index))
return 1;
memset(&pl, 0, sizeof(pl));
@@ -485,14 +487,14 @@ static int sparse_checkout_init(int argc, const char **argv, const char *prefix,
if (res >= 0) {
free(sparse_filename);
clear_pattern_list(&pl);
- return update_working_directory(NULL);
+ return update_working_directory(repo, NULL);
}
- if (repo_get_oid(the_repository, "HEAD", &oid)) {
+ if (repo_get_oid(repo, "HEAD", &oid)) {
FILE *fp;
/* assume we are in a fresh repo, but update the sparse-checkout file */
- if (safe_create_leading_directories(the_repository, sparse_filename))
+ if (safe_create_leading_directories(repo, sparse_filename))
die(_("unable to create leading directories of %s"),
sparse_filename);
fp = xfopen(sparse_filename, "w");
@@ -511,7 +513,7 @@ static int sparse_checkout_init(int argc, const char **argv, const char *prefix,
add_pattern("!/*/", empty_base, 0, &pl, 0);
pl.use_cone_patterns = init_opts.cone_mode;
- return write_patterns_and_update(&pl);
+ return write_patterns_and_update(repo, &pl);
}
static void insert_recursive_pattern(struct pattern_list *pl, struct strbuf *path)
@@ -674,7 +676,8 @@ static void add_patterns_literal(int argc, const char **argv,
add_patterns_from_input(pl, argc, argv, use_stdin ? stdin : NULL);
}
-static int modify_pattern_list(struct strvec *args, int use_stdin,
+static int modify_pattern_list(struct repository *repo,
+ struct strvec *args, int use_stdin,
enum modify_type m)
{
int result;
@@ -695,23 +698,24 @@ static int modify_pattern_list(struct strvec *args, int use_stdin,
break;
}
- if (!the_repository->settings.sparse_checkout) {
- set_config(MODE_ALL_PATTERNS);
- the_repository->settings.sparse_checkout = 1;
+ if (!repo->settings.sparse_checkout) {
+ set_config(repo, MODE_ALL_PATTERNS);
+ repo->settings.sparse_checkout = 1;
changed_config = 1;
}
- result = write_patterns_and_update(pl);
+ result = write_patterns_and_update(repo, pl);
if (result && changed_config)
- set_config(MODE_NO_PATTERNS);
+ set_config(repo, MODE_NO_PATTERNS);
clear_pattern_list(pl);
free(pl);
return result;
}
-static void sanitize_paths(struct strvec *args,
+static void sanitize_paths(struct repository *repo,
+ struct strvec *args,
const char *prefix, int skip_checks)
{
int i;
@@ -752,7 +756,7 @@ static void sanitize_paths(struct strvec *args,
for (i = 0; i < args->nr; i++) {
struct cache_entry *ce;
- struct index_state *index = the_repository->index;
+ struct index_state *index = repo->index;
int pos = index_name_pos(index, args->v[i], strlen(args->v[i]));
if (pos < 0)
@@ -779,7 +783,7 @@ static struct sparse_checkout_add_opts {
} add_opts;
static int sparse_checkout_add(int argc, const char **argv, const char *prefix,
- struct repository *repo UNUSED)
+ struct repository *repo)
{
static struct option builtin_sparse_checkout_add_options[] = {
OPT_BOOL_F(0, "skip-checks", &add_opts.skip_checks,
@@ -796,7 +800,7 @@ static int sparse_checkout_add(int argc, const char **argv, const char *prefix,
if (!the_repository->settings.sparse_checkout)
die(_("no sparse-checkout to add to"));
- repo_read_index(the_repository);
+ repo_read_index(repo);
argc = parse_options(argc, argv, prefix,
builtin_sparse_checkout_add_options,
@@ -804,9 +808,9 @@ static int sparse_checkout_add(int argc, const char **argv, const char *prefix,
for (int i = 0; i < argc; i++)
strvec_push(&patterns, argv[i]);
- sanitize_paths(&patterns, prefix, add_opts.skip_checks);
+ sanitize_paths(repo, &patterns, prefix, add_opts.skip_checks);
- ret = modify_pattern_list(&patterns, add_opts.use_stdin, ADD);
+ ret = modify_pattern_list(repo, &patterns, add_opts.use_stdin, ADD);
strvec_clear(&patterns);
return ret;
@@ -825,7 +829,7 @@ static struct sparse_checkout_set_opts {
} set_opts;
static int sparse_checkout_set(int argc, const char **argv, const char *prefix,
- struct repository *repo UNUSED)
+ struct repository *repo)
{
int default_patterns_nr = 2;
const char *default_patterns[] = {"/*", "!/*/", NULL};
@@ -847,7 +851,7 @@ static int sparse_checkout_set(int argc, const char **argv, const char *prefix,
int ret;
setup_work_tree();
- repo_read_index(the_repository);
+ repo_read_index(repo);
set_opts.cone_mode = -1;
set_opts.sparse_index = -1;
@@ -856,7 +860,7 @@ static int sparse_checkout_set(int argc, const char **argv, const char *prefix,
builtin_sparse_checkout_set_options,
builtin_sparse_checkout_set_usage, 0);
- if (update_modes(&set_opts.cone_mode, &set_opts.sparse_index))
+ if (update_modes(repo, &set_opts.cone_mode, &set_opts.sparse_index))
return 1;
/*
@@ -870,10 +874,10 @@ static int sparse_checkout_set(int argc, const char **argv, const char *prefix,
} else {
for (int i = 0; i < argc; i++)
strvec_push(&patterns, argv[i]);
- sanitize_paths(&patterns, prefix, set_opts.skip_checks);
+ sanitize_paths(repo, &patterns, prefix, set_opts.skip_checks);
}
- ret = modify_pattern_list(&patterns, set_opts.use_stdin, REPLACE);
+ ret = modify_pattern_list(repo, &patterns, set_opts.use_stdin, REPLACE);
strvec_clear(&patterns);
return ret;
@@ -891,7 +895,7 @@ static struct sparse_checkout_reapply_opts {
static int sparse_checkout_reapply(int argc, const char **argv,
const char *prefix,
- struct repository *repo UNUSED)
+ struct repository *repo)
{
static struct option builtin_sparse_checkout_reapply_options[] = {
OPT_BOOL(0, "cone", &reapply_opts.cone_mode,
@@ -912,12 +916,12 @@ static int sparse_checkout_reapply(int argc, const char **argv,
builtin_sparse_checkout_reapply_options,
builtin_sparse_checkout_reapply_usage, 0);
- repo_read_index(the_repository);
+ repo_read_index(repo);
- if (update_modes(&reapply_opts.cone_mode, &reapply_opts.sparse_index))
+ if (update_modes(repo, &reapply_opts.cone_mode, &reapply_opts.sparse_index))
return 1;
- return update_working_directory(NULL);
+ return update_working_directory(repo, NULL);
}
static char const * const builtin_sparse_checkout_disable_usage[] = {
@@ -927,7 +931,7 @@ static char const * const builtin_sparse_checkout_disable_usage[] = {
static int sparse_checkout_disable(int argc, const char **argv,
const char *prefix,
- struct repository *repo UNUSED)
+ struct repository *repo)
{
static struct option builtin_sparse_checkout_disable_options[] = {
OPT_END(),
@@ -955,7 +959,7 @@ static int sparse_checkout_disable(int argc, const char **argv,
* are expecting to do that when disabling sparse-checkout.
*/
give_advice_on_expansion = 0;
- repo_read_index(the_repository);
+ repo_read_index(repo);
memset(&pl, 0, sizeof(pl));
hashmap_init(&pl.recursive_hashmap, pl_hashmap_cmp, NULL, 0);
@@ -965,13 +969,13 @@ static int sparse_checkout_disable(int argc, const char **argv,
add_pattern("/*", empty_base, 0, &pl, 0);
- the_repository->settings.sparse_index = 0;
+ repo->settings.sparse_index = 0;
- if (update_working_directory(&pl))
+ if (update_working_directory(repo, &pl))
die(_("error while refreshing working directory"));
clear_pattern_list(&pl);
- return set_config(MODE_NO_PATTERNS);
+ return set_config(repo, MODE_NO_PATTERNS);
}
static char const * const builtin_sparse_checkout_check_rules_usage[] = {
@@ -986,14 +990,17 @@ static struct sparse_checkout_check_rules_opts {
char *rules_file;
} check_rules_opts;
-static int check_rules(struct pattern_list *pl, int null_terminated) {
+static int check_rules(struct repository *repo,
+ struct pattern_list *pl,
+ int null_terminated)
+{
struct strbuf line = STRBUF_INIT;
struct strbuf unquoted = STRBUF_INIT;
char *path;
int line_terminator = null_terminated ? 0 : '\n';
strbuf_getline_fn getline_fn = null_terminated ? strbuf_getline_nul
: strbuf_getline;
- the_repository->index->sparse_checkout_patterns = pl;
+ repo->index->sparse_checkout_patterns = pl;
while (!getline_fn(&line, stdin)) {
path = line.buf;
if (!null_terminated && line.buf[0] == '"') {
@@ -1005,7 +1012,7 @@ static int check_rules(struct pattern_list *pl, int null_terminated) {
path = unquoted.buf;
}
- if (path_in_sparse_checkout(path, the_repository->index))
+ if (path_in_sparse_checkout(path, repo->index))
write_name_quoted(path, stdout, line_terminator);
}
strbuf_release(&line);
@@ -1015,7 +1022,7 @@ static int check_rules(struct pattern_list *pl, int null_terminated) {
}
static int sparse_checkout_check_rules(int argc, const char **argv, const char *prefix,
- struct repository *repo UNUSED)
+ struct repository *repo)
{
static struct option builtin_sparse_checkout_check_rules_options[] = {
OPT_BOOL('z', NULL, &check_rules_opts.null_termination,
@@ -1054,7 +1061,7 @@ static int sparse_checkout_check_rules(int argc, const char **argv, const char *
free(sparse_filename);
}
- ret = check_rules(&pl, check_rules_opts.null_termination);
+ ret = check_rules(repo, &pl, check_rules_opts.null_termination);
clear_pattern_list(&pl);
free(check_rules_opts.rules_file);
return ret;
@@ -1083,8 +1090,8 @@ int cmd_sparse_checkout(int argc,
git_config(git_default_config, NULL);
- prepare_repo_settings(the_repository);
- the_repository->settings.command_requires_full_index = 0;
+ prepare_repo_settings(repo);
+ repo->settings.command_requires_full_index = 0;
return fn(argc, argv, prefix, repo);
}
--
gitgitgadget
^ permalink raw reply related [flat|nested] 69+ messages in thread* [PATCH v2 2/8] sparse-checkout: add basics of 'clean' command
2025-07-17 1:34 ` [PATCH v2 0/8] " Derrick Stolee via GitGitGadget
2025-07-17 1:34 ` [PATCH v2 1/8] sparse-checkout: remove use of the_repository Derrick Stolee via GitGitGadget
@ 2025-07-17 1:34 ` Derrick Stolee via GitGitGadget
2025-08-05 21:32 ` Elijah Newren
2025-07-17 1:34 ` [PATCH v2 3/8] sparse-checkout: match some 'clean' behavior Derrick Stolee via GitGitGadget
` (7 subsequent siblings)
9 siblings, 1 reply; 69+ messages in thread
From: Derrick Stolee via GitGitGadget @ 2025-07-17 1:34 UTC (permalink / raw)
To: git; +Cc: gitster, newren, Patrick Steinhardt, Derrick Stolee,
Derrick Stolee
From: Derrick Stolee <stolee@gmail.com>
When users change their sparse-checkout definitions to add new
directories and remove old ones, there may be a few reasons why
directories no longer in scope remain (ignored or excluded files still
exist, Windows handles are still open, etc.). When these files still
exist, the sparse index feature notices that a tracked, but sparse,
directory still exists on disk and thus the index expands. This causes a
performance hit _and_ the advice printed isn't very helpful. Using 'git
clean' isn't enough (generally '-dfx' may be needed) but also this may
not be sufficient.
Add a new subcommand to 'git sparse-checkout' that removes these
tracked-but-sparse directories. This necessarily removes all files
contained within, including tracked and untracked files. Of particular
importance are ignored and excluded files which would normally be
ignored even by 'git clean -f' unless the '-x' or '-X' option is
provided. This is the most extreme method for doing this, but it works
when the sparse-checkout is in cone mode and is expected to rescope
based on directories, not files.
The current implementation always deletes these sparse directories
without warning. This is unacceptable for a released version, but those
features will be added in changes coming immediately after this one.
Note that untracked directories within the sparse-checkout remain.
Further, directories that contain staged changes or files in merge
conflict states are not deleted. This is a detail that is partly hidden
by the implementation which relies on collapsing the index to a sparse
index in-memory and only deleting directories that are listed as sparse
in the index.
If a staged change exists, then that entry is not stored as a sparse
tree entry and thus remains on-disk until committed or reset.
There are some interesting cases around merge conflict resolution, but
that will be carefully analyzed in the future.
Signed-off-by: Derrick Stolee <stolee@gmail.com>
---
Documentation/git-sparse-checkout.adoc | 11 ++++-
builtin/sparse-checkout.c | 64 +++++++++++++++++++++++++-
t/t1091-sparse-checkout-builtin.sh | 38 +++++++++++++++
3 files changed, 111 insertions(+), 2 deletions(-)
diff --git a/Documentation/git-sparse-checkout.adoc b/Documentation/git-sparse-checkout.adoc
index 529a8edd9c1e..6db88f00781d 100644
--- a/Documentation/git-sparse-checkout.adoc
+++ b/Documentation/git-sparse-checkout.adoc
@@ -9,7 +9,7 @@ git-sparse-checkout - Reduce your working tree to a subset of tracked files
SYNOPSIS
--------
[verse]
-'git sparse-checkout' (init | list | set | add | reapply | disable | check-rules) [<options>]
+'git sparse-checkout' (init | list | set | add | reapply | disable | check-rules | clean) [<options>]
DESCRIPTION
@@ -111,6 +111,15 @@ flags, with the same meaning as the flags from the `set` command, in order
to change which sparsity mode you are using without needing to also respecify
all sparsity paths.
+'clean'::
+ Remove all files in tracked directories that are outside of the
+ sparse-checkout definition. This subcommand requires cone-mode
+ sparse-checkout to be sure that we know which directories are
+ both tracked and all contained paths are not in the sparse-checkout.
+ This command can be used to be sure the sparse index works
+ efficiently, though it does not require enabling the sparse index
+ feature via the `index.sparse=true` configuration.
+
'disable'::
Disable the `core.sparseCheckout` config setting, and restore the
working directory to include all files.
diff --git a/builtin/sparse-checkout.c b/builtin/sparse-checkout.c
index 61714bf80be0..6fe6ec718fe3 100644
--- a/builtin/sparse-checkout.c
+++ b/builtin/sparse-checkout.c
@@ -2,6 +2,7 @@
#define DISABLE_SIGN_COMPARE_WARNINGS
#include "builtin.h"
+#include "abspath.h"
#include "config.h"
#include "dir.h"
#include "environment.h"
@@ -23,7 +24,7 @@
static const char *empty_base = "";
static char const * const builtin_sparse_checkout_usage[] = {
- N_("git sparse-checkout (init | list | set | add | reapply | disable | check-rules) [<options>]"),
+ N_("git sparse-checkout (init | list | set | add | reapply | disable | check-rules | clean) [<options>]"),
NULL
};
@@ -924,6 +925,66 @@ static int sparse_checkout_reapply(int argc, const char **argv,
return update_working_directory(repo, NULL);
}
+static char const * const builtin_sparse_checkout_clean_usage[] = {
+ "git sparse-checkout clean [-n|--dry-run]",
+ NULL
+};
+
+static const char *msg_remove = N_("Removing %s\n");
+
+static int sparse_checkout_clean(int argc, const char **argv,
+ const char *prefix,
+ struct repository *repo)
+{
+ struct strbuf full_path = STRBUF_INIT;
+ const char *msg = msg_remove;
+ size_t worktree_len;
+
+ struct option builtin_sparse_checkout_clean_options[] = {
+ OPT_END(),
+ };
+
+ setup_work_tree();
+ if (!repo->settings.sparse_checkout)
+ die(_("must be in a sparse-checkout to clean directories"));
+ if (!repo->settings.sparse_checkout_cone)
+ die(_("must be in a cone-mode sparse-checkout to clean directories"));
+
+ argc = parse_options(argc, argv, prefix,
+ builtin_sparse_checkout_clean_options,
+ builtin_sparse_checkout_clean_usage, 0);
+
+ if (repo_read_index(repo) < 0)
+ die(_("failed to read index"));
+
+ if (convert_to_sparse(repo->index, SPARSE_INDEX_MEMORY_ONLY) ||
+ repo->index->sparse_index == INDEX_EXPANDED)
+ die(_("failed to convert index to a sparse index; resolve merge conflicts and try again"));
+
+ strbuf_addstr(&full_path, repo->worktree);
+ strbuf_addch(&full_path, '/');
+ worktree_len = full_path.len;
+
+ for (size_t i = 0; i < repo->index->cache_nr; i++) {
+ struct cache_entry *ce = repo->index->cache[i];
+ if (!S_ISSPARSEDIR(ce->ce_mode))
+ continue;
+ strbuf_setlen(&full_path, worktree_len);
+ strbuf_add(&full_path, ce->name, ce->ce_namelen);
+
+ if (!is_directory(full_path.buf))
+ continue;
+
+ printf(msg, ce->name);
+
+ if (remove_dir_recursively(&full_path, 0))
+ warning_errno(_("failed to remove '%s'"), ce->name);
+ }
+
+ strbuf_release(&full_path);
+ return 0;
+}
+
static char const * const builtin_sparse_checkout_disable_usage[] = {
"git sparse-checkout disable",
NULL
@@ -1079,6 +1140,7 @@ int cmd_sparse_checkout(int argc,
OPT_SUBCOMMAND("set", &fn, sparse_checkout_set),
OPT_SUBCOMMAND("add", &fn, sparse_checkout_add),
OPT_SUBCOMMAND("reapply", &fn, sparse_checkout_reapply),
+ OPT_SUBCOMMAND("clean", &fn, sparse_checkout_clean),
OPT_SUBCOMMAND("disable", &fn, sparse_checkout_disable),
OPT_SUBCOMMAND("check-rules", &fn, sparse_checkout_check_rules),
OPT_END(),
diff --git a/t/t1091-sparse-checkout-builtin.sh b/t/t1091-sparse-checkout-builtin.sh
index ab3a105ffff2..a48eedf766d2 100755
--- a/t/t1091-sparse-checkout-builtin.sh
+++ b/t/t1091-sparse-checkout-builtin.sh
@@ -1050,5 +1050,43 @@ test_expect_success 'check-rules null termination' '
test_cmp expect actual
'
+test_expect_success 'clean' '
+ git -C repo sparse-checkout set --cone deep/deeper1 &&
+ mkdir repo/deep/deeper2 repo/folder1 &&
+ touch repo/deep/deeper2/file &&
+ touch repo/folder1/file &&
+
+ cat >expect <<-\EOF &&
+ Removing deep/deeper2/
+ Removing folder1/
+ EOF
+
+ git -C repo sparse-checkout clean >out &&
+ test_cmp expect out &&
+
+ test_path_is_missing repo/deep/deeper2 &&
+ test_path_is_missing repo/folder1
+'
+
+test_expect_success 'clean with staged sparse change' '
+ git -C repo sparse-checkout set --cone deep/deeper1 &&
+ mkdir repo/deep/deeper2 repo/folder1 repo/folder2 &&
+ touch repo/deep/deeper2/file &&
+ touch repo/folder1/file &&
+ echo dirty >repo/folder2/a &&
+
+ git -C repo add --sparse folder1/file &&
+
+ # deletes deep/deeper2/ but leaves folder1/ and folder2/
+ cat >expect <<-\EOF &&
+ Removing deep/deeper2/
+ EOF
+
+ git -C repo sparse-checkout clean >out &&
+ test_cmp expect out &&
+
+ test_path_is_missing repo/deep/deeper2 &&
+ test_path_exists repo/folder1
+'
test_done
--
gitgitgadget
^ permalink raw reply related [flat|nested] 69+ messages in thread* Re: [PATCH v2 2/8] sparse-checkout: add basics of 'clean' command
2025-07-17 1:34 ` [PATCH v2 2/8] sparse-checkout: add basics of 'clean' command Derrick Stolee via GitGitGadget
@ 2025-08-05 21:32 ` Elijah Newren
2025-09-11 13:37 ` Derrick Stolee
0 siblings, 1 reply; 69+ messages in thread
From: Elijah Newren @ 2025-08-05 21:32 UTC (permalink / raw)
To: Derrick Stolee via GitGitGadget
Cc: git, gitster, Patrick Steinhardt, Derrick Stolee
On Wed, Jul 16, 2025 at 6:34 PM Derrick Stolee via GitGitGadget
<gitgitgadget@gmail.com> wrote:
Sorry for the long delay in responding...
[...]
> Add a new subcommand to 'git sparse-checkout' that removes these
> tracked-but-sparse directories. This necessarily removes all files
> contained within, including tracked and untracked files. Of particular
Nice to see tracked files also being addressed in v2.
> importance are ignored and excluded files which would normally be
> ignored even by 'git clean -f' unless the '-x' or '-X' option is
> provided. This is the most extreme method for doing this, but it works
> when the sparse-checkout is in cone mode and is expected to rescope
> based on directories, not files.
>
> The current implementation always deletes these sparse directories
> without warning. This is unacceptable for a released version, but those
> features will be added in changes coming immediately after this one.
>
> Note that untracked directories within the sparse-checkout remain.
You've changed the wording here relative to v1, but you haven't
addressed the part that was ambiguous/misleading in v1. In fact, you
may have made a different part ambiguous as well, and made readers
think that this sentence contradicts your above claims that this
command is meant to clean out untracked directories underneath sparse
directories. Perhaps something like:
"Note that untracked directories in the sparse-checkout that are not
within sparse directories will not be removed by this command; it only
cleans up paths under directories that are supposed to be sparse."
> Further, directories that contain staged changes or files in merge
> conflict states are not deleted.
Doesn't this sentence conflict with your above statement that "This
necessarily removes all files contained within, including tracked and
untracked files."?
> This is a detail that is partly hidden
> by the implementation which relies on collapsing the index to a sparse
> index in-memory and only deleting directories that are listed as sparse
> in the index.
>
> If a staged change exists, then that entry is not stored as a sparse
> tree entry and thus remains on-disk until committed or reset.
This seems a bit surprising -- if a file's modifications are staged,
then you can use it and other files in the index to write new trees
all the way up to the toplevel, so you should be able to get a sparse
tree entry without problem. I'd only expect problems if you had
unstaged changes, or higher order stages; perhaps you could clarify
here?
> There are some interesting cases around merge conflict resolution, but
> that will be carefully analyzed in the future.
...okay, so you did clarify for the higher order stages, but I'm still
confused about staged vs unstaged without conflicts.
>
> Signed-off-by: Derrick Stolee <stolee@gmail.com>
> ---
> Documentation/git-sparse-checkout.adoc | 11 ++++-
> builtin/sparse-checkout.c | 64 +++++++++++++++++++++++++-
> t/t1091-sparse-checkout-builtin.sh | 38 +++++++++++++++
> 3 files changed, 111 insertions(+), 2 deletions(-)
>
> diff --git a/Documentation/git-sparse-checkout.adoc b/Documentation/git-sparse-checkout.adoc
> index 529a8edd9c1e..6db88f00781d 100644
> --- a/Documentation/git-sparse-checkout.adoc
> +++ b/Documentation/git-sparse-checkout.adoc
> @@ -9,7 +9,7 @@ git-sparse-checkout - Reduce your working tree to a subset of tracked files
> SYNOPSIS
> --------
> [verse]
> -'git sparse-checkout' (init | list | set | add | reapply | disable | check-rules) [<options>]
> +'git sparse-checkout' (init | list | set | add | reapply | disable | check-rules | clean) [<options>]
>
>
> DESCRIPTION
> @@ -111,6 +111,15 @@ flags, with the same meaning as the flags from the `set` command, in order
> to change which sparsity mode you are using without needing to also respecify
> all sparsity paths.
>
> +'clean'::
> + Remove all files in tracked directories that are outside of the
> + sparse-checkout definition. This subcommand requires cone-mode
So, this sentence implies that it'll wipe out all untracked or ignored
files or tracked files with either unstaged, staged, or conflicted
entries. Your commit message says it'll discuss conflicted entries
later, but conflicts about whether staged or unstaged changes will be
wiped.
> + sparse-checkout to be sure that we know which directories are
> + both tracked and all contained paths are not in the sparse-checkout.
> + This command can be used to be sure the sparse index works
> + efficiently, though it does not require enabling the sparse index
[...]
> diff --git a/builtin/sparse-checkout.c b/builtin/sparse-checkout.c
[...]
> + if (convert_to_sparse(repo->index, SPARSE_INDEX_MEMORY_ONLY) ||
> + repo->index->sparse_index == INDEX_EXPANDED)
> + die(_("failed to convert index to a sparse index; resolve merge conflicts and try again"));
In the commit message, though, you said that it'd also fail to convert
a tree with a staged change to a sparse directory. And I thought in
our discussion on v1 we found out that unstaged changes would prevent
converting to sparse. Shouldn't the error message be more general,
then?
[...]
> diff --git a/t/t1091-sparse-checkout-builtin.sh b/t/t1091-sparse-checkout-builtin.sh
> index ab3a105ffff2..a48eedf766d2 100755
> --- a/t/t1091-sparse-checkout-builtin.sh
> +++ b/t/t1091-sparse-checkout-builtin.sh
> @@ -1050,5 +1050,43 @@ test_expect_success 'check-rules null termination' '
> test_cmp expect actual
> '
>
> +test_expect_success 'clean' '
> + git -C repo sparse-checkout set --cone deep/deeper1 &&
> + mkdir repo/deep/deeper2 repo/folder1 &&
> + touch repo/deep/deeper2/file &&
> + touch repo/folder1/file &&
> +
> + cat >expect <<-\EOF &&
> + Removing deep/deeper2/
> + Removing folder1/
> + EOF
> +
> + git -C repo sparse-checkout clean >out &&
> + test_cmp expect out &&
> +
> + test_path_is_missing repo/deep/deeper2 &&
> + test_path_is_missing repo/folder1
> +'
> +
> +test_expect_success 'clean with staged sparse change' '
> + git -C repo sparse-checkout set --cone deep/deeper1 &&
> + mkdir repo/deep/deeper2 repo/folder1 repo/folder2 &&
> + touch repo/deep/deeper2/file &&
> + touch repo/folder1/file &&
> + echo dirty >repo/folder2/a &&
> +
> + git -C repo add --sparse folder1/file &&
> +
> + # deletes deep/deeper2/ but leaves folder1/ and folder2/
> + cat >expect <<-\EOF &&
> + Removing deep/deeper2/
> + EOF
> +
> + git -C repo sparse-checkout clean >out &&
> + test_cmp expect out &&
> +
> + test_path_is_missing repo/deep/deeper2 &&
> + test_path_exists repo/folder1
What about repo/folder2/ ?
Anyway, this test shows that neither staged nor unstaged changes are
cleaned up (which at least resolves the conflicting documentation you
provided on the matter) -- or would if you also checked repo/folder2.
What it doesn't show is that tracked files with neither staged nor
unstaged changes are not cleaned up either:
$ mkdir repo/folder2
$ echo dirty >repo/folder2/a
$ touch repo/folder2/untracked
$ cd repo
$ git status --porcelain
M folder2/a
?? folder2/untracked
# So, we have both a unstaged change and an untracked file; let's undo
the unstaged change
$ git checkout HEAD folder2/a
Updated 1 path from 8cc814f
$ git status --porcelain
?? folder2/untracked
$ ls folder2/
a untracked
# Both files are still present -- the untracked file, and the
untracked file with no changes either staged or unstaged -- what does
`git sparse-checkout clean` do?
$ git sparse-checkout clean
$ ls folder2/
a untracked
$ git status --porcelain
?? folder2/untracked
# Absolutely nothing. Not only does it not clean anything up, it
gives no warnings about not cleaning up what should be cleaned up.
Let's try sparse-checkout reapply:
$ git sparse-checkout reapply
warning: directory 'folder2/' contains untracked files, but is not in
the sparse-checkout cone
$ git status --porcelain
?? folder2/untracked
$ ls folder2/
untracked
# So `sparse-checkout reapply` does correctly remove folder2/a for us,
while warning about the untracked file. (If folder2/a would have
still had changes, it would have warned about it instead of
removing.). Let's try `sparse-checkout clean` now...
$ git sparse-checkout clean
Removing folder2/
$ git status --porcelain
$ ls folder2/
ls: cannot access 'folder2/': No such file or directory
$
I think these cases either need to be a new testcase or part of this
last testcase, and the commit message and documentation should be
clearer about tracked-and-staged, tracked-with-unstaged-changes, and
tracked-with-no-changes files...or at least comment that they'll be
discussed later in the patch series. (I have a feeling I just did a
lot of work to discover as I read your next patches that you cover
these later...)
^ permalink raw reply [flat|nested] 69+ messages in thread* Re: [PATCH v2 2/8] sparse-checkout: add basics of 'clean' command
2025-08-05 21:32 ` Elijah Newren
@ 2025-09-11 13:37 ` Derrick Stolee
0 siblings, 0 replies; 69+ messages in thread
From: Derrick Stolee @ 2025-09-11 13:37 UTC (permalink / raw)
To: Elijah Newren, Derrick Stolee via GitGitGadget
Cc: git, gitster, Patrick Steinhardt
On 8/5/25 5:32 PM, Elijah Newren wrote:
> On Wed, Jul 16, 2025 at 6:34 PM Derrick Stolee via GitGitGadget
> <gitgitgadget@gmail.com> wrote:
>
> Sorry for the long delay in responding...
>
> [...]
>> Add a new subcommand to 'git sparse-checkout' that removes these
>> tracked-but-sparse directories. This necessarily removes all files
>> contained within, including tracked and untracked files. Of particular
>
> Nice to see tracked files also being addressed in v2.
>
>> importance are ignored and excluded files which would normally be
>> ignored even by 'git clean -f' unless the '-x' or '-X' option is
>> provided. This is the most extreme method for doing this, but it works
>> when the sparse-checkout is in cone mode and is expected to rescope
>> based on directories, not files.
>>
>> The current implementation always deletes these sparse directories
>> without warning. This is unacceptable for a released version, but those
>> features will be added in changes coming immediately after this one.
>>
>> Note that untracked directories within the sparse-checkout remain.
>
> You've changed the wording here relative to v1, but you haven't
> addressed the part that was ambiguous/misleading in v1. In fact, you
> may have made a different part ambiguous as well, and made readers
> think that this sentence contradicts your above claims that this
> command is meant to clean out untracked directories underneath sparse
> directories. Perhaps something like:
>
> "Note that untracked directories in the sparse-checkout that are not
> within sparse directories will not be removed by this command; it only
> cleans up paths under directories that are supposed to be sparse."
Both here and in the documentation, things can get a bit confusing.
In the v3 I'm preparing, I'm taking the following approach:
* In the commit message, focus on the implementation details and how
that impacts the behavior of the tool.
* In the documentation, focus on the list of files that will be
"considered for removal". Use the most broad definition there:
in a tracked directory that is outside of the sparse-checkout.
Add pointers that could explain exceptions and how to remove
these exceptions, but don't attempt to explain all special
cases.
>> +test_expect_success 'clean with staged sparse change' '
>> + git -C repo sparse-checkout set --cone deep/deeper1 &&
>> + mkdir repo/deep/deeper2 repo/folder1 repo/folder2 &&
>> + touch repo/deep/deeper2/file &&
>> + touch repo/folder1/file &&
>> + echo dirty >repo/folder2/a &&
>> +
>> + git -C repo add --sparse folder1/file &&
>> +
>> + # deletes deep/deeper2/ but leaves folder1/ and folder2/
>> + cat >expect <<-\EOF &&
>> + Removing deep/deeper2/
>> + EOF
>> +
>> + git -C repo sparse-checkout clean >out &&
>> + test_cmp expect out &&
>> +
>> + test_path_is_missing repo/deep/deeper2 &&
>> + test_path_exists repo/folder1
>
> What about repo/folder2/ ?
>
> Anyway, this test shows that neither staged nor unstaged changes are
> cleaned up (which at least resolves the conflicting documentation you
> provided on the matter) -- or would if you also checked repo/folder2.
>
> What it doesn't show is that tracked files with neither staged nor
> unstaged changes are not cleaned up either:
>
> $ mkdir repo/folder2
> $ echo dirty >repo/folder2/a
> $ touch repo/folder2/untracked
> $ cd repo
> $ git status --porcelain
> M folder2/a
> ?? folder2/untracked
>
> # So, we have both a unstaged change and an untracked file; let's undo
> the unstaged change
>
> $ git checkout HEAD folder2/a
> Updated 1 path from 8cc814f
> $ git status --porcelain
> ?? folder2/untracked
> $ ls folder2/
> a untracked
>
> # Both files are still present -- the untracked file, and the
> untracked file with no changes either staged or unstaged -- what does
> `git sparse-checkout clean` do?
It seems that the unstaged modification to a tracked, sparse file
is enough to prevent the sparse directory collapse. This is
similar to how 'git sparse-checkout reapply' will refuse to remove
those modified changes. I'll be sure to update my advice around
special cases to include this (and lock it in with a test case).
> $ git sparse-checkout clean
> $ ls folder2/
> a untracked
> $ git status --porcelain
> ?? folder2/untracked
>
> # Absolutely nothing. Not only does it not clean anything up, it
> gives no warnings about not cleaning up what should be cleaned up.
At this point, the SKIP_WORKTREE bit is still removed because
we've staged the change.
> Let's try sparse-checkout reapply:
>
> $ git sparse-checkout reapply
> warning: directory 'folder2/' contains untracked files, but is not in
> the sparse-checkout cone
> $ git status --porcelain
> ?? folder2/untracked
> $ ls folder2/
> untracked
>
> # So `sparse-checkout reapply` does correctly remove folder2/a for us,
> while warning about the untracked file. (If folder2/a would have
> still had changes, it would have warned about it instead of
> removing.). Let's try `sparse-checkout clean` now...
>
> $ git sparse-checkout clean
> Removing folder2/
> $ git status --porcelain
> $ ls folder2/
> ls: cannot access 'folder2/': No such file or directory
> $
>
> I think these cases either need to be a new testcase or part of this
> last testcase, and the commit message and documentation should be
> clearer about tracked-and-staged, tracked-with-unstaged-changes, and
> tracked-with-no-changes files...or at least comment that they'll be
> discussed later in the patch series. (I have a feeling I just did a
> lot of work to discover as I read your next patches that you cover
> these later...)
No! you found interesting ways to test special cases. Thanks!
Describing the lifecycle of a sparse file (with sibling untracked
change) going from modified to staged to sparse to unlock the
cleaning would be helpful documentation.
I do think there is an interesting extra functionality that we
should consider for the future: "What files are in my worktree
that _should_ be sparse? Why is 'clean' not removing them?"
Thanks,
-Stolee
^ permalink raw reply [flat|nested] 69+ messages in thread
* [PATCH v2 3/8] sparse-checkout: match some 'clean' behavior
2025-07-17 1:34 ` [PATCH v2 0/8] " Derrick Stolee via GitGitGadget
2025-07-17 1:34 ` [PATCH v2 1/8] sparse-checkout: remove use of the_repository Derrick Stolee via GitGitGadget
2025-07-17 1:34 ` [PATCH v2 2/8] sparse-checkout: add basics of 'clean' command Derrick Stolee via GitGitGadget
@ 2025-07-17 1:34 ` Derrick Stolee via GitGitGadget
2025-08-05 22:06 ` Elijah Newren
2025-07-17 1:34 ` [PATCH v2 4/8] dir: add generic "walk all files" helper Derrick Stolee via GitGitGadget
` (6 subsequent siblings)
9 siblings, 1 reply; 69+ messages in thread
From: Derrick Stolee via GitGitGadget @ 2025-07-17 1:34 UTC (permalink / raw)
To: git; +Cc: gitster, newren, Patrick Steinhardt, Derrick Stolee,
Derrick Stolee
From: Derrick Stolee <stolee@gmail.com>
The 'git sparse-checkout clean' subcommand is somewhat similar to 'git
clean' in that it will delete files that should not be in the worktree.
The big difference is that it focuses on the directories that should not
be in the worktree due to cone-mode sparse-checkout. It also does not
discriminate in the kinds of files and focuses on deleting entire
directories.
However, there are some restrictions that would be good to bring over
from 'git clean', specifically how it refuses to do anything without the
'-f'/'--force' or '-n'/'--dry-run' arguments. The 'clean.requireForce'
config can be set to 'false' to imply '--force'.
Add this behavior to avoid accidental deletion of files that cannot be
recovered from Git.
Signed-off-by: Derrick Stolee <stolee@gmail.com>
---
Documentation/git-sparse-checkout.adoc | 9 ++++
builtin/sparse-checkout.c | 15 +++++-
t/t1091-sparse-checkout-builtin.sh | 66 +++++++++++++++++++++++++-
3 files changed, 87 insertions(+), 3 deletions(-)
diff --git a/Documentation/git-sparse-checkout.adoc b/Documentation/git-sparse-checkout.adoc
index 6db88f00781d..823a66c40bc5 100644
--- a/Documentation/git-sparse-checkout.adoc
+++ b/Documentation/git-sparse-checkout.adoc
@@ -119,6 +119,15 @@ all sparsity paths.
This command can be used to be sure the sparse index works
efficiently, though it does not require enabling the sparse index
feature via the `index.sparse=true` configuration.
++
+To prevent accidental deletion of worktree files, the `clean` subcommand
+will not delete any files without the `-f` or `--force` option, unless
+the `clean.requireForce` config option is set to `false`.
++
+The `--dry-run` option will list the directories that would be removed
+without deleting them. Running in this mode can be helpful to predict the
+behavior of the clean comand or to determine which kinds of files are left
+in the sparse directories.
'disable'::
Disable the `core.sparseCheckout` config setting, and restore the
diff --git a/builtin/sparse-checkout.c b/builtin/sparse-checkout.c
index 6fe6ec718fe3..fe332ff5f941 100644
--- a/builtin/sparse-checkout.c
+++ b/builtin/sparse-checkout.c
@@ -931,6 +931,7 @@ static char const * const builtin_sparse_checkout_clean_usage[] = {
};
static const char *msg_remove = N_("Removing %s\n");
+static const char *msg_would_remove = N_("Would remove %s\n");
static int sparse_checkout_clean(int argc, const char **argv,
const char *prefix,
@@ -939,8 +940,12 @@ static int sparse_checkout_clean(int argc, const char **argv,
struct strbuf full_path = STRBUF_INIT;
const char *msg = msg_remove;
size_t worktree_len;
+ int force = 0, dry_run = 0;
+ int require_force = 1;
struct option builtin_sparse_checkout_clean_options[] = {
+ OPT__DRY_RUN(&dry_run, N_("dry run")),
+ OPT__FORCE(&force, N_("force"), PARSE_OPT_NOCOMPLETE),
OPT_END(),
};
@@ -954,6 +959,13 @@ static int sparse_checkout_clean(int argc, const char **argv,
builtin_sparse_checkout_clean_options,
builtin_sparse_checkout_clean_usage, 0);
+ repo_config_get_bool(repo, "clean.requireforce", &require_force);
+ if (require_force && !force && !dry_run)
+ die(_("for safety, refusing to clean without one of --force or --dry-run"));
+
+ if (dry_run)
+ msg = msg_would_remove;
+
if (repo_read_index(repo) < 0)
die(_("failed to read index"));
@@ -977,7 +989,8 @@ static int sparse_checkout_clean(int argc, const char **argv,
printf(msg, ce->name);
- if (remove_dir_recursively(&full_path, 0))
+ if (dry_run <= 0 &&
+ remove_dir_recursively(&full_path, 0))
warning_errno(_("failed to remove '%s'"), ce->name);
}
diff --git a/t/t1091-sparse-checkout-builtin.sh b/t/t1091-sparse-checkout-builtin.sh
index a48eedf766d2..69f5a6dcc689 100755
--- a/t/t1091-sparse-checkout-builtin.sh
+++ b/t/t1091-sparse-checkout-builtin.sh
@@ -1056,12 +1056,29 @@ test_expect_success 'clean' '
touch repo/deep/deeper2/file &&
touch repo/folder1/file &&
+ test_must_fail git -C repo sparse-checkout clean 2>err &&
+ grep "refusing to clean" err &&
+
+ git -C repo config clean.requireForce true &&
+ test_must_fail git -C repo sparse-checkout clean 2>err &&
+ grep "refusing to clean" err &&
+
+ cat >expect <<-\EOF &&
+ Would remove deep/deeper2/
+ Would remove folder1/
+ EOF
+
+ git -C repo sparse-checkout clean --dry-run >out &&
+ test_cmp expect out &&
+ test_path_exists repo/deep/deeper2 &&
+ test_path_exists repo/folder1 &&
+
cat >expect <<-\EOF &&
Removing deep/deeper2/
Removing folder1/
EOF
- git -C repo sparse-checkout clean >out &&
+ git -C repo sparse-checkout clean -f >out &&
test_cmp expect out &&
test_path_is_missing repo/deep/deeper2 &&
@@ -1077,16 +1094,61 @@ test_expect_success 'clean with staged sparse change' '
git -C repo add --sparse folder1/file &&
+ cat >expect <<-\EOF &&
+ Would remove deep/deeper2/
+ EOF
+
+ git -C repo sparse-checkout clean --dry-run >out &&
+ test_cmp expect out &&
+ test_path_exists repo/deep/deeper2 &&
+ test_path_exists repo/folder1 &&
+ test_path_exists repo/folder2 &&
+
# deletes deep/deeper2/ but leaves folder1/ and folder2/
cat >expect <<-\EOF &&
Removing deep/deeper2/
EOF
+ # The previous test case checked the -f option, so
+ # test the config option in this one.
+ git -C repo config clean.requireForce false &&
git -C repo sparse-checkout clean >out &&
test_cmp expect out &&
test_path_is_missing repo/deep/deeper2 &&
- test_path_exists repo/folder1
+ test_path_exists repo/folder1 &&
+ test_path_exists repo/folder2
+'
+
+test_expect_success 'clean with merge conflict status' '
+ git clone repo clean-merge &&
+
+ echo dirty >clean-merge/deep/deeper2/a &&
+ touch clean-merge/folder2/extra &&
+
+ cat >input <<-EOF &&
+ 0 $ZERO_OID folder1/a
+ 100644 $(git -C clean-merge rev-parse HEAD:folder1/a) 1 folder1/a
+ EOF
+ git -C clean-merge update-index --index-info <input &&
+
+ git -C clean-merge sparse-checkout set deep/deeper1 &&
+
+ test_must_fail git -C clean-merge sparse-checkout clean -f 2>err &&
+ grep "failed to convert index to a sparse index" err &&
+
+ mkdir -p clean-merge/folder1/ &&
+ echo merged >clean-merge/folder1/a &&
+ git -C clean-merge add --sparse folder1/a &&
+
+ # deletes folder2/ but leaves staged change in folder1
+ # and dirty change in deep/deeper2/
+ cat >expect <<-\EOF &&
+ Removing folder2/
+ EOF
+
+ git -C clean-merge sparse-checkout clean -f >out &&
+ test_cmp expect out
'
test_done
--
gitgitgadget
^ permalink raw reply related [flat|nested] 69+ messages in thread* Re: [PATCH v2 3/8] sparse-checkout: match some 'clean' behavior
2025-07-17 1:34 ` [PATCH v2 3/8] sparse-checkout: match some 'clean' behavior Derrick Stolee via GitGitGadget
@ 2025-08-05 22:06 ` Elijah Newren
2025-09-11 13:52 ` Derrick Stolee
0 siblings, 1 reply; 69+ messages in thread
From: Elijah Newren @ 2025-08-05 22:06 UTC (permalink / raw)
To: Derrick Stolee via GitGitGadget
Cc: git, gitster, Patrick Steinhardt, Derrick Stolee
On Wed, Jul 16, 2025 at 6:34 PM Derrick Stolee via GitGitGadget
<gitgitgadget@gmail.com> wrote:
>
> From: Derrick Stolee <stolee@gmail.com>
>
> The 'git sparse-checkout clean' subcommand is somewhat similar to 'git
> clean' in that it will delete files that should not be in the worktree.
> The big difference is that it focuses on the directories that should not
> be in the worktree due to cone-mode sparse-checkout. It also does not
> discriminate in the kinds of files and focuses on deleting entire
> directories.
>
> However, there are some restrictions that would be good to bring over
> from 'git clean', specifically how it refuses to do anything without the
> '-f'/'--force' or '-n'/'--dry-run' arguments. The 'clean.requireForce'
> config can be set to 'false' to imply '--force'.
>
> Add this behavior to avoid accidental deletion of files that cannot be
> recovered from Git.
I'm a bit surprised by this. Given that the only kinds of files that
this command cleans out are untracked and ignored files, and Junio's
comments about clean.requireForce over in
https://lore.kernel.org/git/xmqqv7o2togi.fsf@gitster.g/, I thought his
comments could be interpreted as not wanting clean.requireForce to
apply in more places. Did I misunderstand?
Alternatively, maybe you thought that there were files other than
untracked and ignored which `sparse-checkout clean` would clean up,
and it was because of those files that we wanted the extra protection?
(In that case, it'd make sense, but it seems to go against what was
demonstrated in the final testcase of the previous patch.)
> Signed-off-by: Derrick Stolee <stolee@gmail.com>
> ---
> Documentation/git-sparse-checkout.adoc | 9 ++++
> builtin/sparse-checkout.c | 15 +++++-
> t/t1091-sparse-checkout-builtin.sh | 66 +++++++++++++++++++++++++-
> 3 files changed, 87 insertions(+), 3 deletions(-)
>
> diff --git a/Documentation/git-sparse-checkout.adoc b/Documentation/git-sparse-checkout.adoc
> index 6db88f00781d..823a66c40bc5 100644
> --- a/Documentation/git-sparse-checkout.adoc
> +++ b/Documentation/git-sparse-checkout.adoc
> @@ -119,6 +119,15 @@ all sparsity paths.
> This command can be used to be sure the sparse index works
> efficiently, though it does not require enabling the sparse index
> feature via the `index.sparse=true` configuration.
> ++
> +To prevent accidental deletion of worktree files, the `clean` subcommand
> +will not delete any files without the `-f` or `--force` option, unless
> +the `clean.requireForce` config option is set to `false`.
> ++
> +The `--dry-run` option will list the directories that would be removed
> +without deleting them. Running in this mode can be helpful to predict the
> +behavior of the clean comand or to determine which kinds of files are left
> +in the sparse directories.
>
> 'disable'::
> Disable the `core.sparseCheckout` config setting, and restore the
> diff --git a/builtin/sparse-checkout.c b/builtin/sparse-checkout.c
> index 6fe6ec718fe3..fe332ff5f941 100644
> --- a/builtin/sparse-checkout.c
> +++ b/builtin/sparse-checkout.c
> @@ -931,6 +931,7 @@ static char const * const builtin_sparse_checkout_clean_usage[] = {
> };
>
> static const char *msg_remove = N_("Removing %s\n");
> +static const char *msg_would_remove = N_("Would remove %s\n");
>
> static int sparse_checkout_clean(int argc, const char **argv,
> const char *prefix,
> @@ -939,8 +940,12 @@ static int sparse_checkout_clean(int argc, const char **argv,
> struct strbuf full_path = STRBUF_INIT;
> const char *msg = msg_remove;
> size_t worktree_len;
> + int force = 0, dry_run = 0;
> + int require_force = 1;
>
> struct option builtin_sparse_checkout_clean_options[] = {
> + OPT__DRY_RUN(&dry_run, N_("dry run")),
> + OPT__FORCE(&force, N_("force"), PARSE_OPT_NOCOMPLETE),
> OPT_END(),
> };
>
> @@ -954,6 +959,13 @@ static int sparse_checkout_clean(int argc, const char **argv,
> builtin_sparse_checkout_clean_options,
> builtin_sparse_checkout_clean_usage, 0);
>
> + repo_config_get_bool(repo, "clean.requireforce", &require_force);
> + if (require_force && !force && !dry_run)
> + die(_("for safety, refusing to clean without one of --force or --dry-run"));
> +
> + if (dry_run)
> + msg = msg_would_remove;
> +
> if (repo_read_index(repo) < 0)
> die(_("failed to read index"));
>
> @@ -977,7 +989,8 @@ static int sparse_checkout_clean(int argc, const char **argv,
>
> printf(msg, ce->name);
>
> - if (remove_dir_recursively(&full_path, 0))
> + if (dry_run <= 0 &&
> + remove_dir_recursively(&full_path, 0))
> warning_errno(_("failed to remove '%s'"), ce->name);
> }
>
> diff --git a/t/t1091-sparse-checkout-builtin.sh b/t/t1091-sparse-checkout-builtin.sh
> index a48eedf766d2..69f5a6dcc689 100755
> --- a/t/t1091-sparse-checkout-builtin.sh
> +++ b/t/t1091-sparse-checkout-builtin.sh
> @@ -1056,12 +1056,29 @@ test_expect_success 'clean' '
> touch repo/deep/deeper2/file &&
> touch repo/folder1/file &&
>
> + test_must_fail git -C repo sparse-checkout clean 2>err &&
> + grep "refusing to clean" err &&
> +
> + git -C repo config clean.requireForce true &&
> + test_must_fail git -C repo sparse-checkout clean 2>err &&
> + grep "refusing to clean" err &&
> +
> + cat >expect <<-\EOF &&
> + Would remove deep/deeper2/
> + Would remove folder1/
> + EOF
> +
> + git -C repo sparse-checkout clean --dry-run >out &&
> + test_cmp expect out &&
> + test_path_exists repo/deep/deeper2 &&
> + test_path_exists repo/folder1 &&
> +
> cat >expect <<-\EOF &&
> Removing deep/deeper2/
> Removing folder1/
> EOF
>
> - git -C repo sparse-checkout clean >out &&
> + git -C repo sparse-checkout clean -f >out &&
> test_cmp expect out &&
>
> test_path_is_missing repo/deep/deeper2 &&
> @@ -1077,16 +1094,61 @@ test_expect_success 'clean with staged sparse change' '
>
> git -C repo add --sparse folder1/file &&
>
> + cat >expect <<-\EOF &&
> + Would remove deep/deeper2/
> + EOF
> +
> + git -C repo sparse-checkout clean --dry-run >out &&
> + test_cmp expect out &&
> + test_path_exists repo/deep/deeper2 &&
> + test_path_exists repo/folder1 &&
> + test_path_exists repo/folder2 &&
> +
> # deletes deep/deeper2/ but leaves folder1/ and folder2/
> cat >expect <<-\EOF &&
> Removing deep/deeper2/
> EOF
>
> + # The previous test case checked the -f option, so
> + # test the config option in this one.
> + git -C repo config clean.requireForce false &&
> git -C repo sparse-checkout clean >out &&
> test_cmp expect out &&
>
> test_path_is_missing repo/deep/deeper2 &&
> - test_path_exists repo/folder1
> + test_path_exists repo/folder1 &&
> + test_path_exists repo/folder2
> +'
> +
> +test_expect_success 'clean with merge conflict status' '
> + git clone repo clean-merge &&
> +
> + echo dirty >clean-merge/deep/deeper2/a &&
> + touch clean-merge/folder2/extra &&
> +
> + cat >input <<-EOF &&
> + 0 $ZERO_OID folder1/a
> + 100644 $(git -C clean-merge rev-parse HEAD:folder1/a) 1 folder1/a
> + EOF
> + git -C clean-merge update-index --index-info <input &&
> +
> + git -C clean-merge sparse-checkout set deep/deeper1 &&
> +
> + test_must_fail git -C clean-merge sparse-checkout clean -f 2>err &&
> + grep "failed to convert index to a sparse index" err &&
Oh, interesting...with merge conflicts you at least get an error that
it can't convert, whereas when there are tracked files (whether with
staged changes or unstaged changes or no changes), you don't? That
seems to at least be good for the merge conflicts case, but it seems
like there's something to fix with the non-conflicted tracked files.
But that's kind of tangential to this patch.
> + mkdir -p clean-merge/folder1/ &&
> + echo merged >clean-merge/folder1/a &&
> + git -C clean-merge add --sparse folder1/a &&
> +
> + # deletes folder2/ but leaves staged change in folder1
> + # and dirty change in deep/deeper2/
> + cat >expect <<-\EOF &&
> + Removing folder2/
> + EOF
> +
> + git -C clean-merge sparse-checkout clean -f >out &&
> + test_cmp expect out
> '
>
> test_done
> --
> gitgitgadget
Patch appears to correctly implement what was stated in the commit message.
^ permalink raw reply [flat|nested] 69+ messages in thread* Re: [PATCH v2 3/8] sparse-checkout: match some 'clean' behavior
2025-08-05 22:06 ` Elijah Newren
@ 2025-09-11 13:52 ` Derrick Stolee
0 siblings, 0 replies; 69+ messages in thread
From: Derrick Stolee @ 2025-09-11 13:52 UTC (permalink / raw)
To: Elijah Newren, Derrick Stolee via GitGitGadget
Cc: git, gitster, Patrick Steinhardt
On 8/5/25 6:06 PM, Elijah Newren wrote:
> On Wed, Jul 16, 2025 at 6:34 PM Derrick Stolee via GitGitGadget
> <gitgitgadget@gmail.com> wrote:
>>
>> From: Derrick Stolee <stolee@gmail.com>
>>
>> The 'git sparse-checkout clean' subcommand is somewhat similar to 'git
>> clean' in that it will delete files that should not be in the worktree.
>> The big difference is that it focuses on the directories that should not
>> be in the worktree due to cone-mode sparse-checkout. It also does not
>> discriminate in the kinds of files and focuses on deleting entire
>> directories.
>>
>> However, there are some restrictions that would be good to bring over
>> from 'git clean', specifically how it refuses to do anything without the
>> '-f'/'--force' or '-n'/'--dry-run' arguments. The 'clean.requireForce'
>> config can be set to 'false' to imply '--force'.
>>
>> Add this behavior to avoid accidental deletion of files that cannot be
>> recovered from Git.
>
> I'm a bit surprised by this. Given that the only kinds of files that
> this command cleans out are untracked and ignored files, and Junio's
> comments about clean.requireForce over in
> https://lore.kernel.org/git/xmqqv7o2togi.fsf@gitster.g/, I thought his
> comments could be interpreted as not wanting clean.requireForce to
> apply in more places. Did I misunderstand?
>
> Alternatively, maybe you thought that there were files other than
> untracked and ignored which `sparse-checkout clean` would clean up,
> and it was because of those files that we wanted the extra protection?
> (In that case, it'd make sense, but it seems to go against what was
> demonstrated in the final testcase of the previous patch.)
My thought process here was that users expect 'git clean' to be extra
careful to prevent removing files. While Junio mentioned regret in
the decision to require the '-f', I didn't want to have such a major
difference between the two commands.
I also interpreted Junio's comments to be that he wished that
clean.requireForce was 'false' by default instead of the current
assumption of 'true' if unset.
I'm open to reviewers providing a firm stance towards "the new
command should deviate closer to how we wish 'git clean' worked"
and overriding my choice to match behavior.
Thanks,
-Stolee
^ permalink raw reply [flat|nested] 69+ messages in thread
* [PATCH v2 4/8] dir: add generic "walk all files" helper
2025-07-17 1:34 ` [PATCH v2 0/8] " Derrick Stolee via GitGitGadget
` (2 preceding siblings ...)
2025-07-17 1:34 ` [PATCH v2 3/8] sparse-checkout: match some 'clean' behavior Derrick Stolee via GitGitGadget
@ 2025-07-17 1:34 ` Derrick Stolee via GitGitGadget
2025-08-05 22:22 ` Elijah Newren
2025-07-17 1:34 ` [PATCH v2 5/8] sparse-checkout: add --verbose option to 'clean' Derrick Stolee via GitGitGadget
` (5 subsequent siblings)
9 siblings, 1 reply; 69+ messages in thread
From: Derrick Stolee via GitGitGadget @ 2025-07-17 1:34 UTC (permalink / raw)
To: git; +Cc: gitster, newren, Patrick Steinhardt, Derrick Stolee,
Derrick Stolee
From: Derrick Stolee <stolee@gmail.com>
There is sometimes a need to visit every file within a directory,
recursively. The main example is remove_dir_recursively(), though it has
some extra flags that make it want to iterate over paths in a custom
way. There is also the fill_directory() approach but that involves an
index and a pathspec.
This change adds a new for_each_file_in_dir() method that will be
helpful in the next change.
Signed-off-by: Derrick Stolee <stolee@gmail.com>
---
dir.c | 28 ++++++++++++++++++++++++++++
dir.h | 14 ++++++++++++++
2 files changed, 42 insertions(+)
diff --git a/dir.c b/dir.c
index d2b0a5aef670..2e567ff92746 100644
--- a/dir.c
+++ b/dir.c
@@ -30,6 +30,7 @@
#include "read-cache-ll.h"
#include "setup.h"
#include "sparse-index.h"
+#include "strbuf.h"
#include "submodule-config.h"
#include "symlinks.h"
#include "trace2.h"
@@ -87,6 +88,33 @@ struct dirent *readdir_skip_dot_and_dotdot(DIR *dirp)
return e;
}
+int for_each_file_in_dir(struct strbuf *path, file_iterator fn, const void *data)
+{
+ struct dirent *e;
+ int res = 0;
+ size_t baselen = path->len;
+ DIR *dir = opendir(path->buf);
+
+ if (!dir)
+ return 0;
+
+ while (!res && (e = readdir_skip_dot_and_dotdot(dir)) != NULL) {
+ unsigned char dtype = get_dtype(e, path, 0);
+ strbuf_setlen(path, baselen);
+ strbuf_addstr(path, e->d_name);
+
+ if (dtype == DT_REG) {
+ res = fn(path->buf, data);
+ } else if (dtype == DT_DIR) {
+ strbuf_addch(path, '/');
+ res = for_each_file_in_dir(path, fn, data);
+ }
+ }
+
+ closedir(dir);
+ return res;
+}
+
int count_slashes(const char *s)
{
int cnt = 0;
diff --git a/dir.h b/dir.h
index d7e71aa8daa7..f4235cc12a2f 100644
--- a/dir.h
+++ b/dir.h
@@ -536,6 +536,20 @@ int get_sparse_checkout_patterns(struct pattern_list *pl);
*/
int remove_dir_recursively(struct strbuf *path, int flag);
+/*
+ * This function pointer type is called on each file discovered in
+ * for_each_file_in_dir. The iteration stops if this method returns
+ * non-zero.
+ */
+typedef int (*file_iterator)(const char *path, const void *data);
+
+struct strbuf;
+/*
+ * Given a directory path, recursively visit each file within, including
+ * within subdirectories.
+ */
+int for_each_file_in_dir(struct strbuf *path, file_iterator fn, const void *data);
+
/*
* Tries to remove the path, along with leading empty directories so long as
* those empty directories are not startup_info->original_cwd. Ignores
--
gitgitgadget
^ permalink raw reply related [flat|nested] 69+ messages in thread* Re: [PATCH v2 4/8] dir: add generic "walk all files" helper
2025-07-17 1:34 ` [PATCH v2 4/8] dir: add generic "walk all files" helper Derrick Stolee via GitGitGadget
@ 2025-08-05 22:22 ` Elijah Newren
0 siblings, 0 replies; 69+ messages in thread
From: Elijah Newren @ 2025-08-05 22:22 UTC (permalink / raw)
To: Derrick Stolee via GitGitGadget
Cc: git, gitster, Patrick Steinhardt, Derrick Stolee
On Wed, Jul 16, 2025 at 6:34 PM Derrick Stolee via GitGitGadget
<gitgitgadget@gmail.com> wrote:
>
> From: Derrick Stolee <stolee@gmail.com>
>
> There is sometimes a need to visit every file within a directory,
> recursively. The main example is remove_dir_recursively(), though it has
> some extra flags that make it want to iterate over paths in a custom
> way. There is also the fill_directory() approach but that involves an
> index and a pathspec.
>
> This change adds a new for_each_file_in_dir() method that will be
> helpful in the next change.
>
> Signed-off-by: Derrick Stolee <stolee@gmail.com>
> ---
> dir.c | 28 ++++++++++++++++++++++++++++
> dir.h | 14 ++++++++++++++
> 2 files changed, 42 insertions(+)
>
> diff --git a/dir.c b/dir.c
> index d2b0a5aef670..2e567ff92746 100644
> --- a/dir.c
> +++ b/dir.c
> @@ -30,6 +30,7 @@
> #include "read-cache-ll.h"
> #include "setup.h"
> #include "sparse-index.h"
> +#include "strbuf.h"
> #include "submodule-config.h"
> #include "symlinks.h"
> #include "trace2.h"
> @@ -87,6 +88,33 @@ struct dirent *readdir_skip_dot_and_dotdot(DIR *dirp)
> return e;
> }
>
> +int for_each_file_in_dir(struct strbuf *path, file_iterator fn, const void *data)
> +{
> + struct dirent *e;
> + int res = 0;
> + size_t baselen = path->len;
> + DIR *dir = opendir(path->buf);
> +
> + if (!dir)
> + return 0;
> +
> + while (!res && (e = readdir_skip_dot_and_dotdot(dir)) != NULL) {
> + unsigned char dtype = get_dtype(e, path, 0);
> + strbuf_setlen(path, baselen);
> + strbuf_addstr(path, e->d_name);
> +
> + if (dtype == DT_REG) {
> + res = fn(path->buf, data);
> + } else if (dtype == DT_DIR) {
> + strbuf_addch(path, '/');
> + res = for_each_file_in_dir(path, fn, data);
> + }
> + }
> +
> + closedir(dir);
> + return res;
> +}
> +
> int count_slashes(const char *s)
> {
> int cnt = 0;
> diff --git a/dir.h b/dir.h
> index d7e71aa8daa7..f4235cc12a2f 100644
> --- a/dir.h
> +++ b/dir.h
> @@ -536,6 +536,20 @@ int get_sparse_checkout_patterns(struct pattern_list *pl);
> */
> int remove_dir_recursively(struct strbuf *path, int flag);
>
> +/*
> + * This function pointer type is called on each file discovered in
> + * for_each_file_in_dir. The iteration stops if this method returns
> + * non-zero.
> + */
> +typedef int (*file_iterator)(const char *path, const void *data);
> +
> +struct strbuf;
> +/*
> + * Given a directory path, recursively visit each file within, including
> + * within subdirectories.
> + */
> +int for_each_file_in_dir(struct strbuf *path, file_iterator fn, const void *data);
> +
> /*
> * Tries to remove the path, along with leading empty directories so long as
> * those empty directories are not startup_info->original_cwd. Ignores
> --
> gitgitgadget
Looks reasonable.
^ permalink raw reply [flat|nested] 69+ messages in thread
* [PATCH v2 5/8] sparse-checkout: add --verbose option to 'clean'
2025-07-17 1:34 ` [PATCH v2 0/8] " Derrick Stolee via GitGitGadget
` (3 preceding siblings ...)
2025-07-17 1:34 ` [PATCH v2 4/8] dir: add generic "walk all files" helper Derrick Stolee via GitGitGadget
@ 2025-07-17 1:34 ` Derrick Stolee via GitGitGadget
2025-08-05 22:22 ` Elijah Newren
2025-07-17 1:34 ` [PATCH v2 6/8] sparse-index: point users to new 'clean' action Derrick Stolee via GitGitGadget
` (4 subsequent siblings)
9 siblings, 1 reply; 69+ messages in thread
From: Derrick Stolee via GitGitGadget @ 2025-07-17 1:34 UTC (permalink / raw)
To: git; +Cc: gitster, newren, Patrick Steinhardt, Derrick Stolee,
Derrick Stolee
From: Derrick Stolee <stolee@gmail.com>
The 'git sparse-checkout clean' subcommand is focused on directories,
deleting any tracked sparse directories to clean up the worktree and
make the sparse index feature work optimally.
However, this directory-focused approach can leave users wondering why
those directories exist at all. In my experience, these files are left
over due to ignore or exclude patterns, Windows file handles, or
possibly merge conflict resolutions.
Add a new '--verbose' option for users to see all the files that are
being deleted (with '--force') or would be deleted (with '--dry-run').
Signed-off-by: Derrick Stolee <stolee@gmail.com>
---
Documentation/git-sparse-checkout.adoc | 5 +++++
builtin/sparse-checkout.c | 28 ++++++++++++++++++++++++--
t/t1091-sparse-checkout-builtin.sh | 14 ++++++++++---
3 files changed, 42 insertions(+), 5 deletions(-)
diff --git a/Documentation/git-sparse-checkout.adoc b/Documentation/git-sparse-checkout.adoc
index 823a66c40bc5..604f53f77caf 100644
--- a/Documentation/git-sparse-checkout.adoc
+++ b/Documentation/git-sparse-checkout.adoc
@@ -128,6 +128,11 @@ The `--dry-run` option will list the directories that would be removed
without deleting them. Running in this mode can be helpful to predict the
behavior of the clean comand or to determine which kinds of files are left
in the sparse directories.
++
+The `--verbose` option will list every file within the directories that
+are considered for removal. This option is helpful to determine if those
+files are actually important or perhaps to explain why the directory is
+still present despite the current sparse-checkout.
'disable'::
Disable the `core.sparseCheckout` config setting, and restore the
diff --git a/builtin/sparse-checkout.c b/builtin/sparse-checkout.c
index fe332ff5f941..f38a0809c098 100644
--- a/builtin/sparse-checkout.c
+++ b/builtin/sparse-checkout.c
@@ -930,6 +930,26 @@ static char const * const builtin_sparse_checkout_clean_usage[] = {
NULL
};
+static int list_file_iterator(const char *path, const void *data)
+{
+ const char *msg = data;
+
+ printf(msg, path);
+ return 0;
+}
+
+static void list_every_file_in_dir(const char *msg,
+ const char *directory)
+{
+ struct strbuf path = STRBUF_INIT;
+
+ strbuf_addstr(&path, directory);
+ fprintf(stderr, "list every file in %s\n", directory);
+
+ for_each_file_in_dir(&path, list_file_iterator, msg);
+ strbuf_release(&path);
+}
+
static const char *msg_remove = N_("Removing %s\n");
static const char *msg_would_remove = N_("Would remove %s\n");
@@ -940,12 +960,13 @@ static int sparse_checkout_clean(int argc, const char **argv,
struct strbuf full_path = STRBUF_INIT;
const char *msg = msg_remove;
size_t worktree_len;
- int force = 0, dry_run = 0;
+ int force = 0, dry_run = 0, verbose = 0;
int require_force = 1;
struct option builtin_sparse_checkout_clean_options[] = {
OPT__DRY_RUN(&dry_run, N_("dry run")),
OPT__FORCE(&force, N_("force"), PARSE_OPT_NOCOMPLETE),
+ OPT__VERBOSE(&verbose, N_("report each affected file, not just directories")),
OPT_END(),
};
@@ -987,7 +1008,10 @@ static int sparse_checkout_clean(int argc, const char **argv,
if (!is_directory(full_path.buf))
continue;
- printf(msg, ce->name);
+ if (verbose)
+ list_every_file_in_dir(msg, ce->name);
+ else
+ printf(msg, ce->name);
if (dry_run <= 0 &&
remove_dir_recursively(&full_path, 0))
diff --git a/t/t1091-sparse-checkout-builtin.sh b/t/t1091-sparse-checkout-builtin.sh
index 69f5a6dcc689..9a89b902c3f5 100755
--- a/t/t1091-sparse-checkout-builtin.sh
+++ b/t/t1091-sparse-checkout-builtin.sh
@@ -1052,9 +1052,9 @@ test_expect_success 'check-rules null termination' '
test_expect_success 'clean' '
git -C repo sparse-checkout set --cone deep/deeper1 &&
- mkdir repo/deep/deeper2 repo/folder1 &&
+ mkdir -p repo/deep/deeper2 repo/folder1/extra/inside &&
touch repo/deep/deeper2/file &&
- touch repo/folder1/file &&
+ touch repo/folder1/extra/inside/file &&
test_must_fail git -C repo sparse-checkout clean 2>err &&
grep "refusing to clean" err &&
@@ -1071,7 +1071,15 @@ test_expect_success 'clean' '
git -C repo sparse-checkout clean --dry-run >out &&
test_cmp expect out &&
test_path_exists repo/deep/deeper2 &&
- test_path_exists repo/folder1 &&
+ test_path_exists repo/folder1/extra/inside/file &&
+
+ cat >expect <<-\EOF &&
+ Would remove deep/deeper2/file
+ Would remove folder1/extra/inside/file
+ EOF
+
+ git -C repo sparse-checkout clean --dry-run --verbose >out &&
+ test_cmp expect out &&
cat >expect <<-\EOF &&
Removing deep/deeper2/
--
gitgitgadget
^ permalink raw reply related [flat|nested] 69+ messages in thread* Re: [PATCH v2 5/8] sparse-checkout: add --verbose option to 'clean'
2025-07-17 1:34 ` [PATCH v2 5/8] sparse-checkout: add --verbose option to 'clean' Derrick Stolee via GitGitGadget
@ 2025-08-05 22:22 ` Elijah Newren
2025-09-11 14:06 ` Derrick Stolee
0 siblings, 1 reply; 69+ messages in thread
From: Elijah Newren @ 2025-08-05 22:22 UTC (permalink / raw)
To: Derrick Stolee via GitGitGadget
Cc: git, gitster, Patrick Steinhardt, Derrick Stolee
On Wed, Jul 16, 2025 at 6:34 PM Derrick Stolee via GitGitGadget
<gitgitgadget@gmail.com> wrote:
>
> From: Derrick Stolee <stolee@gmail.com>
>
> The 'git sparse-checkout clean' subcommand is focused on directories,
> deleting any tracked sparse directories to clean up the worktree and
> make the sparse index feature work optimally.
>
> However, this directory-focused approach can leave users wondering why
> those directories exist at all. In my experience, these files are left
> over due to ignore or exclude patterns, Windows file handles, or
> possibly merge conflict resolutions.
Seems reasonable. And based on your previous testcases, it might not
even be merge conflict resolutions, but just someone placing a
(possibly-modified) copy of a tracked file back into the directory.
(I've seen folks do that, so it's not "just" your testcase doing
something unusual.)
> Add a new '--verbose' option for users to see all the files that are
> being deleted (with '--force') or would be deleted (with '--dry-run').
Does that answer the users' question? You said above in your
experience it came from a few different reasons; will users want to
know which reason(s) for which files, or will they only want to know
the files that are present?
> Signed-off-by: Derrick Stolee <stolee@gmail.com>
> ---
> Documentation/git-sparse-checkout.adoc | 5 +++++
> builtin/sparse-checkout.c | 28 ++++++++++++++++++++++++--
> t/t1091-sparse-checkout-builtin.sh | 14 ++++++++++---
> 3 files changed, 42 insertions(+), 5 deletions(-)
>
> diff --git a/Documentation/git-sparse-checkout.adoc b/Documentation/git-sparse-checkout.adoc
> index 823a66c40bc5..604f53f77caf 100644
> --- a/Documentation/git-sparse-checkout.adoc
> +++ b/Documentation/git-sparse-checkout.adoc
> @@ -128,6 +128,11 @@ The `--dry-run` option will list the directories that would be removed
> without deleting them. Running in this mode can be helpful to predict the
> behavior of the clean comand or to determine which kinds of files are left
> in the sparse directories.
> ++
> +The `--verbose` option will list every file within the directories that
> +are considered for removal. This option is helpful to determine if those
> +files are actually important or perhaps to explain why the directory is
> +still present despite the current sparse-checkout.
>
> 'disable'::
> Disable the `core.sparseCheckout` config setting, and restore the
> diff --git a/builtin/sparse-checkout.c b/builtin/sparse-checkout.c
> index fe332ff5f941..f38a0809c098 100644
> --- a/builtin/sparse-checkout.c
> +++ b/builtin/sparse-checkout.c
> @@ -930,6 +930,26 @@ static char const * const builtin_sparse_checkout_clean_usage[] = {
> NULL
> };
>
> +static int list_file_iterator(const char *path, const void *data)
> +{
> + const char *msg = data;
> +
> + printf(msg, path);
> + return 0;
> +}
> +
> +static void list_every_file_in_dir(const char *msg,
> + const char *directory)
> +{
> + struct strbuf path = STRBUF_INIT;
> +
> + strbuf_addstr(&path, directory);
> + fprintf(stderr, "list every file in %s\n", directory);
> +
> + for_each_file_in_dir(&path, list_file_iterator, msg);
> + strbuf_release(&path);
> +}
> +
> static const char *msg_remove = N_("Removing %s\n");
> static const char *msg_would_remove = N_("Would remove %s\n");
>
> @@ -940,12 +960,13 @@ static int sparse_checkout_clean(int argc, const char **argv,
> struct strbuf full_path = STRBUF_INIT;
> const char *msg = msg_remove;
> size_t worktree_len;
> - int force = 0, dry_run = 0;
> + int force = 0, dry_run = 0, verbose = 0;
> int require_force = 1;
>
> struct option builtin_sparse_checkout_clean_options[] = {
> OPT__DRY_RUN(&dry_run, N_("dry run")),
> OPT__FORCE(&force, N_("force"), PARSE_OPT_NOCOMPLETE),
> + OPT__VERBOSE(&verbose, N_("report each affected file, not just directories")),
> OPT_END(),
> };
>
> @@ -987,7 +1008,10 @@ static int sparse_checkout_clean(int argc, const char **argv,
> if (!is_directory(full_path.buf))
> continue;
>
> - printf(msg, ce->name);
> + if (verbose)
> + list_every_file_in_dir(msg, ce->name);
> + else
> + printf(msg, ce->name);
>
> if (dry_run <= 0 &&
> remove_dir_recursively(&full_path, 0))
> diff --git a/t/t1091-sparse-checkout-builtin.sh b/t/t1091-sparse-checkout-builtin.sh
> index 69f5a6dcc689..9a89b902c3f5 100755
> --- a/t/t1091-sparse-checkout-builtin.sh
> +++ b/t/t1091-sparse-checkout-builtin.sh
> @@ -1052,9 +1052,9 @@ test_expect_success 'check-rules null termination' '
>
> test_expect_success 'clean' '
> git -C repo sparse-checkout set --cone deep/deeper1 &&
> - mkdir repo/deep/deeper2 repo/folder1 &&
> + mkdir -p repo/deep/deeper2 repo/folder1/extra/inside &&
> touch repo/deep/deeper2/file &&
> - touch repo/folder1/file &&
> + touch repo/folder1/extra/inside/file &&
>
> test_must_fail git -C repo sparse-checkout clean 2>err &&
> grep "refusing to clean" err &&
> @@ -1071,7 +1071,15 @@ test_expect_success 'clean' '
> git -C repo sparse-checkout clean --dry-run >out &&
> test_cmp expect out &&
> test_path_exists repo/deep/deeper2 &&
> - test_path_exists repo/folder1 &&
> + test_path_exists repo/folder1/extra/inside/file &&
> +
> + cat >expect <<-\EOF &&
> + Would remove deep/deeper2/file
> + Would remove folder1/extra/inside/file
> + EOF
> +
> + git -C repo sparse-checkout clean --dry-run --verbose >out &&
> + test_cmp expect out &&
>
> cat >expect <<-\EOF &&
> Removing deep/deeper2/
> --
> gitgitgadget
You stated in the commit message that "users wonder...why those
directories exist at all." Presuming that listing files is sufficient
to answer those users questions, this patch looks good to me. I'm
unsure if that answers the question, or if some kind of classification
of the files would also be wanted (ignored, untracked, conflicted,
tracked-with-unstaged-changes, tracked-wtih-no-changes,
tracked-with-staged-changes). Maybe the answer is we start with this
and wait for user feedback and only add more if there's demand, but if
so it might be nice to state as much in the commit message.
^ permalink raw reply [flat|nested] 69+ messages in thread* Re: [PATCH v2 5/8] sparse-checkout: add --verbose option to 'clean'
2025-08-05 22:22 ` Elijah Newren
@ 2025-09-11 14:06 ` Derrick Stolee
0 siblings, 0 replies; 69+ messages in thread
From: Derrick Stolee @ 2025-09-11 14:06 UTC (permalink / raw)
To: Elijah Newren, Derrick Stolee via GitGitGadget
Cc: git, gitster, Patrick Steinhardt
On 8/5/25 6:22 PM, Elijah Newren wrote:
> On Wed, Jul 16, 2025 at 6:34 PM Derrick Stolee via GitGitGadget
> <gitgitgadget@gmail.com> wrote:
>>
>> From: Derrick Stolee <stolee@gmail.com>
>>
>> The 'git sparse-checkout clean' subcommand is focused on directories,
>> deleting any tracked sparse directories to clean up the worktree and
>> make the sparse index feature work optimally.
>>
>> However, this directory-focused approach can leave users wondering why
>> those directories exist at all. In my experience, these files are left
>> over due to ignore or exclude patterns, Windows file handles, or
>> possibly merge conflict resolutions.
>
> Seems reasonable. And based on your previous testcases, it might not
> even be merge conflict resolutions, but just someone placing a
> (possibly-modified) copy of a tracked file back into the directory.
>
> (I've seen folks do that, so it's not "just" your testcase doing
> something unusual.)
>
>> Add a new '--verbose' option for users to see all the files that are
>> being deleted (with '--force') or would be deleted (with '--dry-run').
>
> Does that answer the users' question? You said above in your
> experience it came from a few different reasons; will users want to
> know which reason(s) for which files, or will they only want to know
> the files that are present?
I've had users asking for answers to both of these questions:
1. What files will this remove? (I want to make sure it won't remove
something I care about.)
2. What files were causing my sparse index to expand? (I want to
understand how my workflow impacts this behavior.)
> You stated in the commit message that "users wonder...why those
> directories exist at all." Presuming that listing files is sufficient
> to answer those users questions, this patch looks good to me. I'm
> unsure if that answers the question, or if some kind of classification
> of the files would also be wanted (ignored, untracked, conflicted,
> tracked-with-unstaged-changes, tracked-wtih-no-changes,
> tracked-with-staged-changes). Maybe the answer is we start with this
> and wait for user feedback and only add more if there's demand, but if
> so it might be nice to state as much in the commit message.
Sounds like a plan. This verbose output is intended to be human-
readable and can easily be expanded in the future.
Thanks,
-Stolee
^ permalink raw reply [flat|nested] 69+ messages in thread
* [PATCH v2 6/8] sparse-index: point users to new 'clean' action
2025-07-17 1:34 ` [PATCH v2 0/8] " Derrick Stolee via GitGitGadget
` (4 preceding siblings ...)
2025-07-17 1:34 ` [PATCH v2 5/8] sparse-checkout: add --verbose option to 'clean' Derrick Stolee via GitGitGadget
@ 2025-07-17 1:34 ` Derrick Stolee via GitGitGadget
2025-07-17 1:34 ` [PATCH v2 7/8] t: expand tests around sparse merges and clean Derrick Stolee via GitGitGadget
` (3 subsequent siblings)
9 siblings, 0 replies; 69+ messages in thread
From: Derrick Stolee via GitGitGadget @ 2025-07-17 1:34 UTC (permalink / raw)
To: git; +Cc: gitster, newren, Patrick Steinhardt, Derrick Stolee,
Derrick Stolee
From: Derrick Stolee <stolee@gmail.com>
In my experience, the most-common reason that the sparse index must
expand to a full one is because there is some leftover file in a tracked
directory that is now outside of the sparse-checkout. The new 'git
sparse-checkout clean' command will find and delete these directories,
so point users to it when they hit the sparse index expansion advice.
Signed-off-by: Derrick Stolee <stolee@gmail.com>
---
sparse-index.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/sparse-index.c b/sparse-index.c
index ff33b8516b9f..cbf2bf618c37 100644
--- a/sparse-index.c
+++ b/sparse-index.c
@@ -31,7 +31,8 @@ int give_advice_on_expansion = 1;
"Your working directory likely has contents that are outside of\n" \
"your sparse-checkout patterns. Use 'git sparse-checkout list' to\n" \
"see your sparse-checkout definition and compare it to your working\n" \
- "directory contents. Running 'git clean' may assist in this cleanup."
+ "directory contents. Running 'git sparse-checkout clean' may assist\n" \
+ "in this cleanup."
struct modify_index_context {
struct index_state *write;
--
gitgitgadget
^ permalink raw reply related [flat|nested] 69+ messages in thread* [PATCH v2 7/8] t: expand tests around sparse merges and clean
2025-07-17 1:34 ` [PATCH v2 0/8] " Derrick Stolee via GitGitGadget
` (5 preceding siblings ...)
2025-07-17 1:34 ` [PATCH v2 6/8] sparse-index: point users to new 'clean' action Derrick Stolee via GitGitGadget
@ 2025-07-17 1:34 ` Derrick Stolee via GitGitGadget
2025-07-17 1:34 ` [PATCH v2 8/8] sparse-checkout: make 'clean' clear more files Derrick Stolee via GitGitGadget
` (2 subsequent siblings)
9 siblings, 0 replies; 69+ messages in thread
From: Derrick Stolee via GitGitGadget @ 2025-07-17 1:34 UTC (permalink / raw)
To: git; +Cc: gitster, newren, Patrick Steinhardt, Derrick Stolee,
Derrick Stolee
From: Derrick Stolee <stolee@gmail.com>
With the current implementation of 'git sparse-checkout clean', we
notice that a file that was in a conflicted state does not get cleaned
up because of some internal details around the SKIP_WORKTREE bit.
This test is documenting the current behavior before we update it in the
following change.
Signed-off-by: Derrick Stolee <stolee@gmail.com>
---
t/t1091-sparse-checkout-builtin.sh | 56 ++++++++++++++++++------------
1 file changed, 34 insertions(+), 22 deletions(-)
diff --git a/t/t1091-sparse-checkout-builtin.sh b/t/t1091-sparse-checkout-builtin.sh
index 9a89b902c3f5..116ad7c9a20e 100755
--- a/t/t1091-sparse-checkout-builtin.sh
+++ b/t/t1091-sparse-checkout-builtin.sh
@@ -1128,35 +1128,47 @@ test_expect_success 'clean with staged sparse change' '
test_path_exists repo/folder2
'
-test_expect_success 'clean with merge conflict status' '
- git clone repo clean-merge &&
+test_expect_success 'sparse-checkout operations with merge conflicts' '
+ git clone repo merge &&
- echo dirty >clean-merge/deep/deeper2/a &&
- touch clean-merge/folder2/extra &&
+ (
+ cd merge &&
+ mkdir -p folder1/even/more/dirs &&
+ echo base >folder1/even/more/dirs/file &&
+ git add folder1 &&
+ git commit -m "base" &&
- cat >input <<-EOF &&
- 0 $ZERO_OID folder1/a
- 100644 $(git -C clean-merge rev-parse HEAD:folder1/a) 1 folder1/a
- EOF
- git -C clean-merge update-index --index-info <input &&
+ git checkout -b right&&
+ echo right >folder1/even/more/dirs/file &&
+ git commit -a -m "right" &&
- git -C clean-merge sparse-checkout set deep/deeper1 &&
+ git checkout -b left HEAD~1 &&
+ echo left >folder1/even/more/dirs/file &&
+ git commit -a -m "left" &&
- test_must_fail git -C clean-merge sparse-checkout clean -f 2>err &&
- grep "failed to convert index to a sparse index" err &&
+ git checkout -b merge &&
+ git sparse-checkout set deep/deeper1 &&
- mkdir -p clean-merge/folder1/ &&
- echo merged >clean-merge/folder1/a &&
- git -C clean-merge add --sparse folder1/a &&
+ test_must_fail git merge -m "will-conflict" right &&
- # deletes folder2/ but leaves staged change in folder1
- # and dirty change in deep/deeper2/
- cat >expect <<-\EOF &&
- Removing folder2/
- EOF
+ test_must_fail git sparse-checkout clean -f 2>err &&
+ grep "failed to convert index to a sparse index" err &&
- git -C clean-merge sparse-checkout clean -f >out &&
- test_cmp expect out
+ echo merged >folder1/even/more/dirs/file &&
+ git add --sparse folder1 &&
+ git merge --continue &&
+
+ test_path_exists folder1/even/more/dirs/file &&
+
+ # clean does not remove the file, because the
+ # SKIP_WORKTREE bit was not cleared by the merge command.
+ git sparse-checkout clean -f >out &&
+ test_line_count = 0 out &&
+ test_path_exists folder1/even/more/dirs/file &&
+
+ git sparse-checkout reapply &&
+ test_path_is_missing folder1
+ )
'
test_done
--
gitgitgadget
^ permalink raw reply related [flat|nested] 69+ messages in thread* [PATCH v2 8/8] sparse-checkout: make 'clean' clear more files
2025-07-17 1:34 ` [PATCH v2 0/8] " Derrick Stolee via GitGitGadget
` (6 preceding siblings ...)
2025-07-17 1:34 ` [PATCH v2 7/8] t: expand tests around sparse merges and clean Derrick Stolee via GitGitGadget
@ 2025-07-17 1:34 ` Derrick Stolee via GitGitGadget
2025-08-06 0:21 ` Elijah Newren
2025-08-28 23:22 ` [PATCH v2 0/8] sparse-checkout: add 'clean' command Junio C Hamano
2025-09-12 10:30 ` [PATCH v3 0/7] " Derrick Stolee via GitGitGadget
9 siblings, 1 reply; 69+ messages in thread
From: Derrick Stolee via GitGitGadget @ 2025-07-17 1:34 UTC (permalink / raw)
To: git; +Cc: gitster, newren, Patrick Steinhardt, Derrick Stolee,
Derrick Stolee
From: Derrick Stolee <stolee@gmail.com>
The 'git sparse-checkout clean' command is designed to be a one-command
way to get the worktree in a state such that a sparse index would
operate efficiently. The previous change demonstrated that files outside
the sparse-checkout that were committed due to a merge conflict would
persist despite attempts to run 'git sparse-checkout clean' and instead
a 'git sparse-checkout reapply' would be required.
Instead of requiring users to run both commands, update 'clean' to be
more ruthless about tracked sparse directories. The key here is to make
sure that the SKIP_WORKTREE bit is removed from more paths in the index
using update_sparsity() before compressing the index to a sparse one
in-memory.
The tricky part here is that update_sparsity() was previously assuming
that it would be in 'update' mode and would change the worktree as it
made changes. However, we do not want to make these worktree changes at
this point, instead relying on our later logic (that integrates with
--dry-run and --verbose options) to perform those steps.
One side-effect here is that we also clear out staged files that exist
in the worktree, but they would also appear in the verbose output as
part of the dry run.
The final test in t1091 demonstrates that we no longer need the
'reapply' subcommand for merge resolutions. It also fixes an earlier
case where 'git add --sparse' clears the SKIP_WORKTREE bit and avoids a
directory deletion.
Signed-off-by: Derrick Stolee <stolee@gmail.com>
---
builtin/sparse-checkout.c | 8 ++++++++
t/t1091-sparse-checkout-builtin.sh | 24 +++++++++++++++++-------
unpack-trees.c | 2 +-
3 files changed, 26 insertions(+), 8 deletions(-)
diff --git a/builtin/sparse-checkout.c b/builtin/sparse-checkout.c
index f38a0809c098..1d1d5208a3ba 100644
--- a/builtin/sparse-checkout.c
+++ b/builtin/sparse-checkout.c
@@ -962,6 +962,7 @@ static int sparse_checkout_clean(int argc, const char **argv,
size_t worktree_len;
int force = 0, dry_run = 0, verbose = 0;
int require_force = 1;
+ struct unpack_trees_options o = { 0 };
struct option builtin_sparse_checkout_clean_options[] = {
OPT__DRY_RUN(&dry_run, N_("dry run")),
@@ -990,6 +991,13 @@ static int sparse_checkout_clean(int argc, const char **argv,
if (repo_read_index(repo) < 0)
die(_("failed to read index"));
+ o.verbose_update = verbose;
+ o.update = 0; /* skip modifying the worktree here. */
+ o.head_idx = -1;
+ o.src_index = o.dst_index = repo->index;
+ if (update_sparsity(&o, NULL))
+ warning(_("failed to reapply sparse-checkout patterns"));
+
if (convert_to_sparse(repo->index, SPARSE_INDEX_MEMORY_ONLY) ||
repo->index->sparse_index == INDEX_EXPANDED)
die(_("failed to convert index to a sparse index; resolve merge conflicts and try again"));
diff --git a/t/t1091-sparse-checkout-builtin.sh b/t/t1091-sparse-checkout-builtin.sh
index 116ad7c9a20e..4b9078d90a61 100755
--- a/t/t1091-sparse-checkout-builtin.sh
+++ b/t/t1091-sparse-checkout-builtin.sh
@@ -1104,6 +1104,7 @@ test_expect_success 'clean with staged sparse change' '
cat >expect <<-\EOF &&
Would remove deep/deeper2/
+ Would remove folder1/
EOF
git -C repo sparse-checkout clean --dry-run >out &&
@@ -1115,6 +1116,7 @@ test_expect_success 'clean with staged sparse change' '
# deletes deep/deeper2/ but leaves folder1/ and folder2/
cat >expect <<-\EOF &&
Removing deep/deeper2/
+ Removing folder1/
EOF
# The previous test case checked the -f option, so
@@ -1124,7 +1126,7 @@ test_expect_success 'clean with staged sparse change' '
test_cmp expect out &&
test_path_is_missing repo/deep/deeper2 &&
- test_path_exists repo/folder1 &&
+ test_path_is_missing repo/folder1 &&
test_path_exists repo/folder2
'
@@ -1147,7 +1149,11 @@ test_expect_success 'sparse-checkout operations with merge conflicts' '
git commit -a -m "left" &&
git checkout -b merge &&
- git sparse-checkout set deep/deeper1 &&
+
+ touch deep/deeper2/extra &&
+ git sparse-checkout set deep/deeper1 2>err &&
+ grep "contains untracked files" err &&
+ test_path_exists deep/deeper2/extra &&
test_must_fail git merge -m "will-conflict" right &&
@@ -1159,15 +1165,19 @@ test_expect_success 'sparse-checkout operations with merge conflicts' '
git merge --continue &&
test_path_exists folder1/even/more/dirs/file &&
+ test_path_exists deep/deeper2/extra &&
+
+ cat >expect <<-\EOF &&
+ Removing deep/deeper2/
+ Removing folder1/
+ EOF
# clean does not remove the file, because the
# SKIP_WORKTREE bit was not cleared by the merge command.
git sparse-checkout clean -f >out &&
- test_line_count = 0 out &&
- test_path_exists folder1/even/more/dirs/file &&
-
- git sparse-checkout reapply &&
- test_path_is_missing folder1
+ test_cmp expect out &&
+ test_path_is_missing folder1 &&
+ test_path_is_missing deep/deeper2
)
'
diff --git a/unpack-trees.c b/unpack-trees.c
index 0e9813bddf04..b8814af1b07c 100644
--- a/unpack-trees.c
+++ b/unpack-trees.c
@@ -2138,7 +2138,7 @@ enum update_sparsity_result update_sparsity(struct unpack_trees_options *o,
index_state_init(&o->internal.result, o->src_index->repo);
/* Sanity checks */
- if (!o->update || o->index_only || o->skip_sparse_checkout)
+ if (o->index_only || o->skip_sparse_checkout)
BUG("update_sparsity() is for reflecting sparsity patterns in working directory");
if (o->src_index != o->dst_index || o->fn)
BUG("update_sparsity() called wrong");
--
gitgitgadget
^ permalink raw reply related [flat|nested] 69+ messages in thread* Re: [PATCH v2 8/8] sparse-checkout: make 'clean' clear more files
2025-07-17 1:34 ` [PATCH v2 8/8] sparse-checkout: make 'clean' clear more files Derrick Stolee via GitGitGadget
@ 2025-08-06 0:21 ` Elijah Newren
2025-09-11 15:26 ` Derrick Stolee
0 siblings, 1 reply; 69+ messages in thread
From: Elijah Newren @ 2025-08-06 0:21 UTC (permalink / raw)
To: Derrick Stolee via GitGitGadget
Cc: git, gitster, Patrick Steinhardt, Derrick Stolee
On Wed, Jul 16, 2025 at 6:34 PM Derrick Stolee via GitGitGadget
<gitgitgadget@gmail.com> wrote:
>
> From: Derrick Stolee <stolee@gmail.com>
>
> The 'git sparse-checkout clean' command is designed to be a one-command
> way to get the worktree in a state such that a sparse index would
> operate efficiently. The previous change demonstrated that files outside
> the sparse-checkout that were committed due to a merge conflict would
> persist despite attempts to run 'git sparse-checkout clean' and instead
> a 'git sparse-checkout reapply' would be required.
>
> Instead of requiring users to run both commands, update 'clean' to be
> more ruthless about tracked sparse directories. The key here is to make
> sure that the SKIP_WORKTREE bit is removed from more paths in the index
> using update_sparsity() before compressing the index to a sparse one
> in-memory.
>
> The tricky part here is that update_sparsity() was previously assuming
> that it would be in 'update' mode and would change the worktree as it
> made changes. However, we do not want to make these worktree changes at
> this point, instead relying on our later logic (that integrates with
> --dry-run and --verbose options) to perform those steps.
>
> One side-effect here is that we also clear out staged files that exist
> in the worktree, but they would also appear in the verbose output as
> part of the dry run.
>
> The final test in t1091 demonstrates that we no longer need the
> 'reapply' subcommand for merge resolutions. It also fixes an earlier
> case where 'git add --sparse' clears the SKIP_WORKTREE bit and avoids a
> directory deletion.
>
> Signed-off-by: Derrick Stolee <stolee@gmail.com>
> ---
> builtin/sparse-checkout.c | 8 ++++++++
> t/t1091-sparse-checkout-builtin.sh | 24 +++++++++++++++++-------
> unpack-trees.c | 2 +-
> 3 files changed, 26 insertions(+), 8 deletions(-)
>
> diff --git a/builtin/sparse-checkout.c b/builtin/sparse-checkout.c
> index f38a0809c098..1d1d5208a3ba 100644
> --- a/builtin/sparse-checkout.c
> +++ b/builtin/sparse-checkout.c
> @@ -962,6 +962,7 @@ static int sparse_checkout_clean(int argc, const char **argv,
> size_t worktree_len;
> int force = 0, dry_run = 0, verbose = 0;
> int require_force = 1;
> + struct unpack_trees_options o = { 0 };
>
> struct option builtin_sparse_checkout_clean_options[] = {
> OPT__DRY_RUN(&dry_run, N_("dry run")),
> @@ -990,6 +991,13 @@ static int sparse_checkout_clean(int argc, const char **argv,
> if (repo_read_index(repo) < 0)
> die(_("failed to read index"));
>
> + o.verbose_update = verbose;
> + o.update = 0; /* skip modifying the worktree here. */
> + o.head_idx = -1;
> + o.src_index = o.dst_index = repo->index;
> + if (update_sparsity(&o, NULL))
> + warning(_("failed to reapply sparse-checkout patterns"));
> +
> if (convert_to_sparse(repo->index, SPARSE_INDEX_MEMORY_ONLY) ||
> repo->index->sparse_index == INDEX_EXPANDED)
> die(_("failed to convert index to a sparse index; resolve merge conflicts and try again"));
> diff --git a/t/t1091-sparse-checkout-builtin.sh b/t/t1091-sparse-checkout-builtin.sh
> index 116ad7c9a20e..4b9078d90a61 100755
> --- a/t/t1091-sparse-checkout-builtin.sh
> +++ b/t/t1091-sparse-checkout-builtin.sh
> @@ -1104,6 +1104,7 @@ test_expect_success 'clean with staged sparse change' '
>
> cat >expect <<-\EOF &&
> Would remove deep/deeper2/
> + Would remove folder1/
> EOF
>
> git -C repo sparse-checkout clean --dry-run >out &&
> @@ -1115,6 +1116,7 @@ test_expect_success 'clean with staged sparse change' '
> # deletes deep/deeper2/ but leaves folder1/ and folder2/
> cat >expect <<-\EOF &&
> Removing deep/deeper2/
> + Removing folder1/
> EOF
>
> # The previous test case checked the -f option, so
> @@ -1124,7 +1126,7 @@ test_expect_success 'clean with staged sparse change' '
> test_cmp expect out &&
>
> test_path_is_missing repo/deep/deeper2 &&
> - test_path_exists repo/folder1 &&
> + test_path_is_missing repo/folder1 &&
> test_path_exists repo/folder2
What this doesn't show is that afterwards:
$ git -C repo status
On branch main
You are in a sparse checkout with 78% of tracked files present.
Changes to be committed:
(use "git restore --staged <file>..." to unstage)
new file: folder1/file
Changes not staged for commit:
(use "git add/rm <file>..." to update what will be committed)
(use "git restore <file>..." to discard changes in working directory)
deleted: folder1/file
modified: folder2/a
==> In other words, folder1/file and folder1/ were successfully
removed by this clean command, but when the user runs status, they see
that folder1/file has been deleted (and has a staged change). This is
probably correct behavior and may be expected, but might be surprising
to users at first; it probably deserves to be documented, or at least
covered in the commit message. Especially since it's somewhat
difficult for users to work with:
$ cd repo
$ git checkout HEAD -- folder1/file
error: pathspec 'folder1/file' did not match any file(s) known to git
$ git add -- folder1/file
The following paths and/or pathspecs matched paths that exist
outside of your sparse-checkout definition, so will not be
updated in the index:
folder1/file
hint: If you intend to update such entries, try one of the following:
hint: * Use the --sparse option.
hint: * Disable or modify the sparsity rules.
hint: Disable this message with "git config set advice.updateSparsePath false"
$ git add --sparse -- folder1/file
fatal: pathspec 'folder1/file' did not match any files
$ git reset -- folder1/file
Unstaged changes after reset:
M folder2/a
$
==> So, both checkout and add fail to work with this path while reset
works. `git commit` also would have worked; let's back up to the
point of status above, then get rid of the folder2/a modification, and
then use git commit to commit our folder1/file change:
$ git checkout folder2/a
Updated 1 path from the index
$ git commit -m "A change"
[main aad2d89] A change
1 file changed, 0 insertions(+), 0 deletions(-)
create mode 100644 folder1/file
$ git status
On branch main
You are in a sparse checkout with 78% of tracked files present.
Changes not staged for commit:
(use "git add/rm <file>..." to update what will be committed)
(use "git restore <file>..." to discard changes in working directory)
deleted: folder1/file
no changes added to commit (use "git add" and/or "git commit -a")
==> Great, so we got rid of the folder2/a modification, and committed
the staged folder1/file modification. The folder1/file showing as
deleted is annoying but a `sparse-checkout clean` should get rid of it
as well as the pesky folder2/ directory, right?
$ git sparse-checkout clean -f
Removing folder2/
$ git status
On branch main
You are in a sparse checkout with 78% of tracked files present.
Changes not staged for commit:
(use "git add/rm <file>..." to update what will be committed)
(use "git restore <file>..." to discard changes in working directory)
deleted: folder1/file
deleted: folder2/a
no changes added to commit (use "git add" and/or "git commit -a")
==> Huh, `git sparse-checkout clean -f` got rid of folder2, but now
folder2/a shows up as deleted locally...and folder1/file is still
showing as deleted locally. And it doesn't matter how many times we
run `git sparse-checkout clean -f`; this is the end state for it. But
if we run `git sparse-checkout reapply`:
$ git sparse-checkout reapply
$ git status
On branch main
You are in a sparse checkout with 56% of tracked files present.
nothing to commit, working tree clean
==> So, you still need to use `git sparse-checkout reapply` together
with `git sparse-checkout clean -f`; you haven't fixed that yet.
> @@ -1147,7 +1149,11 @@ test_expect_success 'sparse-checkout operations with merge conflicts' '
> git commit -a -m "left" &&
>
> git checkout -b merge &&
> - git sparse-checkout set deep/deeper1 &&
> +
> + touch deep/deeper2/extra &&
> + git sparse-checkout set deep/deeper1 2>err &&
> + grep "contains untracked files" err &&
> + test_path_exists deep/deeper2/extra &&
>
> test_must_fail git merge -m "will-conflict" right &&
>
> @@ -1159,15 +1165,19 @@ test_expect_success 'sparse-checkout operations with merge conflicts' '
> git merge --continue &&
>
> test_path_exists folder1/even/more/dirs/file &&
> + test_path_exists deep/deeper2/extra &&
> +
> + cat >expect <<-\EOF &&
> + Removing deep/deeper2/
> + Removing folder1/
> + EOF
>
> # clean does not remove the file, because the
> # SKIP_WORKTREE bit was not cleared by the merge command.
Shouldn't the comment be updated, given the testcase updates?
> git sparse-checkout clean -f >out &&
> - test_line_count = 0 out &&
> - test_path_exists folder1/even/more/dirs/file &&
> -
> - git sparse-checkout reapply &&
> - test_path_is_missing folder1
> + test_cmp expect out &&
> + test_path_is_missing folder1 &&
> + test_path_is_missing deep/deeper2
Yes, but why does `git status` show folder1/even/more/dirs/file as
being locally deleted? Does the code forget to update the
SKIP_WORKTREE status after clearing out the files?
^ permalink raw reply [flat|nested] 69+ messages in thread* Re: [PATCH v2 8/8] sparse-checkout: make 'clean' clear more files
2025-08-06 0:21 ` Elijah Newren
@ 2025-09-11 15:26 ` Derrick Stolee
2025-09-11 16:21 ` Derrick Stolee
0 siblings, 1 reply; 69+ messages in thread
From: Derrick Stolee @ 2025-09-11 15:26 UTC (permalink / raw)
To: Elijah Newren, Derrick Stolee via GitGitGadget
Cc: git, gitster, Patrick Steinhardt
On 8/5/25 8:21 PM, Elijah Newren wrote:
> On Wed, Jul 16, 2025 at 6:34 PM Derrick Stolee via GitGitGadget
>> test_path_is_missing repo/deep/deeper2 &&
>> - test_path_exists repo/folder1 &&
>> + test_path_is_missing repo/folder1 &&
>> test_path_exists repo/folder2
>
> What this doesn't show is that afterwards:
>
> $ git -C repo status
>
> On branch main
> You are in a sparse checkout with 78% of tracked files present.
>
> Changes to be committed:
> (use "git restore --staged <file>..." to unstage)
> new file: folder1/file
>
> Changes not staged for commit:
> (use "git add/rm <file>..." to update what will be committed)
> (use "git restore <file>..." to discard changes in working directory)
> deleted: folder1/file
> modified: folder2/a
You make an excellent point about SKIP_WORKTREE bit states across
changes like this. I'll expand on this flow in the test script.
>> + test_path_exists deep/deeper2/extra &&
>> +
>> + cat >expect <<-\EOF &&
>> + Removing deep/deeper2/
>> + Removing folder1/
>> + EOF
>>
>> # clean does not remove the file, because the
>> # SKIP_WORKTREE bit was not cleared by the merge command.
>
> Shouldn't the comment be updated, given the testcase updates?
Yes. Good catch.
>> git sparse-checkout clean -f >out &&
>> - test_line_count = 0 out &&
>> - test_path_exists folder1/even/more/dirs/file &&
>> -
>> - git sparse-checkout reapply &&
>> - test_path_is_missing folder1
>> + test_cmp expect out &&
>> + test_path_is_missing folder1 &&
>> + test_path_is_missing deep/deeper2
>
> Yes, but why does `git status` show folder1/even/more/dirs/file as
> being locally deleted? Does the code forget to update the
> SKIP_WORKTREE status after clearing out the files?
This is an interesting quirk about how "git add --sparse <path>"
works, which is to remove the SKIP_WORKTREE bit. It's further
interesting that the file is still being "tracked as a deletion"
while also being "collapsed to a sparse directory".
By expanding the earlier test cases to include these post-clean
statuses, we can see that this change is causing this new view
of a deleted file. I will see what I can do to determine exactly
what's different this time and how we can minimize that
strangeness (or: consider dropping this patch).
Thanks,
-Stolee
^ permalink raw reply [flat|nested] 69+ messages in thread
* Re: [PATCH v2 8/8] sparse-checkout: make 'clean' clear more files
2025-09-11 15:26 ` Derrick Stolee
@ 2025-09-11 16:21 ` Derrick Stolee
0 siblings, 0 replies; 69+ messages in thread
From: Derrick Stolee @ 2025-09-11 16:21 UTC (permalink / raw)
To: Elijah Newren, Derrick Stolee via GitGitGadget
Cc: git, gitster, Patrick Steinhardt
On 9/11/25 11:26 AM, Derrick Stolee wrote:
> On 8/5/25 8:21 PM, Elijah Newren wrote:
>> On Wed, Jul 16, 2025 at 6:34 PM Derrick Stolee via GitGitGadget
>>> git sparse-checkout clean -f >out &&
>>> - test_line_count = 0 out &&
>>> - test_path_exists folder1/even/more/dirs/file &&
>>> -
>>> - git sparse-checkout reapply &&
>>> - test_path_is_missing folder1
>>> + test_cmp expect out &&
>>> + test_path_is_missing folder1 &&
>>> + test_path_is_missing deep/deeper2
>>
>> Yes, but why does `git status` show folder1/even/more/dirs/file as
>> being locally deleted? Does the code forget to update the
>> SKIP_WORKTREE status after clearing out the files?
>
> This is an interesting quirk about how "git add --sparse <path>"
> works, which is to remove the SKIP_WORKTREE bit. It's further
> interesting that the file is still being "tracked as a deletion"
> while also being "collapsed to a sparse directory".
>
> By expanding the earlier test cases to include these post-clean
> statuses, we can see that this change is causing this new view
> of a deleted file. I will see what I can do to determine exactly
> what's different this time and how we can minimize that
> strangeness (or: consider dropping this patch).
I'm coming to the conclusion that I should drop this patch
for now and consider this more aggressive cleaning mechanism
for a later update.
The strangest part of what's going on here is that the in-
memory sparse index collapses the folder2/ entry but an on-
disk sparse index does not in this state. That's what's
leading to this deleted entry.
I think the unpack_trees_options values are involved, but a
few experiments led to no change in my tests.
(With that, I have a v3 nearly ready, but I'll wait a day to
see if feedback on top of my comments causes me to rethink
anything.)
Thanks,
-Stolee
^ permalink raw reply [flat|nested] 69+ messages in thread
* Re: [PATCH v2 0/8] sparse-checkout: add 'clean' command
2025-07-17 1:34 ` [PATCH v2 0/8] " Derrick Stolee via GitGitGadget
` (7 preceding siblings ...)
2025-07-17 1:34 ` [PATCH v2 8/8] sparse-checkout: make 'clean' clear more files Derrick Stolee via GitGitGadget
@ 2025-08-28 23:22 ` Junio C Hamano
2025-08-29 0:15 ` Elijah Newren
2025-09-12 10:30 ` [PATCH v3 0/7] " Derrick Stolee via GitGitGadget
9 siblings, 1 reply; 69+ messages in thread
From: Junio C Hamano @ 2025-08-28 23:22 UTC (permalink / raw)
To: Derrick Stolee via GitGitGadget
Cc: git, newren, Patrick Steinhardt, Derrick Stolee
"Derrick Stolee via GitGitGadget" <gitgitgadget@gmail.com> writes:
> This command uses the same '--force' and '--dry-run' options as 'git clean',
> with integrations with the 'clean.requireForce' config option. There are
> some concerns that this isn't an obvious way to work with the 'git clean'
> command, but I thought we should be consistent here. I did change the error
> message to point users to the necessary options.
>
> This option would be preferred to something like 'git clean -dfx' since it
> does not clear the excluded files that are still within the sparse-checkout.
> Instead, it performs the exact filesystem operations required to refresh the
> sparse index performance back to what is expected.
>
> I spent a few weeks debating with myself about whether or not this was the
> right interface, so please suggest alternatives if you have better ideas.
> Among my rejected ideas include:
>
> * 'git sparse-checkout reapply -f -x' or similar augmentations of
> 'reapply'.
> * 'git clean --sparse' to focus the clean operation on things outside of
> the sparse-checkout.
>
>
> Updates in V2
> =============
>
> * This series is based on 2c5b5565981 (environment: remove the global
> variable 'sparse_expect_files_outside_of_patterns', 2025-07-01) to build
> upon those cleanups in builtin/sparse-checkout.c.
> * The --force and --dry-run options match 'git clean'.
> * A --verbose option is added. It does not link to the index for
> tracked/untracked/ignored/excluded or clean/modified/staged/conflicted
> status, but instead gives the full list for information.
> * To support the --verbose option, a new for_each_file_in_dir() method is
> added to dir.h.
> * Tests are added to demonstrate the behavior when a sparse directory has a
> merge conflict (fails with an explanation). When adding the test based on
> the previous version's functionality, I realized that the behavior is
> sometimes less effective than git sparse-checkout reapply even after a
> sparse file is committed. To demonstrate this change, the full test is
> created on its own and then a code change is added with the impact on the
> test.
This seems to have a few comments that haven't been responded to
(plus a "This step looks good to me" or two). Can we get it unstuck
soonish? The topic is from mid July and I do not like to hold topics
in 'seen' for longer than a month without any activity.
Thanks.
^ permalink raw reply [flat|nested] 69+ messages in thread* Re: [PATCH v2 0/8] sparse-checkout: add 'clean' command
2025-08-28 23:22 ` [PATCH v2 0/8] sparse-checkout: add 'clean' command Junio C Hamano
@ 2025-08-29 0:15 ` Elijah Newren
2025-08-29 0:27 ` Junio C Hamano
0 siblings, 1 reply; 69+ messages in thread
From: Elijah Newren @ 2025-08-29 0:15 UTC (permalink / raw)
To: Junio C Hamano
Cc: Derrick Stolee via GitGitGadget, git, Patrick Steinhardt,
Derrick Stolee
On Thu, Aug 28, 2025 at 4:22 PM Junio C Hamano <gitster@pobox.com> wrote:
>
> This seems to have a few comments that haven't been responded to
> (plus a "This step looks good to me" or two). Can we get it unstuck
> soonish? The topic is from mid July and I do not like to hold topics
> in 'seen' for longer than a month without any activity.
Stolee built this series on top of Ayush's topic to avoid conflicts
for you, and he said
(https://lore.kernel.org/git/c3c0fbef-f395-4972-8352-dd89af6799d5@gmail.com/)
that since you marked this as blocking on Ayush's topic, he didn't
want to update until that topic moved.
Do you want to instead kick Ayush's topic out and have Stolee rebase
to no longer be on top of Ayush's, and have Ayush rebase anything he
might do on top of Stolee's work? (See also Ayush's recent update at
https://lore.kernel.org/git/CAE7as+ZpEwiNsDAozoZXqHRLOF3+hT++uo=mzZqEvTPovQN9uw@mail.gmail.com/)
^ permalink raw reply [flat|nested] 69+ messages in thread
* Re: [PATCH v2 0/8] sparse-checkout: add 'clean' command
2025-08-29 0:15 ` Elijah Newren
@ 2025-08-29 0:27 ` Junio C Hamano
2025-08-29 21:03 ` Junio C Hamano
0 siblings, 1 reply; 69+ messages in thread
From: Junio C Hamano @ 2025-08-29 0:27 UTC (permalink / raw)
To: Elijah Newren
Cc: Derrick Stolee via GitGitGadget, git, Patrick Steinhardt,
Derrick Stolee
Elijah Newren <newren@gmail.com> writes:
> On Thu, Aug 28, 2025 at 4:22 PM Junio C Hamano <gitster@pobox.com> wrote:
>>
>> This seems to have a few comments that haven't been responded to
>> (plus a "This step looks good to me" or two). Can we get it unstuck
>> soonish? The topic is from mid July and I do not like to hold topics
>> in 'seen' for longer than a month without any activity.
>
> Stolee built this series on top of Ayush's topic to avoid conflicts
> for you, and he said
> (https://lore.kernel.org/git/c3c0fbef-f395-4972-8352-dd89af6799d5@gmail.com/)
> that since you marked this as blocking on Ayush's topic, he didn't
> want to update until that topic moved.
>
> Do you want to instead kick Ayush's topic out and have Stolee rebase
> to no longer be on top of Ayush's, and have Ayush rebase anything he
> might do on top of Stolee's work? (See also Ayush's recent update at
> https://lore.kernel.org/git/CAE7as+ZpEwiNsDAozoZXqHRLOF3+hT++uo=mzZqEvTPovQN9uw@mail.gmail.com/)
It really depends on how unstable the base topic would be, but I
know Stolee is better than building his stuff on unusably unstable
crap, and that was the reason why I thought that updating this topic
on top of the same base would allow us to move forward faster, as it
would mean that everything would hopefully be ready _UNLESS_ the
change that needs to be made to the base topic is so extensive that
the topic on top would also need heavy updates _again_ once an
update to the base topic comes.
^ permalink raw reply [flat|nested] 69+ messages in thread
* Re: [PATCH v2 0/8] sparse-checkout: add 'clean' command
2025-08-29 0:27 ` Junio C Hamano
@ 2025-08-29 21:03 ` Junio C Hamano
2025-08-30 13:41 ` Derrick Stolee
0 siblings, 1 reply; 69+ messages in thread
From: Junio C Hamano @ 2025-08-29 21:03 UTC (permalink / raw)
To: Elijah Newren
Cc: Derrick Stolee via GitGitGadget, git, Patrick Steinhardt,
Derrick Stolee
Junio C Hamano <gitster@pobox.com> writes:
> Elijah Newren <newren@gmail.com> writes:
>
>> On Thu, Aug 28, 2025 at 4:22 PM Junio C Hamano <gitster@pobox.com> wrote:
>>>
>>> This seems to have a few comments that haven't been responded to
>>> (plus a "This step looks good to me" or two). Can we get it unstuck
>>> soonish? The topic is from mid July and I do not like to hold topics
>>> in 'seen' for longer than a month without any activity.
>>
>> Stolee built this series on top of Ayush's topic to avoid conflicts
>> for you, and he said
>> (https://lore.kernel.org/git/c3c0fbef-f395-4972-8352-dd89af6799d5@gmail.com/)
>> that since you marked this as blocking on Ayush's topic, he didn't
>> want to update until that topic moved.
>>
>> Do you want to instead kick Ayush's topic out and have Stolee rebase
>> to no longer be on top of Ayush's, and have Ayush rebase anything he
>> might do on top of Stolee's work? (See also Ayush's recent update at
>> https://lore.kernel.org/git/CAE7as+ZpEwiNsDAozoZXqHRLOF3+hT++uo=mzZqEvTPovQN9uw@mail.gmail.com/)
>
> It really depends on how unstable the base topic would be, but I
> know Stolee is better than building his stuff on unusably unstable
> crap, and that was the reason why I thought that updating this topic
> on top of the same base would allow us to move forward faster, as it
> would mean that everything would hopefully be ready _UNLESS_ the
> change that needs to be made to the base topic is so extensive that
> the topic on top would also need heavy updates _again_ once an
> update to the base topic comes.
(Sorry, but sent before finishing).
Yes, it may be simpler to kick out a stalled topic and give it a
fresh restart when it becomes ready. If Derrick wants to go that
route, I am totally fine with that.
Thanks.
^ permalink raw reply [flat|nested] 69+ messages in thread
* Re: [PATCH v2 0/8] sparse-checkout: add 'clean' command
2025-08-29 21:03 ` Junio C Hamano
@ 2025-08-30 13:41 ` Derrick Stolee
0 siblings, 0 replies; 69+ messages in thread
From: Derrick Stolee @ 2025-08-30 13:41 UTC (permalink / raw)
To: Junio C Hamano, Elijah Newren
Cc: Derrick Stolee via GitGitGadget, git, Patrick Steinhardt,
ayu.chandekar
On 8/29/25 5:03 PM, Junio C Hamano wrote:
> Junio C Hamano <gitster@pobox.com> writes:
>
>> Elijah Newren <newren@gmail.com> writes:
>>
>>> On Thu, Aug 28, 2025 at 4:22 PM Junio C Hamano <gitster@pobox.com> wrote:
>>>>
>>>> This seems to have a few comments that haven't been responded to
>>>> (plus a "This step looks good to me" or two). Can we get it unstuck
>>>> soonish? The topic is from mid July and I do not like to hold topics
>>>> in 'seen' for longer than a month without any activity.
>>>
>>> Stolee built this series on top of Ayush's topic to avoid conflicts
>>> for you, and he said
>>> (https://lore.kernel.org/git/c3c0fbef-f395-4972-8352-dd89af6799d5@gmail.com/)
>>> that since you marked this as blocking on Ayush's topic, he didn't
>>> want to update until that topic moved.
>>>
>>> Do you want to instead kick Ayush's topic out and have Stolee rebase
>>> to no longer be on top of Ayush's, and have Ayush rebase anything he
>>> might do on top of Stolee's work? (See also Ayush's recent update at
>>> https://lore.kernel.org/git/CAE7as+ZpEwiNsDAozoZXqHRLOF3+hT++uo=mzZqEvTPovQN9uw@mail.gmail.com/)
>>
>> It really depends on how unstable the base topic would be, but I
>> know Stolee is better than building his stuff on unusably unstable
>> crap, and that was the reason why I thought that updating this topic
>> on top of the same base would allow us to move forward faster, as it
>> would mean that everything would hopefully be ready _UNLESS_ the
>> change that needs to be made to the base topic is so extensive that
>> the topic on top would also need heavy updates _again_ once an
>> update to the base topic comes.
>
> (Sorry, but sent before finishing).
>
> Yes, it may be simpler to kick out a stalled topic and give it a
> fresh restart when it becomes ready. If Derrick wants to go that
> route, I am totally fine with that.
I'll see how things go this coming week as I page this series
back into my active work. If Ayush resubmits before I get to it,
then I won't be upset.
Thanks,
-Stolee
^ permalink raw reply [flat|nested] 69+ messages in thread
* [PATCH v3 0/7] sparse-checkout: add 'clean' command
2025-07-17 1:34 ` [PATCH v2 0/8] " Derrick Stolee via GitGitGadget
` (8 preceding siblings ...)
2025-08-28 23:22 ` [PATCH v2 0/8] sparse-checkout: add 'clean' command Junio C Hamano
@ 2025-09-12 10:30 ` Derrick Stolee via GitGitGadget
2025-09-12 10:30 ` [PATCH v3 1/7] sparse-checkout: remove use of the_repository Derrick Stolee via GitGitGadget
` (9 more replies)
9 siblings, 10 replies; 69+ messages in thread
From: Derrick Stolee via GitGitGadget @ 2025-09-12 10:30 UTC (permalink / raw)
To: git; +Cc: gitster, newren, Patrick Steinhardt, Derrick Stolee
NEW: This series is rebased on a recent master to remove dependence on the
updates to the global variables used by the sparse-checkout system.
When using cone-mode sparse-checkout, users specify which tracked
directories they want (recursively) and any directory not part of the parent
paths for those directories are considered "out of scope". When changing
sparse-checkouts, there are a variety of reasons why these "out of scope"
directories could remain, including:
* The user has .gitignore or .git/info/exclude files that tell Git to not
remove files of a certain type.
* Some filesystem blocker prevented the removal of a tracked file. This is
usually more of an issue on Windows where a read handle will block file
deletion.
Typically, this would not mean too much for the user experience. A few extra
filesystem checks might be required to satisfy git status commands, but the
scope of the performance hit is relative to how many cruft files are left
over in this situation.
However, when using the sparse index, these tracked sparse directories cause
significant performance issues. When noticing that the index contains a
sparse directory but that directory exists on disk, Git needs to expand that
sparse directory to determine which files are tracked or untracked. The
current mechanism expands the entire index to a full one, an expensive
operation that scales with the total number of paths at HEAD and not just
the number of cruft files left over.
Advice was added in 9479a31d603 (advice: warn when sparse index expands,
2024-07-08) to help users determine that they were in this state. However,
the advice doesn't actually recommend helpful ways to get out of this state.
Recommending "git clean" on its own is incomplete, as typically users
actually need 'git clean -dfx' to clear out the ignored or excluded files.
Even then, they may need 'git sparse-checkout reapply' afterwards to clear
the sparse directories.
The advice was successful in helping to alert users to the problem, which is
how I got wind of many of these cases for how users get into this state.
It's now time to give them a tool that helps them out of this state.
This series adds a new 'git sparse-checkout clean' command that currently
only works for cone-mode sparse-checkouts. The only thing it does is
collapse the index to a sparse index (as much as possible) and make sure
that any sparse directories are removed. These directories are listed to
stdout.
This command uses the same '--force' and '--dry-run' options as 'git clean',
with integrations with the 'clean.requireForce' config option. There are
some concerns that this isn't an obvious way to work with the 'git clean'
command, but I thought we should be consistent here. I did change the error
message to point users to the necessary options.
This option would be preferred to something like 'git clean -dfx' since it
does not clear the excluded files that are still within the sparse-checkout.
Instead, it performs the exact filesystem operations required to refresh the
sparse index performance back to what is expected.
Updates in V3
=============
Huge thanks to Elijah for such a detailed review. Apologies for the delay in
responding.
* Removed dependency on stalled series around updating the sparse-checkout
globals.
* Commit message and documentation is updated to better describe the
conditions that qualify a file or directory for removal.
* Tests are expanded significantly to include special cases and
aftereffects.
* A note is added around possible future expansion of the --verbose option
to include more detailed status information on the files that would be
deleted.
* Due to a situation where a file appears as "modified and deleted" after
the more aggressive updating of the tree, the previous patch 8 is removed
(for now). I may reconsider and send a version in the future that avoids
this issue. Tests from the earlier patches are more expanded in such a
way that the aggressive implementation requires test changes that reveal
this problem. See [1] for a copy of this change and how it impacts the
latest tests.
[1]
https://github.com/derrickstolee/git/commit/f9eb8e03361d61023fec33386fc9898f60b65a31
Updates in V2
=============
* This series is based on 2c5b5565981 (environment: remove the global
variable 'sparse_expect_files_outside_of_patterns', 2025-07-01) to build
upon those cleanups in builtin/sparse-checkout.c.
* The --force and --dry-run options match 'git clean'.
* A --verbose option is added. It does not link to the index for
tracked/untracked/ignored/excluded or clean/modified/staged/conflicted
status, but instead gives the full list for information.
* To support the --verbose option, a new for_each_file_in_dir() method is
added to dir.h.
* Tests are added to demonstrate the behavior when a sparse directory has a
merge conflict (fails with an explanation). When adding the test based on
the previous version's functionality, I realized that the behavior is
sometimes less effective than git sparse-checkout reapply even after a
sparse file is committed. To demonstrate this change, the full test is
created on its own and then a code change is added with the impact on the
test.
Thanks, -Stolee
Derrick Stolee (7):
sparse-checkout: remove use of the_repository
sparse-checkout: add basics of 'clean' command
sparse-checkout: match some 'clean' behavior
dir: add generic "walk all files" helper
sparse-checkout: add --verbose option to 'clean'
sparse-index: point users to new 'clean' action
t: expand tests around sparse merges and clean
Documentation/git-sparse-checkout.adoc | 33 +++-
builtin/sparse-checkout.c | 218 ++++++++++++++++++-------
dir.c | 28 ++++
dir.h | 14 ++
sparse-index.c | 3 +-
t/t1091-sparse-checkout-builtin.sh | 175 ++++++++++++++++++++
6 files changed, 413 insertions(+), 58 deletions(-)
base-commit: ab427cd991100e94792fce124b0934135abdea4b
Published-As: https://github.com/gitgitgadget/git/releases/tag/pr-1941%2Fderrickstolee%2Fgit-sparse-checkout-clean-v3
Fetch-It-Via: git fetch https://github.com/gitgitgadget/git pr-1941/derrickstolee/git-sparse-checkout-clean-v3
Pull-Request: https://github.com/gitgitgadget/git/pull/1941
Range-diff vs v2:
1: 92d0cd41a4 ! 1: 437a57b665 sparse-checkout: remove use of the_repository
@@ builtin/sparse-checkout.c: static enum sparse_checkout_mode update_cone_mode(int
int mode, record_mode;
@@ builtin/sparse-checkout.c: static int update_modes(int *cone_mode, int *sparse_index)
- record_mode = (*cone_mode != -1) || !the_repository->settings.sparse_checkout;
+ record_mode = (*cone_mode != -1) || !core_apply_sparse_checkout;
mode = update_cone_mode(cone_mode);
- if (record_mode && set_config(mode))
@@ builtin/sparse-checkout.c: static void add_patterns_literal(int argc, const char
{
int result;
@@ builtin/sparse-checkout.c: static int modify_pattern_list(struct strvec *args, int use_stdin,
- break;
}
-- if (!the_repository->settings.sparse_checkout) {
+ if (!core_apply_sparse_checkout) {
- set_config(MODE_ALL_PATTERNS);
-- the_repository->settings.sparse_checkout = 1;
-+ if (!repo->settings.sparse_checkout) {
+ set_config(repo, MODE_ALL_PATTERNS);
-+ repo->settings.sparse_checkout = 1;
+ core_apply_sparse_checkout = 1;
changed_config = 1;
}
@@ builtin/sparse-checkout.c: static struct sparse_checkout_add_opts {
static struct option builtin_sparse_checkout_add_options[] = {
OPT_BOOL_F(0, "skip-checks", &add_opts.skip_checks,
@@ builtin/sparse-checkout.c: static int sparse_checkout_add(int argc, const char **argv, const char *prefix,
- if (!the_repository->settings.sparse_checkout)
+ if (!core_apply_sparse_checkout)
die(_("no sparse-checkout to add to"));
- repo_read_index(the_repository);
@@ builtin/sparse-checkout.c: static int sparse_checkout_disable(int argc, const ch
memset(&pl, 0, sizeof(pl));
hashmap_init(&pl.recursive_hashmap, pl_hashmap_cmp, NULL, 0);
@@ builtin/sparse-checkout.c: static int sparse_checkout_disable(int argc, const char **argv,
-
add_pattern("/*", empty_base, 0, &pl, 0);
+ prepare_repo_settings(the_repository);
- the_repository->settings.sparse_index = 0;
+ repo->settings.sparse_index = 0;
@@ builtin/sparse-checkout.c: static int sparse_checkout_check_rules(int argc, cons
return ret;
@@ builtin/sparse-checkout.c: int cmd_sparse_checkout(int argc,
- git_config(git_default_config, NULL);
+ repo_config(the_repository, git_default_config, NULL);
- prepare_repo_settings(the_repository);
- the_repository->settings.command_requires_full_index = 0;
2: 7e8f7c2d6c ! 2: a1564f74cf sparse-checkout: add basics of 'clean' command
@@ Commit message
not be sufficient.
Add a new subcommand to 'git sparse-checkout' that removes these
- tracked-but-sparse directories. This necessarily removes all files
- contained within, including tracked and untracked files. Of particular
- importance are ignored and excluded files which would normally be
- ignored even by 'git clean -f' unless the '-x' or '-X' option is
- provided. This is the most extreme method for doing this, but it works
- when the sparse-checkout is in cone mode and is expected to rescope
- based on directories, not files.
+ tracked-but-sparse directories.
+
+ The implementation details provide a clear definition of what is happening,
+ but it is difficult to describe this without including the internal
+ implementation details. The core operation converts the index to a sparse
+ index (in memory if not already on disk) and then deletes any directories in
+ the worktree that correspond with a sparse directory entry in that sparse
+ index.
+
+ In the most common case, this means that a file will be removed if it is
+ contained within a directory that is both tracked and outside of the
+ sparse-checkout definition. However, there can be exceptions depending on
+ the current state of the index:
+
+ * If the worktree has a modification to a tracked, sparse file, then that
+ file's parent directories will be expanded instead of represented as
+ sparse directories. Siblings of those parent directories may be
+ considered sparse.
+
+ * If the user staged a sparse file with "git add --sparse", then that file
+ loses the SKIP_WORKTREE bit until the sparse-checkout is reapplied. Until
+ then, that file's parent directories are not represented as sparse
+ directory entries and thus will not be removed. Siblings of those parent
+ directories may be considered sparse. (There may be other reasons why
+ the SKIP_WORKTREE bit was removed for a file and this impact on the
+ sparse directories will apply to those as well.)
+
+ * If the user has a merge conflict outside of the sparse-checkout
+ definition, then those conflict entries prevent the parent directories
+ from being represented as sparse directory entries and thus are not
+ removed.
+
+ * The cases above present reasons why certain _file conditions_ will impact
+ which _directories_ are considered sparse. The list of tracked
+ directories that are outside of the sparse-checkout definition but not
+ represented as a sparse directory further reduces the list of files that
+ will be removed.
+
+ For these complicated reasons, the documentation details a potential list of
+ files that will be "considered for removal" instead of defining the list
+ concretely. The special cases can be handled by resolving conflicts,
+ committing staged changes, and running 'git sparse-checkout reapply' to
+ update the SKIP_WORKTREE bits as expected by the sparse-checkout definition.
+
+ It is important to make clear that this operation will remove ignored and
+ excluded files which would normally be ignored even by 'git clean -f' unless
+ the '-x' or '-X' option is provided. This is the most extreme method for
+ doing this, but it works when the sparse-checkout is in cone mode and is
+ expected to rescope based on directories, not files.
The current implementation always deletes these sparse directories
without warning. This is unacceptable for a released version, but those
features will be added in changes coming immediately after this one.
- Note that untracked directories within the sparse-checkout remain.
- Further, directories that contain staged changes or files in merge
- conflict states are not deleted. This is a detail that is partly hidden
- by the implementation which relies on collapsing the index to a sparse
- index in-memory and only deleting directories that are listed as sparse
- in the index.
+ Note that this will not remove an untracked directory (or any of its
+ contents) if its parent is a tracked directory within the sparse-checkout
+ definition. This is required to prevent removing data created by tools that
+ perform caching operations for editors or build tools.
+
+ Thus, 'git sparse-checkout clean' is both more aggressive and more careful
+ than 'git clean -fx':
- If a staged change exists, then that entry is not stored as a sparse
- tree entry and thus remains on-disk until committed or reset.
+ * It is more aggressive because it will remove _tracked_ files within the
+ sparse directories.
- There are some interesting cases around merge conflict resolution, but
- that will be carefully analyzed in the future.
+ * It is less aggressive because it will leave _untracked_ files that are
+ not contained in sparse directories.
+
+ These special cases will be handled more explicitly in a future change that
+ expands tests for the 'git sparse-checkout clean' command. We handle some of
+ the modified, staged, and committed states including some impact on 'git
+ status' after cleaning.
Signed-off-by: Derrick Stolee <stolee@gmail.com>
@@ Documentation/git-sparse-checkout.adoc: flags, with the same meaning as the flag
all sparsity paths.
+'clean'::
-+ Remove all files in tracked directories that are outside of the
-+ sparse-checkout definition. This subcommand requires cone-mode
-+ sparse-checkout to be sure that we know which directories are
-+ both tracked and all contained paths are not in the sparse-checkout.
-+ This command can be used to be sure the sparse index works
-+ efficiently, though it does not require enabling the sparse index
-+ feature via the `index.sparse=true` configuration.
++ Opportunistically remove files outside of the sparse-checkout
++ definition. This command requires cone mode to use recursive
++ directory matches to determine which files should be removed. A
++ file is considered for removal if it is contained within a tracked
++ directory that is outside of the sparse-checkout definition.
+++
++Some special cases, such as merge conflicts or modified files outside of
++the sparse-checkout definition could lead to keeping files that would
++otherwise be removed. Resolve conflicts, stage modifications, and use
++`git sparse-checkout reapply` in conjunction with `git sparse-checkout
++clean` to resolve these cases.
+++
++This command can be used to be sure the sparse index works efficiently,
++though it does not require enabling the sparse index feature via the
++`index.sparse=true` configuration.
+
'disable'::
Disable the `core.sparseCheckout` config setting, and restore the
@@ builtin/sparse-checkout.c: static int sparse_checkout_reapply(int argc, const ch
+ };
+
+ setup_work_tree();
-+ if (!repo->settings.sparse_checkout)
++ if (!core_apply_sparse_checkout)
+ die(_("must be in a sparse-checkout to clean directories"));
-+ if (!repo->settings.sparse_checkout_cone)
++ if (!core_sparse_checkout_cone)
+ die(_("must be in a cone-mode sparse-checkout to clean directories"));
+
+ argc = parse_options(argc, argv, prefix,
@@ t/t1091-sparse-checkout-builtin.sh: test_expect_success 'check-rules null termin
+test_expect_success 'clean' '
+ git -C repo sparse-checkout set --cone deep/deeper1 &&
++ git -C repo sparse-checkout reapply &&
+ mkdir repo/deep/deeper2 repo/folder1 &&
++
++ # Add untracked files
+ touch repo/deep/deeper2/file &&
+ touch repo/folder1/file &&
+
@@ t/t1091-sparse-checkout-builtin.sh: test_expect_success 'check-rules null termin
+ test_path_is_missing repo/folder1
+'
+
-+test_expect_success 'clean with staged sparse change' '
++test_expect_success 'clean with sparse file states' '
++ test_when_finished git reset --hard &&
+ git -C repo sparse-checkout set --cone deep/deeper1 &&
-+ mkdir repo/deep/deeper2 repo/folder1 repo/folder2 &&
-+ touch repo/deep/deeper2/file &&
-+ touch repo/folder1/file &&
++ mkdir repo/folder2 &&
++
++ # create an untracked file and a modified file
++ touch repo/folder2/file &&
+ echo dirty >repo/folder2/a &&
+
-+ git -C repo add --sparse folder1/file &&
++ # First clean/reapply pass will do nothing.
++ git -C repo sparse-checkout clean >out &&
++ test_must_be_empty out &&
++ test_path_exists repo/folder2/a &&
++ test_path_exists repo/folder2/file &&
++
++ git -C repo sparse-checkout reapply 2>err &&
++ test_grep folder2 err &&
++ test_path_exists repo/folder2/a &&
++ test_path_exists repo/folder2/file &&
++
++ # Now, stage the change to the tracked file.
++ git -C repo add --sparse folder2/a &&
++
++ # Clean will continue not doing anything.
++ git -C repo sparse-checkout clean >out &&
++ test_line_count = 0 out &&
++ test_path_exists repo/folder2/a &&
++ test_path_exists repo/folder2/file &&
++
++ # But we can reapply to remove the staged change.
++ git -C repo sparse-checkout reapply 2>err &&
++ test_grep folder2 err &&
++ test_path_is_missing repo/folder2/a &&
++ test_path_exists repo/folder2/file &&
+
-+ # deletes deep/deeper2/ but leaves folder1/ and folder2/
++ # We can clean now.
+ cat >expect <<-\EOF &&
-+ Removing deep/deeper2/
++ Removing folder2/
+ EOF
-+
+ git -C repo sparse-checkout clean >out &&
+ test_cmp expect out &&
++ test_path_is_missing repo/folder2 &&
+
-+ test_path_is_missing repo/deep/deeper2 &&
-+ test_path_exists repo/folder1
++ # At the moment, the file is staged.
++ cat >expect <<-\EOF &&
++ M folder2/a
++ EOF
++
++ git -C repo status -s >out &&
++ test_cmp expect out &&
++
++ # Reapply persists the modified state.
++ git -C repo sparse-checkout reapply &&
++ cat >expect <<-\EOF &&
++ M folder2/a
++ EOF
++ git -C repo status -s >out &&
++ test_cmp expect out &&
++
++ # Committing the change leads to resolved status.
++ git -C repo commit -m "modified" &&
++ git -C repo status -s >out &&
++ test_must_be_empty out &&
++
++ # Repeat, but this time commit before reapplying.
++ mkdir repo/folder2/ &&
++ echo dirtier >repo/folder2/a &&
++ git -C repo add --sparse folder2/a &&
++ git -C repo sparse-checkout clean >out &&
++ test_must_be_empty out &&
++ test_path_exists repo/folder2/a &&
++
++ # Committing without reapplying makes it look like a deletion
++ # due to no skip-worktree bit.
++ git -C repo commit -m "dirtier" &&
++ git -C repo status -s >out &&
++ test_must_be_empty out &&
++
++ git -C repo sparse-checkout reapply &&
++ git -C repo status -s >out &&
++ test_must_be_empty out
+'
test_done
3: 221f3e5fb0 ! 3: 71a498db65 sparse-checkout: match some 'clean' behavior
@@ Commit message
Signed-off-by: Derrick Stolee <stolee@gmail.com>
## Documentation/git-sparse-checkout.adoc ##
-@@ Documentation/git-sparse-checkout.adoc: all sparsity paths.
- This command can be used to be sure the sparse index works
- efficiently, though it does not require enabling the sparse index
- feature via the `index.sparse=true` configuration.
+@@ Documentation/git-sparse-checkout.adoc: clean` to resolve these cases.
+ This command can be used to be sure the sparse index works efficiently,
+ though it does not require enabling the sparse index feature via the
+ `index.sparse=true` configuration.
++
+To prevent accidental deletion of worktree files, the `clean` subcommand
+will not delete any files without the `-f` or `--force` option, unless
@@ t/t1091-sparse-checkout-builtin.sh: test_expect_success 'clean' '
test_cmp expect out &&
test_path_is_missing repo/deep/deeper2 &&
-@@ t/t1091-sparse-checkout-builtin.sh: test_expect_success 'clean with staged sparse change' '
-
- git -C repo add --sparse folder1/file &&
-
-+ cat >expect <<-\EOF &&
-+ Would remove deep/deeper2/
-+ EOF
-+
-+ git -C repo sparse-checkout clean --dry-run >out &&
-+ test_cmp expect out &&
-+ test_path_exists repo/deep/deeper2 &&
-+ test_path_exists repo/folder1 &&
-+ test_path_exists repo/folder2 &&
-+
- # deletes deep/deeper2/ but leaves folder1/ and folder2/
- cat >expect <<-\EOF &&
- Removing deep/deeper2/
- EOF
+@@ t/t1091-sparse-checkout-builtin.sh: test_expect_success 'clean with sparse file states' '
+ git -C repo sparse-checkout set --cone deep/deeper1 &&
+ mkdir repo/folder2 &&
+ # The previous test case checked the -f option, so
+ # test the config option in this one.
+ git -C repo config clean.requireForce false &&
- git -C repo sparse-checkout clean >out &&
- test_cmp expect out &&
-
- test_path_is_missing repo/deep/deeper2 &&
-- test_path_exists repo/folder1
-+ test_path_exists repo/folder1 &&
-+ test_path_exists repo/folder2
-+'
+
+ # create an untracked file and a modified file
+ touch repo/folder2/file &&
+ echo dirty >repo/folder2/a &&
+@@ t/t1091-sparse-checkout-builtin.sh: test_expect_success 'clean with sparse file states' '
+ test_must_be_empty out
+ '
+
+test_expect_success 'clean with merge conflict status' '
+ git clone repo clean-merge &&
+
@@ t/t1091-sparse-checkout-builtin.sh: test_expect_success 'clean with staged spars
+
+ git -C clean-merge sparse-checkout clean -f >out &&
+ test_cmp expect out
- '
-
++'
++
test_done
4: fd9a20a392 = 4: 6b29dda4b8 dir: add generic "walk all files" helper
5: f464bb5ed6 ! 5: 2cde464fd4 sparse-checkout: add --verbose option to 'clean'
@@ Commit message
Add a new '--verbose' option for users to see all the files that are
being deleted (with '--force') or would be deleted (with '--dry-run').
+ Based on usage, users may request further context on this list of files for
+ states such as tracked/untracked, unstaged/staged/conflicted, etc.
+
Signed-off-by: Derrick Stolee <stolee@gmail.com>
## Documentation/git-sparse-checkout.adoc ##
@@ builtin/sparse-checkout.c: static int sparse_checkout_clean(int argc, const char
## t/t1091-sparse-checkout-builtin.sh ##
@@ t/t1091-sparse-checkout-builtin.sh: test_expect_success 'check-rules null termination' '
-
test_expect_success 'clean' '
git -C repo sparse-checkout set --cone deep/deeper1 &&
+ git -C repo sparse-checkout reapply &&
- mkdir repo/deep/deeper2 repo/folder1 &&
+ mkdir -p repo/deep/deeper2 repo/folder1/extra/inside &&
+
+ # Add untracked files
touch repo/deep/deeper2/file &&
- touch repo/folder1/file &&
+ touch repo/folder1/extra/inside/file &&
6: d6dbc0b5ca = 6: 460e5e8157 sparse-index: point users to new 'clean' action
7: 0b1a2895b9 ! 7: 7f6f62bce6 t: expand tests around sparse merges and clean
@@ Commit message
Signed-off-by: Derrick Stolee <stolee@gmail.com>
## t/t1091-sparse-checkout-builtin.sh ##
-@@ t/t1091-sparse-checkout-builtin.sh: test_expect_success 'clean with staged sparse change' '
- test_path_exists repo/folder2
+@@ t/t1091-sparse-checkout-builtin.sh: test_expect_success 'clean with sparse file states' '
+ test_must_be_empty out
'
-test_expect_success 'clean with merge conflict status' '
8: 82c24ce519 < -: ---------- sparse-checkout: make 'clean' clear more files
--
gitgitgadget
^ permalink raw reply [flat|nested] 69+ messages in thread* [PATCH v3 1/7] sparse-checkout: remove use of the_repository
2025-09-12 10:30 ` [PATCH v3 0/7] " Derrick Stolee via GitGitGadget
@ 2025-09-12 10:30 ` Derrick Stolee via GitGitGadget
2025-09-12 10:30 ` [PATCH v3 2/7] sparse-checkout: add basics of 'clean' command Derrick Stolee via GitGitGadget
` (8 subsequent siblings)
9 siblings, 0 replies; 69+ messages in thread
From: Derrick Stolee via GitGitGadget @ 2025-09-12 10:30 UTC (permalink / raw)
To: git; +Cc: gitster, newren, Patrick Steinhardt, Derrick Stolee,
Derrick Stolee
From: Derrick Stolee <stolee@gmail.com>
The logic for the 'git sparse-checkout' builtin uses the_repository all
over the place, despite some use of a repository struct in different
method parameters. Complete this removal of the_repository by using
'repo' when possible.
In one place, there was already a local variable 'r' that was set to
the_repository, so move that to a method parameter.
We cannot remove the USE_THE_REPOSITORY_VARIABLE declaration as we are
still using global constants for the state of the sparse-checkout.
Signed-off-by: Derrick Stolee <stolee@gmail.com>
---
builtin/sparse-checkout.c | 117 ++++++++++++++++++++------------------
1 file changed, 62 insertions(+), 55 deletions(-)
diff --git a/builtin/sparse-checkout.c b/builtin/sparse-checkout.c
index 8c333b3e2e..06de61bd9d 100644
--- a/builtin/sparse-checkout.c
+++ b/builtin/sparse-checkout.c
@@ -204,12 +204,12 @@ static void clean_tracked_sparse_directories(struct repository *r)
ensure_full_index(r->index);
}
-static int update_working_directory(struct pattern_list *pl)
+static int update_working_directory(struct repository *r,
+ struct pattern_list *pl)
{
enum update_sparsity_result result;
struct unpack_trees_options o;
struct lock_file lock_file = LOCK_INIT;
- struct repository *r = the_repository;
struct pattern_list *old_pl;
/* If no branch has been checked out, there are no updates to make. */
@@ -327,7 +327,8 @@ static void write_cone_to_file(FILE *fp, struct pattern_list *pl)
string_list_clear(&sl, 0);
}
-static int write_patterns_and_update(struct pattern_list *pl)
+static int write_patterns_and_update(struct repository *repo,
+ struct pattern_list *pl)
{
char *sparse_filename;
FILE *fp;
@@ -336,15 +337,15 @@ static int write_patterns_and_update(struct pattern_list *pl)
sparse_filename = get_sparse_checkout_filename();
- if (safe_create_leading_directories(the_repository, sparse_filename))
+ if (safe_create_leading_directories(repo, sparse_filename))
die(_("failed to create directory for sparse-checkout file"));
hold_lock_file_for_update(&lk, sparse_filename, LOCK_DIE_ON_ERROR);
- result = update_working_directory(pl);
+ result = update_working_directory(repo, pl);
if (result) {
rollback_lock_file(&lk);
- update_working_directory(NULL);
+ update_working_directory(repo, NULL);
goto out;
}
@@ -372,25 +373,26 @@ enum sparse_checkout_mode {
MODE_CONE_PATTERNS = 2,
};
-static int set_config(enum sparse_checkout_mode mode)
+static int set_config(struct repository *repo,
+ enum sparse_checkout_mode mode)
{
/* Update to use worktree config, if not already. */
- if (init_worktree_config(the_repository)) {
+ if (init_worktree_config(repo)) {
error(_("failed to initialize worktree config"));
return 1;
}
- if (repo_config_set_worktree_gently(the_repository,
+ if (repo_config_set_worktree_gently(repo,
"core.sparseCheckout",
mode ? "true" : "false") ||
- repo_config_set_worktree_gently(the_repository,
+ repo_config_set_worktree_gently(repo,
"core.sparseCheckoutCone",
mode == MODE_CONE_PATTERNS ?
"true" : "false"))
return 1;
if (mode == MODE_NO_PATTERNS)
- return set_sparse_index_config(the_repository, 0);
+ return set_sparse_index_config(repo, 0);
return 0;
}
@@ -410,7 +412,7 @@ static enum sparse_checkout_mode update_cone_mode(int *cone_mode) {
return MODE_ALL_PATTERNS;
}
-static int update_modes(int *cone_mode, int *sparse_index)
+static int update_modes(struct repository *repo, int *cone_mode, int *sparse_index)
{
int mode, record_mode;
@@ -418,20 +420,20 @@ static int update_modes(int *cone_mode, int *sparse_index)
record_mode = (*cone_mode != -1) || !core_apply_sparse_checkout;
mode = update_cone_mode(cone_mode);
- if (record_mode && set_config(mode))
+ if (record_mode && set_config(repo, mode))
return 1;
/* Set sparse-index/non-sparse-index mode if specified */
if (*sparse_index >= 0) {
- if (set_sparse_index_config(the_repository, *sparse_index) < 0)
+ if (set_sparse_index_config(repo, *sparse_index) < 0)
die(_("failed to modify sparse-index config"));
/* force an index rewrite */
- repo_read_index(the_repository);
- the_repository->index->updated_workdir = 1;
+ repo_read_index(repo);
+ repo->index->updated_workdir = 1;
if (!*sparse_index)
- ensure_full_index(the_repository->index);
+ ensure_full_index(repo->index);
}
return 0;
@@ -448,7 +450,7 @@ static struct sparse_checkout_init_opts {
} init_opts;
static int sparse_checkout_init(int argc, const char **argv, const char *prefix,
- struct repository *repo UNUSED)
+ struct repository *repo)
{
struct pattern_list pl;
char *sparse_filename;
@@ -464,7 +466,7 @@ static int sparse_checkout_init(int argc, const char **argv, const char *prefix,
};
setup_work_tree();
- repo_read_index(the_repository);
+ repo_read_index(repo);
init_opts.cone_mode = -1;
init_opts.sparse_index = -1;
@@ -473,7 +475,7 @@ static int sparse_checkout_init(int argc, const char **argv, const char *prefix,
builtin_sparse_checkout_init_options,
builtin_sparse_checkout_init_usage, 0);
- if (update_modes(&init_opts.cone_mode, &init_opts.sparse_index))
+ if (update_modes(repo, &init_opts.cone_mode, &init_opts.sparse_index))
return 1;
memset(&pl, 0, sizeof(pl));
@@ -485,14 +487,14 @@ static int sparse_checkout_init(int argc, const char **argv, const char *prefix,
if (res >= 0) {
free(sparse_filename);
clear_pattern_list(&pl);
- return update_working_directory(NULL);
+ return update_working_directory(repo, NULL);
}
- if (repo_get_oid(the_repository, "HEAD", &oid)) {
+ if (repo_get_oid(repo, "HEAD", &oid)) {
FILE *fp;
/* assume we are in a fresh repo, but update the sparse-checkout file */
- if (safe_create_leading_directories(the_repository, sparse_filename))
+ if (safe_create_leading_directories(repo, sparse_filename))
die(_("unable to create leading directories of %s"),
sparse_filename);
fp = xfopen(sparse_filename, "w");
@@ -511,7 +513,7 @@ static int sparse_checkout_init(int argc, const char **argv, const char *prefix,
add_pattern("!/*/", empty_base, 0, &pl, 0);
pl.use_cone_patterns = init_opts.cone_mode;
- return write_patterns_and_update(&pl);
+ return write_patterns_and_update(repo, &pl);
}
static void insert_recursive_pattern(struct pattern_list *pl, struct strbuf *path)
@@ -674,7 +676,8 @@ static void add_patterns_literal(int argc, const char **argv,
add_patterns_from_input(pl, argc, argv, use_stdin ? stdin : NULL);
}
-static int modify_pattern_list(struct strvec *args, int use_stdin,
+static int modify_pattern_list(struct repository *repo,
+ struct strvec *args, int use_stdin,
enum modify_type m)
{
int result;
@@ -696,22 +699,23 @@ static int modify_pattern_list(struct strvec *args, int use_stdin,
}
if (!core_apply_sparse_checkout) {
- set_config(MODE_ALL_PATTERNS);
+ set_config(repo, MODE_ALL_PATTERNS);
core_apply_sparse_checkout = 1;
changed_config = 1;
}
- result = write_patterns_and_update(pl);
+ result = write_patterns_and_update(repo, pl);
if (result && changed_config)
- set_config(MODE_NO_PATTERNS);
+ set_config(repo, MODE_NO_PATTERNS);
clear_pattern_list(pl);
free(pl);
return result;
}
-static void sanitize_paths(struct strvec *args,
+static void sanitize_paths(struct repository *repo,
+ struct strvec *args,
const char *prefix, int skip_checks)
{
int i;
@@ -752,7 +756,7 @@ static void sanitize_paths(struct strvec *args,
for (i = 0; i < args->nr; i++) {
struct cache_entry *ce;
- struct index_state *index = the_repository->index;
+ struct index_state *index = repo->index;
int pos = index_name_pos(index, args->v[i], strlen(args->v[i]));
if (pos < 0)
@@ -779,7 +783,7 @@ static struct sparse_checkout_add_opts {
} add_opts;
static int sparse_checkout_add(int argc, const char **argv, const char *prefix,
- struct repository *repo UNUSED)
+ struct repository *repo)
{
static struct option builtin_sparse_checkout_add_options[] = {
OPT_BOOL_F(0, "skip-checks", &add_opts.skip_checks,
@@ -796,7 +800,7 @@ static int sparse_checkout_add(int argc, const char **argv, const char *prefix,
if (!core_apply_sparse_checkout)
die(_("no sparse-checkout to add to"));
- repo_read_index(the_repository);
+ repo_read_index(repo);
argc = parse_options(argc, argv, prefix,
builtin_sparse_checkout_add_options,
@@ -804,9 +808,9 @@ static int sparse_checkout_add(int argc, const char **argv, const char *prefix,
for (int i = 0; i < argc; i++)
strvec_push(&patterns, argv[i]);
- sanitize_paths(&patterns, prefix, add_opts.skip_checks);
+ sanitize_paths(repo, &patterns, prefix, add_opts.skip_checks);
- ret = modify_pattern_list(&patterns, add_opts.use_stdin, ADD);
+ ret = modify_pattern_list(repo, &patterns, add_opts.use_stdin, ADD);
strvec_clear(&patterns);
return ret;
@@ -825,7 +829,7 @@ static struct sparse_checkout_set_opts {
} set_opts;
static int sparse_checkout_set(int argc, const char **argv, const char *prefix,
- struct repository *repo UNUSED)
+ struct repository *repo)
{
int default_patterns_nr = 2;
const char *default_patterns[] = {"/*", "!/*/", NULL};
@@ -847,7 +851,7 @@ static int sparse_checkout_set(int argc, const char **argv, const char *prefix,
int ret;
setup_work_tree();
- repo_read_index(the_repository);
+ repo_read_index(repo);
set_opts.cone_mode = -1;
set_opts.sparse_index = -1;
@@ -856,7 +860,7 @@ static int sparse_checkout_set(int argc, const char **argv, const char *prefix,
builtin_sparse_checkout_set_options,
builtin_sparse_checkout_set_usage, 0);
- if (update_modes(&set_opts.cone_mode, &set_opts.sparse_index))
+ if (update_modes(repo, &set_opts.cone_mode, &set_opts.sparse_index))
return 1;
/*
@@ -870,10 +874,10 @@ static int sparse_checkout_set(int argc, const char **argv, const char *prefix,
} else {
for (int i = 0; i < argc; i++)
strvec_push(&patterns, argv[i]);
- sanitize_paths(&patterns, prefix, set_opts.skip_checks);
+ sanitize_paths(repo, &patterns, prefix, set_opts.skip_checks);
}
- ret = modify_pattern_list(&patterns, set_opts.use_stdin, REPLACE);
+ ret = modify_pattern_list(repo, &patterns, set_opts.use_stdin, REPLACE);
strvec_clear(&patterns);
return ret;
@@ -891,7 +895,7 @@ static struct sparse_checkout_reapply_opts {
static int sparse_checkout_reapply(int argc, const char **argv,
const char *prefix,
- struct repository *repo UNUSED)
+ struct repository *repo)
{
static struct option builtin_sparse_checkout_reapply_options[] = {
OPT_BOOL(0, "cone", &reapply_opts.cone_mode,
@@ -912,12 +916,12 @@ static int sparse_checkout_reapply(int argc, const char **argv,
builtin_sparse_checkout_reapply_options,
builtin_sparse_checkout_reapply_usage, 0);
- repo_read_index(the_repository);
+ repo_read_index(repo);
- if (update_modes(&reapply_opts.cone_mode, &reapply_opts.sparse_index))
+ if (update_modes(repo, &reapply_opts.cone_mode, &reapply_opts.sparse_index))
return 1;
- return update_working_directory(NULL);
+ return update_working_directory(repo, NULL);
}
static char const * const builtin_sparse_checkout_disable_usage[] = {
@@ -927,7 +931,7 @@ static char const * const builtin_sparse_checkout_disable_usage[] = {
static int sparse_checkout_disable(int argc, const char **argv,
const char *prefix,
- struct repository *repo UNUSED)
+ struct repository *repo)
{
static struct option builtin_sparse_checkout_disable_options[] = {
OPT_END(),
@@ -955,7 +959,7 @@ static int sparse_checkout_disable(int argc, const char **argv,
* are expecting to do that when disabling sparse-checkout.
*/
give_advice_on_expansion = 0;
- repo_read_index(the_repository);
+ repo_read_index(repo);
memset(&pl, 0, sizeof(pl));
hashmap_init(&pl.recursive_hashmap, pl_hashmap_cmp, NULL, 0);
@@ -966,13 +970,13 @@ static int sparse_checkout_disable(int argc, const char **argv,
add_pattern("/*", empty_base, 0, &pl, 0);
prepare_repo_settings(the_repository);
- the_repository->settings.sparse_index = 0;
+ repo->settings.sparse_index = 0;
- if (update_working_directory(&pl))
+ if (update_working_directory(repo, &pl))
die(_("error while refreshing working directory"));
clear_pattern_list(&pl);
- return set_config(MODE_NO_PATTERNS);
+ return set_config(repo, MODE_NO_PATTERNS);
}
static char const * const builtin_sparse_checkout_check_rules_usage[] = {
@@ -987,14 +991,17 @@ static struct sparse_checkout_check_rules_opts {
char *rules_file;
} check_rules_opts;
-static int check_rules(struct pattern_list *pl, int null_terminated) {
+static int check_rules(struct repository *repo,
+ struct pattern_list *pl,
+ int null_terminated)
+{
struct strbuf line = STRBUF_INIT;
struct strbuf unquoted = STRBUF_INIT;
char *path;
int line_terminator = null_terminated ? 0 : '\n';
strbuf_getline_fn getline_fn = null_terminated ? strbuf_getline_nul
: strbuf_getline;
- the_repository->index->sparse_checkout_patterns = pl;
+ repo->index->sparse_checkout_patterns = pl;
while (!getline_fn(&line, stdin)) {
path = line.buf;
if (!null_terminated && line.buf[0] == '"') {
@@ -1006,7 +1013,7 @@ static int check_rules(struct pattern_list *pl, int null_terminated) {
path = unquoted.buf;
}
- if (path_in_sparse_checkout(path, the_repository->index))
+ if (path_in_sparse_checkout(path, repo->index))
write_name_quoted(path, stdout, line_terminator);
}
strbuf_release(&line);
@@ -1016,7 +1023,7 @@ static int check_rules(struct pattern_list *pl, int null_terminated) {
}
static int sparse_checkout_check_rules(int argc, const char **argv, const char *prefix,
- struct repository *repo UNUSED)
+ struct repository *repo)
{
static struct option builtin_sparse_checkout_check_rules_options[] = {
OPT_BOOL('z', NULL, &check_rules_opts.null_termination,
@@ -1055,7 +1062,7 @@ static int sparse_checkout_check_rules(int argc, const char **argv, const char *
free(sparse_filename);
}
- ret = check_rules(&pl, check_rules_opts.null_termination);
+ ret = check_rules(repo, &pl, check_rules_opts.null_termination);
clear_pattern_list(&pl);
free(check_rules_opts.rules_file);
return ret;
@@ -1084,8 +1091,8 @@ int cmd_sparse_checkout(int argc,
repo_config(the_repository, git_default_config, NULL);
- prepare_repo_settings(the_repository);
- the_repository->settings.command_requires_full_index = 0;
+ prepare_repo_settings(repo);
+ repo->settings.command_requires_full_index = 0;
return fn(argc, argv, prefix, repo);
}
--
gitgitgadget
^ permalink raw reply related [flat|nested] 69+ messages in thread* [PATCH v3 2/7] sparse-checkout: add basics of 'clean' command
2025-09-12 10:30 ` [PATCH v3 0/7] " Derrick Stolee via GitGitGadget
2025-09-12 10:30 ` [PATCH v3 1/7] sparse-checkout: remove use of the_repository Derrick Stolee via GitGitGadget
@ 2025-09-12 10:30 ` Derrick Stolee via GitGitGadget
2025-10-07 22:49 ` Elijah Newren
2025-09-12 10:30 ` [PATCH v3 3/7] sparse-checkout: match some 'clean' behavior Derrick Stolee via GitGitGadget
` (7 subsequent siblings)
9 siblings, 1 reply; 69+ messages in thread
From: Derrick Stolee via GitGitGadget @ 2025-09-12 10:30 UTC (permalink / raw)
To: git; +Cc: gitster, newren, Patrick Steinhardt, Derrick Stolee,
Derrick Stolee
From: Derrick Stolee <stolee@gmail.com>
When users change their sparse-checkout definitions to add new
directories and remove old ones, there may be a few reasons why
directories no longer in scope remain (ignored or excluded files still
exist, Windows handles are still open, etc.). When these files still
exist, the sparse index feature notices that a tracked, but sparse,
directory still exists on disk and thus the index expands. This causes a
performance hit _and_ the advice printed isn't very helpful. Using 'git
clean' isn't enough (generally '-dfx' may be needed) but also this may
not be sufficient.
Add a new subcommand to 'git sparse-checkout' that removes these
tracked-but-sparse directories.
The implementation details provide a clear definition of what is happening,
but it is difficult to describe this without including the internal
implementation details. The core operation converts the index to a sparse
index (in memory if not already on disk) and then deletes any directories in
the worktree that correspond with a sparse directory entry in that sparse
index.
In the most common case, this means that a file will be removed if it is
contained within a directory that is both tracked and outside of the
sparse-checkout definition. However, there can be exceptions depending on
the current state of the index:
* If the worktree has a modification to a tracked, sparse file, then that
file's parent directories will be expanded instead of represented as
sparse directories. Siblings of those parent directories may be
considered sparse.
* If the user staged a sparse file with "git add --sparse", then that file
loses the SKIP_WORKTREE bit until the sparse-checkout is reapplied. Until
then, that file's parent directories are not represented as sparse
directory entries and thus will not be removed. Siblings of those parent
directories may be considered sparse. (There may be other reasons why
the SKIP_WORKTREE bit was removed for a file and this impact on the
sparse directories will apply to those as well.)
* If the user has a merge conflict outside of the sparse-checkout
definition, then those conflict entries prevent the parent directories
from being represented as sparse directory entries and thus are not
removed.
* The cases above present reasons why certain _file conditions_ will impact
which _directories_ are considered sparse. The list of tracked
directories that are outside of the sparse-checkout definition but not
represented as a sparse directory further reduces the list of files that
will be removed.
For these complicated reasons, the documentation details a potential list of
files that will be "considered for removal" instead of defining the list
concretely. The special cases can be handled by resolving conflicts,
committing staged changes, and running 'git sparse-checkout reapply' to
update the SKIP_WORKTREE bits as expected by the sparse-checkout definition.
It is important to make clear that this operation will remove ignored and
excluded files which would normally be ignored even by 'git clean -f' unless
the '-x' or '-X' option is provided. This is the most extreme method for
doing this, but it works when the sparse-checkout is in cone mode and is
expected to rescope based on directories, not files.
The current implementation always deletes these sparse directories
without warning. This is unacceptable for a released version, but those
features will be added in changes coming immediately after this one.
Note that this will not remove an untracked directory (or any of its
contents) if its parent is a tracked directory within the sparse-checkout
definition. This is required to prevent removing data created by tools that
perform caching operations for editors or build tools.
Thus, 'git sparse-checkout clean' is both more aggressive and more careful
than 'git clean -fx':
* It is more aggressive because it will remove _tracked_ files within the
sparse directories.
* It is less aggressive because it will leave _untracked_ files that are
not contained in sparse directories.
These special cases will be handled more explicitly in a future change that
expands tests for the 'git sparse-checkout clean' command. We handle some of
the modified, staged, and committed states including some impact on 'git
status' after cleaning.
Signed-off-by: Derrick Stolee <stolee@gmail.com>
---
Documentation/git-sparse-checkout.adoc | 19 ++++-
builtin/sparse-checkout.c | 64 ++++++++++++++-
t/t1091-sparse-checkout-builtin.sh | 103 +++++++++++++++++++++++++
3 files changed, 184 insertions(+), 2 deletions(-)
diff --git a/Documentation/git-sparse-checkout.adoc b/Documentation/git-sparse-checkout.adoc
index 529a8edd9c..baaebce746 100644
--- a/Documentation/git-sparse-checkout.adoc
+++ b/Documentation/git-sparse-checkout.adoc
@@ -9,7 +9,7 @@ git-sparse-checkout - Reduce your working tree to a subset of tracked files
SYNOPSIS
--------
[verse]
-'git sparse-checkout' (init | list | set | add | reapply | disable | check-rules) [<options>]
+'git sparse-checkout' (init | list | set | add | reapply | disable | check-rules | clean) [<options>]
DESCRIPTION
@@ -111,6 +111,23 @@ flags, with the same meaning as the flags from the `set` command, in order
to change which sparsity mode you are using without needing to also respecify
all sparsity paths.
+'clean'::
+ Opportunistically remove files outside of the sparse-checkout
+ definition. This command requires cone mode to use recursive
+ directory matches to determine which files should be removed. A
+ file is considered for removal if it is contained within a tracked
+ directory that is outside of the sparse-checkout definition.
++
+Some special cases, such as merge conflicts or modified files outside of
+the sparse-checkout definition could lead to keeping files that would
+otherwise be removed. Resolve conflicts, stage modifications, and use
+`git sparse-checkout reapply` in conjunction with `git sparse-checkout
+clean` to resolve these cases.
++
+This command can be used to be sure the sparse index works efficiently,
+though it does not require enabling the sparse index feature via the
+`index.sparse=true` configuration.
+
'disable'::
Disable the `core.sparseCheckout` config setting, and restore the
working directory to include all files.
diff --git a/builtin/sparse-checkout.c b/builtin/sparse-checkout.c
index 06de61bd9d..f7caa28f3f 100644
--- a/builtin/sparse-checkout.c
+++ b/builtin/sparse-checkout.c
@@ -2,6 +2,7 @@
#define DISABLE_SIGN_COMPARE_WARNINGS
#include "builtin.h"
+#include "abspath.h"
#include "config.h"
#include "dir.h"
#include "environment.h"
@@ -23,7 +24,7 @@
static const char *empty_base = "";
static char const * const builtin_sparse_checkout_usage[] = {
- N_("git sparse-checkout (init | list | set | add | reapply | disable | check-rules) [<options>]"),
+ N_("git sparse-checkout (init | list | set | add | reapply | disable | check-rules | clean) [<options>]"),
NULL
};
@@ -924,6 +925,66 @@ static int sparse_checkout_reapply(int argc, const char **argv,
return update_working_directory(repo, NULL);
}
+static char const * const builtin_sparse_checkout_clean_usage[] = {
+ "git sparse-checkout clean [-n|--dry-run]",
+ NULL
+};
+
+static const char *msg_remove = N_("Removing %s\n");
+
+static int sparse_checkout_clean(int argc, const char **argv,
+ const char *prefix,
+ struct repository *repo)
+{
+ struct strbuf full_path = STRBUF_INIT;
+ const char *msg = msg_remove;
+ size_t worktree_len;
+
+ struct option builtin_sparse_checkout_clean_options[] = {
+ OPT_END(),
+ };
+
+ setup_work_tree();
+ if (!core_apply_sparse_checkout)
+ die(_("must be in a sparse-checkout to clean directories"));
+ if (!core_sparse_checkout_cone)
+ die(_("must be in a cone-mode sparse-checkout to clean directories"));
+
+ argc = parse_options(argc, argv, prefix,
+ builtin_sparse_checkout_clean_options,
+ builtin_sparse_checkout_clean_usage, 0);
+
+ if (repo_read_index(repo) < 0)
+ die(_("failed to read index"));
+
+ if (convert_to_sparse(repo->index, SPARSE_INDEX_MEMORY_ONLY) ||
+ repo->index->sparse_index == INDEX_EXPANDED)
+ die(_("failed to convert index to a sparse index; resolve merge conflicts and try again"));
+
+ strbuf_addstr(&full_path, repo->worktree);
+ strbuf_addch(&full_path, '/');
+ worktree_len = full_path.len;
+
+ for (size_t i = 0; i < repo->index->cache_nr; i++) {
+ struct cache_entry *ce = repo->index->cache[i];
+ if (!S_ISSPARSEDIR(ce->ce_mode))
+ continue;
+ strbuf_setlen(&full_path, worktree_len);
+ strbuf_add(&full_path, ce->name, ce->ce_namelen);
+
+ if (!is_directory(full_path.buf))
+ continue;
+
+ printf(msg, ce->name);
+
+ if (remove_dir_recursively(&full_path, 0))
+ warning_errno(_("failed to remove '%s'"), ce->name);
+ }
+
+ strbuf_release(&full_path);
+ return 0;
+}
+
static char const * const builtin_sparse_checkout_disable_usage[] = {
"git sparse-checkout disable",
NULL
@@ -1080,6 +1141,7 @@ int cmd_sparse_checkout(int argc,
OPT_SUBCOMMAND("set", &fn, sparse_checkout_set),
OPT_SUBCOMMAND("add", &fn, sparse_checkout_add),
OPT_SUBCOMMAND("reapply", &fn, sparse_checkout_reapply),
+ OPT_SUBCOMMAND("clean", &fn, sparse_checkout_clean),
OPT_SUBCOMMAND("disable", &fn, sparse_checkout_disable),
OPT_SUBCOMMAND("check-rules", &fn, sparse_checkout_check_rules),
OPT_END(),
diff --git a/t/t1091-sparse-checkout-builtin.sh b/t/t1091-sparse-checkout-builtin.sh
index ab3a105fff..bdb7b21e32 100755
--- a/t/t1091-sparse-checkout-builtin.sh
+++ b/t/t1091-sparse-checkout-builtin.sh
@@ -1050,5 +1050,108 @@ test_expect_success 'check-rules null termination' '
test_cmp expect actual
'
+test_expect_success 'clean' '
+ git -C repo sparse-checkout set --cone deep/deeper1 &&
+ git -C repo sparse-checkout reapply &&
+ mkdir repo/deep/deeper2 repo/folder1 &&
+
+ # Add untracked files
+ touch repo/deep/deeper2/file &&
+ touch repo/folder1/file &&
+
+ cat >expect <<-\EOF &&
+ Removing deep/deeper2/
+ Removing folder1/
+ EOF
+
+ git -C repo sparse-checkout clean >out &&
+ test_cmp expect out &&
+
+ test_path_is_missing repo/deep/deeper2 &&
+ test_path_is_missing repo/folder1
+'
+
+test_expect_success 'clean with sparse file states' '
+ test_when_finished git reset --hard &&
+ git -C repo sparse-checkout set --cone deep/deeper1 &&
+ mkdir repo/folder2 &&
+
+ # create an untracked file and a modified file
+ touch repo/folder2/file &&
+ echo dirty >repo/folder2/a &&
+
+ # First clean/reapply pass will do nothing.
+ git -C repo sparse-checkout clean >out &&
+ test_must_be_empty out &&
+ test_path_exists repo/folder2/a &&
+ test_path_exists repo/folder2/file &&
+
+ git -C repo sparse-checkout reapply 2>err &&
+ test_grep folder2 err &&
+ test_path_exists repo/folder2/a &&
+ test_path_exists repo/folder2/file &&
+
+ # Now, stage the change to the tracked file.
+ git -C repo add --sparse folder2/a &&
+
+ # Clean will continue not doing anything.
+ git -C repo sparse-checkout clean >out &&
+ test_line_count = 0 out &&
+ test_path_exists repo/folder2/a &&
+ test_path_exists repo/folder2/file &&
+
+ # But we can reapply to remove the staged change.
+ git -C repo sparse-checkout reapply 2>err &&
+ test_grep folder2 err &&
+ test_path_is_missing repo/folder2/a &&
+ test_path_exists repo/folder2/file &&
+
+ # We can clean now.
+ cat >expect <<-\EOF &&
+ Removing folder2/
+ EOF
+ git -C repo sparse-checkout clean >out &&
+ test_cmp expect out &&
+ test_path_is_missing repo/folder2 &&
+
+ # At the moment, the file is staged.
+ cat >expect <<-\EOF &&
+ M folder2/a
+ EOF
+
+ git -C repo status -s >out &&
+ test_cmp expect out &&
+
+ # Reapply persists the modified state.
+ git -C repo sparse-checkout reapply &&
+ cat >expect <<-\EOF &&
+ M folder2/a
+ EOF
+ git -C repo status -s >out &&
+ test_cmp expect out &&
+
+ # Committing the change leads to resolved status.
+ git -C repo commit -m "modified" &&
+ git -C repo status -s >out &&
+ test_must_be_empty out &&
+
+ # Repeat, but this time commit before reapplying.
+ mkdir repo/folder2/ &&
+ echo dirtier >repo/folder2/a &&
+ git -C repo add --sparse folder2/a &&
+ git -C repo sparse-checkout clean >out &&
+ test_must_be_empty out &&
+ test_path_exists repo/folder2/a &&
+
+ # Committing without reapplying makes it look like a deletion
+ # due to no skip-worktree bit.
+ git -C repo commit -m "dirtier" &&
+ git -C repo status -s >out &&
+ test_must_be_empty out &&
+
+ git -C repo sparse-checkout reapply &&
+ git -C repo status -s >out &&
+ test_must_be_empty out
+'
test_done
--
gitgitgadget
^ permalink raw reply related [flat|nested] 69+ messages in thread* Re: [PATCH v3 2/7] sparse-checkout: add basics of 'clean' command
2025-09-12 10:30 ` [PATCH v3 2/7] sparse-checkout: add basics of 'clean' command Derrick Stolee via GitGitGadget
@ 2025-10-07 22:49 ` Elijah Newren
2025-10-20 14:16 ` Derrick Stolee
0 siblings, 1 reply; 69+ messages in thread
From: Elijah Newren @ 2025-10-07 22:49 UTC (permalink / raw)
To: Derrick Stolee via GitGitGadget
Cc: git, gitster, Patrick Steinhardt, Derrick Stolee
On Fri, Sep 12, 2025 at 3:30 AM Derrick Stolee via GitGitGadget
<gitgitgadget@gmail.com> wrote:
>
> From: Derrick Stolee <stolee@gmail.com>
>
> When users change their sparse-checkout definitions to add new
> directories and remove old ones, there may be a few reasons why
> directories no longer in scope remain (ignored or excluded files still
> exist, Windows handles are still open, etc.). When these files still
> exist, the sparse index feature notices that a tracked, but sparse,
> directory still exists on disk and thus the index expands. This causes a
> performance hit _and_ the advice printed isn't very helpful. Using 'git
> clean' isn't enough (generally '-dfx' may be needed) but also this may
> not be sufficient.
>
> Add a new subcommand to 'git sparse-checkout' that removes these
> tracked-but-sparse directories.
>
> The implementation details provide a clear definition of what is happening,
> but it is difficult to describe this without including the internal
> implementation details. The core operation converts the index to a sparse
> index (in memory if not already on disk) and then deletes any directories in
> the worktree that correspond with a sparse directory entry in that sparse
> index.
>
> In the most common case, this means that a file will be removed if it is
> contained within a directory that is both tracked and outside of the
> sparse-checkout definition. However, there can be exceptions depending on
> the current state of the index:
>
> * If the worktree has a modification to a tracked, sparse file, then that
> file's parent directories will be expanded instead of represented as
> sparse directories. Siblings of those parent directories may be
> considered sparse.
>
> * If the user staged a sparse file with "git add --sparse", then that file
> loses the SKIP_WORKTREE bit until the sparse-checkout is reapplied. Until
> then, that file's parent directories are not represented as sparse
> directory entries and thus will not be removed. Siblings of those parent
> directories may be considered sparse. (There may be other reasons why
> the SKIP_WORKTREE bit was removed for a file and this impact on the
> sparse directories will apply to those as well.)
>
> * If the user has a merge conflict outside of the sparse-checkout
> definition, then those conflict entries prevent the parent directories
> from being represented as sparse directory entries and thus are not
> removed.
>
> * The cases above present reasons why certain _file conditions_ will impact
> which _directories_ are considered sparse. The list of tracked
> directories that are outside of the sparse-checkout definition but not
> represented as a sparse directory further reduces the list of files that
> will be removed.
>
> For these complicated reasons, the documentation details a potential list of
> files that will be "considered for removal" instead of defining the list
> concretely. The special cases can be handled by resolving conflicts,
> committing staged changes, and running 'git sparse-checkout reapply' to
> update the SKIP_WORKTREE bits as expected by the sparse-checkout definition.
>
> It is important to make clear that this operation will remove ignored and
> excluded files which would normally be ignored even by 'git clean -f' unless
> the '-x' or '-X' option is provided. This is the most extreme method for
> doing this, but it works when the sparse-checkout is in cone mode and is
> expected to rescope based on directories, not files.
>
> The current implementation always deletes these sparse directories
> without warning. This is unacceptable for a released version, but those
> features will be added in changes coming immediately after this one.
>
> Note that this will not remove an untracked directory (or any of its
> contents) if its parent is a tracked directory within the sparse-checkout
> definition. This is required to prevent removing data created by tools that
> perform caching operations for editors or build tools.
>
> Thus, 'git sparse-checkout clean' is both more aggressive and more careful
> than 'git clean -fx':
>
> * It is more aggressive because it will remove _tracked_ files within the
> sparse directories.
>
> * It is less aggressive because it will leave _untracked_ files that are
> not contained in sparse directories.
>
> These special cases will be handled more explicitly in a future change that
> expands tests for the 'git sparse-checkout clean' command. We handle some of
> the modified, staged, and committed states including some impact on 'git
> status' after cleaning.
I appreciate the more detailed explanation.
> Signed-off-by: Derrick Stolee <stolee@gmail.com>
> ---
> Documentation/git-sparse-checkout.adoc | 19 ++++-
> builtin/sparse-checkout.c | 64 ++++++++++++++-
> t/t1091-sparse-checkout-builtin.sh | 103 +++++++++++++++++++++++++
> 3 files changed, 184 insertions(+), 2 deletions(-)
>
> diff --git a/Documentation/git-sparse-checkout.adoc b/Documentation/git-sparse-checkout.adoc
> index 529a8edd9c..baaebce746 100644
> --- a/Documentation/git-sparse-checkout.adoc
> +++ b/Documentation/git-sparse-checkout.adoc
> @@ -9,7 +9,7 @@ git-sparse-checkout - Reduce your working tree to a subset of tracked files
> SYNOPSIS
> --------
> [verse]
> -'git sparse-checkout' (init | list | set | add | reapply | disable | check-rules) [<options>]
> +'git sparse-checkout' (init | list | set | add | reapply | disable | check-rules | clean) [<options>]
>
>
> DESCRIPTION
> @@ -111,6 +111,23 @@ flags, with the same meaning as the flags from the `set` command, in order
> to change which sparsity mode you are using without needing to also respecify
> all sparsity paths.
>
> +'clean'::
> + Opportunistically remove files outside of the sparse-checkout
> + definition. This command requires cone mode to use recursive
> + directory matches to determine which files should be removed. A
> + file is considered for removal if it is contained within a tracked
> + directory that is outside of the sparse-checkout definition.
> ++
> +Some special cases, such as merge conflicts or modified files outside of
> +the sparse-checkout definition could lead to keeping files that would
> +otherwise be removed. Resolve conflicts, stage modifications, and use
> +`git sparse-checkout reapply` in conjunction with `git sparse-checkout
> +clean` to resolve these cases.
> ++
> +This command can be used to be sure the sparse index works efficiently,
> +though it does not require enabling the sparse index feature via the
> +`index.sparse=true` configuration.
This expanded explanation for users is nice too. I particularly like
that you called out three things users need to use in conjunction with
this command -- resolving conflicts, staging modifications, and using
`git sparse-checkout reapply`...
[...]
> + if (convert_to_sparse(repo->index, SPARSE_INDEX_MEMORY_ONLY) ||
> + repo->index->sparse_index == INDEX_EXPANDED)
> + die(_("failed to convert index to a sparse index; resolve merge conflicts and try again"));
...yet the error message you give to users only lists one of those
three things even though the other two may be the problem. Could we
fix up the error message?
> diff --git a/t/t1091-sparse-checkout-builtin.sh b/t/t1091-sparse-checkout-builtin.sh
> index ab3a105fff..bdb7b21e32 100755
> --- a/t/t1091-sparse-checkout-builtin.sh
> +++ b/t/t1091-sparse-checkout-builtin.sh
> @@ -1050,5 +1050,108 @@ test_expect_success 'check-rules null termination' '
> test_cmp expect actual
> '
>
> +test_expect_success 'clean' '
> + git -C repo sparse-checkout set --cone deep/deeper1 &&
> + git -C repo sparse-checkout reapply &&
> + mkdir repo/deep/deeper2 repo/folder1 &&
> +
> + # Add untracked files
> + touch repo/deep/deeper2/file &&
> + touch repo/folder1/file &&
> +
> + cat >expect <<-\EOF &&
> + Removing deep/deeper2/
> + Removing folder1/
> + EOF
> +
> + git -C repo sparse-checkout clean >out &&
> + test_cmp expect out &&
> +
> + test_path_is_missing repo/deep/deeper2 &&
> + test_path_is_missing repo/folder1
> +'
> +
> +test_expect_success 'clean with sparse file states' '
> + test_when_finished git reset --hard &&
> + git -C repo sparse-checkout set --cone deep/deeper1 &&
> + mkdir repo/folder2 &&
> +
> + # create an untracked file and a modified file
> + touch repo/folder2/file &&
> + echo dirty >repo/folder2/a &&
> +
> + # First clean/reapply pass will do nothing.
> + git -C repo sparse-checkout clean >out &&
> + test_must_be_empty out &&
> + test_path_exists repo/folder2/a &&
> + test_path_exists repo/folder2/file &&
> +
> + git -C repo sparse-checkout reapply 2>err &&
> + test_grep folder2 err &&
> + test_path_exists repo/folder2/a &&
> + test_path_exists repo/folder2/file &&
> +
> + # Now, stage the change to the tracked file.
> + git -C repo add --sparse folder2/a &&
> +
> + # Clean will continue not doing anything.
> + git -C repo sparse-checkout clean >out &&
> + test_line_count = 0 out &&
> + test_path_exists repo/folder2/a &&
> + test_path_exists repo/folder2/file &&
> +
> + # But we can reapply to remove the staged change.
> + git -C repo sparse-checkout reapply 2>err &&
> + test_grep folder2 err &&
> + test_path_is_missing repo/folder2/a &&
> + test_path_exists repo/folder2/file &&
> +
> + # We can clean now.
> + cat >expect <<-\EOF &&
> + Removing folder2/
> + EOF
> + git -C repo sparse-checkout clean >out &&
> + test_cmp expect out &&
> + test_path_is_missing repo/folder2 &&
> +
> + # At the moment, the file is staged.
> + cat >expect <<-\EOF &&
> + M folder2/a
> + EOF
> +
> + git -C repo status -s >out &&
> + test_cmp expect out &&
> +
> + # Reapply persists the modified state.
> + git -C repo sparse-checkout reapply &&
> + cat >expect <<-\EOF &&
> + M folder2/a
> + EOF
> + git -C repo status -s >out &&
> + test_cmp expect out &&
> +
> + # Committing the change leads to resolved status.
> + git -C repo commit -m "modified" &&
> + git -C repo status -s >out &&
> + test_must_be_empty out &&
> +
> + # Repeat, but this time commit before reapplying.
> + mkdir repo/folder2/ &&
> + echo dirtier >repo/folder2/a &&
> + git -C repo add --sparse folder2/a &&
> + git -C repo sparse-checkout clean >out &&
> + test_must_be_empty out &&
> + test_path_exists repo/folder2/a &&
> +
> + # Committing without reapplying makes it look like a deletion
> + # due to no skip-worktree bit.
> + git -C repo commit -m "dirtier" &&
> + git -C repo status -s >out &&
> + test_must_be_empty out &&
> +
> + git -C repo sparse-checkout reapply &&
> + git -C repo status -s >out &&
> + test_must_be_empty out
> +'
>
> test_done
> --
> gitgitgadget
I very much appreciate the extended test.
^ permalink raw reply [flat|nested] 69+ messages in thread* Re: [PATCH v3 2/7] sparse-checkout: add basics of 'clean' command
2025-10-07 22:49 ` Elijah Newren
@ 2025-10-20 14:16 ` Derrick Stolee
0 siblings, 0 replies; 69+ messages in thread
From: Derrick Stolee @ 2025-10-20 14:16 UTC (permalink / raw)
To: Elijah Newren, Derrick Stolee via GitGitGadget
Cc: git, gitster, Patrick Steinhardt
On 10/7/25 6:49 PM, Elijah Newren wrote:
> On Fri, Sep 12, 2025 at 3:30 AM Derrick Stolee via GitGitGadget
>> +Some special cases, such as merge conflicts or modified files outside of
>> +the sparse-checkout definition could lead to keeping files that would
>> +otherwise be removed. Resolve conflicts, stage modifications, and use
>> +`git sparse-checkout reapply` in conjunction with `git sparse-checkout
>> +clean` to resolve these cases.
>> ++
>> +This command can be used to be sure the sparse index works efficiently,
>> +though it does not require enabling the sparse index feature via the
>> +`index.sparse=true` configuration.
>
> This expanded explanation for users is nice too. I particularly like
> that you called out three things users need to use in conjunction with
> this command -- resolving conflicts, staging modifications, and using
> `git sparse-checkout reapply`...
These cases are about 'git sparse-checkout clean' itself...
> [...]
>> + if (convert_to_sparse(repo->index, SPARSE_INDEX_MEMORY_ONLY) ||
>> + repo->index->sparse_index == INDEX_EXPANDED)
>> + die(_("failed to convert index to a sparse index; resolve merge conflicts and try again"));
>
> ...yet the error message you give to users only lists one of those
> three things even though the other two may be the problem. Could we
> fix up the error message?
But this is specifically about the convert_to_sparse() action, which
is very limited in what could cause it to fail.
I _will_ update the advice later (added in Patch 6) to include the
more detailed set of actions.
Thanks,
-Stolee
^ permalink raw reply [flat|nested] 69+ messages in thread
* [PATCH v3 3/7] sparse-checkout: match some 'clean' behavior
2025-09-12 10:30 ` [PATCH v3 0/7] " Derrick Stolee via GitGitGadget
2025-09-12 10:30 ` [PATCH v3 1/7] sparse-checkout: remove use of the_repository Derrick Stolee via GitGitGadget
2025-09-12 10:30 ` [PATCH v3 2/7] sparse-checkout: add basics of 'clean' command Derrick Stolee via GitGitGadget
@ 2025-09-12 10:30 ` Derrick Stolee via GitGitGadget
2025-09-12 10:30 ` [PATCH v3 4/7] dir: add generic "walk all files" helper Derrick Stolee via GitGitGadget
` (6 subsequent siblings)
9 siblings, 0 replies; 69+ messages in thread
From: Derrick Stolee via GitGitGadget @ 2025-09-12 10:30 UTC (permalink / raw)
To: git; +Cc: gitster, newren, Patrick Steinhardt, Derrick Stolee,
Derrick Stolee
From: Derrick Stolee <stolee@gmail.com>
The 'git sparse-checkout clean' subcommand is somewhat similar to 'git
clean' in that it will delete files that should not be in the worktree.
The big difference is that it focuses on the directories that should not
be in the worktree due to cone-mode sparse-checkout. It also does not
discriminate in the kinds of files and focuses on deleting entire
directories.
However, there are some restrictions that would be good to bring over
from 'git clean', specifically how it refuses to do anything without the
'-f'/'--force' or '-n'/'--dry-run' arguments. The 'clean.requireForce'
config can be set to 'false' to imply '--force'.
Add this behavior to avoid accidental deletion of files that cannot be
recovered from Git.
Signed-off-by: Derrick Stolee <stolee@gmail.com>
---
Documentation/git-sparse-checkout.adoc | 9 +++++
builtin/sparse-checkout.c | 15 ++++++-
t/t1091-sparse-checkout-builtin.sh | 54 +++++++++++++++++++++++++-
3 files changed, 76 insertions(+), 2 deletions(-)
diff --git a/Documentation/git-sparse-checkout.adoc b/Documentation/git-sparse-checkout.adoc
index baaebce746..42050ff5b5 100644
--- a/Documentation/git-sparse-checkout.adoc
+++ b/Documentation/git-sparse-checkout.adoc
@@ -127,6 +127,15 @@ clean` to resolve these cases.
This command can be used to be sure the sparse index works efficiently,
though it does not require enabling the sparse index feature via the
`index.sparse=true` configuration.
++
+To prevent accidental deletion of worktree files, the `clean` subcommand
+will not delete any files without the `-f` or `--force` option, unless
+the `clean.requireForce` config option is set to `false`.
++
+The `--dry-run` option will list the directories that would be removed
+without deleting them. Running in this mode can be helpful to predict the
+behavior of the clean comand or to determine which kinds of files are left
+in the sparse directories.
'disable'::
Disable the `core.sparseCheckout` config setting, and restore the
diff --git a/builtin/sparse-checkout.c b/builtin/sparse-checkout.c
index f7caa28f3f..d777b64960 100644
--- a/builtin/sparse-checkout.c
+++ b/builtin/sparse-checkout.c
@@ -931,6 +931,7 @@ static char const * const builtin_sparse_checkout_clean_usage[] = {
};
static const char *msg_remove = N_("Removing %s\n");
+static const char *msg_would_remove = N_("Would remove %s\n");
static int sparse_checkout_clean(int argc, const char **argv,
const char *prefix,
@@ -939,8 +940,12 @@ static int sparse_checkout_clean(int argc, const char **argv,
struct strbuf full_path = STRBUF_INIT;
const char *msg = msg_remove;
size_t worktree_len;
+ int force = 0, dry_run = 0;
+ int require_force = 1;
struct option builtin_sparse_checkout_clean_options[] = {
+ OPT__DRY_RUN(&dry_run, N_("dry run")),
+ OPT__FORCE(&force, N_("force"), PARSE_OPT_NOCOMPLETE),
OPT_END(),
};
@@ -954,6 +959,13 @@ static int sparse_checkout_clean(int argc, const char **argv,
builtin_sparse_checkout_clean_options,
builtin_sparse_checkout_clean_usage, 0);
+ repo_config_get_bool(repo, "clean.requireforce", &require_force);
+ if (require_force && !force && !dry_run)
+ die(_("for safety, refusing to clean without one of --force or --dry-run"));
+
+ if (dry_run)
+ msg = msg_would_remove;
+
if (repo_read_index(repo) < 0)
die(_("failed to read index"));
@@ -977,7 +989,8 @@ static int sparse_checkout_clean(int argc, const char **argv,
printf(msg, ce->name);
- if (remove_dir_recursively(&full_path, 0))
+ if (dry_run <= 0 &&
+ remove_dir_recursively(&full_path, 0))
warning_errno(_("failed to remove '%s'"), ce->name);
}
diff --git a/t/t1091-sparse-checkout-builtin.sh b/t/t1091-sparse-checkout-builtin.sh
index bdb7b21e32..e6b768a8da 100755
--- a/t/t1091-sparse-checkout-builtin.sh
+++ b/t/t1091-sparse-checkout-builtin.sh
@@ -1059,12 +1059,29 @@ test_expect_success 'clean' '
touch repo/deep/deeper2/file &&
touch repo/folder1/file &&
+ test_must_fail git -C repo sparse-checkout clean 2>err &&
+ grep "refusing to clean" err &&
+
+ git -C repo config clean.requireForce true &&
+ test_must_fail git -C repo sparse-checkout clean 2>err &&
+ grep "refusing to clean" err &&
+
+ cat >expect <<-\EOF &&
+ Would remove deep/deeper2/
+ Would remove folder1/
+ EOF
+
+ git -C repo sparse-checkout clean --dry-run >out &&
+ test_cmp expect out &&
+ test_path_exists repo/deep/deeper2 &&
+ test_path_exists repo/folder1 &&
+
cat >expect <<-\EOF &&
Removing deep/deeper2/
Removing folder1/
EOF
- git -C repo sparse-checkout clean >out &&
+ git -C repo sparse-checkout clean -f >out &&
test_cmp expect out &&
test_path_is_missing repo/deep/deeper2 &&
@@ -1076,6 +1093,10 @@ test_expect_success 'clean with sparse file states' '
git -C repo sparse-checkout set --cone deep/deeper1 &&
mkdir repo/folder2 &&
+ # The previous test case checked the -f option, so
+ # test the config option in this one.
+ git -C repo config clean.requireForce false &&
+
# create an untracked file and a modified file
touch repo/folder2/file &&
echo dirty >repo/folder2/a &&
@@ -1154,4 +1175,35 @@ test_expect_success 'clean with sparse file states' '
test_must_be_empty out
'
+test_expect_success 'clean with merge conflict status' '
+ git clone repo clean-merge &&
+
+ echo dirty >clean-merge/deep/deeper2/a &&
+ touch clean-merge/folder2/extra &&
+
+ cat >input <<-EOF &&
+ 0 $ZERO_OID folder1/a
+ 100644 $(git -C clean-merge rev-parse HEAD:folder1/a) 1 folder1/a
+ EOF
+ git -C clean-merge update-index --index-info <input &&
+
+ git -C clean-merge sparse-checkout set deep/deeper1 &&
+
+ test_must_fail git -C clean-merge sparse-checkout clean -f 2>err &&
+ grep "failed to convert index to a sparse index" err &&
+
+ mkdir -p clean-merge/folder1/ &&
+ echo merged >clean-merge/folder1/a &&
+ git -C clean-merge add --sparse folder1/a &&
+
+ # deletes folder2/ but leaves staged change in folder1
+ # and dirty change in deep/deeper2/
+ cat >expect <<-\EOF &&
+ Removing folder2/
+ EOF
+
+ git -C clean-merge sparse-checkout clean -f >out &&
+ test_cmp expect out
+'
+
test_done
--
gitgitgadget
^ permalink raw reply related [flat|nested] 69+ messages in thread* [PATCH v3 4/7] dir: add generic "walk all files" helper
2025-09-12 10:30 ` [PATCH v3 0/7] " Derrick Stolee via GitGitGadget
` (2 preceding siblings ...)
2025-09-12 10:30 ` [PATCH v3 3/7] sparse-checkout: match some 'clean' behavior Derrick Stolee via GitGitGadget
@ 2025-09-12 10:30 ` Derrick Stolee via GitGitGadget
2025-09-12 10:30 ` [PATCH v3 5/7] sparse-checkout: add --verbose option to 'clean' Derrick Stolee via GitGitGadget
` (5 subsequent siblings)
9 siblings, 0 replies; 69+ messages in thread
From: Derrick Stolee via GitGitGadget @ 2025-09-12 10:30 UTC (permalink / raw)
To: git; +Cc: gitster, newren, Patrick Steinhardt, Derrick Stolee,
Derrick Stolee
From: Derrick Stolee <stolee@gmail.com>
There is sometimes a need to visit every file within a directory,
recursively. The main example is remove_dir_recursively(), though it has
some extra flags that make it want to iterate over paths in a custom
way. There is also the fill_directory() approach but that involves an
index and a pathspec.
This change adds a new for_each_file_in_dir() method that will be
helpful in the next change.
Signed-off-by: Derrick Stolee <stolee@gmail.com>
---
dir.c | 28 ++++++++++++++++++++++++++++
dir.h | 14 ++++++++++++++
2 files changed, 42 insertions(+)
diff --git a/dir.c b/dir.c
index 71108ac79b..194b36a8c4 100644
--- a/dir.c
+++ b/dir.c
@@ -30,6 +30,7 @@
#include "read-cache-ll.h"
#include "setup.h"
#include "sparse-index.h"
+#include "strbuf.h"
#include "submodule-config.h"
#include "symlinks.h"
#include "trace2.h"
@@ -87,6 +88,33 @@ struct dirent *readdir_skip_dot_and_dotdot(DIR *dirp)
return e;
}
+int for_each_file_in_dir(struct strbuf *path, file_iterator fn, const void *data)
+{
+ struct dirent *e;
+ int res = 0;
+ size_t baselen = path->len;
+ DIR *dir = opendir(path->buf);
+
+ if (!dir)
+ return 0;
+
+ while (!res && (e = readdir_skip_dot_and_dotdot(dir)) != NULL) {
+ unsigned char dtype = get_dtype(e, path, 0);
+ strbuf_setlen(path, baselen);
+ strbuf_addstr(path, e->d_name);
+
+ if (dtype == DT_REG) {
+ res = fn(path->buf, data);
+ } else if (dtype == DT_DIR) {
+ strbuf_addch(path, '/');
+ res = for_each_file_in_dir(path, fn, data);
+ }
+ }
+
+ closedir(dir);
+ return res;
+}
+
int count_slashes(const char *s)
{
int cnt = 0;
diff --git a/dir.h b/dir.h
index fc9be7b427..20d4a078d6 100644
--- a/dir.h
+++ b/dir.h
@@ -536,6 +536,20 @@ int get_sparse_checkout_patterns(struct pattern_list *pl);
*/
int remove_dir_recursively(struct strbuf *path, int flag);
+/*
+ * This function pointer type is called on each file discovered in
+ * for_each_file_in_dir. The iteration stops if this method returns
+ * non-zero.
+ */
+typedef int (*file_iterator)(const char *path, const void *data);
+
+struct strbuf;
+/*
+ * Given a directory path, recursively visit each file within, including
+ * within subdirectories.
+ */
+int for_each_file_in_dir(struct strbuf *path, file_iterator fn, const void *data);
+
/*
* Tries to remove the path, along with leading empty directories so long as
* those empty directories are not startup_info->original_cwd. Ignores
--
gitgitgadget
^ permalink raw reply related [flat|nested] 69+ messages in thread* [PATCH v3 5/7] sparse-checkout: add --verbose option to 'clean'
2025-09-12 10:30 ` [PATCH v3 0/7] " Derrick Stolee via GitGitGadget
` (3 preceding siblings ...)
2025-09-12 10:30 ` [PATCH v3 4/7] dir: add generic "walk all files" helper Derrick Stolee via GitGitGadget
@ 2025-09-12 10:30 ` Derrick Stolee via GitGitGadget
2025-09-15 18:09 ` Derrick Stolee
2025-09-12 10:30 ` [PATCH v3 6/7] sparse-index: point users to new 'clean' action Derrick Stolee via GitGitGadget
` (4 subsequent siblings)
9 siblings, 1 reply; 69+ messages in thread
From: Derrick Stolee via GitGitGadget @ 2025-09-12 10:30 UTC (permalink / raw)
To: git; +Cc: gitster, newren, Patrick Steinhardt, Derrick Stolee,
Derrick Stolee
From: Derrick Stolee <stolee@gmail.com>
The 'git sparse-checkout clean' subcommand is focused on directories,
deleting any tracked sparse directories to clean up the worktree and
make the sparse index feature work optimally.
However, this directory-focused approach can leave users wondering why
those directories exist at all. In my experience, these files are left
over due to ignore or exclude patterns, Windows file handles, or
possibly merge conflict resolutions.
Add a new '--verbose' option for users to see all the files that are
being deleted (with '--force') or would be deleted (with '--dry-run').
Based on usage, users may request further context on this list of files for
states such as tracked/untracked, unstaged/staged/conflicted, etc.
Signed-off-by: Derrick Stolee <stolee@gmail.com>
---
Documentation/git-sparse-checkout.adoc | 5 +++++
builtin/sparse-checkout.c | 28 ++++++++++++++++++++++++--
t/t1091-sparse-checkout-builtin.sh | 14 ++++++++++---
3 files changed, 42 insertions(+), 5 deletions(-)
diff --git a/Documentation/git-sparse-checkout.adoc b/Documentation/git-sparse-checkout.adoc
index 42050ff5b5..113728a0e7 100644
--- a/Documentation/git-sparse-checkout.adoc
+++ b/Documentation/git-sparse-checkout.adoc
@@ -136,6 +136,11 @@ The `--dry-run` option will list the directories that would be removed
without deleting them. Running in this mode can be helpful to predict the
behavior of the clean comand or to determine which kinds of files are left
in the sparse directories.
++
+The `--verbose` option will list every file within the directories that
+are considered for removal. This option is helpful to determine if those
+files are actually important or perhaps to explain why the directory is
+still present despite the current sparse-checkout.
'disable'::
Disable the `core.sparseCheckout` config setting, and restore the
diff --git a/builtin/sparse-checkout.c b/builtin/sparse-checkout.c
index d777b64960..8d3c3485f5 100644
--- a/builtin/sparse-checkout.c
+++ b/builtin/sparse-checkout.c
@@ -930,6 +930,26 @@ static char const * const builtin_sparse_checkout_clean_usage[] = {
NULL
};
+static int list_file_iterator(const char *path, const void *data)
+{
+ const char *msg = data;
+
+ printf(msg, path);
+ return 0;
+}
+
+static void list_every_file_in_dir(const char *msg,
+ const char *directory)
+{
+ struct strbuf path = STRBUF_INIT;
+
+ strbuf_addstr(&path, directory);
+ fprintf(stderr, "list every file in %s\n", directory);
+
+ for_each_file_in_dir(&path, list_file_iterator, msg);
+ strbuf_release(&path);
+}
+
static const char *msg_remove = N_("Removing %s\n");
static const char *msg_would_remove = N_("Would remove %s\n");
@@ -940,12 +960,13 @@ static int sparse_checkout_clean(int argc, const char **argv,
struct strbuf full_path = STRBUF_INIT;
const char *msg = msg_remove;
size_t worktree_len;
- int force = 0, dry_run = 0;
+ int force = 0, dry_run = 0, verbose = 0;
int require_force = 1;
struct option builtin_sparse_checkout_clean_options[] = {
OPT__DRY_RUN(&dry_run, N_("dry run")),
OPT__FORCE(&force, N_("force"), PARSE_OPT_NOCOMPLETE),
+ OPT__VERBOSE(&verbose, N_("report each affected file, not just directories")),
OPT_END(),
};
@@ -987,7 +1008,10 @@ static int sparse_checkout_clean(int argc, const char **argv,
if (!is_directory(full_path.buf))
continue;
- printf(msg, ce->name);
+ if (verbose)
+ list_every_file_in_dir(msg, ce->name);
+ else
+ printf(msg, ce->name);
if (dry_run <= 0 &&
remove_dir_recursively(&full_path, 0))
diff --git a/t/t1091-sparse-checkout-builtin.sh b/t/t1091-sparse-checkout-builtin.sh
index e6b768a8da..7b15fa669c 100755
--- a/t/t1091-sparse-checkout-builtin.sh
+++ b/t/t1091-sparse-checkout-builtin.sh
@@ -1053,11 +1053,11 @@ test_expect_success 'check-rules null termination' '
test_expect_success 'clean' '
git -C repo sparse-checkout set --cone deep/deeper1 &&
git -C repo sparse-checkout reapply &&
- mkdir repo/deep/deeper2 repo/folder1 &&
+ mkdir -p repo/deep/deeper2 repo/folder1/extra/inside &&
# Add untracked files
touch repo/deep/deeper2/file &&
- touch repo/folder1/file &&
+ touch repo/folder1/extra/inside/file &&
test_must_fail git -C repo sparse-checkout clean 2>err &&
grep "refusing to clean" err &&
@@ -1074,7 +1074,15 @@ test_expect_success 'clean' '
git -C repo sparse-checkout clean --dry-run >out &&
test_cmp expect out &&
test_path_exists repo/deep/deeper2 &&
- test_path_exists repo/folder1 &&
+ test_path_exists repo/folder1/extra/inside/file &&
+
+ cat >expect <<-\EOF &&
+ Would remove deep/deeper2/file
+ Would remove folder1/extra/inside/file
+ EOF
+
+ git -C repo sparse-checkout clean --dry-run --verbose >out &&
+ test_cmp expect out &&
cat >expect <<-\EOF &&
Removing deep/deeper2/
--
gitgitgadget
^ permalink raw reply related [flat|nested] 69+ messages in thread* Re: [PATCH v3 5/7] sparse-checkout: add --verbose option to 'clean'
2025-09-12 10:30 ` [PATCH v3 5/7] sparse-checkout: add --verbose option to 'clean' Derrick Stolee via GitGitGadget
@ 2025-09-15 18:09 ` Derrick Stolee
2025-09-15 19:12 ` Junio C Hamano
0 siblings, 1 reply; 69+ messages in thread
From: Derrick Stolee @ 2025-09-15 18:09 UTC (permalink / raw)
To: Derrick Stolee via GitGitGadget, git; +Cc: gitster, newren, Patrick Steinhardt
On 9/12/2025 6:30 AM, Derrick Stolee via GitGitGadget wrote:
> From: Derrick Stolee <stolee@gmail.com>
> +static void list_every_file_in_dir(const char *msg,
> + const char *directory)
> +{
> + struct strbuf path = STRBUF_INIT;
> +
> + strbuf_addstr(&path, directory);
> + fprintf(stderr, "list every file in %s\n", directory);
I don't know how I missed that this debugging output line snuck
in and stayed through my testing. This line should be removed.
> + for_each_file_in_dir(&path, list_file_iterator, msg);
> + strbuf_release(&path);
> +}
> +
Thanks,
-Stolee
^ permalink raw reply [flat|nested] 69+ messages in thread* Re: [PATCH v3 5/7] sparse-checkout: add --verbose option to 'clean'
2025-09-15 18:09 ` Derrick Stolee
@ 2025-09-15 19:12 ` Junio C Hamano
2025-09-16 2:00 ` Derrick Stolee
0 siblings, 1 reply; 69+ messages in thread
From: Junio C Hamano @ 2025-09-15 19:12 UTC (permalink / raw)
To: Derrick Stolee
Cc: Derrick Stolee via GitGitGadget, git, newren, Patrick Steinhardt
Derrick Stolee <stolee@gmail.com> writes:
> On 9/12/2025 6:30 AM, Derrick Stolee via GitGitGadget wrote:
>> From: Derrick Stolee <stolee@gmail.com>
>
>> +static void list_every_file_in_dir(const char *msg,
>> + const char *directory)
>> +{
>> + struct strbuf path = STRBUF_INIT;
>> +
>> + strbuf_addstr(&path, directory);
>> + fprintf(stderr, "list every file in %s\n", directory);
>
> I don't know how I missed that this debugging output line snuck
> in and stayed through my testing. This line should be removed.
>
>> + for_each_file_in_dir(&path, list_file_iterator, msg);
>> + strbuf_release(&path);
>> +}
;-) Don't feel bad. Nobody among other people caught it either.
Locally amended so no need to resubmit only to fix this.
Thanks.
^ permalink raw reply [flat|nested] 69+ messages in thread* Re: [PATCH v3 5/7] sparse-checkout: add --verbose option to 'clean'
2025-09-15 19:12 ` Junio C Hamano
@ 2025-09-16 2:00 ` Derrick Stolee
0 siblings, 0 replies; 69+ messages in thread
From: Derrick Stolee @ 2025-09-16 2:00 UTC (permalink / raw)
To: Junio C Hamano
Cc: Derrick Stolee via GitGitGadget, git, newren, Patrick Steinhardt
On 9/15/2025 3:12 PM, Junio C Hamano wrote:
> Derrick Stolee <stolee@gmail.com> writes:
>
>> On 9/12/2025 6:30 AM, Derrick Stolee via GitGitGadget wrote:
>>> From: Derrick Stolee <stolee@gmail.com>
>>
>>> +static void list_every_file_in_dir(const char *msg,
>>> + const char *directory)
>>> +{
>>> + struct strbuf path = STRBUF_INIT;
>>> +
>>> + strbuf_addstr(&path, directory);
>>> + fprintf(stderr, "list every file in %s\n", directory);
>>
>> I don't know how I missed that this debugging output line snuck
>> in and stayed through my testing. This line should be removed.
>>
>>> + for_each_file_in_dir(&path, list_file_iterator, msg);
>>> + strbuf_release(&path);
>>> +}
>
> ;-) Don't feel bad. Nobody among other people caught it either.
>
> Locally amended so no need to resubmit only to fix this.
Thanks!
-Stolee
^ permalink raw reply [flat|nested] 69+ messages in thread
* [PATCH v3 6/7] sparse-index: point users to new 'clean' action
2025-09-12 10:30 ` [PATCH v3 0/7] " Derrick Stolee via GitGitGadget
` (4 preceding siblings ...)
2025-09-12 10:30 ` [PATCH v3 5/7] sparse-checkout: add --verbose option to 'clean' Derrick Stolee via GitGitGadget
@ 2025-09-12 10:30 ` Derrick Stolee via GitGitGadget
2025-10-07 22:53 ` Elijah Newren
2025-09-12 10:30 ` [PATCH v3 7/7] t: expand tests around sparse merges and clean Derrick Stolee via GitGitGadget
` (3 subsequent siblings)
9 siblings, 1 reply; 69+ messages in thread
From: Derrick Stolee via GitGitGadget @ 2025-09-12 10:30 UTC (permalink / raw)
To: git; +Cc: gitster, newren, Patrick Steinhardt, Derrick Stolee,
Derrick Stolee
From: Derrick Stolee <stolee@gmail.com>
In my experience, the most-common reason that the sparse index must
expand to a full one is because there is some leftover file in a tracked
directory that is now outside of the sparse-checkout. The new 'git
sparse-checkout clean' command will find and delete these directories,
so point users to it when they hit the sparse index expansion advice.
Signed-off-by: Derrick Stolee <stolee@gmail.com>
---
sparse-index.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/sparse-index.c b/sparse-index.c
index 5634abafaa..5d14795063 100644
--- a/sparse-index.c
+++ b/sparse-index.c
@@ -32,7 +32,8 @@ int give_advice_on_expansion = 1;
"Your working directory likely has contents that are outside of\n" \
"your sparse-checkout patterns. Use 'git sparse-checkout list' to\n" \
"see your sparse-checkout definition and compare it to your working\n" \
- "directory contents. Running 'git clean' may assist in this cleanup."
+ "directory contents. Running 'git sparse-checkout clean' may assist\n" \
+ "in this cleanup."
struct modify_index_context {
struct index_state *write;
--
gitgitgadget
^ permalink raw reply related [flat|nested] 69+ messages in thread* Re: [PATCH v3 6/7] sparse-index: point users to new 'clean' action
2025-09-12 10:30 ` [PATCH v3 6/7] sparse-index: point users to new 'clean' action Derrick Stolee via GitGitGadget
@ 2025-10-07 22:53 ` Elijah Newren
2025-10-20 14:17 ` Derrick Stolee
0 siblings, 1 reply; 69+ messages in thread
From: Elijah Newren @ 2025-10-07 22:53 UTC (permalink / raw)
To: Derrick Stolee via GitGitGadget
Cc: git, gitster, Patrick Steinhardt, Derrick Stolee
On Fri, Sep 12, 2025 at 3:30 AM Derrick Stolee via GitGitGadget
<gitgitgadget@gmail.com> wrote:
>
> From: Derrick Stolee <stolee@gmail.com>
>
> In my experience, the most-common reason that the sparse index must
> expand to a full one is because there is some leftover file in a tracked
> directory that is now outside of the sparse-checkout. The new 'git
> sparse-checkout clean' command will find and delete these directories,
> so point users to it when they hit the sparse index expansion advice.
>
> Signed-off-by: Derrick Stolee <stolee@gmail.com>
> ---
> sparse-index.c | 3 ++-
> 1 file changed, 2 insertions(+), 1 deletion(-)
>
> diff --git a/sparse-index.c b/sparse-index.c
> index 5634abafaa..5d14795063 100644
> --- a/sparse-index.c
> +++ b/sparse-index.c
> @@ -32,7 +32,8 @@ int give_advice_on_expansion = 1;
> "Your working directory likely has contents that are outside of\n" \
> "your sparse-checkout patterns. Use 'git sparse-checkout list' to\n" \
> "see your sparse-checkout definition and compare it to your working\n" \
> - "directory contents. Running 'git clean' may assist in this cleanup."
> + "directory contents. Running 'git sparse-checkout clean' may assist\n" \
> + "in this cleanup."
Given that you dropped patch 8 and explicitly call out in the
documentation of `git sparse-checkout clean` that it alone is not
sufficient to do the cleanup, should this advice be calling out a
combination of `git sparse-checkout clean` and `git sparse-checkout
reapply` ? (Should it also suggest an order for running those two; I
seem to recall that the order mattered, but can't recall which one
needs to run first or if it is situation dependent.)
^ permalink raw reply [flat|nested] 69+ messages in thread
* Re: [PATCH v3 6/7] sparse-index: point users to new 'clean' action
2025-10-07 22:53 ` Elijah Newren
@ 2025-10-20 14:17 ` Derrick Stolee
0 siblings, 0 replies; 69+ messages in thread
From: Derrick Stolee @ 2025-10-20 14:17 UTC (permalink / raw)
To: Elijah Newren, Derrick Stolee via GitGitGadget
Cc: git, gitster, Patrick Steinhardt
On 10/7/25 6:53 PM, Elijah Newren wrote:
> On Fri, Sep 12, 2025 at 3:30 AM Derrick Stolee via GitGitGadget
> <gitgitgadget@gmail.com> wrote:
>>
>> From: Derrick Stolee <stolee@gmail.com>
>>
>> In my experience, the most-common reason that the sparse index must
>> expand to a full one is because there is some leftover file in a tracked
>> directory that is now outside of the sparse-checkout. The new 'git
>> sparse-checkout clean' command will find and delete these directories,
>> so point users to it when they hit the sparse index expansion advice.
>>
>> Signed-off-by: Derrick Stolee <stolee@gmail.com>
>> ---
>> sparse-index.c | 3 ++-
>> 1 file changed, 2 insertions(+), 1 deletion(-)
>>
>> diff --git a/sparse-index.c b/sparse-index.c
>> index 5634abafaa..5d14795063 100644
>> --- a/sparse-index.c
>> +++ b/sparse-index.c
>> @@ -32,7 +32,8 @@ int give_advice_on_expansion = 1;
>> "Your working directory likely has contents that are outside of\n" \
>> "your sparse-checkout patterns. Use 'git sparse-checkout list' to\n" \
>> "see your sparse-checkout definition and compare it to your working\n" \
>> - "directory contents. Running 'git clean' may assist in this cleanup."
>> + "directory contents. Running 'git sparse-checkout clean' may assist\n" \
>> + "in this cleanup."
>
> Given that you dropped patch 8 and explicitly call out in the
> documentation of `git sparse-checkout clean` that it alone is not
> sufficient to do the cleanup, should this advice be calling out a
> combination of `git sparse-checkout clean` and `git sparse-checkout
> reapply` ? (Should it also suggest an order for running those two; I
> seem to recall that the order mattered, but can't recall which one
> needs to run first or if it is situation dependent.)
I'll expand this advice in an upcoming patch 8.
Thanks,
-Stolee
^ permalink raw reply [flat|nested] 69+ messages in thread
* [PATCH v3 7/7] t: expand tests around sparse merges and clean
2025-09-12 10:30 ` [PATCH v3 0/7] " Derrick Stolee via GitGitGadget
` (5 preceding siblings ...)
2025-09-12 10:30 ` [PATCH v3 6/7] sparse-index: point users to new 'clean' action Derrick Stolee via GitGitGadget
@ 2025-09-12 10:30 ` Derrick Stolee via GitGitGadget
2025-09-12 16:12 ` [PATCH v3 0/7] sparse-checkout: add 'clean' command Junio C Hamano
` (2 subsequent siblings)
9 siblings, 0 replies; 69+ messages in thread
From: Derrick Stolee via GitGitGadget @ 2025-09-12 10:30 UTC (permalink / raw)
To: git; +Cc: gitster, newren, Patrick Steinhardt, Derrick Stolee,
Derrick Stolee
From: Derrick Stolee <stolee@gmail.com>
With the current implementation of 'git sparse-checkout clean', we
notice that a file that was in a conflicted state does not get cleaned
up because of some internal details around the SKIP_WORKTREE bit.
This test is documenting the current behavior before we update it in the
following change.
Signed-off-by: Derrick Stolee <stolee@gmail.com>
---
t/t1091-sparse-checkout-builtin.sh | 56 ++++++++++++++++++------------
1 file changed, 34 insertions(+), 22 deletions(-)
diff --git a/t/t1091-sparse-checkout-builtin.sh b/t/t1091-sparse-checkout-builtin.sh
index 7b15fa669c..b2da4feaef 100755
--- a/t/t1091-sparse-checkout-builtin.sh
+++ b/t/t1091-sparse-checkout-builtin.sh
@@ -1183,35 +1183,47 @@ test_expect_success 'clean with sparse file states' '
test_must_be_empty out
'
-test_expect_success 'clean with merge conflict status' '
- git clone repo clean-merge &&
+test_expect_success 'sparse-checkout operations with merge conflicts' '
+ git clone repo merge &&
- echo dirty >clean-merge/deep/deeper2/a &&
- touch clean-merge/folder2/extra &&
+ (
+ cd merge &&
+ mkdir -p folder1/even/more/dirs &&
+ echo base >folder1/even/more/dirs/file &&
+ git add folder1 &&
+ git commit -m "base" &&
- cat >input <<-EOF &&
- 0 $ZERO_OID folder1/a
- 100644 $(git -C clean-merge rev-parse HEAD:folder1/a) 1 folder1/a
- EOF
- git -C clean-merge update-index --index-info <input &&
+ git checkout -b right&&
+ echo right >folder1/even/more/dirs/file &&
+ git commit -a -m "right" &&
- git -C clean-merge sparse-checkout set deep/deeper1 &&
+ git checkout -b left HEAD~1 &&
+ echo left >folder1/even/more/dirs/file &&
+ git commit -a -m "left" &&
- test_must_fail git -C clean-merge sparse-checkout clean -f 2>err &&
- grep "failed to convert index to a sparse index" err &&
+ git checkout -b merge &&
+ git sparse-checkout set deep/deeper1 &&
- mkdir -p clean-merge/folder1/ &&
- echo merged >clean-merge/folder1/a &&
- git -C clean-merge add --sparse folder1/a &&
+ test_must_fail git merge -m "will-conflict" right &&
- # deletes folder2/ but leaves staged change in folder1
- # and dirty change in deep/deeper2/
- cat >expect <<-\EOF &&
- Removing folder2/
- EOF
+ test_must_fail git sparse-checkout clean -f 2>err &&
+ grep "failed to convert index to a sparse index" err &&
- git -C clean-merge sparse-checkout clean -f >out &&
- test_cmp expect out
+ echo merged >folder1/even/more/dirs/file &&
+ git add --sparse folder1 &&
+ git merge --continue &&
+
+ test_path_exists folder1/even/more/dirs/file &&
+
+ # clean does not remove the file, because the
+ # SKIP_WORKTREE bit was not cleared by the merge command.
+ git sparse-checkout clean -f >out &&
+ test_line_count = 0 out &&
+ test_path_exists folder1/even/more/dirs/file &&
+
+ git sparse-checkout reapply &&
+ test_path_is_missing folder1
+ )
'
test_done
--
gitgitgadget
^ permalink raw reply related [flat|nested] 69+ messages in thread* Re: [PATCH v3 0/7] sparse-checkout: add 'clean' command
2025-09-12 10:30 ` [PATCH v3 0/7] " Derrick Stolee via GitGitGadget
` (6 preceding siblings ...)
2025-09-12 10:30 ` [PATCH v3 7/7] t: expand tests around sparse merges and clean Derrick Stolee via GitGitGadget
@ 2025-09-12 16:12 ` Junio C Hamano
2025-09-26 13:40 ` Derrick Stolee
2025-10-07 23:07 ` Elijah Newren
2025-10-20 14:24 ` [PATCH 8/8] sparse-index: improve advice message instructions Derrick Stolee
9 siblings, 1 reply; 69+ messages in thread
From: Junio C Hamano @ 2025-09-12 16:12 UTC (permalink / raw)
To: Derrick Stolee via GitGitGadget
Cc: git, newren, Patrick Steinhardt, Derrick Stolee
"Derrick Stolee via GitGitGadget" <gitgitgadget@gmail.com> writes:
> NEW: This series is rebased on a recent master to remove dependence on the
> updates to the global variables used by the sparse-checkout system.
>
> When using cone-mode sparse-checkout, users specify which tracked
> directories they want (recursively) and any directory not part of the parent
> paths for those directories are considered "out of scope". When changing
> sparse-checkouts, there are a variety of reasons why these "out of scope"
> directories could remain, including:
>
> * The user has .gitignore or .git/info/exclude files that tell Git to not
> remove files of a certain type.
> * Some filesystem blocker prevented the removal of a tracked file. This is
> usually more of an issue on Windows where a read handle will block file
> deletion.
The updated documentation was easeier to follow (even though I had a
"Huh?" moment with "Opportunistically" a bit). Comparing with the
previous version (with my rebase to get rid of the dependence on the
other topic) and this one, I see a few more code paths have learned
to pass "struct repository *" pointers throughout the callchain,
which is very nice.
Will replace. Thanks.
^ permalink raw reply [flat|nested] 69+ messages in thread* Re: [PATCH v3 0/7] sparse-checkout: add 'clean' command
2025-09-12 16:12 ` [PATCH v3 0/7] sparse-checkout: add 'clean' command Junio C Hamano
@ 2025-09-26 13:40 ` Derrick Stolee
2025-09-26 18:58 ` Elijah Newren
0 siblings, 1 reply; 69+ messages in thread
From: Derrick Stolee @ 2025-09-26 13:40 UTC (permalink / raw)
To: Junio C Hamano, Derrick Stolee via GitGitGadget
Cc: git, newren, Patrick Steinhardt
On 9/12/2025 12:12 PM, Junio C Hamano wrote:
> "Derrick Stolee via GitGitGadget" <gitgitgadget@gmail.com> writes:
>
>> NEW: This series is rebased on a recent master to remove dependence on the
>> updates to the global variables used by the sparse-checkout system.
>>
>> When using cone-mode sparse-checkout, users specify which tracked
>> directories they want (recursively) and any directory not part of the parent
>> paths for those directories are considered "out of scope". When changing
>> sparse-checkouts, there are a variety of reasons why these "out of scope"
>> directories could remain, including:
>>
>> * The user has .gitignore or .git/info/exclude files that tell Git to not
>> remove files of a certain type.
>> * Some filesystem blocker prevented the removal of a tracked file. This is
>> usually more of an issue on Windows where a read handle will block file
>> deletion.
>
> The updated documentation was easeier to follow (even though I had a
> "Huh?" moment with "Opportunistically" a bit). Comparing with the
> previous version (with my rebase to get rid of the dependence on the
> other topic) and this one, I see a few more code paths have learned
> to pass "struct repository *" pointers throughout the callchain,
> which is very nice.
I'm hoping to see some feedback from Elijah whose feedback on v2 was
very helpful. Here is a ping to see if he's available.
Thanks,
-Stolee
^ permalink raw reply [flat|nested] 69+ messages in thread
* Re: [PATCH v3 0/7] sparse-checkout: add 'clean' command
2025-09-26 13:40 ` Derrick Stolee
@ 2025-09-26 18:58 ` Elijah Newren
0 siblings, 0 replies; 69+ messages in thread
From: Elijah Newren @ 2025-09-26 18:58 UTC (permalink / raw)
To: Derrick Stolee
Cc: Junio C Hamano, Derrick Stolee via GitGitGadget, git,
Patrick Steinhardt
On Fri, Sep 26, 2025 at 6:40 AM Derrick Stolee <stolee@gmail.com> wrote:
>
> > The updated documentation was easeier to follow (even though I had a
> > "Huh?" moment with "Opportunistically" a bit). Comparing with the
> > previous version (with my rebase to get rid of the dependence on the
> > other topic) and this one, I see a few more code paths have learned
> > to pass "struct repository *" pointers throughout the callchain,
> > which is very nice.
>
> I'm hoping to see some feedback from Elijah whose feedback on v2 was
> very helpful. Here is a ping to see if he's available.
Sorry for the delay; I saw the series and I've been meaning to review,
but my daughter's hospitalization a few weeks ago (she recovered and
is doing fine now) put me behind on several things. I should have
some time to take a look next week after the Git Merge conference.
^ permalink raw reply [flat|nested] 69+ messages in thread
* Re: [PATCH v3 0/7] sparse-checkout: add 'clean' command
2025-09-12 10:30 ` [PATCH v3 0/7] " Derrick Stolee via GitGitGadget
` (7 preceding siblings ...)
2025-09-12 16:12 ` [PATCH v3 0/7] sparse-checkout: add 'clean' command Junio C Hamano
@ 2025-10-07 23:07 ` Elijah Newren
2025-10-20 14:25 ` Derrick Stolee
2025-10-20 14:24 ` [PATCH 8/8] sparse-index: improve advice message instructions Derrick Stolee
9 siblings, 1 reply; 69+ messages in thread
From: Elijah Newren @ 2025-10-07 23:07 UTC (permalink / raw)
To: Derrick Stolee via GitGitGadget
Cc: git, gitster, Patrick Steinhardt, Derrick Stolee
On Fri, Sep 12, 2025 at 3:30 AM Derrick Stolee via GitGitGadget
<gitgitgadget@gmail.com> wrote:
>
> NEW: This series is rebased on a recent master to remove dependence on the
> updates to the global variables used by the sparse-checkout system.
>
> When using cone-mode sparse-checkout, users specify which tracked
> directories they want (recursively) and any directory not part of the parent
> paths for those directories are considered "out of scope". When changing
> sparse-checkouts, there are a variety of reasons why these "out of scope"
> directories could remain, including:
>
> * The user has .gitignore or .git/info/exclude files that tell Git to not
> remove files of a certain type.
> * Some filesystem blocker prevented the removal of a tracked file. This is
> usually more of an issue on Windows where a read handle will block file
> deletion.
>
> Typically, this would not mean too much for the user experience. A few extra
> filesystem checks might be required to satisfy git status commands, but the
> scope of the performance hit is relative to how many cruft files are left
> over in this situation.
>
> However, when using the sparse index, these tracked sparse directories cause
> significant performance issues. When noticing that the index contains a
> sparse directory but that directory exists on disk, Git needs to expand that
> sparse directory to determine which files are tracked or untracked. The
> current mechanism expands the entire index to a full one, an expensive
> operation that scales with the total number of paths at HEAD and not just
> the number of cruft files left over.
>
> Advice was added in 9479a31d603 (advice: warn when sparse index expands,
> 2024-07-08) to help users determine that they were in this state. However,
> the advice doesn't actually recommend helpful ways to get out of this state.
> Recommending "git clean" on its own is incomplete, as typically users
> actually need 'git clean -dfx' to clear out the ignored or excluded files.
> Even then, they may need 'git sparse-checkout reapply' afterwards to clear
> the sparse directories.
>
> The advice was successful in helping to alert users to the problem, which is
> how I got wind of many of these cases for how users get into this state.
> It's now time to give them a tool that helps them out of this state.
...in v2, I found some cases where the tool doesn't help them get out
of this state. In v3, you documented those cases, and didn't attempt
to provide a combined tool. I'm a little disappointed at the
end-state, because it means we tell users to use a combination of
commands, and they may have to figure out the order to run those
commands in. However, I think with the documentation you've got,
we've at least improved on the status quo, so we could always make
further improvements later.
There was an error message and an advice message that I think could be
touched up to improve on this (commented on both in v3), otherwise I
think this series is good enough to merge down.
[...]
> This option would be preferred to something like 'git clean -dfx' since it
> does not clear the excluded files that are still within the sparse-checkout.
> Instead, it performs the exact filesystem operations required to refresh the
> sparse index performance back to what is expected.
This paragraph is the same from v2 of the cover letter, but we know
this paragraph to be false -- the new command only works in a subset
of applicable cases, otherwise an additional command (sparse-checkout
reapply) is also needed. So, it feels like this paragraph should be
updated.
> Updates in V3
> =============
>
> Huge thanks to Elijah for such a detailed review. Apologies for the delay in
> responding.
Likewise...it's nearly been a month since you sent this. :-(
> * Removed dependency on stalled series around updating the sparse-checkout
> globals.
> * Commit message and documentation is updated to better describe the
> conditions that qualify a file or directory for removal.
> * Tests are expanded significantly to include special cases and
> aftereffects.
> * A note is added around possible future expansion of the --verbose option
> to include more detailed status information on the files that would be
> deleted.
All much appreciated.
> * Due to a situation where a file appears as "modified and deleted" after
> the more aggressive updating of the tree, the previous patch 8 is removed
> (for now). I may reconsider and send a version in the future that avoids
> this issue. Tests from the earlier patches are more expanded in such a
> way that the aggressive implementation requires test changes that reveal
> this problem. See [1] for a copy of this change and how it impacts the
> latest tests.
Yeah, I think I was hoping that patch 8 would instead be modified to
handle the additional cases (or more patches added to make it all work
out), but punting that for future work seems viable too.
In summary, I think this series is close to ready to merge, but I
think a couple wording improvements to an error message and advice
message that I called out in separate emails on this series makes
sense to fix up first.
Thanks for working on this!
^ permalink raw reply [flat|nested] 69+ messages in thread* Re: [PATCH v3 0/7] sparse-checkout: add 'clean' command
2025-10-07 23:07 ` Elijah Newren
@ 2025-10-20 14:25 ` Derrick Stolee
0 siblings, 0 replies; 69+ messages in thread
From: Derrick Stolee @ 2025-10-20 14:25 UTC (permalink / raw)
To: Elijah Newren, Derrick Stolee via GitGitGadget
Cc: git, gitster, Patrick Steinhardt
On 10/7/25 7:07 PM, Elijah Newren wrote:
> On Fri, Sep 12, 2025 at 3:30 AM Derrick Stolee via GitGitGadget
>> Updates in V3
>> =============
>>
>> Huge thanks to Elijah for such a detailed review. Apologies for the delay in
>> responding.
>
> Likewise...it's nearly been a month since you sent this. :-(
It's my turn to be late in responding. :(
> In summary, I think this series is close to ready to merge, but I
> think a couple wording improvements to an error message and advice
> message that I called out in separate emails on this series makes
> sense to fix up first.
I sent a patch 8 to the v3 thread [1] that includes an update to
the error message. This could be squashed, but I sent an extra patch
to avoid a reroll of 'next'.
[1] https://lore.kernel.org/git/a34cc559-5823-4e68-8f3f-07c182f7299b@gmail.com/
Thanks,
-Stolee
^ permalink raw reply [flat|nested] 69+ messages in thread
* [PATCH 8/8] sparse-index: improve advice message instructions
2025-09-12 10:30 ` [PATCH v3 0/7] " Derrick Stolee via GitGitGadget
` (8 preceding siblings ...)
2025-10-07 23:07 ` Elijah Newren
@ 2025-10-20 14:24 ` Derrick Stolee
2025-10-20 16:29 ` Junio C Hamano
2025-10-24 2:22 ` Elijah Newren
9 siblings, 2 replies; 69+ messages in thread
From: Derrick Stolee @ 2025-10-20 14:24 UTC (permalink / raw)
To: Derrick Stolee via GitGitGadget, git; +Cc: gitster, newren, Patrick Steinhardt
From 0ee829fea73d495dd32deda4553ea00f9299c701 Mon Sep 17 00:00:00 2001
From: Derrick Stolee <stolee@gmail.com>
Date: Mon, 20 Oct 2025 10:19:22 -0400
Subject: [PATCH 8/8] sparse-index: improve advice message instructions
When an on-disk sparse index is expanded to a full one, this could be due to
some worktree state that requires looking at file entries hidden within
sparse tree entries. These can be avoided if the worktree is cleaned up and
some other issues related to the index state. Expand the advice message to
include all of these cases, since 'git sparse-checkout clean' is not
currently capable of handling all cases.
In the future, we may improve the behavior of 'git sparse-checkout clean' to
handle all of the cases.
Helped-by: Elijah Newren <newren@gmail.com>
Signed-off-by: Derrick Stolee <stolee@gmail.com>
---
Here is an add-on patch to add to this series to hopefully satisfy
Elijah's feedback. Sorry it took so long to be able to get back to
this!
-Stolee
sparse-index.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/sparse-index.c b/sparse-index.c
index 5d14795063b..76f90da5f5f 100644
--- a/sparse-index.c
+++ b/sparse-index.c
@@ -32,8 +32,9 @@ int give_advice_on_expansion = 1;
"Your working directory likely has contents that are outside of\n" \
"your sparse-checkout patterns. Use 'git sparse-checkout list' to\n" \
"see your sparse-checkout definition and compare it to your working\n" \
- "directory contents. Running 'git sparse-checkout clean' may assist\n" \
- "in this cleanup."
+ "directory contents. Cleaning up any merge conflicts or staged\n" \
+ "changes before running 'git sparse-checkout clean' or 'git\n" \
+ "sparse-checkout reapply' may assist in this cleanup."
struct modify_index_context {
struct index_state *write;
--
2.47.0.vfs.0.3
^ permalink raw reply related [flat|nested] 69+ messages in thread* Re: [PATCH 8/8] sparse-index: improve advice message instructions
2025-10-20 14:24 ` [PATCH 8/8] sparse-index: improve advice message instructions Derrick Stolee
@ 2025-10-20 16:29 ` Junio C Hamano
2025-10-24 2:22 ` Elijah Newren
1 sibling, 0 replies; 69+ messages in thread
From: Junio C Hamano @ 2025-10-20 16:29 UTC (permalink / raw)
To: Derrick Stolee
Cc: Derrick Stolee via GitGitGadget, git, newren, Patrick Steinhardt
Derrick Stolee <stolee@gmail.com> writes:
I am getting
warning: Patch sent with format=flowed; space at the end of lines might be lost.
Applying: sparse-index: improve advice message instructions
but hopefully the result is correct.
> From 0ee829fea73d495dd32deda4553ea00f9299c701 Mon Sep 17 00:00:00 2001
> From: Derrick Stolee <stolee@gmail.com>
> Date: Mon, 20 Oct 2025 10:19:22 -0400
> Subject: [PATCH 8/8] sparse-index: improve advice message instructions
I will strip the above out with "commit --amend"
> When an on-disk sparse index is expanded to a full one, this could be due to
> some worktree state that requires looking at file entries hidden within
> sparse tree entries.
I would find it smoother to read with "this could be" -> "it could
be", but that would be just me.
> These can be avoided if the worktree is cleaned up and
> some other issues related to the index state.
Now "These" confused me. Does it refer to the same thing as the
previous sentence refers to with "this"? Also, I can understand up
to "if the worktree is cleaned up", but the rest of the sentence
does not quite parse for me. It may be that we are missing " are
resolved" between "state" and the full stop?
Even though it would leave readers in suspense to know what "some
other issues" are, it is answered by reading the message updated by
the patch, so it is OK ;-)
> Expand the advice message to
> include all of these cases, since 'git sparse-checkout clean' is not
> currently capable of handling all cases.
>
> In the future, we may improve the behavior of 'git sparse-checkout clean' to
> handle all of the cases.
>
> Helped-by: Elijah Newren <newren@gmail.com>
> Signed-off-by: Derrick Stolee <stolee@gmail.com>
> ---
> Here is an add-on patch to add to this series to hopefully satisfy
> Elijah's feedback. Sorry it took so long to be able to get back to
> this!
Great. Thanks, both.
If the title were numbered [8/7], that would have been even nicer,
but I was following the discussion this time, so it was not a
surprise to me to see only [8/8] in my mailbox.
> diff --git a/sparse-index.c b/sparse-index.c
> index 5d14795063b..76f90da5f5f 100644
> --- a/sparse-index.c
> +++ b/sparse-index.c
> @@ -32,8 +32,9 @@ int give_advice_on_expansion = 1;
> "Your working directory likely has contents that are outside of\n" \
> "your sparse-checkout patterns. Use 'git sparse-checkout list' to\n" \
> "see your sparse-checkout definition and compare it to your working\n" \
> - "directory contents. Running 'git sparse-checkout clean' may assist\n" \
> - "in this cleanup."
> + "directory contents. Cleaning up any merge conflicts or staged\n" \
> + "changes before running 'git sparse-checkout clean' or 'git\n" \
> + "sparse-checkout reapply' may assist in this cleanup."
>
> struct modify_index_context {
> struct index_state *write;
^ permalink raw reply [flat|nested] 69+ messages in thread* Re: [PATCH 8/8] sparse-index: improve advice message instructions
2025-10-20 14:24 ` [PATCH 8/8] sparse-index: improve advice message instructions Derrick Stolee
2025-10-20 16:29 ` Junio C Hamano
@ 2025-10-24 2:22 ` Elijah Newren
1 sibling, 0 replies; 69+ messages in thread
From: Elijah Newren @ 2025-10-24 2:22 UTC (permalink / raw)
To: Derrick Stolee
Cc: Derrick Stolee via GitGitGadget, git, gitster, Patrick Steinhardt
On Mon, Oct 20, 2025 at 10:24 AM Derrick Stolee <stolee@gmail.com> wrote:
>
> From 0ee829fea73d495dd32deda4553ea00f9299c701 Mon Sep 17 00:00:00 2001
> From: Derrick Stolee <stolee@gmail.com>
> Date: Mon, 20 Oct 2025 10:19:22 -0400
> Subject: [PATCH 8/8] sparse-index: improve advice message instructions
>
> When an on-disk sparse index is expanded to a full one, this could be due to
> some worktree state that requires looking at file entries hidden within
> sparse tree entries. These can be avoided if the worktree is cleaned up and
> some other issues related to the index state. Expand the advice message to
> include all of these cases, since 'git sparse-checkout clean' is not
> currently capable of handling all cases.
This paragraph feels slightly clumsy or awkward to parse, but...
>
> In the future, we may improve the behavior of 'git sparse-checkout clean' to
> handle all of the cases.
>
> Helped-by: Elijah Newren <newren@gmail.com>
> Signed-off-by: Derrick Stolee <stolee@gmail.com>
> ---
>
> Here is an add-on patch to add to this series to hopefully satisfy
> Elijah's feedback. Sorry it took so long to be able to get back to
> this!
>
> -Stolee
>
>
> sparse-index.c | 5 +++--
> 1 file changed, 3 insertions(+), 2 deletions(-)
>
> diff --git a/sparse-index.c b/sparse-index.c
> index 5d14795063b..76f90da5f5f 100644
> --- a/sparse-index.c
> +++ b/sparse-index.c
> @@ -32,8 +32,9 @@ int give_advice_on_expansion = 1;
> "Your working directory likely has contents that are outside of\n" \
> "your sparse-checkout patterns. Use 'git sparse-checkout list' to\n" \
> "see your sparse-checkout definition and compare it to your working\n" \
> - "directory contents. Running 'git sparse-checkout clean' may assist\n" \
> - "in this cleanup."
> + "directory contents. Cleaning up any merge conflicts or staged\n" \
> + "changes before running 'git sparse-checkout clean' or 'git\n" \
> + "sparse-checkout reapply' may assist in this cleanup."
I like the message change; thanks for sending this in!
>
> struct modify_index_context {
> struct index_state *write;
> --
> 2.47.0.vfs.0.3
^ permalink raw reply [flat|nested] 69+ messages in thread