* [PATCH 3/7] setup: remove global `git_work_tree_cfg` variable
From: Patrick Steinhardt @ 2026-06-10 6:56 UTC (permalink / raw)
To: git
In-Reply-To: <20260610-b4-pks-setup-drop-global-state-v1-0-5dff3eec8f06@pks.im>
The global `git_work_tree_cfg` variable used to be modified by both
"setup.c" and by "builtin/init-db.c". We have refactored the latter user
to not use that variable at all anymore in a preceding commit, which
makes "setup.c" the only remaining user.
Even for "setup.c" it is unnecessary though, as we only ever set it to
the value we have stored in the discovered repository format. The
consequence is that we only ever set it in case we already have it set
to the same value in our discovered repository format, which makes it
redundant.
Refactor the code so that we instead use the worktree configuration as
discovered via the repository format. Drop the global variable.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
setup.c | 28 +++++++++++-----------------
1 file changed, 11 insertions(+), 17 deletions(-)
diff --git a/setup.c b/setup.c
index 52228b42a1..71fc6b33da 100644
--- a/setup.c
+++ b/setup.c
@@ -31,9 +31,6 @@ enum allowed_bare_repo {
ALLOWED_BARE_REPO_ALL,
};
-/* This is set by setup_git_directory_gently() and/or git_default_config() */
-static char *git_work_tree_cfg;
-
static struct startup_info the_startup_info;
struct startup_info *startup_info = &the_startup_info;
const char *tmp_original_cwd;
@@ -799,13 +796,10 @@ static int check_repository_format_gently(const char *gitdir,
}
if (!has_common) {
- if (candidate->is_bare != -1) {
+ if (candidate->is_bare != -1)
is_bare_repository_cfg = candidate->is_bare;
- }
- if (candidate->work_tree) {
- free(git_work_tree_cfg);
- git_work_tree_cfg = xstrdup(candidate->work_tree);
- }
+ } else {
+ FREE_AND_NULL(candidate->work_tree);
}
return 0;
@@ -1145,7 +1139,7 @@ static const char *setup_explicit_git_dir(struct repository *repo,
if (work_tree_env)
set_git_work_tree(repo, work_tree_env);
else if (is_bare_repository_cfg > 0) {
- if (git_work_tree_cfg) {
+ if (repo_fmt->work_tree) {
/* #22.2, #30 */
warning("core.bare and core.worktree do not make sense");
repo->worktree_config_is_bogus = true;
@@ -1156,15 +1150,15 @@ static const char *setup_explicit_git_dir(struct repository *repo,
free(gitfile);
return NULL;
}
- else if (git_work_tree_cfg) { /* #6, #14 */
- if (is_absolute_path(git_work_tree_cfg))
- set_git_work_tree(repo, git_work_tree_cfg);
+ else if (repo_fmt->work_tree) { /* #6, #14 */
+ if (is_absolute_path(repo_fmt->work_tree))
+ set_git_work_tree(repo, repo_fmt->work_tree);
else {
char *core_worktree;
if (chdir(gitdirenv))
die_errno(_("cannot chdir to '%s'"), gitdirenv);
- if (chdir(git_work_tree_cfg))
- die_errno(_("cannot chdir to '%s'"), git_work_tree_cfg);
+ if (chdir(repo_fmt->work_tree))
+ die_errno(_("cannot chdir to '%s'"), repo_fmt->work_tree);
core_worktree = xgetcwd();
if (chdir(cwd->buf))
die_errno(_("cannot come back to cwd"));
@@ -1217,7 +1211,7 @@ static const char *setup_discovered_git_dir(struct repository *repo,
return NULL;
/* --work-tree is set without --git-dir; use discovered one */
- if (getenv(GIT_WORK_TREE_ENVIRONMENT) || git_work_tree_cfg) {
+ if (getenv(GIT_WORK_TREE_ENVIRONMENT) || repo_fmt->work_tree) {
char *to_free = NULL;
const char *ret;
@@ -1267,7 +1261,7 @@ static const char *setup_bare_git_dir(struct repository *repo,
setenv(GIT_IMPLICIT_WORK_TREE_ENVIRONMENT, "0", 1);
/* --work-tree is set without --git-dir; use discovered one */
- if (getenv(GIT_WORK_TREE_ENVIRONMENT) || git_work_tree_cfg) {
+ if (getenv(GIT_WORK_TREE_ENVIRONMENT) || repo_fmt->work_tree) {
static const char *gitdir;
gitdir = offset == cwd->len ? "." : xmemdupz(cwd->buf, offset);
--
2.54.0.1189.g8c84645362.dirty
^ permalink raw reply related
* [PATCH 2/7] builtin/init: simplify logic to configure worktree
From: Patrick Steinhardt @ 2026-06-10 6:56 UTC (permalink / raw)
To: git
In-Reply-To: <20260610-b4-pks-setup-drop-global-state-v1-0-5dff3eec8f06@pks.im>
In the preceding commit we have stopped modifying the global
`git_work_tree_cfg` variable. With this change there's now some code
paths where we end up setting the local `git_work_tree_cfg` variable,
but without actually using the value for anything.
Refactor the code a bit so that we only set the worktree configuration
in case it's actually needed. Furthermore, reflow it a bit to make the
code easier to follow.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
builtin/init-db.c | 31 ++++++++++++++++++-------------
1 file changed, 18 insertions(+), 13 deletions(-)
diff --git a/builtin/init-db.c b/builtin/init-db.c
index 01bc27904e..b4343c2804 100644
--- a/builtin/init-db.c
+++ b/builtin/init-db.c
@@ -229,24 +229,29 @@ int cmd_init_db(int argc,
if (!is_bare_repository_cfg) {
const char *git_dir_parent = strrchr(git_dir, '/');
- char *git_work_tree_cfg = NULL;
- if (git_dir_parent) {
- char *rel = xstrndup(git_dir, git_dir_parent - git_dir);
- git_work_tree_cfg = real_pathdup(rel, 1);
- free(rel);
- }
- if (!git_work_tree_cfg)
- git_work_tree_cfg = xgetcwd();
- if (work_tree)
+ if (work_tree) {
set_git_work_tree(the_repository, work_tree);
- else
- set_git_work_tree(the_repository, git_work_tree_cfg);
+ } else {
+ char *work_tree_cfg = NULL;
+
+ if (git_dir_parent) {
+ char *rel = xstrndup(git_dir, git_dir_parent - git_dir);
+ work_tree_cfg = real_pathdup(rel, 1);
+ free(rel);
+ }
+
+ if (!work_tree_cfg)
+ work_tree_cfg = xgetcwd();
+
+ set_git_work_tree(the_repository, work_tree_cfg);
+
+ free(work_tree_cfg);
+ }
+
if (access(repo_get_work_tree(the_repository), X_OK))
die_errno (_("Cannot access work tree '%s'"),
repo_get_work_tree(the_repository));
-
- free(git_work_tree_cfg);
}
else {
if (real_git_dir)
--
2.54.0.1189.g8c84645362.dirty
^ permalink raw reply related
* [PATCH 1/7] builtin/init: stop modifying global `git_work_tree_cfg` variable
From: Patrick Steinhardt @ 2026-06-10 6:56 UTC (permalink / raw)
To: git
In-Reply-To: <20260610-b4-pks-setup-drop-global-state-v1-0-5dff3eec8f06@pks.im>
When executing git-init(1) we need to figure out the final location of
the worktree. This location can be configured in a couple of ways: via
an environment variable, via the preexisting "core.worktree" config in
case we're reinitializing, or implicitly when reinitializing a non-bare
repository.
When checking for the worktree location in "builtin/init-db.c" we
populate any potentially-discovered value both by setting the global
`git_work_tree_cfg` variable and via `set_git_work_tree()`, which
ultimately ends up modifying `struct repository::worktree`.
Modifying `git_work_tree_cfg` is unnecessary though: we configure the
worktree in `create_default_files()`, and that function derives the
worktree location via `repo_get_work_tree()`. Consequently, propagating
the worktree via `set_git_work_tree()` is sufficient.
Stop munging `git_work_tree_cfg` and make it file-local to "setup.c" and
function-local to `cmd_init_db()`.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
builtin/init-db.c | 4 ++++
environment.c | 3 ---
environment.h | 1 -
setup.c | 3 +++
4 files changed, 7 insertions(+), 4 deletions(-)
diff --git a/builtin/init-db.c b/builtin/init-db.c
index c55517ad94..01bc27904e 100644
--- a/builtin/init-db.c
+++ b/builtin/init-db.c
@@ -229,6 +229,8 @@ int cmd_init_db(int argc,
if (!is_bare_repository_cfg) {
const char *git_dir_parent = strrchr(git_dir, '/');
+ char *git_work_tree_cfg = NULL;
+
if (git_dir_parent) {
char *rel = xstrndup(git_dir, git_dir_parent - git_dir);
git_work_tree_cfg = real_pathdup(rel, 1);
@@ -243,6 +245,8 @@ int cmd_init_db(int argc,
if (access(repo_get_work_tree(the_repository), X_OK))
die_errno (_("Cannot access work tree '%s'"),
repo_get_work_tree(the_repository));
+
+ free(git_work_tree_cfg);
}
else {
if (real_git_dir)
diff --git a/environment.c b/environment.c
index fc3ed8bb1c..4e86335f25 100644
--- a/environment.c
+++ b/environment.c
@@ -100,9 +100,6 @@ int auto_comment_line_char;
bool warn_on_auto_comment_char;
#endif /* !WITH_BREAKING_CHANGES */
-/* This is set by setup_git_directory_gently() and/or git_default_config() */
-char *git_work_tree_cfg;
-
/*
* Repository-local GIT_* environment variables; see environment.h for details.
*/
diff --git a/environment.h b/environment.h
index ccfcf37bfb..5d6e4e6c1b 100644
--- a/environment.h
+++ b/environment.h
@@ -149,7 +149,6 @@ int have_git_dir(void);
extern int is_bare_repository_cfg;
int is_bare_repository(void);
-extern char *git_work_tree_cfg;
/* Environment bits from configuration mechanism */
extern int trust_executable_bit;
diff --git a/setup.c b/setup.c
index b4652651df..52228b42a1 100644
--- a/setup.c
+++ b/setup.c
@@ -31,6 +31,9 @@ enum allowed_bare_repo {
ALLOWED_BARE_REPO_ALL,
};
+/* This is set by setup_git_directory_gently() and/or git_default_config() */
+static char *git_work_tree_cfg;
+
static struct startup_info the_startup_info;
struct startup_info *startup_info = &the_startup_info;
const char *tmp_original_cwd;
--
2.54.0.1189.g8c84645362.dirty
^ permalink raw reply related
* [PATCH 0/7] setup: drop global state
From: Patrick Steinhardt @ 2026-06-10 6:56 UTC (permalink / raw)
To: git
Hi,
this patch series continues to refactor "setup.c", where the focus is to
drop remaining global state that we have in "setup.c". The most
important consequence of this is that we don't need to rely on
`the_repository` in `is_bare_repository()` anymore.
This series is built on top of 1ff279f340 (The 13th batch, 2026-06-09)
with ps/setup-centralize-odb-creation at 42b9d3dc9d (setup: construct
object database in `apply_repository_format()`, 2026-06-04) merged into
it.
Thanks!
Patrick
---
Patrick Steinhardt (7):
builtin/init: stop modifying global `git_work_tree_cfg` variable
builtin/init: simplify logic to configure worktree
setup: remove global `git_work_tree_cfg` variable
builtin/init: stop modifying `is_bare_repository_cfg`
environment: split up concerns of `is_bare_repository_cfg`
environment: stop using `the_repository` in `is_bare_repository()`
treewide: drop USE_THE_REPOSITORY_VARIABLE
attr.c | 4 ++--
builtin/bisect.c | 2 +-
builtin/blame.c | 2 +-
builtin/check-attr.c | 2 +-
builtin/fetch.c | 2 +-
builtin/gc.c | 2 +-
builtin/history.c | 2 +-
builtin/init-db.c | 44 +++++++++++++++++++++++++++-----------------
builtin/repack.c | 3 +--
builtin/repo.c | 2 +-
builtin/reset.c | 2 +-
builtin/rev-parse.c | 2 +-
environment.c | 10 +++-------
environment.h | 6 ++----
git.c | 2 +-
mailmap.c | 6 ++----
refs/files-backend.c | 2 +-
refs/reftable-backend.c | 4 +---
repository.c | 1 +
repository.h | 7 +++++++
setup.c | 45 ++++++++++++++++++++++++---------------------
setup.h | 6 ++++++
transport.c | 4 ++--
worktree.c | 4 ++--
24 files changed, 91 insertions(+), 75 deletions(-)
---
base-commit: f5a08a09a0fdf0fc2a355eba7979e2cfd65659e5
change-id: 20260422-b4-pks-setup-drop-global-state-6b1374aed5db
^ permalink raw reply
* Re: [PATCH v3 0/3] Documentation: recommend the use of b4
From: Patrick Steinhardt @ 2026-06-10 6:42 UTC (permalink / raw)
To: Toon Claes
Cc: git, Junio C Hamano, Tuomas Ahola, Weijie Yuan, Ramsay Jones,
SZEDER Gábor, Kristoffer Haugsbakk
In-Reply-To: <87a4t32a4g.fsf@emacs.iotcl.com>
On Tue, Jun 09, 2026 at 02:04:15PM +0200, Toon Claes wrote:
> Patrick Steinhardt <ps@pks.im> writes:
> Now on the other hand, looking at a few examples I see GitGitGadget does
> deep nesting. Wouldn't it make sense to be consistent?
I've chatted with Dscho about this, and he mentioned that Stolee has
already opened [1]. So yes, if we agree to change the recommendation we
should also adapt GGG.
Patrick
[1]: https://github.com/gitgitgadget/gitgitgadget/issues/2254
^ permalink raw reply
* Re: [PATCH v2] ls-files: filter pathspec before lstat
From: Tamir Duberstein @ 2026-06-09 23:15 UTC (permalink / raw)
To: Jeff King; +Cc: git, René Scharfe, Patrick Steinhardt, Junio C Hamano
In-Reply-To: <20260609104119.GA1509396@coredump.intra.peff.net>
On Tue, Jun 9, 2026 at 3:41 AM Jeff King <peff@peff.net> wrote:
>
> On Mon, Jun 08, 2026 at 07:37:15PM -0700, Tamir Duberstein wrote:
>
> > + /*
> > + * match_pathspec() is linear in pathspec.nr, so prefilter only
> > + * the single-pathspec case. Only entries shown by show_ce()
> > + * satisfy --error-unmatch.
> > + */
> > + if (pathspec.nr == 1 &&
> > + !match_pathspec(repo->index, &pathspec, fullname.buf,
> > + fullname.len, max_prefix_len, NULL,
> > + S_ISDIR(ce->ce_mode) ||
> > + S_ISGITLINK(ce->ce_mode)))
> > + continue;
>
> This feels...kind of arbitrary, no? Surely it's also faster with
> pathspec.nr == 2, and so on up to some nr closer to the size of the
> total index. It feels weird to be making an arbitrary cutoff based on
> pathspec performance in calling code like this.
>
> It is not wrong, per se, as you are optimizing your case without trying
> to hurt any others. But what do we do when somebody profiles it and
> comes along trying to bump the number to 2, or 10?
>
> I dunno.
Yeah, absolutely it's arbitrary. The simplest answer is that others
are welcome to bump this, provided they make the case for it.
^ permalink raw reply
* Re: [PATCH v2 0/2] unpack-trees: use explicit repository in trace2 calls
From: Junio C Hamano @ 2026-06-09 21:41 UTC (permalink / raw)
To: Jayesh Daga via GitGitGadget
Cc: git, Justin Tobler, Ayush Chandekar, Siddharth Asthana,
Jayesh Daga
In-Reply-To: <xmqqldf7y95a.fsf@gitster.g>
Junio C Hamano <gitster@pobox.com> writes:
> "Jayesh Daga via GitGitGadget" <gitgitgadget@gmail.com> writes:
>
>> Jayesh Daga (2):
>> unpack-trees: use repository from index instead of global
>> unpack-trees: use repository from index instead of global
>
> That is unusual to have two commits with identical title and
> identical proposed log messages yet with different patch text.
>
> Do you perhaps want to squash them into a single commit?
After not hearing anything for full two months, I just decided to
squash these two patches into one, as the second one looked clearly
like "ooops, I screwed up and left one pice behind, so here is an
incremental update". I'll mark the result for 'next'.
^ permalink raw reply
* [PATCH] update-ref: add --rename option
From: Junio C Hamano @ 2026-06-09 21:35 UTC (permalink / raw)
To: git
Add a "--rename" option to "git update-ref" with the syntax:
$ git update-ref --rename <old-refname> <new-refname>
It renames <old-refname> together with its reflog to <new-refname>.
The command warns to use "git branch --rename" instead if old or new
refname falls inside refs/heads/. We issue this warning (but perform
the rename anyway) because "git update-ref" acts as a low-level
plumbing utility: unlike "git branch", it does not update active
worktree "HEAD" symbolic references or ".git/config" branch tracking
configuration.
Note that we do not add a corresponding "rename" verb to the "--stdin"
mode in this commit. While adding a "rename SP <oldref> SP <newref> LF"
command to the batch stream would be useful, operations in "--stdin"
mode are executed within an atomic "ref_transaction". Reference renaming
is currently implemented at the backend level via standalone, non-transactional
calls (e.g. "refs_rename_ref()"). Supporting renames in a batch transaction
would require extending "struct ref_transaction" and the reference storage
backend APIs to coordinate ref and reflog moves atomically, which is
left for a future refactoring.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
Documentation/git-update-ref.adoc | 9 ++++++++
builtin/update-ref.c | 36 +++++++++++++++++++++++++++++--
t/t1400-update-ref.sh | 29 +++++++++++++++++++++++++
3 files changed, 72 insertions(+), 2 deletions(-)
diff --git a/Documentation/git-update-ref.adoc b/Documentation/git-update-ref.adoc
index 37a5019a8b..a622f2512b 100644
--- a/Documentation/git-update-ref.adoc
+++ b/Documentation/git-update-ref.adoc
@@ -9,6 +9,7 @@ SYNOPSIS
--------
[synopsis]
git update-ref [-m <reason>] [--no-deref] -d <ref> [<old-oid>]
+git update-ref [-m <reason>] [--no-deref] --rename <old-refname> <new-refname>
git update-ref [-m <reason>] [--no-deref] [--create-reflog] <ref> <new-oid> [<old-oid>]
git update-ref [-m <reason>] [--no-deref] --stdin [-z] [--batch-updates]
@@ -39,6 +40,14 @@ the result of following the symbolic pointers.
With `-d`, it deletes the named <ref> after verifying that it
still contains <old-oid>.
+With `--rename`, it renames <old-refname> together with its reflog to
+<new-refname>. If either <old-refname> or <new-refname> falls inside
+`refs/heads/`, a warning will be issued to use `git branch --rename`
+instead, because `git update-ref` does not update active worktree
+`HEAD` symbolic references or `.git/config` tracking settings. It
+fails if <old-refname> does not exist, or if <new-refname> already
+exists.
+
With `--stdin`, update-ref reads instructions from standard input and
performs all modifications together. Specify commands of the form:
diff --git a/builtin/update-ref.c b/builtin/update-ref.c
index 2d68c40ecb..1ae2dac6c5 100644
--- a/builtin/update-ref.c
+++ b/builtin/update-ref.c
@@ -15,6 +15,7 @@
static const char * const git_update_ref_usage[] = {
N_("git update-ref [<options>] -d <refname> [<old-oid>]"),
N_("git update-ref [<options>] <refname> <new-oid> [<old-oid>]"),
+ N_("git update-ref [<options>] --rename <old-refname> <new-refname>"),
N_("git update-ref [<options>] --stdin [-z] [--batch-updates]"),
NULL
};
@@ -756,13 +757,14 @@ int cmd_update_ref(int argc,
{
const char *refname, *oldval;
struct object_id oid, oldoid;
- int delete = 0, no_deref = 0, read_stdin = 0, end_null = 0;
+ int delete = 0, rename = 0, no_deref = 0, read_stdin = 0, end_null = 0;
int create_reflog = 0;
unsigned int flags = 0;
struct option options[] = {
OPT_STRING( 'm', NULL, &msg, N_("reason"), N_("reason of the update")),
OPT_BOOL('d', NULL, &delete, N_("delete the reference")),
+ OPT_BOOL( 0 , "rename", &rename, N_("rename the reference")),
OPT_BOOL( 0 , "no-deref", &no_deref,
N_("update <refname> not the one it points to")),
OPT_BOOL('z', NULL, &end_null, N_("stdin has NUL-terminated arguments")),
@@ -787,7 +789,7 @@ int cmd_update_ref(int argc,
}
if (read_stdin) {
- if (delete || argc > 0)
+ if (delete || rename || argc > 0)
usage_with_options(git_update_ref_usage, options);
if (end_null)
line_termination = '\0';
@@ -800,6 +802,36 @@ int cmd_update_ref(int argc,
if (end_null)
usage_with_options(git_update_ref_usage, options);
+ if (rename) {
+ const char *oldref, *newref;
+
+ if (delete || argc != 2)
+ usage_with_options(git_update_ref_usage, options);
+
+ oldref = argv[0];
+ newref = argv[1];
+
+ if (check_refname_format(oldref, 0))
+ die("invalid ref format: %s", oldref);
+ if (check_refname_format(newref, 0))
+ die("invalid ref format: %s", newref);
+
+ if (starts_with(oldref, "refs/heads/") ||
+ starts_with(newref, "refs/heads/"))
+ warning(_("You may want 'git branch --rename' instead?"));
+
+ if (!refs_ref_exists(get_main_ref_store(the_repository), oldref))
+ die("no ref named '%s'", oldref);
+
+ if (refs_ref_exists(get_main_ref_store(the_repository), newref))
+ die("ref '%s' already exists", newref);
+
+ if (refs_rename_ref(get_main_ref_store(the_repository),
+ oldref, newref, msg))
+ die("rename failed");
+ return 0;
+ }
+
if (delete) {
if (argc < 1 || argc > 2)
usage_with_options(git_update_ref_usage, options);
diff --git a/t/t1400-update-ref.sh b/t/t1400-update-ref.sh
index b2858a9061..fcb986d485 100755
--- a/t/t1400-update-ref.sh
+++ b/t/t1400-update-ref.sh
@@ -2455,4 +2455,33 @@ test_expect_success 'dangling symref overwritten without old oid' '
test_must_fail git rev-parse --verify refs/heads/does-not-exist
'
+test_expect_success '--rename fails if old-refname does not exist' '
+ test_must_fail git update-ref --rename refs/tags/no-such-ref refs/tags/new-ref 2>err &&
+ test_grep "no ref named .refs/tags/no-such-ref." err
+'
+
+test_expect_success '--rename fails if new-refname does exist' '
+ git update-ref refs/tags/existing HEAD &&
+ git update-ref refs/tags/old-ref HEAD &&
+ test_must_fail git update-ref --rename refs/tags/old-ref refs/tags/existing 2>err &&
+ test_grep "ref .refs/tags/existing. already exists" err
+'
+
+test_expect_success '--rename warns if old or new refname falls inside refs/heads/' '
+ git update-ref refs/heads/old-branch HEAD &&
+ git update-ref --rename refs/heads/old-branch refs/heads/new-branch 2>err &&
+ test_grep "branch --rename. instead" err &&
+ git rev-parse --verify refs/heads/new-branch &&
+ test_must_fail git rev-parse --verify refs/heads/old-branch
+'
+
+test_expect_success '--rename moves old-refname and its reflog to new-refname' '
+ git update-ref -m "initial tag" refs/tags/old-tag HEAD &&
+ git update-ref --rename refs/tags/old-tag refs/tags/new-tag 2>err &&
+ test_must_be_empty err &&
+ git rev-parse --verify refs/tags/new-tag &&
+ test_must_fail git rev-parse --verify refs/tags/old-tag &&
+ git log -g refs/tags/new-tag
+'
+
test_done
--
2.54.0-618-gdfae347457
^ permalink raw reply related
* [GSoC Blog] week 1 and 2: Complete and extend the remote-object-info command for git cat-file
From: Pablo Sabater @ 2026-06-09 21:31 UTC (permalink / raw)
To: git
Hi!
I've posted my first and second week, sorry I forgot to post the first
week here as well.
first week: https://pablosabater.dev/posts/gsoc-week-1-coding-period/
second week: https://pablosabater.dev/posts/gsoc-week-2-coding-period/
I hope you like it and I'm here for any doubt.
Thanks,
Pablo
^ permalink raw reply
* Re: [PATCH RFC v2 2/2] builtin/history: abort reword on same message
From: Justin Tobler @ 2026-06-09 20:14 UTC (permalink / raw)
To: Junio C Hamano
Cc: Phillip Wood, Pablo Sabater, git, cat, ps, kaartic.sivaraam,
ben.knoble
In-Reply-To: <xmqq5x3r1ph0.fsf@gitster.g>
On 26/06/09 12:30PM, Junio C Hamano wrote:
> Justin Tobler <jltobler@gmail.com> writes:
>
> > I can see a situation where a user performs:
> >
> > git history reword abcd1234
> >
> > with the intention to modify a commit message, but then for some reason
> > changes their mind and doesn't want history to change. Maybe the wrong
> > commit was referenced, or they decide the current message is actually
> > fine. From my understanding, there isn't a great way to abort rewording
> > a commit during editing and thus the user would have to reset history
> > afterwards if they care enough to go back to the previous point.
> >
> > So I do see some value in a mechanism to abort rewriting a commit
> > message.
>
> I think we are saying the same thing in different ways. I want to
> see that command "succeed" either case (normally we create a new
> commit object because we record an updated committer timestamp, but
> if there is no need to create a new commit object only to record an
> updated committer timestamp, we may choose not to and leave the
> history intact) and I do not want it to *abort*.
>
> The mechanism to do so may be exactly the same, i.e., accept an
> updated log message, then try to "hash-object" (without -w) the
> commit object with everything, except for the updated commit log
> message, taken from the original commit, plus the updated log
> message. And if the resulting hash is the same as the original, do
> not do anything further and return happily. Aborting sounds more
> like complaining loudly "baa, you asked me to reword but you gave me
> the same message? is anything wrong with you?" with non-zero exit
> status, which I think the user does not deserve in such a case.
Yes, I completely agree. If the user doesn't update the commit message,
for whatever reason, that should still be considered a success since it
follows the user's intent. I don't think it makes sense to exit with a
non-zero code in such cases. I would also question if we should print
any message/note to the user at all for the same reasons.
-Justin
^ permalink raw reply
* Re: [PATCH RFC v2 2/2] builtin/history: abort reword on same message
From: Junio C Hamano @ 2026-06-09 19:30 UTC (permalink / raw)
To: Justin Tobler
Cc: Phillip Wood, Pablo Sabater, git, cat, ps, kaartic.sivaraam,
ben.knoble
In-Reply-To: <aihH8ye-r4QuXlRD@denethor>
Justin Tobler <jltobler@gmail.com> writes:
> I can see a situation where a user performs:
>
> git history reword abcd1234
>
> with the intention to modify a commit message, but then for some reason
> changes their mind and doesn't want history to change. Maybe the wrong
> commit was referenced, or they decide the current message is actually
> fine. From my understanding, there isn't a great way to abort rewording
> a commit during editing and thus the user would have to reset history
> afterwards if they care enough to go back to the previous point.
>
> So I do see some value in a mechanism to abort rewriting a commit
> message.
I think we are saying the same thing in different ways. I want to
see that command "succeed" either case (normally we create a new
commit object because we record an updated committer timestamp, but
if there is no need to create a new commit object only to record an
updated committer timestamp, we may choose not to and leave the
history intact) and I do not want it to *abort*.
The mechanism to do so may be exactly the same, i.e., accept an
updated log message, then try to "hash-object" (without -w) the
commit object with everything, except for the updated commit log
message, taken from the original commit, plus the updated log
message. And if the resulting hash is the same as the original, do
not do anything further and return happily. Aborting sounds more
like complaining loudly "baa, you asked me to reword but you gave me
the same message? is anything wrong with you?" with non-zero exit
status, which I think the user does not deserve in such a case.
^ permalink raw reply
* [PATCH] commit-reach: remove get_reachable_subset()
From: Kristofer Karlsson via GitGitGadget @ 2026-06-09 19:28 UTC (permalink / raw)
To: git; +Cc: Derrick Stolee, Kristofer Karlsson, Kristofer Karlsson
From: Kristofer Karlsson <krka@spotify.com>
get_reachable_subset() and tips_reachable_from_bases() answer the
same question: given a set of bases and a set of tips, which tips
are reachable from at least one base?
get_reachable_subset() was introduced in fcb2c0769d (2018-11-02)
for add_missing_tags() in remote.c. tips_reachable_from_bases()
was added in cbfe360b14 (2023-03-20) as part of the ahead-behind
series. The two were never consolidated.
With a commit-graph, tips_reachable_from_bases() can have an edge:
its DFS raises the generation floor as lower targets are found,
pruning more aggressively than the static floor in
get_reachable_subset(). Without generation numbers, some edge cases
may be slower with DFS instead of BFS since the date-ordered
prio_queue naturally stays near the top of the graph, but this
should not matter in practice -- worst case both visit the full
graph down from the bases.
The flag in remote.c changes from 1 (bit 0) to TMP_MARK (bit 4)
because tips_reachable_from_bases() uses SEEN (bit 0) internally.
TMP_MARK is already used for deduplication earlier in the same
function and is cleared before the reachability check.
Signed-off-by: Kristofer Karlsson <krka@spotify.com>
---
commit-reach: remove get_reachable_subset()
This removes get_reachable_subset() and converts its only caller to use
tips_reachable_from_bases() directly. Both answer the same category-2
reachability question ("which tips are reachable from these bases?") but
were introduced years apart and never consolidated.
On the no-commit-graph tradeoff: without generation numbers, the
date-ordered BFS in get_reachable_subset() can be more disciplined than
DFS since it naturally stays near the top of the graph. But this only
matters for repositories that are both large enough for the difference
to be measurable and missing a commit-graph -- a combination that would
already struggle for many other reasons. The fix there is to enable the
commit-graph, not to maintain two implementations of the same
reachability query.
Notes for reviewers:
* The flag in remote.c changes from 1 to TMP_MARK because
tips_reachable_from_bases() uses SEEN (bit 0) internally. TMP_MARK is
already used earlier in the same function and is cleared before the
reachability block.
* The sent_tips array is converted to a commit_list to match the
tips_reachable_from_bases() API. This is O(n) list-node allocations,
negligible compared to the graph walk.
* Test helper and test names rename from get_reachable_subset to
tips_reachable_from_bases to match the function being exercised.
Published-As: https://github.com/gitgitgadget/git/releases/tag/pr-2144%2Fspkrka%2Fkrka%2Fremove-get-reachable-subset-v2-v1
Fetch-It-Via: git fetch https://github.com/gitgitgadget/git pr-2144/spkrka/krka/remove-get-reachable-subset-v2-v1
Pull-Request: https://github.com/gitgitgadget/git/pull/2144
commit-reach.c | 73 -------------------------------------------
commit-reach.h | 13 --------
remote.c | 19 ++++++-----
t/helper/test-reach.c | 39 +++++++++++------------
t/t6600-test-reach.sh | 18 +++++------
5 files changed, 36 insertions(+), 126 deletions(-)
diff --git a/commit-reach.c b/commit-reach.c
index 5df471a313..e78752eb87 100644
--- a/commit-reach.c
+++ b/commit-reach.c
@@ -1013,79 +1013,6 @@ int can_all_from_reach(struct commit_list *from, struct commit_list *to,
return result;
}
-struct commit_list *get_reachable_subset(struct commit **from, size_t nr_from,
- struct commit **to, size_t nr_to,
- unsigned int reachable_flag)
-{
- struct commit **item;
- struct commit *current;
- struct commit_list *found_commits = NULL;
- struct commit **to_last = to + nr_to;
- struct commit **from_last = from + nr_from;
- timestamp_t min_generation = GENERATION_NUMBER_INFINITY;
- int num_to_find = 0;
-
- struct prio_queue queue = { compare_commits_by_gen_then_commit_date };
-
- for (item = to; item < to_last; item++) {
- timestamp_t generation;
- struct commit *c = *item;
-
- repo_parse_commit(the_repository, c);
- generation = commit_graph_generation(c);
- if (generation < min_generation)
- min_generation = generation;
-
- if (!(c->object.flags & PARENT1)) {
- c->object.flags |= PARENT1;
- num_to_find++;
- }
- }
-
- for (item = from; item < from_last; item++) {
- struct commit *c = *item;
- if (!(c->object.flags & PARENT2)) {
- c->object.flags |= PARENT2;
- repo_parse_commit(the_repository, c);
-
- prio_queue_put(&queue, *item);
- }
- }
-
- while (num_to_find && (current = prio_queue_get(&queue)) != NULL) {
- struct commit_list *parents;
-
- if (current->object.flags & PARENT1) {
- current->object.flags &= ~PARENT1;
- current->object.flags |= reachable_flag;
- commit_list_insert(current, &found_commits);
- num_to_find--;
- }
-
- for (parents = current->parents; parents; parents = parents->next) {
- struct commit *p = parents->item;
-
- repo_parse_commit(the_repository, p);
-
- if (commit_graph_generation(p) < min_generation)
- continue;
-
- if (p->object.flags & PARENT2)
- continue;
-
- p->object.flags |= PARENT2;
- prio_queue_put(&queue, p);
- }
- }
-
- clear_prio_queue(&queue);
-
- clear_commit_marks_many(nr_to, to, PARENT1);
- clear_commit_marks_many(nr_from, from, PARENT2);
-
- return found_commits;
-}
-
define_commit_slab(bit_arrays, struct bitmap *);
static struct bit_arrays bit_arrays;
diff --git a/commit-reach.h b/commit-reach.h
index 3f3a563d8a..b3e7051738 100644
--- a/commit-reach.h
+++ b/commit-reach.h
@@ -96,19 +96,6 @@ int can_all_from_reach_with_flag(struct object_array *from,
int can_all_from_reach(struct commit_list *from, struct commit_list *to,
int commit_date_cutoff);
-
-/*
- * Return a list of commits containing the commits in the 'to' array
- * that are reachable from at least one commit in the 'from' array.
- * Also add the given 'flag' to each of the commits in the returned list.
- *
- * This method uses the PARENT1 and PARENT2 flags during its operation,
- * so be sure these flags are not set before calling the method.
- */
-struct commit_list *get_reachable_subset(struct commit **from, size_t nr_from,
- struct commit **to, size_t nr_to,
- unsigned int reachable_flag);
-
struct ahead_behind_count {
/**
* As input, the *_index members indicate which positions in
diff --git a/remote.c b/remote.c
index f1a3681b7c..7cdb59ed87 100644
--- a/remote.c
+++ b/remote.c
@@ -1459,9 +1459,8 @@ static void add_missing_tags(struct ref *src, struct ref **dst, struct ref ***ds
* sent to the other side.
*/
if (sent_tips.nr) {
- const int reachable_flag = 1;
- struct commit_list *found_commits;
struct commit_stack src_commits = COMMIT_STACK_INIT;
+ struct commit_list *bases = NULL;
for_each_string_list_item(item, &src_tag) {
struct ref *ref = item->util;
@@ -1479,11 +1478,12 @@ static void add_missing_tags(struct ref *src, struct ref **dst, struct ref ***ds
commit_stack_push(&src_commits, commit);
}
- found_commits = get_reachable_subset(sent_tips.items,
- sent_tips.nr,
- src_commits.items,
- src_commits.nr,
- reachable_flag);
+ for (size_t i = 0; i < sent_tips.nr; i++)
+ commit_list_insert(sent_tips.items[i], &bases);
+ tips_reachable_from_bases(the_repository,
+ bases, src_commits.items,
+ src_commits.nr, TMP_MARK);
+ commit_list_free(bases);
for_each_string_list_item(item, &src_tag) {
struct ref *dst_ref;
@@ -1503,7 +1503,7 @@ static void add_missing_tags(struct ref *src, struct ref **dst, struct ref ***ds
* Is this tag, which they do not have, reachable from
* any of the commits we are sending?
*/
- if (!(commit->object.flags & reachable_flag))
+ if (!(commit->object.flags & TMP_MARK))
continue;
/* Add it in */
@@ -1513,9 +1513,8 @@ static void add_missing_tags(struct ref *src, struct ref **dst, struct ref ***ds
}
clear_commit_marks_many(src_commits.nr, src_commits.items,
- reachable_flag);
+ TMP_MARK);
commit_stack_clear(&src_commits);
- commit_list_free(found_commits);
}
string_list_clear(&src_tag, 0);
diff --git a/t/helper/test-reach.c b/t/helper/test-reach.c
index 5d86a96c17..eb44a64f50 100644
--- a/t/helper/test-reach.c
+++ b/t/helper/test-reach.c
@@ -7,6 +7,7 @@
#include "hex.h"
#include "object-name.h"
#include "ref-filter.h"
+#include "revision.h"
#include "setup.h"
#include "string-list.h"
#include "tag.h"
@@ -149,30 +150,26 @@ int cmd__reach(int ac, const char **av)
printf("%s(_,A,X,_):%d\n", av[1], commit_contains(&filter, A, X, &cache));
clear_contains_cache(&cache);
- } else if (!strcmp(av[1], "get_reachable_subset")) {
- const int reachable_flag = 1;
- int count = 0;
- struct commit_list *current;
- struct commit_list *list = get_reachable_subset(X_stack.items, X_stack.nr,
- Y_stack.items, Y_stack.nr,
- reachable_flag);
- printf("get_reachable_subset(X,Y)\n");
- for (current = list; current; current = current->next) {
- if (!(list->item->object.flags & reachable_flag))
- die(_("commit %s is not marked reachable"),
- oid_to_hex(&list->item->object.oid));
- count++;
- }
+ } else if (!strcmp(av[1], "tips_reachable_from_bases")) {
+ struct commit_list *bases = NULL;
+ struct commit_list *result = NULL;
+
+ for (size_t i = 0; i < X_stack.nr; i++)
+ commit_list_insert(X_stack.items[i], &bases);
+ tips_reachable_from_bases(the_repository,
+ bases, Y_stack.items,
+ Y_stack.nr, TMP_MARK);
+ commit_list_free(bases);
+
+ printf("tips_reachable_from_bases(X,Y)\n");
for (size_t i = 0; i < Y_stack.nr; i++) {
- if (Y_stack.items[i]->object.flags & reachable_flag)
- count--;
+ if (Y_stack.items[i]->object.flags & TMP_MARK)
+ commit_list_insert(Y_stack.items[i], &result);
}
+ print_sorted_commit_ids(result);
- if (count < 0)
- die(_("too many commits marked reachable"));
-
- print_sorted_commit_ids(list);
- commit_list_free(list);
+ clear_commit_marks_many(Y_stack.nr, Y_stack.items, TMP_MARK);
+ commit_list_free(result);
}
object_array_clear(&X_obj);
diff --git a/t/t6600-test-reach.sh b/t/t6600-test-reach.sh
index b5b314e570..51b140a539 100755
--- a/t/t6600-test-reach.sh
+++ b/t/t6600-test-reach.sh
@@ -391,7 +391,7 @@ test_expect_success 'rev-list: symmetric difference topo-order' '
run_all_modes git rev-list --topo-order commit-3-8...commit-6-6
'
-test_expect_success 'get_reachable_subset:all' '
+test_expect_success 'tips_reachable_from_bases:all' '
cat >input <<-\EOF &&
X:commit-9-1
X:commit-8-3
@@ -403,15 +403,15 @@ test_expect_success 'get_reachable_subset:all' '
Y:commit-5-6
EOF
(
- echo "get_reachable_subset(X,Y)" &&
+ echo "tips_reachable_from_bases(X,Y)" &&
git rev-parse commit-3-3 \
commit-1-7 \
commit-5-6 | sort
) >expect &&
- test_all_modes get_reachable_subset
+ test_all_modes tips_reachable_from_bases
'
-test_expect_success 'get_reachable_subset:some' '
+test_expect_success 'tips_reachable_from_bases:some' '
cat >input <<-\EOF &&
X:commit-9-1
X:commit-8-3
@@ -422,14 +422,14 @@ test_expect_success 'get_reachable_subset:some' '
Y:commit-5-6
EOF
(
- echo "get_reachable_subset(X,Y)" &&
+ echo "tips_reachable_from_bases(X,Y)" &&
git rev-parse commit-3-3 \
commit-1-7 | sort
) >expect &&
- test_all_modes get_reachable_subset
+ test_all_modes tips_reachable_from_bases
'
-test_expect_success 'get_reachable_subset:none' '
+test_expect_success 'tips_reachable_from_bases:none' '
cat >input <<-\EOF &&
X:commit-9-1
X:commit-8-3
@@ -439,8 +439,8 @@ test_expect_success 'get_reachable_subset:none' '
Y:commit-7-6
Y:commit-2-8
EOF
- echo "get_reachable_subset(X,Y)" >expect &&
- test_all_modes get_reachable_subset
+ echo "tips_reachable_from_bases(X,Y)" >expect &&
+ test_all_modes tips_reachable_from_bases
'
test_expect_success 'for-each-ref ahead-behind:linear' '
base-commit: 600fe743028cbfb640855f659e9851522214bc0b
--
gitgitgadget
^ permalink raw reply related
* Re: [PATCH RFC v2 2/2] builtin/history: abort reword on same message
From: Junio C Hamano @ 2026-06-09 19:17 UTC (permalink / raw)
To: Pablo Sabater; +Cc: Phillip Wood, git, cat, ps, kaartic.sivaraam, ben.knoble
In-Reply-To: <CAN5EUNRz9F+njb_O=Q4DzVMec-q+rDf83Ow+MPJE4yLCBq9qww@mail.gmail.com>
Pablo Sabater <pabloosabaterr@gmail.com> writes:
>> > I wonder if we should check that the committer identity is unchanged as
>> > well in case anyone is using this to fix commits after committing with
>> > the wrong identity.
>
> I think that if you reword a commit committed by someone else but end
> up with no changes I want it to be kept as it was.
That depends on the reason why the feature to "reword" the commit is
being used, and the use case Phillip is talking about is a bit
different.
A very common mistake a new user makes when starting a repository is
to make commits before they realize that they used a wrong identity
to create them. They are happy with what they committed, except
that they want these commits to be attributed to user.{name,email}
they corrected.
Also, people often use multiple identities (e.g., corp vs personal),
and when making commits to the project for their employer they do
not want to use their personal identity (and vice versa). After
making a mistake to create commits under wrong identity, they want
to fix these commits.
In such situations, there is no room for leaving the committer name
as "someone else". The user wants to get rid of the "someone else"s
identity out of these commits.
^ permalink raw reply
* Re: [PATCH GSoC RFC v12 12/12] cat-file: make remote-object-info allow-list dynamic
From: Junio C Hamano @ 2026-06-09 18:54 UTC (permalink / raw)
To: Chandra Pratap
Cc: Pablo Sabater, eric.peijian, calvinwan, chriscool, git, jltobler,
jonathantanmy, karthik.188, toon
In-Reply-To: <CA+J6zkQ22en2HgH03EedKOfC+jLcHH2UbwpH0h_bDEAHR6B2pg@mail.gmail.com>
Chandra Pratap <chandrapratap3519@gmail.com> writes:
> On Mon, 8 Jun 2026 at 15:45, Pablo Sabater <pabloosabaterr@gmail.com> wrote:
>>
>> The static allow-list in expand_atom() is hardcoded to only allow
>> "objectname" and "objectsize" for remote queries. This works because
>> ...
>> }
You just forced readers to skip over 200+ lines of quoted material,
looking for something interesting you have said in response to
comment on the patch in vain.
>> diff --git a/fetch-object-info.c b/fetch-object-info.c
>> index 51a898430d..425929a269 100644
>> --- a/fetch-object-info.c
>> +++ b/fetch-object-info.c
>> @@ -39,6 +39,12 @@ int fetch_object_info(const enum protocol_version version, struct object_info_ar
>> case protocol_v2:
>> if (!server_supports_v2("object-info"))
>> die(_("object-info capability is not enabled on the server"));
>> +
>> + for (int i = args->object_info_options->nr - 1; i >= 0; i--)
>
> Isn't args->object_info_options->nr of type size_t? We should probably
> do something
> like:
>
> for (size_t i = 0; i < args->args->object_info_options->nr; i++)
>
> instead.
This is a valid observation and a careful reading like this is very
much appreciated. It is unfortunate that it was buried by 200+
lines of irrelevant material before we find it.
Thanks.
>> + if (!server_supports_feature("object-info",
>> + args->object_info_options->items[i].string, 0))
>> + unsorted_string_list_delete_item(args->object_info_options, i, 0);
>> +
>> send_object_info_request(fd_out, args);
>> break;
>> case protocol_v1:
>>
>> --
>> 2.54.0
>
> Other than these, the patch series LGTM for now.
>
> Thanks,
> Chandra.
^ permalink raw reply
* Re: [PATCH RFC v2 2/2] builtin/history: abort reword on same message
From: Justin Tobler @ 2026-06-09 18:02 UTC (permalink / raw)
To: Junio C Hamano
Cc: Phillip Wood, Pablo Sabater, git, cat, ps, kaartic.sivaraam,
ben.knoble
In-Reply-To: <xmqq4ijbsn2m.fsf@gitster.g>
On 26/06/09 09:20AM, Junio C Hamano wrote:
> Phillip Wood <phillip.wood123@gmail.com> writes:
>
> > Hi Pablo
> >
> > On 09/06/2026 11:42, Pablo Sabater wrote:
> >> static int commit_tree_ext(struct repository *repo,
> >> @@ -135,6 +136,13 @@ static int commit_tree_ext(struct repository *repo,
> >> original_body, action, &commit_message);
> >> if (ret < 0)
> >> goto out;
> >> +
> >> + if (flags & COMMIT_TREE_ABORT_ON_SAME_MESSAGE &&
> >> + !strcmp(original_body, commit_message.buf)) {
> >> + fprintf(stderr, _("Message unchanged, aborting reword.\n"));
> >> + ret = 1;
> >> + goto out;
> >> + }
> >
> > I wonder if we should check that the committer identity is unchanged as
> > well in case anyone is using this to fix commits after committing with
> > the wrong identity.
> >
> > Aborting when the message and committer identity are unchanged seems
> > like a good idea.
>
> I am not sure why it would be a good idea. The user wanted to make
> the commit have this message, and the commit ended up having the
> same message as the user gave. That message may have been identical
> to what the commit originally had, or it may be different. Why is
> the former an abort-worthy event? A simple note, I may understand,
> but aborting with an error message?
I can see a situation where a user performs:
git history reword abcd1234
with the intention to modify a commit message, but then for some reason
changes their mind and doesn't want history to change. Maybe the wrong
commit was referenced, or they decide the current message is actually
fine. From my understanding, there isn't a great way to abort rewording
a commit during editing and thus the user would have to reset history
afterwards if they care enough to go back to the previous point.
So I do see some value in a mechanism to abort rewriting a commit
message. An unchanged commit message does seem like a reasonable signal
to essentially abort the reword. I'm not sure committer identity should
be taken into consideration though since it would inhibit a users
ability to abort the reword if they ever touch a commit that they
themselves are not the previous committer.
I don't think there is a need to have an error message though. Even in
the case where the user leaves the commit message unchanged and history
is left untouched, git-history(1) would be following exactly what the
user instructed it to do. I don't really see why the user should care
whether history was actually modified or not in such a scenario.
-Justin
^ permalink raw reply
* Re: [PATCH GSoC RFC v12 03/12] cat-file: add declaration of variable i inside its for loop
From: Pablo Sabater @ 2026-06-09 17:55 UTC (permalink / raw)
To: Junio C Hamano
Cc: eric.peijian, calvinwan, chriscool, git, jltobler, jonathantanmy,
karthik.188, toon, chandrapratap3519
In-Reply-To: <xmqqzf15w0cz.fsf@gitster.g>
El lun, 8 jun 2026 a las 16:52, Junio C Hamano (<gitster@pobox.com>) escribió:
>
> Pablo Sabater <pabloosabaterr@gmail.com> writes:
>
> > From: Eric Ju <eric.peijian@gmail.com>
> > Subject: Re: [PATCH GSoC RFC v12 03/12] cat-file: add declaration of variable i inside its for loop
>
> "add" sounds a bit strange, as the existing code wouldn't have
> compiled if the variable were never declared. What the patch did
> was to move (not add) the declaration of a function scope variable
> that is used to control for() loops. Would any of these work?
>
> Subject: [PATCH GSOC v12 03/12] cat-file: narrow scope of loop counter
> Subject: [PATCH GSOC v12 03/12] cat-file: declare loop counter inside for()
>
Hi!
True, it's better to write "move" and about the title, works as well,
I'll change them for the next version.
> > Some code used in this series declares variable i and only uses it
> > in a for loop, not in any other logic outside the loop.
> >
> > Change the declaration of i to be inside the for loop for readability.
> > While at it, we also change its type from "int" to "size_t" where the latter makes more sense.
>
> Curious single line that is overly long?
True, I'll wrap it up correctly in the next version.
>
> > Helped-by: Christian Couder <chriscool@tuxfamily.org>
> > Signed-off-by: Eric Ju <eric.peijian@gmail.com>
> > Signed-off-by: Pablo Sabater <pabloosabaterr@gmail.com>
> > ---
> > builtin/cat-file.c | 11 +++--------
> > fetch-pack.c | 3 +--
> > 2 files changed, 4 insertions(+), 10 deletions(-)
> >
> > diff --git a/builtin/cat-file.c b/builtin/cat-file.c
> > index fa45f774d7..c060fd4800 100644
> > --- a/builtin/cat-file.c
> > +++ b/builtin/cat-file.c
> > @@ -726,12 +726,10 @@ static void dispatch_calls(struct batch_options *opt,
> > struct queued_cmd *cmd,
> > int nr)
> > {
> > - int i;
> > -
> > if (!opt->buffer_output)
> > die(_("flush is only for --buffer mode"));
> >
> > - for (i = 0; i < nr; i++)
> > + for (size_t i = 0; i < nr; i++)
> > cmd[i].fn(opt, cmd[i].line, output, data);
>
> The loop limit "nr" will not become as large as size_t because the
> caller passes a platform natural "int" to the function. Wouldn't a
> stupid compiler give us warning on comparing unsigned size_t with
> signed int here?
Yes, I'll change "i" to be int.
>
> > @@ -739,9 +737,7 @@ static void dispatch_calls(struct batch_options *opt,
> >
> > static void free_cmds(struct queued_cmd *cmd, size_t *nr)
> > {
> > - size_t i;
> > -
> > - for (i = 0; i < *nr; i++)
> > + for (size_t i = 0; i < *nr; i++)
> > FREE_AND_NULL(cmd[i].line);
>
> No type change, so the result is as safe as the original.
>
> > @@ -768,7 +764,6 @@ static void batch_objects_command(struct batch_options *opt,
> > size_t alloc = 0, nr = 0;
> >
> > while (strbuf_getdelim_strip_crlf(&input, stdin, opt->input_delim) != EOF) {
> > - int i;
> > const struct parse_cmd *cmd = NULL;
> > const char *p = NULL, *cmd_end;
> > struct queued_cmd call = {0};
> > @@ -778,7 +773,7 @@ static void batch_objects_command(struct batch_options *opt,
> > if (isspace(*input.buf))
> > die(_("whitespace before command: '%s'"), input.buf);
> >
> > - for (i = 0; i < ARRAY_SIZE(commands); i++) {
> > + for (size_t i = 0; i < ARRAY_SIZE(commands); i++) {
> > if (!skip_prefix(input.buf, commands[i].name, &cmd_end))
> > continue;
>
> ARRAY_SIZE() is some arithmetic over sizeof(*commands) and
> sizeof(commands), which is of type size_t, so this is better than
> the original. Use of size_t i of course is a natural way to index
> into commands[] array, so the result is just fine.
>
> > diff --git a/fetch-pack.c b/fetch-pack.c
> > index 120e01f3cf..f13951d154 100644
> > --- a/fetch-pack.c
> > +++ b/fetch-pack.c
> > @@ -1388,9 +1388,8 @@ static void write_fetch_command_and_capabilities(struct strbuf *req_buf,
> > if (advertise_sid && server_supports_v2("session-id"))
> > packet_buf_write(req_buf, "session-id=%s", trace2_session_id());
> > if (server_options && server_options->nr) {
> > - int i;
> > ensure_server_supports_v2("server-option");
> > - for (i = 0; i < server_options->nr; i++)
> > + for (size_t i = 0; i < server_options->nr; i++)
> > packet_buf_write(req_buf, "server-option=%s",
> > server_options->items[i].string);
>
> server_options is a string_list whose .nr member is of type size_t,
> so this comparison is perfectly fine. Ditto for ->items[i].string
> that is a natural way to index into an array.
>
> > }
>
> v
Thanks,
Pablo.
^ permalink raw reply
* Re: [PATCH GSoC RFC v12 04/12] t1006: split test utility functions into new "lib-cat-file.sh"
From: Pablo Sabater @ 2026-06-09 17:44 UTC (permalink / raw)
To: Chandra Pratap
Cc: eric.peijian, calvinwan, chriscool, git, jltobler, jonathantanmy,
karthik.188, toon
In-Reply-To: <CA+J6zkQe=K80QUOH8LwXCRw9nxv3tHBg+FtfDsYedY5xdHW79A@mail.gmail.com>
El mar, 9 jun 2026 a las 8:29, Chandra Pratap
(<chandrapratap3519@gmail.com>) escribió:
>
> On Mon, 8 Jun 2026 at 15:44, Pablo Sabater <pabloosabaterr@gmail.com> wrote:
> >
> > From: Eric Ju <eric.peijian@gmail.com>
> >
> > This refactor extracts utility functions from the cat-file's test
> > script "t1006-cat-file.sh" into a new "lib-cat-file.sh" dedicated
> > library file. The goal is to improve code reuse and readability,
> > enabling future tests to leverage these utilities without duplicating
> > code.
>
> Hmm, seems like a premature change to me. Do any of the subsequent
> commits require this refactor? Maybe the follow-up series that enables
> %objecttype support needs it? Did someone request this change in v11's
> feedback?
>
> If any of those are true, I think it's worthwhile mentioning it here. That will
> make it easier to determine whether this change is truly necessary.
Yes, these functions are needed for "t1017" which is created later in
the series [1] for the remote object info tests, so they are both used
in "t1006" (where they were originally) and "t1017".
>
> > Signed-off-by: Pablo Sabater <pabloosabaterr@gmail.com>
> > ---
> > t/lib-cat-file.sh | 16 ++++++++++++++++
> > t/t1006-cat-file.sh | 13 +------------
> > 2 files changed, 17 insertions(+), 12 deletions(-)
> >
> > diff --git a/t/lib-cat-file.sh b/t/lib-cat-file.sh
> > new file mode 100644
> > index 0000000000..44af232d74
> > --- /dev/null
> > +++ b/t/lib-cat-file.sh
> > @@ -0,0 +1,16 @@
> > +# Library of git-cat-file related test functions.
> > +
> > +# Print a string without a trailing newline.
> > +echo_without_newline () {
> > + printf '%s' "$*"
> > +}
> > +
> > +# Print a string without newlines and replace them with a NULL character (\0).
> > +echo_without_newline_nul () {
> > + echo_without_newline "$@" | tr '\n' '\0'
> > +}
> > +
> > +# Calculate the length of a string.
> > +strlen () {
> > + echo_without_newline "$1" | wc -c | sed -e 's/^ *//'
> > +}
> > diff --git a/t/t1006-cat-file.sh b/t/t1006-cat-file.sh
> > index 8e2c52652c..8360f3bbd9 100755
> > --- a/t/t1006-cat-file.sh
> > +++ b/t/t1006-cat-file.sh
> > @@ -4,6 +4,7 @@ test_description='git cat-file'
> >
> > . ./test-lib.sh
> > . "$TEST_DIRECTORY/lib-loose.sh"
> > +. "$TEST_DIRECTORY"/lib-cat-file.sh
> >
> > test_cmdmode_usage () {
> > test_expect_code 129 "$@" 2>err &&
> > @@ -99,18 +100,6 @@ do
> > '
> > done
> >
> > -echo_without_newline () {
> > - printf '%s' "$*"
> > -}
> > -
> > -echo_without_newline_nul () {
> > - echo_without_newline "$@" | tr '\n' '\0'
> > -}
> > -
> > -strlen () {
> > - echo_without_newline "$1" | wc -c | sed -e 's/^ *//'
> > -}
> > -
> > run_tests () {
> > type=$1
> > object_name="$2"
> >
> > --
> > 2.54.0
[1]: https://lore.kernel.org/git/20260608-ps-eric-work-rebase-v12-10-5338b766e658@gmail.com/
Thanks,
Pablo.
^ permalink raw reply
* Re: [PATCH GSoC RFC v12 12/12] cat-file: make remote-object-info allow-list dynamic
From: Pablo Sabater @ 2026-06-09 17:34 UTC (permalink / raw)
To: Chandra Pratap
Cc: eric.peijian, calvinwan, chriscool, git, jltobler, jonathantanmy,
karthik.188, toon
In-Reply-To: <CA+J6zkQ22en2HgH03EedKOfC+jLcHH2UbwpH0h_bDEAHR6B2pg@mail.gmail.com>
El mar, 9 jun 2026 a las 17:32, Chandra Pratap
(<chandrapratap3519@gmail.com>) escribió:
>
> On Mon, 8 Jun 2026 at 15:45, Pablo Sabater <pabloosabaterr@gmail.com> wrote:
> >
> > The static allow-list in expand_atom() is hardcoded to only allow
> > "objectname" and "objectsize" for remote queries. This works because
> > up to this point all servers will either support object-info with name
> > and size or they do not support them at all, but we cannot expect that
> > in a future different servers with different git versions to have the
> > same object-info capabilities. Therefore, the allow_list needs to be
> > dynamic depending on what does the server advertise.
> >
> > The client will now:
> >
> > 1. Request the protocol option that the placeholder refers to (i.e.
> > "size" when "%(objectsize)").
> >
> > 2. Filters the request in fetch_object_info() dropping any option that
> > the server does not advertise.
> >
> > 3. After the fetching, the options that haven't been dropped are the ones
> > fetched and supported by the server, these supported options are
> > mapped and remote_allowed_atoms is populated with the placeholders.
> >
> > 4. expand_atom() checks remote_allowed_atoms with the same behaviour as
> > the static allow_list had.
> >
> > Move object_info_options out of get_remote_info so the caller which has
> > data can select what options will be requested instead of requesting
> > always size.
> > Move batch_object_write() out so there will always be an output even if
> > all the placeholders are not supported by the server (returns an empty
> > line).
> >
> > Include "type" in the object_info_options so once the server supports
> > it, the clients know already how to request it.
> >
> > Mentored-by: Karthik Nayak <karthik.188@gmail.com>
> > Mentored-by: Chandra Pratap <chandrapratap3519@gmail.com>
> > Signed-off-by: Pablo Sabater <pabloosabaterr@gmail.com>
> > ---
> > builtin/cat-file.c | 85 ++++++++++++++++++++++++++++++++---------------------
> > fetch-object-info.c | 6 ++++
> > 2 files changed, 58 insertions(+), 33 deletions(-)
> >
[snip]
> > diff --git a/fetch-object-info.c b/fetch-object-info.c
> > index 51a898430d..425929a269 100644
> > --- a/fetch-object-info.c
> > +++ b/fetch-object-info.c
> > @@ -39,6 +39,12 @@ int fetch_object_info(const enum protocol_version version, struct object_info_ar
> > case protocol_v2:
> > if (!server_supports_v2("object-info"))
> > die(_("object-info capability is not enabled on the server"));
> > +
> > + for (int i = args->object_info_options->nr - 1; i >= 0; i--)
>
> Isn't args->object_info_options->nr of type size_t? We should probably
> do something
> like:
>
> for (size_t i = 0; i < args->args->object_info_options->nr; i++)
>
> instead.
Hi!
void unsorted_string_list_delete_item(struct string_list *list, int i,
int free_util)
{
if (list->strdup_strings)
free(list->items[i].string);
if (free_util)
free(list->items[i].util);
list->items[i] = list->items[list->nr-1];
list->nr--;
}
I made it backwards because of "list->items[i] = list->items[list->nr
- 1];" If we made it from 0..nr and we delete the first element, for
the next iteration, the last element is at [0] but we are on [1] and
that swapped element never gets evaluated.
About size_t, yes, it is size_t but because we go backwards 0 - 1
would fail, also unsorted_string_list_delete_item() signature has "int
i". The options that can be on that list will be a small number so
there should be no problem, should I cast it explicitly?
>
> > + if (!server_supports_feature("object-info",
> > + args->object_info_options->items[i].string, 0))
> > + unsorted_string_list_delete_item(args->object_info_options, i, 0);
> > +
> > send_object_info_request(fd_out, args);
> > break;
> > case protocol_v1:
> >
> > --
> > 2.54.0
>
> Other than these, the patch series LGTM for now.
>
> Thanks,
> Chandra.
Thanks,
Pablo.
^ permalink raw reply
* Re: git-diff in a worktree is an order of magnitude slower?
From: D. Ben Knoble @ 2026-06-09 17:15 UTC (permalink / raw)
To: Jeff King; +Cc: Git
In-Reply-To: <20260609001134.GD358144@coredump.intra.peff.net>
On Mon, Jun 8, 2026 at 8:11 PM Jeff King <peff@peff.net> wrote:
>
> On Mon, Jun 08, 2026 at 07:36:45PM -0400, D. Ben Knoble wrote:
>
> > I'd like to report and offer to help fix what I view as a serious performance
> > bug:
> >
> > "git diff --no-ext-diff --quiet" performs about ~10x slower in a secondary
> > worktree than in the main worktree.
>
> Hmm, I get the opposite effect: it is much faster in the worktree!
>
> I did:
>
> git clone /path/to/linux.git
> git -C linux worktree add --detach ../wt
> hyperfine -L dir linux,wt 'git -C {dir} diff'
>
> which yielded:
>
> Benchmark 1: git -C linux diff
> Time (mean ± σ): 188.9 ms ± 2.5 ms [User: 166.4 ms, System: 130.7 ms]
> Range (min … max): 185.5 ms … 194.8 ms 16 runs
>
> Benchmark 2: git -C wt diff
> Time (mean ± σ): 20.0 ms ± 1.5 ms [User: 23.4 ms, System: 103.5 ms]
> Range (min … max): 17.2 ms … 24.6 ms 132 runs
>
> Summary
> git -C wt diff ran
> 9.43 ± 0.71 times faster than git -C linux diff
>
> Running:
>
> perf record -g git -C wt --no-pager diff
> perf record -g git -C linux --no-pager diff
> perf diff
>
> implies that the slow case is spending a lot more time computing sha1s.
> Which implies that the entries are stat dirty. And indeed, if I run:
>
> git -C linux update-index --refresh
>
> now they both take ~20ms.
Ah, TIL about --refresh. I suppose it could be nice if "git diff"
updated the index in this way, but that sounds like a band-aid. Maybe
creating a fresh worktree should do the equivalent to make sure it's
considered "fresh"?
(This also dropped my timings down to normal.)
At $DAYJOB, I _think_ some version of "git restore <stuff>" ended up
also updating the index.
> I wonder if it's just a racy-git problem? Many files are written in the
> same second as the index, so they end up with the same mtimes, and we
> have to err on the side of checking the contents.
>
> See Documentation/technical/racy-git.adoc for a larger discussion.
>
> So it is not really about worktrees at all, but just "bad luck" in
> generating that initial index (that goes away next time you actually
> make an index update that rewrites the whole thing).
Ah, that makes sense! I'm familiar with the raciness but didn't expect it here.
> I'd have thought USE_NSEC was the default these days, but looks like it
> isn't? Try building with that and I'll bet it goes away entirely.
Thanks, I'll take a look.
I can see on my Macbook that at least Meson does automatically set
either USE_ST_TIMESPEC or NO_NSEC automatically, but has no option to
enabled USE_NSEC and try that. I can probably write that patch (which
I'll do to test), and I can send it along with the "worktree add
should refresh the index" if you think that's an appropriate thing to
do.
> > PS I almost CC'd Peff and Patrick, whose names stood out in "git
> > shortlog builtin/{worktree,diff}* object-file* | sort -t\( -k2 -g",
> > but decided they'd be their own best judge of whether they can
> > understand what's going on? :)
>
> You might be interested in "git shortlog -ns". :)
>
> -Peff
Phew! Yeah, that's much nicer. Thanks! (When typing this out for
email, I even left out the "grep '^[^[:space:]]' |" filter :P)
--
D. Ben Knoble
^ permalink raw reply
* Re: [PATCH RFC v2 2/2] builtin/history: abort reword on same message
From: Pablo Sabater @ 2026-06-09 17:12 UTC (permalink / raw)
To: Junio C Hamano; +Cc: Phillip Wood, git, cat, ps, kaartic.sivaraam, ben.knoble
In-Reply-To: <xmqq4ijbsn2m.fsf@gitster.g>
El mar, 9 jun 2026 a las 18:20, Junio C Hamano (<gitster@pobox.com>) escribió:
>
> Phillip Wood <phillip.wood123@gmail.com> writes:
>
> > Hi Pablo
> >
> > On 09/06/2026 11:42, Pablo Sabater wrote:
> >> static int commit_tree_ext(struct repository *repo,
> >> @@ -135,6 +136,13 @@ static int commit_tree_ext(struct repository *repo,
> >> original_body, action, &commit_message);
> >> if (ret < 0)
> >> goto out;
> >> +
> >> + if (flags & COMMIT_TREE_ABORT_ON_SAME_MESSAGE &&
> >> + !strcmp(original_body, commit_message.buf)) {
> >> + fprintf(stderr, _("Message unchanged, aborting reword.\n"));
> >> + ret = 1;
> >> + goto out;
> >> + }
> >
> > I wonder if we should check that the committer identity is unchanged as
> > well in case anyone is using this to fix commits after committing with
> > the wrong identity.
I think that if you reword a commit committed by someone else but end
up with no changes I want it to be kept as it was.
> >
> > Aborting when the message and committer identity are unchanged seems
> > like a good idea.
>
> I am not sure why it would be a good idea. The user wanted to make
> the commit have this message, and the commit ended up having the
> same message as the user gave. That message may have been identical
> to what the commit originally had, or it may be different. Why is
> the former an abort-worthy event? A simple note, I may understand,
> but aborting with an error message?
With what you said at [1], having this in an
"--avoid-unnecessary-rewrite" I think that the abort might be too much
as with the flag the user already expects this to happen and silent
might be better.
By the way, I feel that "--avoid-unnecessary-rewrite" is too long,
could it be something shorter? If not it could be set "-r" as the
short and leave the long as it is.
>
> Thanks.
[1]: https://lore.kernel.org/git/xmqqtsrbsvcm.fsf@gitster.g/
Thanks,
Pablo
^ permalink raw reply
* Re: [PATCH RFC v2 2/2] builtin/history: abort reword on same message
From: Junio C Hamano @ 2026-06-09 16:20 UTC (permalink / raw)
To: Phillip Wood; +Cc: Pablo Sabater, git, cat, ps, kaartic.sivaraam, ben.knoble
In-Reply-To: <54bd36e9-3d21-4f83-86d6-2882a14779de@gmail.com>
Phillip Wood <phillip.wood123@gmail.com> writes:
> Hi Pablo
>
> On 09/06/2026 11:42, Pablo Sabater wrote:
>> static int commit_tree_ext(struct repository *repo,
>> @@ -135,6 +136,13 @@ static int commit_tree_ext(struct repository *repo,
>> original_body, action, &commit_message);
>> if (ret < 0)
>> goto out;
>> +
>> + if (flags & COMMIT_TREE_ABORT_ON_SAME_MESSAGE &&
>> + !strcmp(original_body, commit_message.buf)) {
>> + fprintf(stderr, _("Message unchanged, aborting reword.\n"));
>> + ret = 1;
>> + goto out;
>> + }
>
> I wonder if we should check that the committer identity is unchanged as
> well in case anyone is using this to fix commits after committing with
> the wrong identity.
>
> Aborting when the message and committer identity are unchanged seems
> like a good idea.
I am not sure why it would be a good idea. The user wanted to make
the commit have this message, and the commit ended up having the
same message as the user gave. That message may have been identical
to what the commit originally had, or it may be different. Why is
the former an abort-worthy event? A simple note, I may understand,
but aborting with an error message?
Thanks.
^ permalink raw reply
* Re: [PATCH RFC 1/2] builtin/history: abort reword on unchanged message
From: Pablo Sabater @ 2026-06-09 15:51 UTC (permalink / raw)
To: Junio C Hamano; +Cc: git, Patrick Steinhardt, Kaartic Sivaraam
In-Reply-To: <xmqqtsrbsvcm.fsf@gitster.g>
El mar, 9 jun 2026 a las 15:21, Junio C Hamano (<gitster@pobox.com>) escribió:
>
> Pablo Sabater <pabloosabaterr@gmail.com> writes:
>
> > True, after reading it, history being more costly or the in memory are
> > not good args.
>
> And no argument, including that history is new, is a good excuse to
> make these three things inconsistent, period.
>
> One of the patches in your updated iteration claims
>
> When using `git history reword <commit>` if the new message is the same
> as the original, it continues and rewrites the history when nothing
> changed.
>
> `git commit --amend` and `git rebase -i` with reword share this behavior
> and it is wrong as well, but changing them breaks what people are used
> to. Take the opportunity of `git history` being a new command and handle
> it correctly from the start.
>
> and I think this is a totally wrong attitude to go about this.
>
> I may have said that it may have been a better default to try hard
> to avoid making a change that is a no-op, other than that it changes
> committer timestamp, while making the current "always create a new
> commit object" behaviour optionally available, for these three
> commands, and cited that the behaviour of 'pick' in 'rebase -i' that
> avoids unnecessary rewrite as an example of a good practice.
>
> But I do not think the existing behaviour to always rewrite is
> *wrong* at all. It may be wrong not to offer the other choice of
> pretending no content change means no commit object change, but that
> is a different story.
>
> I also do not think *aborting* only when the message happens to be
> the same is a valid mode of operation at all.
>
> The most sensible first step, I think, is to add a new command line
> option to "git history" (which will gain more history editing
> subcommands) that tells the command to leave the original history
> as-is when the only change rewriting commits would make would be to
> the committer ident or timestamp information. If in a future a new
> replace-tree subcommand is added, e.g. if
>
> $ git history replace-tree HEAD~20 HEAD~27^{tree}
>
> were a command to rewrite the history in such a way that 20th direct
> ancestor of the current HEAD had a tree object HEAD~27^{tree}, by
> derfault the command _should_ rewrite HEAD~10 and everything that
> has it as an ancestor. With the "--avoid-unnecsssary-rewrite"
> optimization feature on, however, it may silently become a no-op
> when HEAD~27^{tree} happened to be the same tree as HEAD~20^{tree}
> so the only difference between rewritten and original HEAD~20 would
> be when that commit object was created and by whom.
>
> And give the same option to "rebase -i" or "commit --amend". We can
> discuss, educate the users, and flip the default at a major version
> boundary, if the "avoid unnecessary rewrite" truly turns out to be a
> better default (right now it is merely our speculation, and we do
> not even know if the current behaviour is a worse default).
Hi Junio,
Sorry about how I expressed myself. I didn't mean by wrong to be bad
or anything similar, I just noticed this when testing `git history
reword` and thought that I would like it this other way.
Saying that git history is new or I would like this to be different
are not good arguments to have `git history` inconsistent with other
commands.
My idea was more of a defensive thing, where you would need a
"--force-rewrite" opt to explicitly change timestamps. But I see the
point of having it in an `--avoid-unnecessary-rewrite` so without
options it has the same behavior as other commands.
I'll try to express myself better in the next version and go with the
opt direction.
Sorry again,
Pablo
^ permalink raw reply
* Re: [PATCH GSoC RFC v12 12/12] cat-file: make remote-object-info allow-list dynamic
From: Chandra Pratap @ 2026-06-09 15:32 UTC (permalink / raw)
To: Pablo Sabater
Cc: eric.peijian, calvinwan, chriscool, git, jltobler, jonathantanmy,
karthik.188, toon
In-Reply-To: <20260608-ps-eric-work-rebase-v12-12-5338b766e658@gmail.com>
On Mon, 8 Jun 2026 at 15:45, Pablo Sabater <pabloosabaterr@gmail.com> wrote:
>
> The static allow-list in expand_atom() is hardcoded to only allow
> "objectname" and "objectsize" for remote queries. This works because
> up to this point all servers will either support object-info with name
> and size or they do not support them at all, but we cannot expect that
> in a future different servers with different git versions to have the
> same object-info capabilities. Therefore, the allow_list needs to be
> dynamic depending on what does the server advertise.
>
> The client will now:
>
> 1. Request the protocol option that the placeholder refers to (i.e.
> "size" when "%(objectsize)").
>
> 2. Filters the request in fetch_object_info() dropping any option that
> the server does not advertise.
>
> 3. After the fetching, the options that haven't been dropped are the ones
> fetched and supported by the server, these supported options are
> mapped and remote_allowed_atoms is populated with the placeholders.
>
> 4. expand_atom() checks remote_allowed_atoms with the same behaviour as
> the static allow_list had.
>
> Move object_info_options out of get_remote_info so the caller which has
> data can select what options will be requested instead of requesting
> always size.
> Move batch_object_write() out so there will always be an output even if
> all the placeholders are not supported by the server (returns an empty
> line).
>
> Include "type" in the object_info_options so once the server supports
> it, the clients know already how to request it.
>
> Mentored-by: Karthik Nayak <karthik.188@gmail.com>
> Mentored-by: Chandra Pratap <chandrapratap3519@gmail.com>
> Signed-off-by: Pablo Sabater <pabloosabaterr@gmail.com>
> ---
> builtin/cat-file.c | 85 ++++++++++++++++++++++++++++++++---------------------
> fetch-object-info.c | 6 ++++
> 2 files changed, 58 insertions(+), 33 deletions(-)
>
> diff --git a/builtin/cat-file.c b/builtin/cat-file.c
> index 1166a046b4..055991b5af 100644
> --- a/builtin/cat-file.c
> +++ b/builtin/cat-file.c
> @@ -341,13 +341,10 @@ struct expand_data {
> * Flags about when an object info is being fetched from remote.
> */
> unsigned is_remote:1;
> -};
> -#define EXPAND_DATA_INIT { .mode = S_IFINVALID, .type = OBJ_BAD }
>
> -static const char *remote_object_info_atoms[] = {
> - "objectname",
> - "objectsize",
> + struct string_list remote_allowed_atoms;
> };
> +#define EXPAND_DATA_INIT { .mode = S_IFINVALID, .type = OBJ_BAD, .remote_allowed_atoms = STRING_LIST_INIT_NODUP }
>
> static int is_atom(const char *atom, const char *s, int slen)
> {
> @@ -359,17 +356,11 @@ static int expand_atom(struct strbuf *sb, const char *atom, int len,
> struct expand_data *data)
> {
> if (data->is_remote) {
> - size_t i, allowed_nr = ARRAY_SIZE(remote_object_info_atoms);
> - for (i = 0; i < allowed_nr; i++)
> - if (is_atom(remote_object_info_atoms[i], atom, len))
> + size_t i;
> + for (i = 0; i < data->remote_allowed_atoms.nr; i++)
> + if (is_atom(data->remote_allowed_atoms.items[i].string, atom, len))
> break;
> -
> - /*
> - * On remote, skip unsupported atoms returning an empty sb,
> - * honoring how for-each-ref handles known but inapplicable
> - * atoms (e.g. %(tagger)).
> - */
> - if (i == allowed_nr)
> + if (i == data->remote_allowed_atoms.nr)
> return 1;
> }
>
> @@ -686,12 +677,12 @@ static int get_remote_info(struct batch_options *opt,
> int argc,
> const char **argv,
> struct object_info **remote_object_info,
> - struct oid_array *object_info_oids)
> + struct oid_array *object_info_oids,
> + struct string_list *object_info_options)
> {
> int retval = 0;
> struct remote *remote = NULL;
> struct object_id oid;
> - struct string_list object_info_options = STRING_LIST_INIT_NODUP;
> static struct transport *gtransport;
>
> /*
> @@ -726,15 +717,12 @@ static int get_remote_info(struct batch_options *opt,
> gtransport->smart_options->object_info = 1;
> gtransport->smart_options->object_info_oids = object_info_oids;
>
> - string_list_append(&object_info_options, "size");
> -
> - if (object_info_options.nr > 0) {
> - gtransport->smart_options->object_info_options = &object_info_options;
> + if (object_info_options->nr > 0) {
> + gtransport->smart_options->object_info_options = object_info_options;
> gtransport->smart_options->object_info_data = *remote_object_info;
> retval = transport_fetch_refs(gtransport, NULL);
> }
> cleanup:
> - string_list_clear(&object_info_options, 0);
> transport_disconnect(gtransport);
> return retval;
> }
> @@ -820,6 +808,21 @@ static void parse_cmd_mailmap(struct batch_options *opt UNUSED,
> load_mailmap();
> }
>
> +struct protocol_placeholder_entry {
> + const char *option;
> + const char *atom;
> +};
> +
> +static const struct protocol_placeholder_entry remote_atom_map[] = {
> + {"size", "objectsize"},
> + {"type", "objecttype"},
> + /*
> + * Add new protocol options here. Even if the server doesn't support
> + * them the allow_list will drop them if the server doesn't advertise
> + * them.
> + */
> +};
> +
> static void parse_cmd_remote_object_info(struct batch_options *opt,
> const char *line, struct strbuf *output,
> struct expand_data *data)
> @@ -829,6 +832,7 @@ static void parse_cmd_remote_object_info(struct batch_options *opt,
> char *line_to_split;
> static struct object_info *remote_object_info;
> static struct oid_array object_info_oids = OID_ARRAY_INIT;
> + struct string_list object_info_options = STRING_LIST_INIT_NODUP;
>
> if (strlen(line) >= MAX_REMOTE_OBJ_INFO_LINE)
> die(_("remote-object-info command too long"));
> @@ -841,30 +845,44 @@ static void parse_cmd_remote_object_info(struct batch_options *opt,
> die(_("remote-object-info supports at most %d objects"),
> MAX_ALLOWED_OBJ_LIMIT);
>
> + if (data->info.sizep)
> + string_list_append(&object_info_options, "size");
> + if (data->info.typep)
> + string_list_append(&object_info_options, "type");
> +
> if (get_remote_info(opt, count, argv, &remote_object_info,
> - &object_info_oids))
> + &object_info_oids, &object_info_options))
> goto cleanup;
>
> + string_list_clear(&data->remote_allowed_atoms, 0);
> + string_list_append(&data->remote_allowed_atoms, "objectname");
> + for (size_t i = 0; i < ARRAY_SIZE(remote_atom_map); i++)
> + if (unsorted_string_list_has_string(&object_info_options, remote_atom_map[i].option))
> + string_list_append(&data->remote_allowed_atoms,
> + remote_atom_map[i].atom);
> +
> data->skip_object_info = 1;
> for (size_t i = 0; i < object_info_oids.nr; i++) {
> data->oid = object_info_oids.oid[i];
> - if (remote_object_info[i].sizep) {
> - /*
> - * When reaching here, it means remote-object-info can retrieve
> - * information from server without downloading them.
> - */
> + /*
> + * When reaching here, it means remote-object-info can retrieve
> + * information from server without downloading them.
> + */
> + if (remote_object_info[i].sizep)
> data->size = *remote_object_info[i].sizep;
> - opt->batch_mode = BATCH_MODE_INFO;
> - data->is_remote = 1;
> - batch_object_write(argv[i + 1], output, opt, data, NULL, 0);
> - data->is_remote = 0;
> - }
> + if (remote_object_info[i].typep)
> + data->type = *remote_object_info[i].typep;
> + opt->batch_mode = BATCH_MODE_INFO;
> + data->is_remote = 1;
> + batch_object_write(argv[i + 1], output, opt, data, NULL, 0);
> + data->is_remote = 0;
> }
> data->skip_object_info = 0;
>
> cleanup:
> for (size_t i = 0; i < object_info_oids.nr; i++)
> free_object_info_contents(&remote_object_info[i]);
> + string_list_clear(&object_info_options, 0);
> free(line_to_split);
> free(argv);
> free(remote_object_info);
> @@ -1177,6 +1195,7 @@ static int batch_objects(struct batch_options *opt)
> cleanup:
> strbuf_release(&input);
> strbuf_release(&output);
> + string_list_clear(&data.remote_allowed_atoms, 0);
> warn_on_object_refname_ambiguity = save_warning;
> return retval;
> }
> diff --git a/fetch-object-info.c b/fetch-object-info.c
> index 51a898430d..425929a269 100644
> --- a/fetch-object-info.c
> +++ b/fetch-object-info.c
> @@ -39,6 +39,12 @@ int fetch_object_info(const enum protocol_version version, struct object_info_ar
> case protocol_v2:
> if (!server_supports_v2("object-info"))
> die(_("object-info capability is not enabled on the server"));
> +
> + for (int i = args->object_info_options->nr - 1; i >= 0; i--)
Isn't args->object_info_options->nr of type size_t? We should probably
do something
like:
for (size_t i = 0; i < args->args->object_info_options->nr; i++)
instead.
> + if (!server_supports_feature("object-info",
> + args->object_info_options->items[i].string, 0))
> + unsorted_string_list_delete_item(args->object_info_options, i, 0);
> +
> send_object_info_request(fd_out, args);
> break;
> case protocol_v1:
>
> --
> 2.54.0
Other than these, the patch series LGTM for now.
Thanks,
Chandra.
^ permalink raw reply
* Re: [PATCH v2] describe: limit default ref iteration to tags
From: Tamir Duberstein @ 2026-06-09 14:44 UTC (permalink / raw)
To: Jeff King; +Cc: git, Junio C Hamano, Patrick Steinhardt
In-Reply-To: <20260609110957.GB1509396@coredump.intra.peff.net>
On Tue, Jun 9, 2026 at 4:09 AM Jeff King <peff@peff.net> wrote:
>
> On Mon, Jun 08, 2026 at 07:32:14PM -0700, Tamir Duberstein wrote:
>
> > The benchmark checkout had 120,532 refs, of which 330 were tags. With
> > `$repo` naming the checkout, `$commit` an exactly tagged commit, and
> > `$parent` and `$this` the two binaries, I ran:
> >
> > hyperfine --warmup 3 --runs 15 \
> > --command-name parent \
> > '$parent -C $repo describe --exact-match $commit' \
> > --command-name 'this commit' \
> > '$this -C $repo describe --exact-match $commit'
> >
> > The results were:
> >
> > Benchmark 1: parent
> > Time (mean ± σ): 171.7 ms ± 18.5 ms [User: 23.9 ms, System: 133.6 ms]
> > Range (min … max): 142.3 ms … 198.3 ms 15 runs
> >
> > Benchmark 2: this commit
> > Time (mean ± σ): 9.9 ms ± 1.1 ms [User: 3.3 ms, System: 4.7 ms]
> > Range (min … max): 8.8 ms … 13.1 ms 15 runs
> >
> > Summary
> > this commit ran
> > 17.35 ± 2.63 times faster than parent
> >
> > Both revisions were built with -O3, -mcpu=native, and ThinLTO using
> > Apple clang 21.0.0 on macOS 26.5. The machine was a MacBook Pro
> > (Mac16,6) with a 16-core Apple M4 Max (12 performance and four
> > efficiency cores) and 128 GB RAM.
>
> This patch looks fine to me, but let me pick a nit for a minute, because
> I think there is a broader conversation to be had.
Just to say from the start: I appreciate you taking the time to discuss this.
>
> Given the discussion in earlier rounds and sibling topics, I assume the
> commit message here was AI-generated. And it's OK in the sense that it
> is describing what happened and I assume is entirely accurate. But as a
> human reader, it feels so much more verbose than what I'd expect, as it
> is full of semi-irrelevant details. Why set --warmup and --runs? Why
> bother with --command-name, which just means you have to show the
> commands separately anyway? Is the amount of RAM in the machine
> important for this test? Surely it could be if it was absurdly tiny, but
> in general, no, I would not expect it to be.
Well, the details matter in case some human reader knows something I
don't, or wants to reproduce the findings and observes something
completely different - they aught to be able to reconstruct my
environment.
The command-name flag is needed; without it the output of the
hyperfine would include local paths and would require post-processing
to include in the message.
>
> So while it is perhaps reasonable to document every detail in case
> somebody later wants to verify or reproduce timings, it is a little
> overwhelming when trying to tell a story, the core of which is:
>
> In a repo with ~120k refs, ~300 of which were tags, running:
>
> git describe --exact-match $some_tag
>
> went from ~170ms to ~10ms, since we no longer needed to iterate all of
> those other refs.
>
> That has _way_ less detail, but makes the point succinctly.
I don't disagree. Ultimately, it is a matter of maintainer preference,
and I'm happy to follow (and instruct the AI to follow) the
preferences described in this thread.
>
> I dunno. I am not trying to pick apart your commit in particular, but am
> more interested in the broader use of AI commit messages going forward.
> This kind of verbosity is quite common in the output (from my limited
> experience), and I think creates more work for reviewers. Should we be
> expecting contributors to make things more concise before submitting
> (either manually or through prompting)? Or do people even agree that the
> shorter version is preferable? I could be the only one.
The AI does what you tell it; in this case I was telling it to follow
the precedent in the repo and to ensure its claims are always cited.
I'll tune it for succinct output going forward.
>
> I have a few other comments on the patch itself below.
>
> > diff --git a/builtin/describe.c b/builtin/describe.c
> > index 1c47d7c0b7..3532c8ff22 100644
> > --- a/builtin/describe.c
> > +++ b/builtin/describe.c
> > @@ -740,6 +740,9 @@ int cmd_describe(int argc,
> > return ret;
> > }
> >
> > + if (!all)
> > + for_each_ref_opts.prefix = "refs/tags/";
> > +
> > hashmap_init(&names, commit_name_neq, NULL, 0);
> > refs_for_each_ref_ext(get_main_ref_store(the_repository),
> > get_name, NULL, &for_each_ref_opts);
>
> The code change looks fine. It creates a bit of a subtle dependency
> between what's happening here, and the filtering inside get_name(). But
> I think that's OK for the scope of a single command. It _might_ be
> possible to simplify the top of get_name(), since we'd no longer see
> non-tag refs in the input. But it also may not, since we have to strip
> out the prefix anyway. It can certainly come on top as a cleanup later
> if we want.
>
> > diff --git a/t/perf/p6100-describe.sh b/t/perf/p6100-describe.sh
>
> It is a little curious that we add a perf test here, but the commit
> message does not even show it off. ;)
>
> I ran it myself here and had trouble showing improvement, simply because
> it is already quite fast! I guess that's because I'm on Linux, where
> warm-cache filesystem operations are pretty fast. Bumping $ref_count by
> a factor of 10 made the "before" case 30ms, and after is still sub-1ms.
>
> > +test_expect_success 'set up many unrelated refs' '
> > + ref_count=10000 &&
> > + git tag -m tip tip HEAD &&
> > + for i in $(test_seq $ref_count)
> > + do
> > + printf "create refs/heads/describe-perf/%05d HEAD\n" $i ||
> > + return 1
> > + done >instructions &&
> > + git update-ref --stdin <instructions
> > +'
>
> A few things come to mind on reading this.
>
> I have mixed feelings on sticking synthetic constructions in the t/perf
> suite. Part of the original point was that we'd run it against real
> repos to see how they perform. But that implies that people running it
> have some clue about which tests may be interesting on which repos,
> which is hopeful at best. So we've turned to this kind of synthetic
> construction at times (and this is certainly not the first). It's
> probably a reasonable tactic here.
>
> I suspect the resulting state is not all that realistic, though. If you
> have 10,000 refs, you probably didn't make them all at once. And so in
> practice the majority of them would be packed. Sticking "git pack-refs
> --all" at the end might give more realistic numbers.
>
> Bumping to a larger number of refs shows the effect more clearly, but at
> the cost of making the setup take a long time (since we have to take a
> lockfile on each ref!). We could sneak around it by generating a
> packed-refs file directly, but now the test really would be
> backend-specific. Probably better not to go there.
>
> And finally, the loop can be written a bit more succinctly these days
> as:
>
> diff --git a/t/perf/p6100-describe.sh b/t/perf/p6100-describe.sh
> index ed9f1abe18..b365dc67ee 100755
> --- a/t/perf/p6100-describe.sh
> +++ b/t/perf/p6100-describe.sh
> @@ -30,12 +30,8 @@ test_perf 'describe HEAD with one tag' '
> test_expect_success 'set up many unrelated refs' '
> ref_count=10000 &&
> git tag -m tip tip HEAD &&
> - for i in $(test_seq $ref_count)
> - do
> - printf "create refs/heads/describe-perf/%05d HEAD\n" $i ||
> - return 1
> - done >instructions &&
> - git update-ref --stdin <instructions
> + test_seq -f "create refs/heads/describe-perf/%05d HEAD" $ref_count |
> + git update-ref --stdin
> '
>
> test_perf 'describe exact tag with many unrelated refs' '
>
>
> Probably not worth re-rolling on its own, though.
The suggested changes seem reasonable to me. Certainly I am happy to
make them, and re-rolls are cheap. Do let me know explicitly if you'd
like that done.
>
> -Peff
Thanks for your time! I really appreciate it.
^ permalink raw reply
* Re: [GSoC PATCH v2 3/4] repo: add path.gitdir with absolute and relative suffix formatting
From: Justin Tobler @ 2026-06-09 14:31 UTC (permalink / raw)
To: K Jayatheerth
Cc: git, a3205153416, gitster, kumarayushjha123, lucasseikioshiro,
phillip.wood, sandals
In-Reply-To: <CA+rGoLdpkuigWXqNSk3bS7-uhtzCizkPx2GGtNaTyy5J1SF7Rg@mail.gmail.com>
On 26/06/09 10:11AM, K Jayatheerth wrote:
> > > + struct strbuf sb = STRBUF_INIT;
> > > + enum path_format fmt = (arg_path_format != -1) ? arg_path_format : def_format;
> >
> > hmmm, so `arg_path_format` specifies what the user-provided format and
> > acts as a sentinel to signal there is no value provided and the fallback
> > format needs to be used. This feels a tad bit awkward to me.
> >
> > I wonder if we should introduce a PATH_FORMAT_DEFAULT to the
> > `path_format` enum that maps to one of the existing enum values in
> > `path.c:format_path()`. Here in `print_path()`, we could then intercept
> > a PATH_FORMAT_DEFAULT value and override it to the specified
> > `def_format`. I'm not sure if this is ultimately that much better
> > though.
>
> You're right that the -1 is awkward
> it forces arg_path_format to be an int rather than the enum type
> itself, which loses type safety.
>
> PATH_FORMAT_DEFAULT is cleaner in that regard, but it pushes the "what
> does default mean?" question into format_path()
> which currently has no notion of a fallback.
> Since the fallback is call-site specific (each path type in rev-parse
> has its own default),
> I'd rather keep that logic in print_path() where the context lives.
>
> A middle ground would be adding PATH_FORMAT_DEFAULT to the enum but
> not handling it in format_path().
>
> ---
> enum path_format_type format = PATH_FORMAT_DEFAULT;
>
> /* ... */
>
> static void print_path(const char *path, const char *prefix,
> enum path_format_type format,
> enum path_format_type def_format)
> {
> struct strbuf sb = STRBUF_INIT;
> enum path_format_type fmt =
> (format == PATH_FORMAT_DEFAULT) ? def_format : format;
>
> format_path(&sb, path, prefix, fmt);
> puts(sb.buf);
> strbuf_release(&sb);
> }
> ---
Intercepting PATH_FORMAT_DEFAULT in print_path() and overriding it to
the appropriate default needed for the specific path printed by
git-rev-parse(1), as shown above, seems reasonable to me.
But I do think that PATH_FORMAT_DEFAULT should have an actual default in
format_path(). Otherwise we would have an enum value that requires
callers to explicitly handle prior to invoking format_path() which would
also be rather awkward. IMO, it probably wouldn't be a big deal to just
say PATH_FORMAT_DEFAULT is treated as PATH_FORMAT_UNMODIFIED when passed
to format_path() and document it. In practice, our rev-parse use-case
would always replace PATH_FORMAT_DEFAULT with the appropriate value
prior to invoking format_path().
-Justin
^ permalink raw reply
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox