Git development
 help / color / mirror / Atom feed
* [PATCH v4 4/4] archive: support remote archive from stateless transport
From: Jiang Xin @ 2023-12-14 14:13 UTC (permalink / raw)
  To: Git List, Junio C Hamano; +Cc: Jiang Xin, Eric Sunshine
In-Reply-To: <cover.1702562879.git.zhiyou.jx@alibaba-inc.com>

From: Jiang Xin <zhiyou.jx@alibaba-inc.com>

Even though we can establish a stateless connection, we still cannot
archive the remote repository using a stateless HTTP protocol. Try the
following steps to make it work.

 1. Add support for "git-upload-archive" service in "http-backend".

 2. Use the URL ".../info/refs?service=git-upload-pack" to detect the
    protocol version, instead of use the "git-upload-archive" service.

 3. "git-archive" does not expect to see protocol version and
    capabilities when connecting to remote-helper, so do not send them
    in "remote-curl.c" for the "git-upload-archive" service.

Helped-by: Eric Sunshine <sunshine@sunshineco.com>
Signed-off-by: Jiang Xin <zhiyou.jx@alibaba-inc.com>
---
 http-backend.c         | 15 +++++++++++----
 remote-curl.c          | 14 +++++++++++---
 t/t5003-archive-zip.sh | 34 ++++++++++++++++++++++++++++++++++
 transport-helper.c     |  3 ++-
 4 files changed, 58 insertions(+), 8 deletions(-)

diff --git a/http-backend.c b/http-backend.c
index ff07b87e64..6a2c919839 100644
--- a/http-backend.c
+++ b/http-backend.c
@@ -38,6 +38,7 @@ struct rpc_service {
 static struct rpc_service rpc_service[] = {
 	{ "upload-pack", "uploadpack", 1, 1 },
 	{ "receive-pack", "receivepack", 0, -1 },
+	{ "upload-archive", "uploadarchive", 0, -1 },
 };
 
 static struct string_list *get_parameters(void)
@@ -639,10 +640,15 @@ static void check_content_type(struct strbuf *hdr, const char *accepted_type)
 
 static void service_rpc(struct strbuf *hdr, char *service_name)
 {
-	const char *argv[] = {NULL, "--stateless-rpc", ".", NULL};
+	struct strvec argv = STRVEC_INIT;
 	struct rpc_service *svc = select_service(hdr, service_name);
 	struct strbuf buf = STRBUF_INIT;
 
+	strvec_push(&argv, svc->name);
+	if (strcmp(service_name, "git-upload-archive"))
+		strvec_push(&argv, "--stateless-rpc");
+	strvec_push(&argv, ".");
+
 	strbuf_reset(&buf);
 	strbuf_addf(&buf, "application/x-git-%s-request", svc->name);
 	check_content_type(hdr, buf.buf);
@@ -655,9 +661,9 @@ static void service_rpc(struct strbuf *hdr, char *service_name)
 
 	end_headers(hdr);
 
-	argv[0] = svc->name;
-	run_service(argv, svc->buffer_input);
+	run_service(argv.v, svc->buffer_input);
 	strbuf_release(&buf);
+	strvec_clear(&argv);
 }
 
 static int dead;
@@ -723,7 +729,8 @@ static struct service_cmd {
 	{"GET", "/objects/pack/pack-[0-9a-f]{64}\\.idx$", get_idx_file},
 
 	{"POST", "/git-upload-pack$", service_rpc},
-	{"POST", "/git-receive-pack$", service_rpc}
+	{"POST", "/git-receive-pack$", service_rpc},
+	{"POST", "/git-upload-archive$", service_rpc}
 };
 
 static int bad_request(struct strbuf *hdr, const struct service_cmd *c)
diff --git a/remote-curl.c b/remote-curl.c
index ef05752ca5..ce6cb8ac05 100644
--- a/remote-curl.c
+++ b/remote-curl.c
@@ -1447,8 +1447,14 @@ static int stateless_connect(const char *service_name)
 	 * establish a stateless connection, otherwise we need to tell the
 	 * client to fallback to using other transport helper functions to
 	 * complete their request.
+	 *
+	 * The "git-upload-archive" service is a read-only operation. Fallback
+	 * to use "git-upload-pack" service to discover protocol version.
 	 */
-	discover = discover_refs(service_name, 0);
+	if (!strcmp(service_name, "git-upload-archive"))
+		discover = discover_refs("git-upload-pack", 0);
+	else
+		discover = discover_refs(service_name, 0);
 	if (discover->version != protocol_v2) {
 		printf("fallback\n");
 		fflush(stdout);
@@ -1486,9 +1492,11 @@ static int stateless_connect(const char *service_name)
 
 	/*
 	 * Dump the capability listing that we got from the server earlier
-	 * during the info/refs request.
+	 * during the info/refs request. This does not work with the
+	 * "git-upload-archive" service.
 	 */
-	write_or_die(rpc.in, discover->buf, discover->len);
+	if (strcmp(service_name, "git-upload-archive"))
+		write_or_die(rpc.in, discover->buf, discover->len);
 
 	/* Until we see EOF keep sending POSTs */
 	while (1) {
diff --git a/t/t5003-archive-zip.sh b/t/t5003-archive-zip.sh
index fc499cdff0..961c6aac25 100755
--- a/t/t5003-archive-zip.sh
+++ b/t/t5003-archive-zip.sh
@@ -239,4 +239,38 @@ check_zip with_untracked2
 check_added with_untracked2 untracked one/untracked
 check_added with_untracked2 untracked two/untracked
 
+# Test remote archive over HTTP protocol.
+#
+# Note: this should be the last part of this test suite, because
+# by including lib-httpd.sh, the test may end early if httpd tests
+# should not be run.
+#
+. "$TEST_DIRECTORY"/lib-httpd.sh
+start_httpd
+
+test_expect_success "setup for HTTP protocol" '
+	cp -R bare.git "$HTTPD_DOCUMENT_ROOT_PATH/bare.git" &&
+	git -C "$HTTPD_DOCUMENT_ROOT_PATH/bare.git" \
+		config http.uploadpack true &&
+	set_askpass user@host pass@host
+'
+
+setup_askpass_helper
+
+test_expect_success 'remote archive does not work with protocol v1' '
+	test_must_fail git -c protocol.version=1 archive \
+		--remote="$HTTPD_URL/auth/smart/bare.git" \
+		--output=remote-http.zip HEAD >actual 2>&1 &&
+	cat >expect <<-EOF &&
+	fatal: can${SQ}t connect to subservice git-upload-archive
+	EOF
+	test_cmp expect actual
+'
+
+test_expect_success 'archive remote http repository' '
+	git archive --remote="$HTTPD_URL/auth/smart/bare.git" \
+		--output=remote-http.zip HEAD &&
+	test_cmp_bin d.zip remote-http.zip
+'
+
 test_done
diff --git a/transport-helper.c b/transport-helper.c
index 3b036ae1ca..566f7473df 100644
--- a/transport-helper.c
+++ b/transport-helper.c
@@ -628,7 +628,8 @@ static int process_connect_service(struct transport *transport,
 		ret = run_connect(transport, &cmdbuf);
 	} else if (data->stateless_connect &&
 		   (get_protocol_version_config() == protocol_v2) &&
-		   !strcmp("git-upload-pack", name)) {
+		   (!strcmp("git-upload-pack", name) ||
+		    !strcmp("git-upload-archive", name))) {
 		strbuf_addf(&cmdbuf, "stateless-connect %s\n", name);
 		ret = run_connect(transport, &cmdbuf);
 		if (ret)
-- 
2.41.0.232.g2f6f0bca4f.agit.8.0.4.dev


^ permalink raw reply related

* [PATCH v4 3/4] transport-helper: call do_take_over() in connect_helper
From: Jiang Xin @ 2023-12-14 14:13 UTC (permalink / raw)
  To: Git List, Junio C Hamano; +Cc: Jiang Xin
In-Reply-To: <cover.1702562879.git.zhiyou.jx@alibaba-inc.com>

From: Jiang Xin <zhiyou.jx@alibaba-inc.com>

After successfully connecting to the smart transport by calling
process_connect_service() in connect_helper(), run do_take_over() to
replace the old vtable with a new one which has methods ready for the
smart transport connection.

The connect_helper() function is used as the connect method of the
vtable in "transport-helper.c", and it is called by transport_connect()
in "transport.c" to setup a connection. The only place that we call
transport_connect() so far is in "builtin/archive.c". Without running
do_take_over(), it may fail to call transport_disconnect() in
run_remote_archiver() of "builtin/archive.c". This is because for a
stateless connection or a service like "git-upload-pack-archive", the
remote helper may receive a SIGPIPE signal and exit early. To have a
graceful disconnect method by calling do_take_over() will solve this
issue.

The subsequent commit will introduce remote archive over a stateless-rpc
connection.

Signed-off-by: Jiang Xin <zhiyou.jx@alibaba-inc.com>
---
 transport-helper.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/transport-helper.c b/transport-helper.c
index 51088cc03a..3b036ae1ca 100644
--- a/transport-helper.c
+++ b/transport-helper.c
@@ -672,6 +672,8 @@ static int connect_helper(struct transport *transport, const char *name,
 
 	fd[0] = data->helper->out;
 	fd[1] = data->helper->in;
+
+	do_take_over(transport);
 	return 0;
 }
 
-- 
2.41.0.232.g2f6f0bca4f.agit.8.0.4.dev


^ permalink raw reply related

* [PATCH v4 2/4] transport-helper: call do_take_over() in process_connect
From: Jiang Xin @ 2023-12-14 14:13 UTC (permalink / raw)
  To: Git List, Junio C Hamano; +Cc: Jiang Xin
In-Reply-To: <cover.1702562879.git.zhiyou.jx@alibaba-inc.com>

From: Jiang Xin <zhiyou.jx@alibaba-inc.com>

The existing pattern among all callers of process_connect() seems to be

        if (process_connect(...)) {
                do_take_over();
                ... dispatch to the underlying method ...
        }
        ... otherwise implement the fallback ...

where the return value from process_connect() is the return value of the
call it makes to process_connect_service().

Move the call of do_take_over() inside process_connect(), so that
calling the process_connect() function is more concise and will not
miss do_take_over().

Suggested-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Jiang Xin <zhiyou.jx@alibaba-inc.com>
---
 transport-helper.c | 22 +++++++++-------------
 1 file changed, 9 insertions(+), 13 deletions(-)

diff --git a/transport-helper.c b/transport-helper.c
index 2e127d24a5..51088cc03a 100644
--- a/transport-helper.c
+++ b/transport-helper.c
@@ -645,6 +645,7 @@ static int process_connect(struct transport *transport,
 	struct helper_data *data = transport->data;
 	const char *name;
 	const char *exec;
+	int ret;
 
 	name = for_push ? "git-receive-pack" : "git-upload-pack";
 	if (for_push)
@@ -652,7 +653,10 @@ static int process_connect(struct transport *transport,
 	else
 		exec = data->transport_options.uploadpack;
 
-	return process_connect_service(transport, name, exec);
+	ret = process_connect_service(transport, name, exec);
+	if (ret)
+		do_take_over(transport);
+	return ret;
 }
 
 static int connect_helper(struct transport *transport, const char *name,
@@ -682,10 +686,8 @@ static int fetch_refs(struct transport *transport,
 
 	get_helper(transport);
 
-	if (process_connect(transport, 0)) {
-		do_take_over(transport);
+	if (process_connect(transport, 0))
 		return transport->vtable->fetch_refs(transport, nr_heads, to_fetch);
-	}
 
 	/*
 	 * If we reach here, then the server, the client, and/or the transport
@@ -1142,10 +1144,8 @@ static int push_refs(struct transport *transport,
 {
 	struct helper_data *data = transport->data;
 
-	if (process_connect(transport, 1)) {
-		do_take_over(transport);
+	if (process_connect(transport, 1))
 		return transport->vtable->push_refs(transport, remote_refs, flags);
-	}
 
 	if (!remote_refs) {
 		fprintf(stderr,
@@ -1186,11 +1186,9 @@ static struct ref *get_refs_list(struct transport *transport, int for_push,
 {
 	get_helper(transport);
 
-	if (process_connect(transport, for_push)) {
-		do_take_over(transport);
+	if (process_connect(transport, for_push))
 		return transport->vtable->get_refs_list(transport, for_push,
 							transport_options);
-	}
 
 	return get_refs_list_using_list(transport, for_push);
 }
@@ -1274,10 +1272,8 @@ static int get_bundle_uri(struct transport *transport)
 {
 	get_helper(transport);
 
-	if (process_connect(transport, 0)) {
-		do_take_over(transport);
+	if (process_connect(transport, 0))
 		return transport->vtable->get_bundle_uri(transport);
-	}
 
 	return -1;
 }
-- 
2.41.0.232.g2f6f0bca4f.agit.8.0.4.dev


^ permalink raw reply related

* [PATCH v4 1/4] transport-helper: no connection restriction in connect_helper
From: Jiang Xin @ 2023-12-14 14:13 UTC (permalink / raw)
  To: Git List, Junio C Hamano; +Cc: Jiang Xin
In-Reply-To: <cover.1702562879.git.zhiyou.jx@alibaba-inc.com>

From: Jiang Xin <zhiyou.jx@alibaba-inc.com>

When commit b236752a (Support remote archive from all smart transports,
2009-12-09) added "remote archive" support for "smart transports", it
was for transport that supports the ".connect" method. The
"connect_helper()" function protected itself from getting called for a
transport without the method before calling process_connect_service(),
which did not work with such a transport.

Later, commit edc9caf7 (transport-helper: introduce stateless-connect,
2018-03-15) added a way for a transport without the ".connect" method
to establish a "stateless" connection in protocol-v2, which
process_connect_service() was taught to handle the "stateless"
connection, making the old safety valve in its caller that insisted
that ".connect" method must be defined too strict, and forgot to loosen
it.

Remove the restriction in the "connect_helper()" function and give the
function "process_connect_service()" the opportunity to establish a
connection using ".connect" or ".stateless_connect" for protocol v2. So
we can connect with a stateless-rpc and do something useful. E.g., in a
later commit, implements remote archive for a repository over HTTP
protocol.

Helped-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Jiang Xin <zhiyou.jx@alibaba-inc.com>
---
 transport-helper.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/transport-helper.c b/transport-helper.c
index 49811ef176..2e127d24a5 100644
--- a/transport-helper.c
+++ b/transport-helper.c
@@ -662,8 +662,6 @@ static int connect_helper(struct transport *transport, const char *name,
 
 	/* Get_helper so connect is inited. */
 	get_helper(transport);
-	if (!data->connect)
-		die(_("operation not supported by protocol"));
 
 	if (!process_connect_service(transport, name, exec))
 		die(_("can't connect to subservice %s"), name);
-- 
2.41.0.232.g2f6f0bca4f.agit.8.0.4.dev


^ permalink raw reply related

* [PATCH v4 0/4] support remote archive via stateless transport
From: Jiang Xin @ 2023-12-14 14:13 UTC (permalink / raw)
  To: Git List, Junio C Hamano; +Cc: Jiang Xin
In-Reply-To: <cover.1696432593.git.zhiyou.jx@alibaba-inc.com>

From: Jiang Xin <zhiyou.jx@alibaba-inc.com>

# Change since v3:

1. Update commit message of patch 2/4.
2. Add comments in t5003.

# range-diff v3...v4

1:  1818d8e30e = 1:  d343585cb5 transport-helper: no connection restriction in connect_helper
2:  b57524bc91 ! 2:  65fb67523c transport-helper: call do_take_over() in process_connect
    @@ Commit message
         where the return value from process_connect() is the return value of the
         call it makes to process_connect_service().
     
    -    It is safe to make a refactor by moving the call of do_take_over()
    -    into the function process_connect().
    +    Move the call of do_take_over() inside process_connect(), so that
    +    calling the process_connect() function is more concise and will not
    +    miss do_take_over().
     
         Suggested-by: Junio C Hamano <gitster@pobox.com>
         Signed-off-by: Jiang Xin <zhiyou.jx@alibaba-inc.com>
3:  7ce60e3b9a = 3:  109a1fffde transport-helper: call do_take_over() in connect_helper
4:  626f903508 ! 4:  eb905259fe archive: support remote archive from stateless transport
    @@ Commit message
     
         Helped-by: Eric Sunshine <sunshine@sunshineco.com>
         Signed-off-by: Jiang Xin <zhiyou.jx@alibaba-inc.com>
    @@ t/t5003-archive-zip.sh: check_zip with_untracked2
      check_added with_untracked2 untracked one/untracked
      check_added with_untracked2 untracked two/untracked
      
    ++# Test remote archive over HTTP protocol.
    ++#
    ++# Note: this should be the last part of this test suite, because
    ++# by including lib-httpd.sh, the test may end early if httpd tests
    ++# should not be run.
    ++#
     +. "$TEST_DIRECTORY"/lib-httpd.sh
     +start_httpd
     +
    @@ t/t5003-archive-zip.sh: check_zip with_untracked2
     +setup_askpass_helper
     +
     +test_expect_success 'remote archive does not work with protocol v1' '
    -+	test_when_finished "rm -f d5.zip" &&
     +	test_must_fail git -c protocol.version=1 archive \
     +		--remote="$HTTPD_URL/auth/smart/bare.git" \
    -+		--output=d5.zip HEAD >actual 2>&1 &&
    ++		--output=remote-http.zip HEAD >actual 2>&1 &&
     +	cat >expect <<-EOF &&
     +	fatal: can${SQ}t connect to subservice git-upload-archive
     +	EOF
    @@ t/t5003-archive-zip.sh: check_zip with_untracked2
     +'
     +
     +test_expect_success 'archive remote http repository' '
    -+	test_when_finished "rm -f d5.zip" &&
     +	git archive --remote="$HTTPD_URL/auth/smart/bare.git" \
    -+		--output=d5.zip HEAD &&
    -+	test_cmp_bin d.zip d5.zip
    ++		--output=remote-http.zip HEAD &&
    ++	test_cmp_bin d.zip remote-http.zip
     +'
     +
      test_done

Jiang Xin (4):
  transport-helper: no connection restriction in connect_helper
  transport-helper: call do_take_over() in process_connect
  transport-helper: call do_take_over() in connect_helper
  archive: support remote archive from stateless transport

 http-backend.c         | 15 +++++++++++----
 remote-curl.c          | 14 +++++++++++---
 t/t5003-archive-zip.sh | 34 ++++++++++++++++++++++++++++++++++
 transport-helper.c     | 29 +++++++++++++----------------
 4 files changed, 69 insertions(+), 23 deletions(-)

-- 
2.41.0.232.g2f6f0bca4f.agit.8.0.4.dev


^ permalink raw reply

* [PATCH v3 4/4] bisect: consistently write BISECT_EXPECTED_REV via the refdb
From: Patrick Steinhardt @ 2023-12-14 13:37 UTC (permalink / raw)
  To: git; +Cc: Taylor Blau, Phillip Wood, Ramsay Jones
In-Reply-To: <cover.1702560829.git.ps@pks.im>

[-- Attachment #1: Type: text/plain, Size: 5649 bytes --]

We're inconsistently writing BISECT_EXPECTED_REV both via the filesystem
and via the refdb, which violates the newly established rules for how
special refs must be treated. This works alright in practice with the
reffiles reference backend, but will cause bugs once we gain additional
backends.

Fix this issue and consistently write BISECT_EXPECTED_REV via the refdb
so that it is no longer a special ref.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
 bisect.c                    | 25 ++++---------------------
 builtin/bisect.c            |  8 ++------
 refs.c                      |  3 ++-
 t/t6030-bisect-porcelain.sh |  2 +-
 4 files changed, 9 insertions(+), 29 deletions(-)

diff --git a/bisect.c b/bisect.c
index 1be8e0a271..985b96ed13 100644
--- a/bisect.c
+++ b/bisect.c
@@ -471,7 +471,6 @@ static int read_bisect_refs(void)
 }
 
 static GIT_PATH_FUNC(git_path_bisect_names, "BISECT_NAMES")
-static GIT_PATH_FUNC(git_path_bisect_expected_rev, "BISECT_EXPECTED_REV")
 static GIT_PATH_FUNC(git_path_bisect_ancestors_ok, "BISECT_ANCESTORS_OK")
 static GIT_PATH_FUNC(git_path_bisect_run, "BISECT_RUN")
 static GIT_PATH_FUNC(git_path_bisect_start, "BISECT_START")
@@ -707,26 +706,10 @@ static enum bisect_error error_if_skipped_commits(struct commit_list *tried,
 
 static int is_expected_rev(const struct object_id *oid)
 {
-	const char *filename = git_path_bisect_expected_rev();
-	struct stat st;
-	struct strbuf str = STRBUF_INIT;
-	FILE *fp;
-	int res = 0;
-
-	if (stat(filename, &st) || !S_ISREG(st.st_mode))
+	struct object_id expected_oid;
+	if (read_ref("BISECT_EXPECTED_REV", &expected_oid))
 		return 0;
-
-	fp = fopen_or_warn(filename, "r");
-	if (!fp)
-		return 0;
-
-	if (strbuf_getline_lf(&str, fp) != EOF)
-		res = !strcmp(str.buf, oid_to_hex(oid));
-
-	strbuf_release(&str);
-	fclose(fp);
-
-	return res;
+	return oideq(oid, &expected_oid);
 }
 
 enum bisect_error bisect_checkout(const struct object_id *bisect_rev,
@@ -1185,10 +1168,10 @@ int bisect_clean_state(void)
 	struct string_list refs_for_removal = STRING_LIST_INIT_NODUP;
 	for_each_ref_in("refs/bisect", mark_for_removal, (void *) &refs_for_removal);
 	string_list_append(&refs_for_removal, xstrdup("BISECT_HEAD"));
+	string_list_append(&refs_for_removal, xstrdup("BISECT_EXPECTED_REV"));
 	result = delete_refs("bisect: remove", &refs_for_removal, REF_NO_DEREF);
 	refs_for_removal.strdup_strings = 1;
 	string_list_clear(&refs_for_removal, 0);
-	unlink_or_warn(git_path_bisect_expected_rev());
 	unlink_or_warn(git_path_bisect_ancestors_ok());
 	unlink_or_warn(git_path_bisect_log());
 	unlink_or_warn(git_path_bisect_names());
diff --git a/builtin/bisect.c b/builtin/bisect.c
index 35938b05fd..4e2c43daf5 100644
--- a/builtin/bisect.c
+++ b/builtin/bisect.c
@@ -17,7 +17,6 @@
 #include "revision.h"
 
 static GIT_PATH_FUNC(git_path_bisect_terms, "BISECT_TERMS")
-static GIT_PATH_FUNC(git_path_bisect_expected_rev, "BISECT_EXPECTED_REV")
 static GIT_PATH_FUNC(git_path_bisect_ancestors_ok, "BISECT_ANCESTORS_OK")
 static GIT_PATH_FUNC(git_path_bisect_start, "BISECT_START")
 static GIT_PATH_FUNC(git_path_bisect_log, "BISECT_LOG")
@@ -921,7 +920,6 @@ static enum bisect_error bisect_state(struct bisect_terms *terms, int argc,
 	const char *state;
 	int i, verify_expected = 1;
 	struct object_id oid, expected;
-	struct strbuf buf = STRBUF_INIT;
 	struct oid_array revs = OID_ARRAY_INIT;
 
 	if (!argc)
@@ -976,10 +974,8 @@ static enum bisect_error bisect_state(struct bisect_terms *terms, int argc,
 		oid_array_append(&revs, &commit->object.oid);
 	}
 
-	if (strbuf_read_file(&buf, git_path_bisect_expected_rev(), 0) < the_hash_algo->hexsz ||
-	    get_oid_hex(buf.buf, &expected) < 0)
+	if (read_ref("BISECT_EXPECTED_REV", &expected))
 		verify_expected = 0; /* Ignore invalid file contents */
-	strbuf_release(&buf);
 
 	for (i = 0; i < revs.nr; i++) {
 		if (bisect_write(state, oid_to_hex(&revs.oid[i]), terms, 0)) {
@@ -988,7 +984,7 @@ static enum bisect_error bisect_state(struct bisect_terms *terms, int argc,
 		}
 		if (verify_expected && !oideq(&revs.oid[i], &expected)) {
 			unlink_or_warn(git_path_bisect_ancestors_ok());
-			unlink_or_warn(git_path_bisect_expected_rev());
+			delete_ref(NULL, "BISECT_EXPECTED_REV", NULL, REF_NO_DEREF);
 			verify_expected = 0;
 		}
 	}
diff --git a/refs.c b/refs.c
index 8fe34d51e4..c76ce86bef 100644
--- a/refs.c
+++ b/refs.c
@@ -1840,6 +1840,8 @@ static int is_special_ref(const char *refname)
 	 * There are some exceptions that you might expect to see on this list
 	 * but which are handled exclusively via the reference backend:
 	 *
+	 * - BISECT_EXPECTED_REV
+	 *
 	 * - CHERRY_PICK_HEAD
 	 *
 	 * - HEAD
@@ -1857,7 +1859,6 @@ static int is_special_ref(const char *refname)
 	 */
 	static const char * const special_refs[] = {
 		"AUTO_MERGE",
-		"BISECT_EXPECTED_REV",
 		"FETCH_HEAD",
 		"MERGE_AUTOSTASH",
 		"MERGE_HEAD",
diff --git a/t/t6030-bisect-porcelain.sh b/t/t6030-bisect-porcelain.sh
index 2a5b7d8379..792c1504bc 100755
--- a/t/t6030-bisect-porcelain.sh
+++ b/t/t6030-bisect-porcelain.sh
@@ -1176,7 +1176,7 @@ test_expect_success 'git bisect reset cleans bisection state properly' '
 	git bisect bad $HASH4 &&
 	git bisect reset &&
 	test -z "$(git for-each-ref "refs/bisect/*")" &&
-	test_path_is_missing ".git/BISECT_EXPECTED_REV" &&
+	test_ref_missing BISECT_EXPECTED_REV &&
 	test_path_is_missing ".git/BISECT_ANCESTORS_OK" &&
 	test_path_is_missing ".git/BISECT_LOG" &&
 	test_path_is_missing ".git/BISECT_RUN" &&
-- 
2.43.GIT


[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply related

* [PATCH v3 3/4] refs: complete list of special refs
From: Patrick Steinhardt @ 2023-12-14 13:37 UTC (permalink / raw)
  To: git; +Cc: Taylor Blau, Phillip Wood, Ramsay Jones
In-Reply-To: <cover.1702560829.git.ps@pks.im>

[-- Attachment #1: Type: text/plain, Size: 4342 bytes --]

We have some references that are more special than others. The reason
for them being special is that they either do not follow the usual
format of references, or that they are written to the filesystem
directly by the respective owning subsystem and thus circumvent the
reference backend.

This works perfectly fine right now because the reffiles backend will
know how to read those refs just fine. But with the prospect of gaining
a new reference backend implementation we need to be a lot more careful
here:

  - We need to make sure that we are consistent about how those refs are
    written. They must either always be written via the filesystem, or
    they must always be written via the reference backend. Any mixture
    will lead to inconsistent state.

  - We need to make sure that such special refs are always handled
    specially when reading them.

We're already mostly good with regard to the first item, except for
`BISECT_EXPECTED_REV` which will be addressed in a subsequent commit.
But the current list of special refs is missing some refs that really
should be treated specially. Right now, we only treat `FETCH_HEAD` and
`MERGE_HEAD` specially here.

Introduce a new function `is_special_ref()` that contains all current
instances of special refs to fix the reading path.

Note that this is only a temporary measure where we record and rectify
the current state. Ideally, the list of special refs should in the end
only contain `FETCH_HEAD` and `MERGE_HEAD` again because they both may
reference multiple objects and can contain annotations, so they indeed
are special.

Based-on-patch-by: Han-Wen Nienhuys <hanwenn@gmail.com>
Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
 refs.c | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 52 insertions(+), 2 deletions(-)

diff --git a/refs.c b/refs.c
index 00e72a2abf..8fe34d51e4 100644
--- a/refs.c
+++ b/refs.c
@@ -1820,15 +1820,65 @@ static int refs_read_special_head(struct ref_store *ref_store,
 	return result;
 }
 
+static int is_special_ref(const char *refname)
+{
+	/*
+	 * Special references get written and read directly via the filesystem
+	 * by the subsystems that create them. Thus, they must not go through
+	 * the reference backend but must instead be read directly. It is
+	 * arguable whether this behaviour is sensible, or whether it's simply
+	 * a leaky abstraction enabled by us only having a single reference
+	 * backend implementation. But at least for a subset of references it
+	 * indeed does make sense to treat them specially:
+	 *
+	 * - FETCH_HEAD may contain multiple object IDs, and each one of them
+	 *   carries additional metadata like where it came from.
+	 *
+	 * - MERGE_HEAD may contain multiple object IDs when merging multiple
+	 *   heads.
+	 *
+	 * There are some exceptions that you might expect to see on this list
+	 * but which are handled exclusively via the reference backend:
+	 *
+	 * - CHERRY_PICK_HEAD
+	 *
+	 * - HEAD
+	 *
+	 * - ORIG_HEAD
+	 *
+	 * - "rebase-apply/" and "rebase-merge/" contain all of the state for
+	 *   rebases, including some reference-like files. These are
+	 *   exclusively read and written via the filesystem and never go
+	 *   through the refdb.
+	 *
+	 * Writing or deleting references must consistently go either through
+	 * the filesystem (special refs) or through the reference backend
+	 * (normal ones).
+	 */
+	static const char * const special_refs[] = {
+		"AUTO_MERGE",
+		"BISECT_EXPECTED_REV",
+		"FETCH_HEAD",
+		"MERGE_AUTOSTASH",
+		"MERGE_HEAD",
+	};
+	size_t i;
+
+	for (i = 0; i < ARRAY_SIZE(special_refs); i++)
+		if (!strcmp(refname, special_refs[i]))
+			return 1;
+
+	return 0;
+}
+
 int refs_read_raw_ref(struct ref_store *ref_store, const char *refname,
 		      struct object_id *oid, struct strbuf *referent,
 		      unsigned int *type, int *failure_errno)
 {
 	assert(failure_errno);
-	if (!strcmp(refname, "FETCH_HEAD") || !strcmp(refname, "MERGE_HEAD")) {
+	if (is_special_ref(refname))
 		return refs_read_special_head(ref_store, refname, oid, referent,
 					      type, failure_errno);
-	}
 
 	return ref_store->be->read_raw_ref(ref_store, refname, oid, referent,
 					   type, failure_errno);
-- 
2.43.GIT


[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply related

* [PATCH v3 2/4] refs: propagate errno when reading special refs fails
From: Patrick Steinhardt @ 2023-12-14 13:37 UTC (permalink / raw)
  To: git; +Cc: Taylor Blau, Phillip Wood, Ramsay Jones
In-Reply-To: <cover.1702560829.git.ps@pks.im>

[-- Attachment #1: Type: text/plain, Size: 1787 bytes --]

Some refs in Git are more special than others due to reasons explained
in the next commit. These refs are read via `refs_read_special_head()`,
but this function doesn't behave the same as when we try to read a
normal ref. Most importantly, we do not propagate `failure_errno` in the
case where the reference does not exist, which is behaviour that we rely
on in many parts of Git.

Fix this bug by propagating errno when `strbuf_read_file()` fails.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
 refs.c              |  4 +++-
 t/t1403-show-ref.sh | 10 ++++++++++
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/refs.c b/refs.c
index fcae5dddc6..00e72a2abf 100644
--- a/refs.c
+++ b/refs.c
@@ -1806,8 +1806,10 @@ static int refs_read_special_head(struct ref_store *ref_store,
 	int result = -1;
 	strbuf_addf(&full_path, "%s/%s", ref_store->gitdir, refname);
 
-	if (strbuf_read_file(&content, full_path.buf, 0) < 0)
+	if (strbuf_read_file(&content, full_path.buf, 0) < 0) {
+		*failure_errno = errno;
 		goto done;
+	}
 
 	result = parse_loose_ref_contents(content.buf, oid, referent, type,
 					  failure_errno);
diff --git a/t/t1403-show-ref.sh b/t/t1403-show-ref.sh
index b50ae6fcf1..66e6e77fa9 100755
--- a/t/t1403-show-ref.sh
+++ b/t/t1403-show-ref.sh
@@ -266,4 +266,14 @@ test_expect_success '--exists with directory fails with generic error' '
 	test_cmp expect err
 '
 
+test_expect_success '--exists with non-existent special ref' '
+	test_expect_code 2 git show-ref --exists FETCH_HEAD
+'
+
+test_expect_success '--exists with existing special ref' '
+	test_when_finished "rm .git/FETCH_HEAD" &&
+	git rev-parse HEAD >.git/FETCH_HEAD &&
+	git show-ref --exists FETCH_HEAD
+'
+
 test_done
-- 
2.43.GIT


[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply related

* [PATCH v3 1/4] wt-status: read HEAD and ORIG_HEAD via the refdb
From: Patrick Steinhardt @ 2023-12-14 13:36 UTC (permalink / raw)
  To: git; +Cc: Taylor Blau, Phillip Wood, Ramsay Jones
In-Reply-To: <cover.1702560829.git.ps@pks.im>

[-- Attachment #1: Type: text/plain, Size: 2948 bytes --]

We read both the HEAD and ORIG_HEAD references directly from the
filesystem in order to figure out whether we're currently splitting a
commit. If both of the following are true:

  - HEAD points to the same object as "rebase-merge/amend".

  - ORIG_HEAD points to the same object as "rebase-merge/orig-head".

Then we are currently splitting commits.

The current code only works by chance because we only have a single
reference backend implementation. Refactor it to instead read both refs
via the refdb layer so that we'll also be compatible with alternate
reference backends.

There are some subtleties involved here:

  - We pass `RESOLVE_REF_READING` so that a missing ref will cause
    `read_ref_full()` to return an error.

  - We pass `RESOLVE_REF_NO_RECURSE` so that we do not try to resolve
    symrefs. The old code didn't resolve symrefs either, and we only
    ever write object IDs into the refs in "rebase-merge/".

  - In the same spirit we verify that successfully-read refs are not
    symbolic refs.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
 wt-status.c | 22 ++++++++++++++--------
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/wt-status.c b/wt-status.c
index 9f45bf6949..da19923981 100644
--- a/wt-status.c
+++ b/wt-status.c
@@ -1295,26 +1295,32 @@ static char *read_line_from_git_path(const char *filename)
 static int split_commit_in_progress(struct wt_status *s)
 {
 	int split_in_progress = 0;
-	char *head, *orig_head, *rebase_amend, *rebase_orig_head;
+	struct object_id head_oid, orig_head_oid;
+	char *rebase_amend, *rebase_orig_head;
+	int head_flags, orig_head_flags;
 
 	if ((!s->amend && !s->nowarn && !s->workdir_dirty) ||
 	    !s->branch || strcmp(s->branch, "HEAD"))
 		return 0;
 
-	head = read_line_from_git_path("HEAD");
-	orig_head = read_line_from_git_path("ORIG_HEAD");
+	if (read_ref_full("HEAD", RESOLVE_REF_READING | RESOLVE_REF_NO_RECURSE,
+			  &head_oid, &head_flags) ||
+	    read_ref_full("ORIG_HEAD", RESOLVE_REF_READING | RESOLVE_REF_NO_RECURSE,
+			  &orig_head_oid, &orig_head_flags))
+		return 0;
+	if (head_flags & REF_ISSYMREF || orig_head_flags & REF_ISSYMREF)
+		return 0;
+
 	rebase_amend = read_line_from_git_path("rebase-merge/amend");
 	rebase_orig_head = read_line_from_git_path("rebase-merge/orig-head");
 
-	if (!head || !orig_head || !rebase_amend || !rebase_orig_head)
+	if (!rebase_amend || !rebase_orig_head)
 		; /* fall through, no split in progress */
 	else if (!strcmp(rebase_amend, rebase_orig_head))
-		split_in_progress = !!strcmp(head, rebase_amend);
-	else if (strcmp(orig_head, rebase_orig_head))
+		split_in_progress = !!strcmp(oid_to_hex(&head_oid), rebase_amend);
+	else if (strcmp(oid_to_hex(&orig_head_oid), rebase_orig_head))
 		split_in_progress = 1;
 
-	free(head);
-	free(orig_head);
 	free(rebase_amend);
 	free(rebase_orig_head);
 
-- 
2.43.GIT


[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply related

* [PATCH v3 0/4] refs: improve handling of special refs
From: Patrick Steinhardt @ 2023-12-14 13:36 UTC (permalink / raw)
  To: git; +Cc: Taylor Blau, Phillip Wood, Ramsay Jones
In-Reply-To: <cover.1701243201.git.ps@pks.im>

[-- Attachment #1: Type: text/plain, Size: 4913 bytes --]

Hi,

this is the third version of my patch series to improve handling of
special refs.

Changes combpared to v2:

  - Patch 1: We're now more careful around missing and symbolic refs.

  - Patch 3: Document in the commit message that the extended list of
    special refs is not intended to stay like this forever.

Thanks for your reviews and the discussion!

Patrick

Patrick Steinhardt (4):
  wt-status: read HEAD and ORIG_HEAD via the refdb
  refs: propagate errno when reading special refs fails
  refs: complete list of special refs
  bisect: consistently write BISECT_EXPECTED_REV via the refdb

 bisect.c                    | 25 +++-------------
 builtin/bisect.c            |  8 ++---
 refs.c                      | 59 +++++++++++++++++++++++++++++++++++--
 t/t1403-show-ref.sh         | 10 +++++++
 t/t6030-bisect-porcelain.sh |  2 +-
 wt-status.c                 | 22 +++++++++-----
 6 files changed, 87 insertions(+), 39 deletions(-)

Range-diff against v2:
1:  1db3eb3945 ! 1:  aea9410bd9 wt-status: read HEAD and ORIG_HEAD via the refdb
    @@ Commit message
         via the refdb layer so that we'll also be compatible with alternate
         reference backends.
     
    -    Note that we pass `RESOLVE_REF_NO_RECURSE` to `read_ref_full()`. This is
    -    because we didn't resolve symrefs before either, and in practice none of
    -    the refs in "rebase-merge/" would be symbolic. We thus don't want to
    -    resolve symrefs with the new code either to retain the old behaviour.
    +    There are some subtleties involved here:
    +
    +      - We pass `RESOLVE_REF_READING` so that a missing ref will cause
    +        `read_ref_full()` to return an error.
    +
    +      - We pass `RESOLVE_REF_NO_RECURSE` so that we do not try to resolve
    +        symrefs. The old code didn't resolve symrefs either, and we only
    +        ever write object IDs into the refs in "rebase-merge/".
    +
    +      - In the same spirit we verify that successfully-read refs are not
    +        symbolic refs.
     
         Signed-off-by: Patrick Steinhardt <ps@pks.im>
     
    @@ wt-status.c: static char *read_line_from_git_path(const char *filename)
     -	char *head, *orig_head, *rebase_amend, *rebase_orig_head;
     +	struct object_id head_oid, orig_head_oid;
     +	char *rebase_amend, *rebase_orig_head;
    ++	int head_flags, orig_head_flags;
      
      	if ((!s->amend && !s->nowarn && !s->workdir_dirty) ||
      	    !s->branch || strcmp(s->branch, "HEAD"))
    @@ wt-status.c: static char *read_line_from_git_path(const char *filename)
      
     -	head = read_line_from_git_path("HEAD");
     -	orig_head = read_line_from_git_path("ORIG_HEAD");
    -+	if (read_ref_full("HEAD", RESOLVE_REF_NO_RECURSE, &head_oid, NULL) ||
    -+	    read_ref_full("ORIG_HEAD", RESOLVE_REF_NO_RECURSE, &orig_head_oid, NULL))
    ++	if (read_ref_full("HEAD", RESOLVE_REF_READING | RESOLVE_REF_NO_RECURSE,
    ++			  &head_oid, &head_flags) ||
    ++	    read_ref_full("ORIG_HEAD", RESOLVE_REF_READING | RESOLVE_REF_NO_RECURSE,
    ++			  &orig_head_oid, &orig_head_flags))
    ++		return 0;
    ++	if (head_flags & REF_ISSYMREF || orig_head_flags & REF_ISSYMREF)
     +		return 0;
     +
      	rebase_amend = read_line_from_git_path("rebase-merge/amend");
2:  24032a62e5 = 2:  455ab69177 refs: propagate errno when reading special refs fails
3:  3dd9089fd5 ! 3:  81ac092281 refs: complete list of special refs
    @@ Commit message
     
         We're already mostly good with regard to the first item, except for
         `BISECT_EXPECTED_REV` which will be addressed in a subsequent commit.
    -    But the current list of special refs is missing a lot of refs that
    -    really should be treated specially. Right now, we only treat
    -    `FETCH_HEAD` and `MERGE_HEAD` specially here.
    +    But the current list of special refs is missing some refs that really
    +    should be treated specially. Right now, we only treat `FETCH_HEAD` and
    +    `MERGE_HEAD` specially here.
     
         Introduce a new function `is_special_ref()` that contains all current
         instances of special refs to fix the reading path.
     
    +    Note that this is only a temporary measure where we record and rectify
    +    the current state. Ideally, the list of special refs should in the end
    +    only contain `FETCH_HEAD` and `MERGE_HEAD` again because they both may
    +    reference multiple objects and can contain annotations, so they indeed
    +    are special.
    +
         Based-on-patch-by: Han-Wen Nienhuys <hanwenn@gmail.com>
         Signed-off-by: Patrick Steinhardt <ps@pks.im>
     
4:  4a4447a3f5 = 4:  3244678161 bisect: consistently write BISECT_EXPECTED_REV via the refdb

base-commit: 1a87c842ece327d03d08096395969aca5e0a6996
-- 
2.43.GIT


[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply

* Re: [PATCH v2 1/4] wt-status: read HEAD and ORIG_HEAD via the refdb
From: Patrick Steinhardt @ 2023-12-14 13:21 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, Taylor Blau, Phillip Wood
In-Reply-To: <xmqqle9zqidj.fsf@gitster.g>

[-- Attachment #1: Type: text/plain, Size: 3621 bytes --]

On Tue, Dec 12, 2023 at 12:24:24PM -0800, Junio C Hamano wrote:
> Patrick Steinhardt <ps@pks.im> writes:
[snip]
> > diff --git a/wt-status.c b/wt-status.c
> > index 9f45bf6949..fe9e590b80 100644
> > --- a/wt-status.c
> > +++ b/wt-status.c
> > @@ -1295,26 +1295,27 @@ static char *read_line_from_git_path(const char *filename)
> >  static int split_commit_in_progress(struct wt_status *s)
> >  {
> >  	int split_in_progress = 0;
> > -	char *head, *orig_head, *rebase_amend, *rebase_orig_head;
> > +	struct object_id head_oid, orig_head_oid;
> > +	char *rebase_amend, *rebase_orig_head;
> >  
> >  	if ((!s->amend && !s->nowarn && !s->workdir_dirty) ||
> >  	    !s->branch || strcmp(s->branch, "HEAD"))
> >  		return 0;
> >  
> > -	head = read_line_from_git_path("HEAD");
> > -	orig_head = read_line_from_git_path("ORIG_HEAD");
> > +	if (read_ref_full("HEAD", RESOLVE_REF_NO_RECURSE, &head_oid, NULL) ||
> > +	    read_ref_full("ORIG_HEAD", RESOLVE_REF_NO_RECURSE, &orig_head_oid, NULL))
> > +		return 0;
> > +
> 
> This made me wonder if we have changed behaviour when on an unborn
> branch.  In such a case, the original most likely would have read
> "ref: blah" in "head" and compared it with "rebase_amend", which
> would be a good way to ensure they would not match.  I would not
> know offhand what the updated code would do, but head_oid would be
> uninitialized in such a case, so ...?

The code flow goes as following:

  1. We call `read_ref_full()`, which itself calls
     `refs_resolve_ref_unsafe()`.

  2. It calls `refs_read_raw_ref()`, which succeeds and finds the
     symref.

  3. We notice that this is a symref and that `RESOLVE_REF_NO_RECURSE`
     is set. We thus clear the object ID and return the name of the
     symref target.

  4. Back in `read_ref_full()` we see that `refs_resolve_ref_unsafe()`
     returns the symref target, which we interpret as successful lookup.
     We thus return `0`.

  5. Now we look up "rebase-merge/{amend,orig-head}" and end up
     comparing whatever they contain with the cleared OID resolved from
     HEAD/ORIG_HEAD.

So the OID would not be uninitialized but the zero OID. Now:

  - "rebase-merge/amend" always contains the result of `repo_get_oid()`
    and never contains the zero OID.

  - "rebase-merge/orig-head" may contain the zero OID when there was no
    ORIG_HEAD at the time of starting a rebase or in case it did not
    resolve

So... if ORIG_HEAD was rewritten to be a symref pointing into nirvana
between starting the rebase and calling into "wt-status.c", and when
ORIG_HEAD didn't exist at the time of starting the rebase, then we might
now wrongly report that splitting was in progress.

In other cases the old code was actually doing the wrong thing. Suppose
that ORIG_HEAD was a dangling symref, then we'd have written the zero
OID into "rebase-merge/orig-head". But when calling into "wt-status.c"
now we read the still-dangling symref value and notice that the zero OID
is different than "ref: refs/heads/dangling".

I dunno. It feels like this is one of many cases where as you start to
think deeply about how things behave you realize that it's been broken
all along. On the other hand, I doubt there was even a single user who
ever experienced this issue. It often just needs to be correct enough.

I think the best way to go about this is to check for `REF_ISSSYMREF`
and exit early in that case. We only want to compare direct refs, so
this is closer to the old behaviour and should even fix edge cases like
the above.

Will update.

Patrick

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply

* completing an existing patch
From: Marzi Esipreh @ 2023-12-14 13:20 UTC (permalink / raw)
  To: git

Hi all,
I hope you are well.
I'm working in a company where most of our developers are using linux
as a development environment, so the performance of git on linux
platforms is important for us.
We came across this PR: https://github.com/git/git/pull/1352 that is
improving git status performance on linux platforms, we tried it out,
and we are happy with the result.
I was in contact with the author of this patch, and I addressed the PR
comments as well.
Please let me know how I can proceed? Shall I create a new fresh PR,
and refer to existing one in the descriptions?
Best regards,
Marzi

^ permalink raw reply

* Re: [PATCH] Use ^=1 to toggle between 0 and 1
From: René Scharfe @ 2023-12-14 13:08 UTC (permalink / raw)
  To: Jeff King; +Cc: AtariDreams via GitGitGadget, git, Seija Kijin
In-Reply-To: <20231213080143.GA1684525@coredump.intra.peff.net>

Am 13.12.23 um 09:01 schrieb Jeff King:
> On Tue, Dec 12, 2023 at 11:30:03PM +0100, René Scharfe wrote:
>
>> I wonder if
>> it's time to use the C99 type _Bool in our code.  It would allow
>> documenting that only two possible values exist in cases like the one
>> above.  That would be even more useful for function returns, I assume.

> I don't even know that we'd need much of a weather-balloon patch. I
> think it would be valid to do:
>
>   #ifndef bool
>   #define bool int
>
> to handle pre-C99 compilers (if there even are any these days). Of
> course we probably need some conditional magic to try to "#include
> <stdbool.h>" for the actual C99. I guess we could assume C99 by default
> and then add NO_STDBOOL as an escape hatch if anybody complains.

The semantics are slightly different in edge cases, so that fallback
would not be fully watertight.  E.g. consider:

   bool b(bool cond) {return cond == true;}
   bool b2(void) {return b(2);}

b() returns false if you give it false and true for anything else. b2()
returns true.

With int as the fallback this becomes:

   int b(int cond) {return cond == 1;}
   int b2(void) {return b(2);}

Now only 1 is recognized as true, b2() returns 0 (false).

A coding rule to not compare bools could mitigate that.  Or a rule to
only use the values true and false in bool context and to only use
logical operators on them.

René

^ permalink raw reply

* [PATCH v2 2/2] fetch: no redundant error message for atomic fetch
From: Jiang Xin @ 2023-12-14 12:33 UTC (permalink / raw)
  To: Git List, Junio C Hamano, Patrick Steinhardt; +Cc: Jiang Xin
In-Reply-To: <cover.1702556642.git.zhiyou.jx@alibaba-inc.com>

From: Jiang Xin <zhiyou.jx@alibaba-inc.com>

If an error occurs during an atomic fetch, a redundant error message
will appear at the end of do_fetch(). It was introduced in b3a804663c
(fetch: make `--atomic` flag cover backfilling of tags, 2022-02-17).

In function do_fetch(), a failure message is already shown before the
retcode is set, so we should not call additional error() at the end of
this function.

We can remove the redundant error() function, because we know that
the function ref_transaction_abort() never fails. While we can find a
common pattern for calling ref_transaction_abort() by running command
"git grep -A1 ref_transaction_abort", e.g.:

    if (ref_transaction_abort(transaction, &error))
        error("abort: %s", error.buf);

We can fix this issue follow this pattern, and the test case "fetch
porcelain output (atomic)" in t5574 will also be fixed. If in the future
we decide that we don't need to check the return value of the function
ref_transaction_abort(), this change can be fixed along with it.

Signed-off-by: Jiang Xin <zhiyou.jx@alibaba-inc.com>
---
 builtin/fetch.c         | 4 +---
 t/t5574-fetch-output.sh | 2 +-
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/builtin/fetch.c b/builtin/fetch.c
index fd134ba74d..01a573cf8d 100644
--- a/builtin/fetch.c
+++ b/builtin/fetch.c
@@ -1775,10 +1775,8 @@ static int do_fetch(struct transport *transport,
 	}
 
 cleanup:
-	if (retcode && transaction) {
-		ref_transaction_abort(transaction, &err);
+	if (retcode && transaction && ref_transaction_abort(transaction, &err))
 		error("%s", err.buf);
-	}
 
 	display_state_release(&display_state);
 	close_fetch_head(&fetch_head);
diff --git a/t/t5574-fetch-output.sh b/t/t5574-fetch-output.sh
index bc747efefc..8d01e36b3d 100755
--- a/t/t5574-fetch-output.sh
+++ b/t/t5574-fetch-output.sh
@@ -98,7 +98,7 @@ do
 		opt=
 		;;
 	esac
-	test_expect_failure "fetch porcelain output ${opt:+(atomic)}" '
+	test_expect_success "fetch porcelain output ${opt:+(atomic)}" '
 		test_when_finished "rm -rf porcelain" &&
 
 		# Clone and pre-seed the repositories. We fetch references into two
-- 
2.41.0.232.g2f6f0bca4f.agit.8.0.4.dev


^ permalink raw reply related

* [PATCH v2 1/2] t5574: test porcelain output of atomic fetch
From: Jiang Xin @ 2023-12-14 12:33 UTC (permalink / raw)
  To: Git List, Junio C Hamano, Patrick Steinhardt; +Cc: Jiang Xin
In-Reply-To: <cover.1702556642.git.zhiyou.jx@alibaba-inc.com>

From: Jiang Xin <zhiyou.jx@alibaba-inc.com>

The test case "fetch porcelain output" checks output of the fetch
command. The error output must be empty with the follow assertion:

    test_must_be_empty stderr

But this assertion fails if using atomic fetch. Refactor this test case
to use different fetch options by splitting it into three test cases.

  1. "setup for fetch porcelain output".

  2. "fetch porcelain output": for non-atomic fetch.

  3. "fetch porcelain output (atomic)": for atomic fetch.

Add new command "test_commit ..." in the first test case, so that if we
run these test cases individually (--run=4-6), "git rev-parse HEAD~"
command will work properly. Run the above test cases, we can find that
one test case has a known breakage, as shown below:

    ok 4 - setup for fetch porcelain output
    ok 5 - fetch porcelain output  # TODO known breakage vanished
    not ok 6 - fetch porcelain output (atomic) # TODO known breakage

The failed test case has an error message with only the error prompt but
no message body, as follows:

    'stderr' is not empty, it contains:
    error:

In a later commit, we will fix this issue.

Signed-off-by: Jiang Xin <zhiyou.jx@alibaba-inc.com>
---
 t/t5574-fetch-output.sh | 97 ++++++++++++++++++++++++-----------------
 1 file changed, 58 insertions(+), 39 deletions(-)

diff --git a/t/t5574-fetch-output.sh b/t/t5574-fetch-output.sh
index 90e6dcb9a7..bc747efefc 100755
--- a/t/t5574-fetch-output.sh
+++ b/t/t5574-fetch-output.sh
@@ -61,11 +61,10 @@ test_expect_success 'fetch compact output' '
 	test_cmp expect actual
 '
 
-test_expect_success 'fetch porcelain output' '
-	test_when_finished "rm -rf porcelain" &&
-
+test_expect_success 'setup for fetch porcelain output' '
 	# Set up a bunch of references that we can use to demonstrate different
 	# kinds of flag symbols in the output format.
+	test_commit commit-for-porcelain-output &&
 	MAIN_OLD=$(git rev-parse HEAD) &&
 	git branch "fast-forward" &&
 	git branch "deleted-branch" &&
@@ -74,15 +73,10 @@ test_expect_success 'fetch porcelain output' '
 	FORCE_UPDATED_OLD=$(git rev-parse HEAD) &&
 	git checkout main &&
 
-	# Clone and pre-seed the repositories. We fetch references into two
-	# namespaces so that we can test that rejected and force-updated
-	# references are reported properly.
-	refspecs="refs/heads/*:refs/unforced/* +refs/heads/*:refs/forced/*" &&
-	git clone . porcelain &&
-	git -C porcelain fetch origin $refspecs &&
+	# Backup to preseed.git
+	git clone --mirror . preseed.git &&
 
-	# Now that we have set up the client repositories we can change our
-	# local references.
+	# Continue changing our local references.
 	git branch new-branch &&
 	git branch -d deleted-branch &&
 	git checkout fast-forward &&
@@ -91,36 +85,61 @@ test_expect_success 'fetch porcelain output' '
 	git checkout force-updated &&
 	git reset --hard HEAD~ &&
 	test_commit --no-tag force-update-new &&
-	FORCE_UPDATED_NEW=$(git rev-parse HEAD) &&
-
-	cat >expect <<-EOF &&
-	- $MAIN_OLD $ZERO_OID refs/forced/deleted-branch
-	- $MAIN_OLD $ZERO_OID refs/unforced/deleted-branch
-	  $MAIN_OLD $FAST_FORWARD_NEW refs/unforced/fast-forward
-	! $FORCE_UPDATED_OLD $FORCE_UPDATED_NEW refs/unforced/force-updated
-	* $ZERO_OID $MAIN_OLD refs/unforced/new-branch
-	  $MAIN_OLD $FAST_FORWARD_NEW refs/forced/fast-forward
-	+ $FORCE_UPDATED_OLD $FORCE_UPDATED_NEW refs/forced/force-updated
-	* $ZERO_OID $MAIN_OLD refs/forced/new-branch
-	  $MAIN_OLD $FAST_FORWARD_NEW refs/remotes/origin/fast-forward
-	+ $FORCE_UPDATED_OLD $FORCE_UPDATED_NEW refs/remotes/origin/force-updated
-	* $ZERO_OID $MAIN_OLD refs/remotes/origin/new-branch
-	EOF
-
-	# Execute a dry-run fetch first. We do this to assert that the dry-run
-	# and non-dry-run fetches produces the same output. Execution of the
-	# fetch is expected to fail as we have a rejected reference update.
-	test_must_fail git -C porcelain fetch \
-		--porcelain --dry-run --prune origin $refspecs >actual &&
-	test_cmp expect actual &&
-
-	# And now we perform a non-dry-run fetch.
-	test_must_fail git -C porcelain fetch \
-		--porcelain --prune origin $refspecs >actual 2>stderr &&
-	test_cmp expect actual &&
-	test_must_be_empty stderr
+	FORCE_UPDATED_NEW=$(git rev-parse HEAD)
 '
 
+for opt in off on
+do
+	case $opt in
+	on)
+		opt=--atomic
+		;;
+	off)
+		opt=
+		;;
+	esac
+	test_expect_failure "fetch porcelain output ${opt:+(atomic)}" '
+		test_when_finished "rm -rf porcelain" &&
+
+		# Clone and pre-seed the repositories. We fetch references into two
+		# namespaces so that we can test that rejected and force-updated
+		# references are reported properly.
+		refspecs="refs/heads/*:refs/unforced/* +refs/heads/*:refs/forced/*" &&
+		git clone preseed.git porcelain &&
+		git -C porcelain fetch origin $opt $refspecs &&
+
+		cat >expect <<-EOF &&
+		- $MAIN_OLD $ZERO_OID refs/forced/deleted-branch
+		- $MAIN_OLD $ZERO_OID refs/unforced/deleted-branch
+		  $MAIN_OLD $FAST_FORWARD_NEW refs/unforced/fast-forward
+		! $FORCE_UPDATED_OLD $FORCE_UPDATED_NEW refs/unforced/force-updated
+		* $ZERO_OID $MAIN_OLD refs/unforced/new-branch
+		  $MAIN_OLD $FAST_FORWARD_NEW refs/forced/fast-forward
+		+ $FORCE_UPDATED_OLD $FORCE_UPDATED_NEW refs/forced/force-updated
+		* $ZERO_OID $MAIN_OLD refs/forced/new-branch
+		  $MAIN_OLD $FAST_FORWARD_NEW refs/remotes/origin/fast-forward
+		+ $FORCE_UPDATED_OLD $FORCE_UPDATED_NEW refs/remotes/origin/force-updated
+		* $ZERO_OID $MAIN_OLD refs/remotes/origin/new-branch
+		EOF
+
+		# Change the URL of the repository to fetch different references.
+		git -C porcelain remote set-url origin .. &&
+
+		# Execute a dry-run fetch first. We do this to assert that the dry-run
+		# and non-dry-run fetches produces the same output. Execution of the
+		# fetch is expected to fail as we have a rejected reference update.
+		test_must_fail git -C porcelain fetch $opt \
+			--porcelain --dry-run --prune origin $refspecs >actual &&
+		test_cmp expect actual &&
+
+		# And now we perform a non-dry-run fetch.
+		test_must_fail git -C porcelain fetch $opt \
+			--porcelain --prune origin $refspecs >actual 2>stderr &&
+		test_cmp expect actual &&
+		test_must_be_empty stderr
+	'
+done
+
 test_expect_success 'fetch porcelain with multiple remotes' '
 	test_when_finished "rm -rf porcelain" &&
 
-- 
2.41.0.232.g2f6f0bca4f.agit.8.0.4.dev


^ permalink raw reply related

* [PATCH v2 0/2] jx/fetch-atomic-error-message-fix
From: Jiang Xin @ 2023-12-14 12:33 UTC (permalink / raw)
  To: Git List, Junio C Hamano, Patrick Steinhardt; +Cc: Jiang Xin
In-Reply-To: <38b0b22038399265407f7fc5f126f471dcc6f1a3.1697725898.git.zhiyou.jx@alibaba-inc.com>

From: Jiang Xin <zhiyou.jx@alibaba-inc.com>

# Changes since v1:

1. Add a "test_commit ..." command in test case of t5574, so we can run
   test cases 4-6 individually.

2. Improve commit logs.


# range-diff v1...v2

1:  8c85f83e66 ! 1:  210191917b t5574: test porcelain output of atomic fetch
    @@ Commit message
     
             test_must_be_empty stderr
     
    -    Refactor this test case to run it twice. The first time will be run
    -    using non-atomic fetch and the other time will be run using atomic
    -    fetch. We can see that the above assertion fails for atomic get, as
    -    shown below:
    +    But this assertion fails if using atomic fetch. Refactor this test case
    +    to use different fetch options by splitting it into three test cases.
     
    +      1. "setup for fetch porcelain output".
    +
    +      2. "fetch porcelain output": for non-atomic fetch.
    +
    +      3. "fetch porcelain output (atomic)": for atomic fetch.
    +
    +    Add new command "test_commit ..." in the first test case, so that if we
    +    run these test cases individually (--run=4-6), "git rev-parse HEAD~"
    +    command will work properly. Run the above test cases, we can find that
    +    one test case has a known breakage, as shown below:
    +
    +        ok 4 - setup for fetch porcelain output
             ok 5 - fetch porcelain output  # TODO known breakage vanished
             not ok 6 - fetch porcelain output (atomic) # TODO known breakage
     
    -    The failed test case had an error message with only the error prompt but
    +    The failed test case has an error message with only the error prompt but
         no message body, as follows:
     
             'stderr' is not empty, it contains:
    @@ Commit message
         In a later commit, we will fix this issue.
     
         Signed-off-by: Jiang Xin <zhiyou.jx@alibaba-inc.com>
     
      ## t/t5574-fetch-output.sh ##
     @@ t/t5574-fetch-output.sh: test_expect_success 'fetch compact output' '
    @@ t/t5574-fetch-output.sh: test_expect_success 'fetch compact output' '
     +test_expect_success 'setup for fetch porcelain output' '
      	# Set up a bunch of references that we can use to demonstrate different
      	# kinds of flag symbols in the output format.
    ++	test_commit commit-for-porcelain-output &&
      	MAIN_OLD=$(git rev-parse HEAD) &&
    + 	git branch "fast-forward" &&
    + 	git branch "deleted-branch" &&
     @@ t/t5574-fetch-output.sh: test_expect_success 'fetch porcelain output' '
      	FORCE_UPDATED_OLD=$(git rev-parse HEAD) &&
      	git checkout main &&
2:  d3184a9d0f ! 2:  6fb83a0000 fetch: no redundant error message for atomic fetch
    @@ Commit message
         will appear at the end of do_fetch(). It was introduced in b3a804663c
         (fetch: make `--atomic` flag cover backfilling of tags, 2022-02-17).
     
    -    Instead of displaying the error message unconditionally, the final error
    -    output should follow the pattern in update-ref.c and files-backend.c as
    -    follows:
    +    In function do_fetch(), a failure message is already shown before the
    +    retcode is set, so we should not call additional error() at the end of
    +    this function.
    +
    +    We can remove the redundant error() function, because we know that
    +    the function ref_transaction_abort() never fails. While we can find a
    +    common pattern for calling ref_transaction_abort() by running command
    +    "git grep -A1 ref_transaction_abort", e.g.:
     
             if (ref_transaction_abort(transaction, &error))
                 error("abort: %s", error.buf);
     
    -    This will fix the test case "fetch porcelain output (atomic)" in t5574.
    +    We can fix this issue follow this pattern, and the test case "fetch
    +    porcelain output (atomic)" in t5574 will also be fixed. If in the future
    +    we decide that we don't need to check the return value of the function
    +    ref_transaction_abort(), this change can be fixed along with it.
     
         Signed-off-by: Jiang Xin <zhiyou.jx@alibaba-inc.com>
     
      ## builtin/fetch.c ##
     @@ builtin/fetch.c: static int do_fetch(struct transport *transport,

Jiang Xin (2):
  t5574: test porcelain output of atomic fetch
  fetch: no redundant error message for atomic fetch

 builtin/fetch.c         |  4 +-
 t/t5574-fetch-output.sh | 97 ++++++++++++++++++++++++-----------------
 2 files changed, 59 insertions(+), 42 deletions(-)

-- 
2.41.0.232.g2f6f0bca4f.agit.8.0.4.dev


^ permalink raw reply

* Re: [PATCH v2 1/4] wt-status: read HEAD and ORIG_HEAD via the refdb
From: Patrick Steinhardt @ 2023-12-14  9:04 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Ramsay Jones, git, Taylor Blau, Phillip Wood
In-Reply-To: <xmqqmsuennfu.fsf@gitster.g>

[-- Attachment #1: Type: text/plain, Size: 2853 bytes --]

On Wed, Dec 13, 2023 at 07:15:33AM -0800, Junio C Hamano wrote:
> Patrick Steinhardt <ps@pks.im> writes:
> 
> >> Me neither, but once you start thinking about getting rid of the
> >> need to use one-file-per-ref filesystem, being able to maintain all
> >> refs, including the pseudo refs, in one r/w store backend, becomes a
> >> very tempting goal.  From that point of view, I do not have problem
> >> with the idea to move _all_ pseudorefs to reftable.
> >
> > Yes, we're in agreement.
> >
> >> But I do have reservations on what Patrick, and the code he
> >> inherited from Han-Wen, calls "special refs" (which is not defined
> >> in the glossary at all), namely, refs.c:is_special_ref() and its
> >> callers.
> >
> > I do not want to add "special refs" to the glossary because ultimately
> > they should go away, with two exceptions: FETCH_HEAD and MERGE_HEAD.
> > Once we're there we can of course discuss whether we want to explicitly
> > point them out in the glossary and give them a special name.
> 
> OK, I somehow got a (wrong) impression that you are very close to
> the finish line.

You mean with the reftable backend? I indeed am quite close, I've just
finished the last prerequisite ("extensions.refFormat" and related
tooling) today. I will send that patch series upstream for review once
my patches that fix repo initialization with git-clone(1) land in the
"next" branch. The current state at [1] passes CI, even though there
will of course still be bugs which aren't covered by the test suite.

So once all prerequisites that are currently in flight plus the pending
"extensions.refFormat" series have landed I will send the reftable
backend implementation in for review. If things continue to go smoothly
I expect that this may happen at the end of January/start of February.

Anyway. This patch series here is in fact already sufficient to make
reftables work with those special refs. The only thing that we require
in this context is that refs are either exclusively routed through the
filesystem, or exclusively routed through the ref API. If that property
holds then things work just fine.

But still, I do want to clean up the remaining special refs regardless
of that, even though it is not a mandatory prerequisite. I find that the
current state is just plain confusing with all these special cases, and
I'd really love for it to be simplified. Also, I think there is benefit
in having those refs in reftables because it does allow for proper
atomic updates.

> If the intention is to leave many others still in
> the "special" category (for only the reasons of inertia), with a
> vision that some selected few must remain "special" with their own
> good reasons, then I am perfectly fine.

Okay.

Patrick

[1]: https://gitlab.com/gitlab-org/git/-/merge_requests/58

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply

* Re: [PATCH v2] tests: adjust whitespace in chainlint expectations
From: Eric Sunshine @ 2023-12-14  8:44 UTC (permalink / raw)
  To: Patrick Steinhardt; +Cc: git, Junio C Hamano
In-Reply-To: <aec86a15c69aa276eee4875fad208ee2fc57635a.1702542564.git.ps@pks.im>

On Thu, Dec 14, 2023 at 3:30 AM Patrick Steinhardt <ps@pks.im> wrote:
> [...]
> Instead of improving the detection logic, fix our ".expect" files so
> that we do not need any post-processing anymore. This allows us to drop
> the `-w` flag when diffing so that we can always use diff(1) now.
>
> Signed-off-by: Patrick Steinhardt <ps@pks.im>
> ---
> diff --git a/t/Makefile b/t/Makefile
> @@ -108,15 +108,7 @@ check-chainlint:
>         } >'$(CHAINLINTTMP_SQ)'/expect && \
>         $(CHAINLINT) --emit-all '$(CHAINLINTTMP_SQ)'/tests | \
>                 sed -e 's/^[1-9][0-9]* //;/^[   ]*$$/d' >'$(CHAINLINTTMP_SQ)'/actual && \
> -       if test -f ../GIT-BUILD-OPTIONS; then \
> -               . ../GIT-BUILD-OPTIONS; \
> -       fi && \
> -       if test -x ../git$$X; then \
> -               DIFFW="../git$$X --no-pager diff -w --no-index"; \
> -       else \
> -               DIFFW="diff -w -u"; \
> -       fi && \
> -       $$DIFFW '$(CHAINLINTTMP_SQ)'/expect '$(CHAINLINTTMP_SQ)'/actual
> +       diff -u '$(CHAINLINTTMP_SQ)'/expect '$(CHAINLINTTMP_SQ)'/actual

This change feels incomplete. Yes, it "corrects" the whitespace, but
check-chainlint still applies sed to remove blank lines and strip the
line numbers from the beginning of each line. Based upon the commit
message, I had expected that all post-processing of the output would
have been eliminated and that the "expect" files would exactly mirror
the output of the linter.

As mentioned elsewhere[1], I'm not particularly in favor of this
approach, though I won't stand in the way of it either.

[1]: https://lore.kernel.org/git/CAPig+cSkuRfkR2D3JqYcbaJqj485nfD9Nq6pM=vXWB5DJenWpA@mail.gmail.com/

^ permalink raw reply

* Re: [PATCH] tests: prefer host Git to verify chainlint self-checks
From: Patrick Steinhardt @ 2023-12-14  8:40 UTC (permalink / raw)
  To: Eric Sunshine; +Cc: Junio C Hamano, git
In-Reply-To: <CAPig+cQ2-PB24n0xfcoSy_1UT-VbEZUXXJ9QbA8FBA8Vfyd6Ng@mail.gmail.com>

[-- Attachment #1: Type: text/plain, Size: 2038 bytes --]

On Thu, Dec 14, 2023 at 03:39:17AM -0500, Eric Sunshine wrote:
> On Thu, Dec 14, 2023 at 3:13 AM Patrick Steinhardt <ps@pks.im> wrote:
> > On Wed, Dec 13, 2023 at 10:33:00PM -0500, Eric Sunshine wrote:
> > > On Wed, Dec 13, 2023 at 10:11 AM Junio C Hamano <gitster@pobox.com> wrote:
> > > > Mostly because your "differences in features supported by just-built
> > > > one and what happens to be on $PATH can cause problems" cuts both
> > > > ways [...]
> > >
> > > I sent an alternative solution[1] which should sidestep this objection.
> >
> > Thanks, I've replied to the thread. I think by now there are three
> > different ideas:
> >
> >   - Improve the logic to pick some kind of diff implementation, which is
> >     my patch series. It would need to be improved so that we also probe
> >     whether the respective Git executables actually understand the repo
> >     format so that we can fall back from the just-built Git to system's
> >     Git.
> >
> >   - Munge the whitespace of the expected results with some regexes.
> >     I like that idea better because we can avoid the git-diff(1)
> >     problem, but find that the result is somewhat hard to read.
> >
> >   - Fix the ".expect" files so that we can avoid all of these games.
> >
> > I actually like the last option most. I'll have a go at it and send this
> > third version out in a bit.
> 
> I sent a reply[1] in the other thread explaining why I'm still leaning
> toward `sed` to smooth over these minor differences rather than
> churning the "expect" files, especially since the minor differences
> are not significant to what is actually being tested. That said, I
> won't stand in the way of such a patch to "fix" the "expect" files,
> but it feels unnecessary.
> 
> [1]: https://lore.kernel.org/git/CAPig+cSkuRfkR2D3JqYcbaJqj485nfD9Nq6pM=vXWB5DJenWpA@mail.gmail.com/

Yeah, our mails indeed crossed. I personally do not mind much which of
our patches land upstream and would be happy with either.

Thanks!

Patrick

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply

* Re: [PATCH] tests: prefer host Git to verify chainlint self-checks
From: Eric Sunshine @ 2023-12-14  8:39 UTC (permalink / raw)
  To: Patrick Steinhardt; +Cc: Junio C Hamano, git
In-Reply-To: <ZXq5GL723v4E3_IH@tanuki>

On Thu, Dec 14, 2023 at 3:13 AM Patrick Steinhardt <ps@pks.im> wrote:
> On Wed, Dec 13, 2023 at 10:33:00PM -0500, Eric Sunshine wrote:
> > On Wed, Dec 13, 2023 at 10:11 AM Junio C Hamano <gitster@pobox.com> wrote:
> > > Mostly because your "differences in features supported by just-built
> > > one and what happens to be on $PATH can cause problems" cuts both
> > > ways [...]
> >
> > I sent an alternative solution[1] which should sidestep this objection.
>
> Thanks, I've replied to the thread. I think by now there are three
> different ideas:
>
>   - Improve the logic to pick some kind of diff implementation, which is
>     my patch series. It would need to be improved so that we also probe
>     whether the respective Git executables actually understand the repo
>     format so that we can fall back from the just-built Git to system's
>     Git.
>
>   - Munge the whitespace of the expected results with some regexes.
>     I like that idea better because we can avoid the git-diff(1)
>     problem, but find that the result is somewhat hard to read.
>
>   - Fix the ".expect" files so that we can avoid all of these games.
>
> I actually like the last option most. I'll have a go at it and send this
> third version out in a bit.

I sent a reply[1] in the other thread explaining why I'm still leaning
toward `sed` to smooth over these minor differences rather than
churning the "expect" files, especially since the minor differences
are not significant to what is actually being tested. That said, I
won't stand in the way of such a patch to "fix" the "expect" files,
but it feels unnecessary.

[1]: https://lore.kernel.org/git/CAPig+cSkuRfkR2D3JqYcbaJqj485nfD9Nq6pM=vXWB5DJenWpA@mail.gmail.com/

^ permalink raw reply

* Re: [PATCH] tests: drop dependency on `git diff` in check-chainlint
From: Eric Sunshine @ 2023-12-14  8:31 UTC (permalink / raw)
  To: Patrick Steinhardt; +Cc: Eric Sunshine, git, Junio C Hamano
In-Reply-To: <ZXq3YdK2RSKF3npE@tanuki>

On Thu, Dec 14, 2023 at 3:05 AM Patrick Steinhardt <ps@pks.im> wrote:
> On Wed, Dec 13, 2023 at 10:22:48PM -0500, Eric Sunshine wrote:
> > This is an alternative solution to the issue Patrick's patch[1]
> > addresses. Hopefully, this approach should avoid the sort of push-back
> > Patrick's patch received[2].
> >
> > By the way, I'm somewhat surprised that this issue crops up at all
> > considering that --no-index is being used with git-diff. As such, I
> > would have thought that the local repository's format would not have
> > been interrogated at all. If that's a bug in `git diff --no-index`, then
> > fixing that could be considered yet another alternative solution to the
> > issue raised here.
>
> This strongly reminds me of the thread at [1], where a similar issue was
> discussed for git-grep(1). Quoting Junio:
>
> > I actually do not think these "we are allowing Git tools to be used
> > on random garbage" is a good idea to begin with X-<.  If we invented
> > something nice for our variant in "git grep" and wish we can use it
> > outside the repository, contributing the feature to implementations
> > of "grep" would have been the right way to move forward, instead of
> > contaminating the codebase with things that are not related to Git.
>
> So this might not be the best way to go.

I recall Junio mentioning that, and I'm fine with the conclusion that
"fixing" --no-index is counter to the project's goals.

> > -                     sed -e '/^[     ]*$$/d' chainlint/$$i.expect; \
> > +                     sed -e 's/[     ][      ]*/ /g;/^ *$$/d;s/^ //;s/ $$//;s/\([<>|();&]\) /\1/g;s/ \([<>|();&]\)/\1/g' chainlint/$$i.expect; \
> >
> > The sed expressions for normalizing whitespace prior to `diff` may look
> > a bit hairy, but they are simple enough in concept:
> >
> > * collapse runs of whitespace to a single SP
> > * drop blank lines (this step is not new)
> > * fold out possible SP at beginning and end of each line
> > * fold out SP surrounding common punctuation characters used in shell
> >   scripts, such as `>`, `|`, `;`, etc.
>
> These sed expressions do look hairy indeed. I have to wonder: all that
> we're doing here is to munge the expected files we already have in our
> tree. Can't we fix those to look exactly like the actual results instead
> and then avoid any kind of post processing altogether? If I understand
> correctly the only reason we do this post processing is because the
> original implementation of the chainlinter produced slightly different
> whitespace.

Yes and no. It's not just whitespace.

I did strongly consider submitting patches to fix all the whitespace
differences in the "expect" files when chainlint.pl replaced
chainlint.sed, but I particularly didn't want to plague the mailing
list with such noise. It's really just unnecessary churn since it's so
easy to work around it with minor sed magic.

And time tells me that that was probably the correct decision since
the output of chainlint.pl has changed multiple times. Even the output
of chainlint.sed wasn't necessarily stable[1]. Then, of course
chainlint.pl replaced[2] chainlint.sed. The original implementation or
chainlint.pl just dumped out the parsed token stream, but [3] improved
it to preserve the original formatting of the test snippet, and [4]
annotated the output with line numbers of the original test snippet.
Had those changes been accompanied by extra patches to "fix" the
"expect" files to suit, it would have been just that much more noise
both in terms of patches to review and in terms of churn in the actual
history.

And, who knows, the output of chainlint.pl might change/improve again
some day. So, I still favor using sed to smooth over these minor
differences rather than "fixing" the "expect" file repeatedly to
adjust them for changes which are not significant to what is actually
being tested.

[1]: d73f5cfa89 (chainlint.sed: stop splitting "(..." into separate
lines "(" and "...", 2021-12-13)
[2]: d00113ec34 (t/Makefile: apply chainlint.pl to existing
self-tests, 2022-09-01)
[3]: 73c768dae9 (chainlint: annotate original test definition rather
than token stream, 2022-11-08)
[4]: 48d69d8f2f (chainlint: prefix annotated test definition with line
numbers, 2022-11-11)

^ permalink raw reply

* [PATCH v2] tests: adjust whitespace in chainlint expectations
From: Patrick Steinhardt @ 2023-12-14  8:30 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano, Eric Sunshine
In-Reply-To: <4112adbe467c14a8f22a87ea41aa4705f8760cf6.1702380646.git.ps@pks.im>

[-- Attachment #1: Type: text/plain, Size: 14057 bytes --]

The "check-chainlint" target runs automatically when running tests and
performs self-checks to verify that the chainlinter itself produces the
expected output. Originally, the chainlinter was implemented via sed,
but the infrastructure has been rewritten in fb41727b7e (t: retire
unused chainlint.sed, 2022-09-01) to use a Perl script instead.

The rewrite caused some slight whitespace changes in the output that are
ultimately not of much importance. In order to be able to assert that
the actual chainlinter errors match our expectations we thus have to
ignore whitespace characters when diffing them. As the `-w` flag is not
in POSIX we try to use `git diff -w --no-index` before we fall back to
`diff -w -u`.

To accomodate for cases where the host system has no Git installation we
use the locally-compiled version of Git. This can result in problems
though when the Git project's repository is using extensions that the
locally-compiled version of Git doesn't understand. It will refuse to
run and thus cause the checks to fail.

Instead of improving the detection logic, fix our ".expect" files so
that we do not need any post-processing anymore. This allows us to drop
the `-w` flag when diffing so that we can always use diff(1) now.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
 t/Makefile                                    | 10 +-------
 t/chainlint/blank-line-before-esac.expect     |  8 +++----
 t/chainlint/block.expect                      |  4 ++--
 t/chainlint/chain-break-background.expect     |  4 ++--
 t/chainlint/chain-break-return-exit.expect    | 14 +++++------
 t/chainlint/chain-break-status.expect         |  4 ++--
 t/chainlint/chained-subshell.expect           |  4 ++--
 .../command-substitution-subsubshell.expect   |  2 +-
 t/chainlint/dqstring-line-splice.expect       |  4 ++--
 t/chainlint/dqstring-no-interpolate.expect    |  4 ++--
 t/chainlint/empty-here-doc.expect             |  4 ++--
 t/chainlint/exclamation.expect                |  2 +-
 t/chainlint/for-loop-abbreviated.expect       |  2 +-
 t/chainlint/function.expect                   |  4 ++--
 t/chainlint/here-doc.expect                   |  4 ++--
 t/chainlint/loop-detect-status.expect         | 20 ++++++++--------
 t/chainlint/nested-loop-detect-failure.expect | 24 +++++++++----------
 t/chainlint/subshell-here-doc.expect          |  4 ++--
 t/chainlint/token-pasting.expect              | 14 +++++------
 19 files changed, 64 insertions(+), 72 deletions(-)

diff --git a/t/Makefile b/t/Makefile
index 225aaf78ed..3a5d81b7fe 100644
--- a/t/Makefile
+++ b/t/Makefile
@@ -108,15 +108,7 @@ check-chainlint:
 	} >'$(CHAINLINTTMP_SQ)'/expect && \
 	$(CHAINLINT) --emit-all '$(CHAINLINTTMP_SQ)'/tests | \
 		sed -e 's/^[1-9][0-9]* //;/^[ 	]*$$/d' >'$(CHAINLINTTMP_SQ)'/actual && \
-	if test -f ../GIT-BUILD-OPTIONS; then \
-		. ../GIT-BUILD-OPTIONS; \
-	fi && \
-	if test -x ../git$$X; then \
-		DIFFW="../git$$X --no-pager diff -w --no-index"; \
-	else \
-		DIFFW="diff -w -u"; \
-	fi && \
-	$$DIFFW '$(CHAINLINTTMP_SQ)'/expect '$(CHAINLINTTMP_SQ)'/actual
+	diff -u '$(CHAINLINTTMP_SQ)'/expect '$(CHAINLINTTMP_SQ)'/actual
 
 test-lint: test-lint-duplicates test-lint-executable test-lint-shell-syntax \
 	test-lint-filenames
diff --git a/t/chainlint/blank-line-before-esac.expect b/t/chainlint/blank-line-before-esac.expect
index 48ed4eb124..056e03003d 100644
--- a/t/chainlint/blank-line-before-esac.expect
+++ b/t/chainlint/blank-line-before-esac.expect
@@ -1,11 +1,11 @@
-test_done ( ) {
+test_done () {
 	case "$test_failure" in
-	0 )
+	0)
 		test_at_end_hook_
 
 		exit 0 ;;
 
-	* )
+	*)
 		if test $test_external_has_tap -eq 0
 		then
 			say_color error "# failed $test_failure among $msg"
@@ -14,5 +14,5 @@ test_done ( ) {
 
 		exit 1 ;;
 
-		esac
+	esac
 }
diff --git a/t/chainlint/block.expect b/t/chainlint/block.expect
index a3bcea492a..1c87326364 100644
--- a/t/chainlint/block.expect
+++ b/t/chainlint/block.expect
@@ -12,9 +12,9 @@
 ) &&
 
 {
-	echo a ; ?!AMP?! echo b
+	echo a; ?!AMP?! echo b
 } &&
-{ echo a ; ?!AMP?! echo b ; } &&
+{ echo a; ?!AMP?! echo b; } &&
 
 {
 	echo "${var}9" &&
diff --git a/t/chainlint/chain-break-background.expect b/t/chainlint/chain-break-background.expect
index 28f9114f42..20d0bb5333 100644
--- a/t/chainlint/chain-break-background.expect
+++ b/t/chainlint/chain-break-background.expect
@@ -1,9 +1,9 @@
 JGIT_DAEMON_PID= &&
 git init --bare empty.git &&
-> empty.git/git-daemon-export-ok &&
+>empty.git/git-daemon-export-ok &&
 mkfifo jgit_daemon_output &&
 {
-	jgit daemon --port="$JGIT_DAEMON_PORT" . > jgit_daemon_output &
+	jgit daemon --port="$JGIT_DAEMON_PORT" . >jgit_daemon_output &
 	JGIT_DAEMON_PID=$!
 } &&
 test_expect_code 2 git ls-remote --exit-code git://localhost:$JGIT_DAEMON_PORT/empty.git
diff --git a/t/chainlint/chain-break-return-exit.expect b/t/chainlint/chain-break-return-exit.expect
index 1732d221c3..4cd18e2edf 100644
--- a/t/chainlint/chain-break-return-exit.expect
+++ b/t/chainlint/chain-break-return-exit.expect
@@ -1,16 +1,16 @@
 case "$(git ls-files)" in
-one ) echo pass one ;;
-* ) echo bad one ; return 1 ;;
+one) echo pass one ;;
+*) echo bad one; return 1 ;;
 esac &&
 (
 	case "$(git ls-files)" in
-	two ) echo pass two ;;
-	* ) echo bad two ; exit 1 ;;
-esac
+	two) echo pass two ;;
+	*) echo bad two; exit 1 ;;
+	esac
 ) &&
 case "$(git ls-files)" in
-dir/two"$LF"one ) echo pass both ;;
-* ) echo bad ; return 1 ;;
+dir/two"$LF"one) echo pass both ;;
+*) echo bad; return 1 ;;
 esac &&
 
 for i in 1 2 3 4 ; do
diff --git a/t/chainlint/chain-break-status.expect b/t/chainlint/chain-break-status.expect
index f4bada9463..e6b3b2193e 100644
--- a/t/chainlint/chain-break-status.expect
+++ b/t/chainlint/chain-break-status.expect
@@ -1,7 +1,7 @@
-OUT=$(( ( large_git ; echo $? 1 >& 3 ) | : ) 3 >& 1) &&
+OUT=$( ((large_git; echo $? 1>&3) | :) 3>&1 ) &&
 test_match_signal 13 "$OUT" &&
 
-{ test-tool sigchain > actual ; ret=$? ; } &&
+{ test-tool sigchain >actual; ret=$?; } &&
 {
 	test_match_signal 15 "$ret" ||
 	test "$ret" = 3
diff --git a/t/chainlint/chained-subshell.expect b/t/chainlint/chained-subshell.expect
index af0369d328..83810ea7ec 100644
--- a/t/chainlint/chained-subshell.expect
+++ b/t/chainlint/chained-subshell.expect
@@ -4,7 +4,7 @@ mkdir sub && (
 	nuff said
 ) &&
 
-cut "-d " -f actual | ( read s1 s2 s3 &&
+cut "-d " -f actual | (read s1 s2 s3 &&
 test -f $s1 ?!AMP?!
 test $(cat $s2) = tree2path1 &&
-test $(cat $s3) = tree3path1 )
+test $(cat $s3) = tree3path1)
diff --git a/t/chainlint/command-substitution-subsubshell.expect b/t/chainlint/command-substitution-subsubshell.expect
index ab2f79e845..ec42f2c30c 100644
--- a/t/chainlint/command-substitution-subsubshell.expect
+++ b/t/chainlint/command-substitution-subsubshell.expect
@@ -1,2 +1,2 @@
-OUT=$(( ( large_git 1 >& 3 ) | : ) 3 >& 1) &&
+OUT=$( ((large_git 1>&3) | :) 3>&1 ) &&
 test_match_signal 13 "$OUT"
diff --git a/t/chainlint/dqstring-line-splice.expect b/t/chainlint/dqstring-line-splice.expect
index bf9ced60d4..f8fa3bc969 100644
--- a/t/chainlint/dqstring-line-splice.expect
+++ b/t/chainlint/dqstring-line-splice.expect
@@ -1,3 +1,3 @@
-echo 'fatal: reword option of --fixup is mutually exclusive with' '--patch/--interactive/--all/--include/--only' > expect &&
-test_must_fail git commit --fixup=reword:HEAD~ $1 2 > actual &&
+echo 'fatal: reword option of --fixup is mutually exclusive with'	'--patch/--interactive/--all/--include/--only' >expect &&
+test_must_fail git commit --fixup=reword:HEAD~ $1 2>actual &&
 test_cmp expect actual
diff --git a/t/chainlint/dqstring-no-interpolate.expect b/t/chainlint/dqstring-no-interpolate.expect
index 10724987a5..2c1851a3c3 100644
--- a/t/chainlint/dqstring-no-interpolate.expect
+++ b/t/chainlint/dqstring-no-interpolate.expect
@@ -6,6 +6,6 @@ grep "^\.git$" output.txt &&
 (
 	cd client$version &&
 	GIT_TEST_PROTOCOL_VERSION=$version git fetch-pack --no-progress .. $(cat ../input)
-) > output &&
-	cut -d ' ' -f 2 < output | sort > actual &&
+) >output &&
+	cut -d ' ' -f 2 <output | sort >actual &&
 	test_cmp expect actual
diff --git a/t/chainlint/empty-here-doc.expect b/t/chainlint/empty-here-doc.expect
index e8733c97c6..8507721192 100644
--- a/t/chainlint/empty-here-doc.expect
+++ b/t/chainlint/empty-here-doc.expect
@@ -1,4 +1,4 @@
-git ls-tree $tree path > current &&
-cat > expected <<\EOF &&
+git ls-tree $tree path >current &&
+cat >expected <<\EOF &&
 EOF
 test_output
diff --git a/t/chainlint/exclamation.expect b/t/chainlint/exclamation.expect
index 2d961a58c6..765a35bb4c 100644
--- a/t/chainlint/exclamation.expect
+++ b/t/chainlint/exclamation.expect
@@ -1,4 +1,4 @@
-if ! condition ; then echo nope ; else yep ; fi &&
+if ! condition; then echo nope; else yep; fi &&
 test_prerequisite !MINGW &&
 mail uucp!address &&
 echo !whatever!
diff --git a/t/chainlint/for-loop-abbreviated.expect b/t/chainlint/for-loop-abbreviated.expect
index a21007a63f..02c0d15cca 100644
--- a/t/chainlint/for-loop-abbreviated.expect
+++ b/t/chainlint/for-loop-abbreviated.expect
@@ -1,5 +1,5 @@
 for it
 do
-	path=$(expr "$it" : ( [^:]*) ) &&
+	path=$(expr "$it" : ([^:]*)) &&
 	git update-index --add "$path" || exit
 done
diff --git a/t/chainlint/function.expect b/t/chainlint/function.expect
index a14388e6b9..dd7c997a3c 100644
--- a/t/chainlint/function.expect
+++ b/t/chainlint/function.expect
@@ -1,8 +1,8 @@
-sha1_file ( ) {
+sha1_file() {
 	echo "$*" | sed "s#..#.git/objects/&/#"
 } &&
 
-remove_object ( ) {
+remove_object() {
 	file=$(sha1_file "$*") &&
 	test -e "$file" ?!AMP?!
 	rm -f "$file"
diff --git a/t/chainlint/here-doc.expect b/t/chainlint/here-doc.expect
index 1df3f78282..91b961242a 100644
--- a/t/chainlint/here-doc.expect
+++ b/t/chainlint/here-doc.expect
@@ -1,6 +1,6 @@
 boodle wobba \
-	gorgo snoot \
-	wafta snurb <<EOF &&
+       gorgo snoot \
+       wafta snurb <<EOF &&
 quoth the raven,
 nevermore...
 EOF
diff --git a/t/chainlint/loop-detect-status.expect b/t/chainlint/loop-detect-status.expect
index 24da9e86d5..7ce3a34806 100644
--- a/t/chainlint/loop-detect-status.expect
+++ b/t/chainlint/loop-detect-status.expect
@@ -1,18 +1,18 @@
-( while test $i -le $blobcount
-do
-	printf "Generating blob $i/$blobcount\r" >& 2 &&
+(while test $i -le $blobcount
+ do
+	printf "Generating blob $i/$blobcount\r" >&2 &&
 	printf "blob\nmark :$i\ndata $blobsize\n" &&
 	#test-tool genrandom $i $blobsize &&
 	printf "%-${blobsize}s" $i &&
 	echo "M 100644 :$i $i" >> commit &&
 	i=$(($i+1)) ||
 	echo $? > exit-status
-done &&
-echo "commit refs/heads/main" &&
-echo "author A U Thor <author@email.com> 123456789 +0000" &&
-echo "committer C O Mitter <committer@email.com> 123456789 +0000" &&
-echo "data 5" &&
-echo ">2gb" &&
-cat commit ) |
+ done &&
+ echo "commit refs/heads/main" &&
+ echo "author A U Thor <author@email.com> 123456789 +0000" &&
+ echo "committer C O Mitter <committer@email.com> 123456789 +0000" &&
+ echo "data 5" &&
+ echo ">2gb" &&
+ cat commit) |
 git fast-import --big-file-threshold=2 &&
 test ! -f exit-status
diff --git a/t/chainlint/nested-loop-detect-failure.expect b/t/chainlint/nested-loop-detect-failure.expect
index 4793a0e8e1..3461df40e5 100644
--- a/t/chainlint/nested-loop-detect-failure.expect
+++ b/t/chainlint/nested-loop-detect-failure.expect
@@ -1,31 +1,31 @@
-for i in 0 1 2 3 4 5 6 7 8 9 ;
+for i in 0 1 2 3 4 5 6 7 8 9;
 do
-	for j in 0 1 2 3 4 5 6 7 8 9 ;
+	for j in 0 1 2 3 4 5 6 7 8 9;
 	do
-		echo "$i$j" > "path$i$j" ?!LOOP?!
+		echo "$i$j" >"path$i$j" ?!LOOP?!
 	done ?!LOOP?!
 done &&
 
-for i in 0 1 2 3 4 5 6 7 8 9 ;
+for i in 0 1 2 3 4 5 6 7 8 9;
 do
-	for j in 0 1 2 3 4 5 6 7 8 9 ;
+	for j in 0 1 2 3 4 5 6 7 8 9;
 	do
-		echo "$i$j" > "path$i$j" || return 1
+		echo "$i$j" >"path$i$j" || return 1
 	done
 done &&
 
-for i in 0 1 2 3 4 5 6 7 8 9 ;
+for i in 0 1 2 3 4 5 6 7 8 9;
 do
-	for j in 0 1 2 3 4 5 6 7 8 9 ;
+	for j in 0 1 2 3 4 5 6 7 8 9;
 	do
-		echo "$i$j" > "path$i$j" ?!LOOP?!
+		echo "$i$j" >"path$i$j" ?!LOOP?!
 	done || return 1
 done &&
 
-for i in 0 1 2 3 4 5 6 7 8 9 ;
+for i in 0 1 2 3 4 5 6 7 8 9;
 do
-	for j in 0 1 2 3 4 5 6 7 8 9 ;
+	for j in 0 1 2 3 4 5 6 7 8 9;
 	do
-		echo "$i$j" > "path$i$j" || return 1
+		echo "$i$j" >"path$i$j" || return 1
 	done || return 1
 done
diff --git a/t/chainlint/subshell-here-doc.expect b/t/chainlint/subshell-here-doc.expect
index 52789278d1..75d6f607e2 100644
--- a/t/chainlint/subshell-here-doc.expect
+++ b/t/chainlint/subshell-here-doc.expect
@@ -1,7 +1,7 @@
 (
 	echo wobba \
-		gorgo snoot \
-		wafta snurb <<-EOF &&
+	       gorgo snoot \
+	       wafta snurb <<-EOF &&
 	quoth the raven,
 	nevermore...
 	EOF
diff --git a/t/chainlint/token-pasting.expect b/t/chainlint/token-pasting.expect
index 342360bcd0..6a387917a7 100644
--- a/t/chainlint/token-pasting.expect
+++ b/t/chainlint/token-pasting.expect
@@ -4,22 +4,22 @@ git config filter.rot13.clean ./rot13.sh &&
 {
     echo "*.t filter=rot13" ?!AMP?!
     echo "*.i ident"
-} > .gitattributes &&
+} >.gitattributes &&
 
 {
     echo a b c d e f g h i j k l m ?!AMP?!
     echo n o p q r s t u v w x y z ?!AMP?!
     echo '$Id$'
-} > test &&
-cat test > test.t &&
-cat test > test.o &&
-cat test > test.i &&
+} >test &&
+cat test >test.t &&
+cat test >test.o &&
+cat test >test.i &&
 git add test test.t test.i &&
 rm -f test test.t test.i &&
 git checkout -- test test.t test.i &&
 
-echo "content-test2" > test2.o &&
-echo "content-test3 - filename with special characters" > "test3 'sq',$x=.o" ?!AMP?!
+echo "content-test2" >test2.o &&
+echo "content-test3 - filename with special characters" >"test3 'sq',$x=.o" ?!AMP?!
 
 downstream_url_for_sed=$(
 	printf "%sn" "$downstream_url" |
-- 
2.43.GIT


[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply related

* Re: [PATCH] tests: prefer host Git to verify chainlint self-checks
From: Patrick Steinhardt @ 2023-12-14  8:13 UTC (permalink / raw)
  To: Eric Sunshine; +Cc: Junio C Hamano, git
In-Reply-To: <CAPig+cRc2hW_xhJRPJmEVYik71zWLDQ_EFjBFw095OgPGYrWGg@mail.gmail.com>

[-- Attachment #1: Type: text/plain, Size: 1642 bytes --]

On Wed, Dec 13, 2023 at 10:33:00PM -0500, Eric Sunshine wrote:
> On Wed, Dec 13, 2023 at 10:11 AM Junio C Hamano <gitster@pobox.com> wrote:
> > Patrick Steinhardt <ps@pks.im> writes:
> > >> I do not think "prefer host Git" is necessarily a good idea; falling
> > >> back to use host Git is perfectly fine, of course.
> > >
> > > Why is that, though?
> >
> > Mostly because your "differences in features supported by just-built
> > one and what happens to be on $PATH can cause problems" cuts both
> > ways [...]
> 
> I sent an alternative solution[1] which should sidestep this objection.
> 
> As usual, I forgot to use --in-reply-to=<this-thread> when sending the
> patch, even though I reminded myself to use it only a minute or so
> earlier. Sorry.
> 
> [1]: https://lore.kernel.org/git/20231214032248.1615-1-ericsunshine@charter.net/

Thanks, I've replied to the thread. I think by now there are three
different ideas:

  - Improve the logic to pick some kind of diff implementation, which is
    my patch series. It would need to be improved so that we also probe
    whether the respective Git executables actually understand the repo
    format so that we can fall back from the just-built Git to system's
    Git.

  - Munge the whitespace of the expected results with some regexes.
    I like that idea better because we can avoid the git-diff(1)
    problem, but find that the result is somewhat hard to read.

  - Fix the ".expect" files so that we can avoid all of these games.

I actually like the last option most. I'll have a go at it and send this
third version out in a bit.

Patrick

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply

* Re: [PATCH] tests: drop dependency on `git diff` in check-chainlint
From: Patrick Steinhardt @ 2023-12-14  8:05 UTC (permalink / raw)
  To: Eric Sunshine; +Cc: git, Eric Sunshine, Junio C Hamano, Eric Sunshine
In-Reply-To: <20231214032248.1615-1-ericsunshine@charter.net>

[-- Attachment #1: Type: text/plain, Size: 4485 bytes --]

On Wed, Dec 13, 2023 at 10:22:48PM -0500, Eric Sunshine wrote:
> From: Eric Sunshine <sunshine@sunshineco.com>
> 
> The "check-chainlint" target runs automatically when running tests and
> performs self-checks to verify that the chainlinter itself produces the
> expected output. Originally, the chainlinter was implemented via sed,
> but the infrastructure has been rewritten in fb41727b7e (t: retire
> unused chainlint.sed, 2022-09-01) to use a Perl script instead.
> 
> The rewrite caused some slight whitespace changes in the output that are
> ultimately not of much importance. In order to be able to assert that
> the actual chainlinter errors match our expectations we thus have to
> ignore whitespace characters when diffing them. As the `-w` flag is not
> in POSIX we try to use `git diff -w --no-index` before we fall back to
> non-standard `diff -w -u`.
> 
> To accommodate for cases where the host system has no Git installation
> we use the locally-compiled version of Git. This can result in problems
> though when the Git project's repository is using extensions that the
> locally-compiled version of Git doesn't understand, in which case `git`
> may refuse to run and thus cause the checks to fail.
> 
> Work around this issue by normalizing whitespace via sed before invoking
> diff, which allows any platform diff implementation to be used, thus
> eliminating the dependency upon `git diff` and the non-POSIX `-w` flag.
> 
> Reported-by: Patrick Steinhardt <ps@pks.im>
> Signed-off-by: Eric Sunshine <sunshine@sunshineco.com>
> ---
> 
> This is an alternative solution to the issue Patrick's patch[1]
> addresses. Hopefully, this approach should avoid the sort of push-back
> Patrick's patch received[2].

Thanks for chiming in!

> I shamelessly stole most of Patrick's commit message.
> 
> The sed expressions for normalizing whitespace prior to `diff` may look
> a bit hairy, but they are simple enough in concept:
> 
> * collapse runs of whitespace to a single SP
> * drop blank lines (this step is not new)
> * fold out possible SP at beginning and end of each line
> * fold out SP surrounding common punctuation characters used in shell
>   scripts, such as `>`, `|`, `;`, etc.
> 
> By the way, I'm somewhat surprised that this issue crops up at all
> considering that --no-index is being used with git-diff. As such, I
> would have thought that the local repository's format would not have
> been interrogated at all. If that's a bug in `git diff --no-index`, then
> fixing that could be considered yet another alternative solution to the
> issue raised here.

This strongly reminds me of the thread at [1], where a similar issue was
discussed for git-grep(1). Quoting Junio: 

> I actually do not think these "we are allowing Git tools to be used
> on random garbage" is a good idea to begin with X-<.  If we invented
> something nice for our variant in "git grep" and wish we can use it
> outside the repository, contributing the feature to implementations
> of "grep" would have been the right way to move forward, instead of
> contaminating the codebase with things that are not related to Git.

So this might not be the best way to go.

> [1]: https://lore.kernel.org/git/4112adbe467c14a8f22a87ea41aa4705f8760cf6.1702380646.git.ps@pks.im/
> [2]: https://lore.kernel.org/git/xmqqr0jqnnmn.fsf@gitster.g/
> 
>  t/Makefile | 14 +++-----------
>  1 file changed, 3 insertions(+), 11 deletions(-)
> 
> diff --git a/t/Makefile b/t/Makefile
> index 225aaf78ed..656ff10afa 100644
> --- a/t/Makefile
> +++ b/t/Makefile
> @@ -103,20 +103,12 @@ check-chainlint:
>  		echo "# chainlint: $(CHAINLINTTMP_SQ)/tests" && \
>  		for i in $(CHAINLINTTESTS); do \
>  			echo "# chainlint: $$i" && \
> -			sed -e '/^[ 	]*$$/d' chainlint/$$i.expect; \
> +			sed -e 's/[ 	][ 	]*/ /g;/^ *$$/d;s/^ //;s/ $$//;s/\([<>|();&]\) /\1/g;s/ \([<>|();&]\)/\1/g' chainlint/$$i.expect; \

These sed expressions do look hairy indeed. I have to wonder: all that
we're doing here is to munge the expected files we already have in our
tree. Can't we fix those to look exactly like the actual results instead
and then avoid any kind of post processing altogether? If I understand
correctly the only reason we do this post processing is because the
original implementation of the chainlinter produced slightly different
whitespace.

Patrick

[1]: https://lore.kernel.org/git/xmqq7cnnpy3z.fsf@gitster.g/

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply

* Re: [PATCH] mailinfo: fix out-of-bounds memory reads in unquote_quoted_pair()
From: Patrick Steinhardt @ 2023-12-14  7:41 UTC (permalink / raw)
  To: Jeff King; +Cc: git, Taylor Blau, Carlos Andrés Ramírez Cataño
In-Reply-To: <20231213082027.GB1684525@coredump.intra.peff.net>

[-- Attachment #1: Type: text/plain, Size: 2643 bytes --]

On Wed, Dec 13, 2023 at 03:20:27AM -0500, Jeff King wrote:
> On Wed, Dec 13, 2023 at 08:07:21AM +0100, Patrick Steinhardt wrote:
[snip]
> > Another thing I was wondering about is the recursive nature of these
> > functions, and whether it can lead to similar issues like we recently
> > had when parsing revisions with stack exhaustion. I _think_ this should
> > not be much of a problem in this case though, as we're talking about
> > mail headers here. While the length of header values isn't restricted
> > per se (the line length is restricted to 1000, but with Comment Folding
> > Whitespace values can span multiple lines), but mail provdiers will sure
> > clamp down on mails with a "From:" header that is megabytes in size.
> 
> It's just unquote_comment() that is recursive, but yeah, there is
> nothing to stop it from recursing forever on "((((((...". The stack
> requirements are pretty small, though. I needed between 2^17 and 2^18
> bytes to segfault on my machine using:
> 
>   perl -e 'print "From: ", "(" x 2**18;' |
>   git mailinfo /dev/null /dev/null
> 
> Absurdly big for an email, but maybe within the realm of possibility? I
> think it might be possible to drop the recursion and just use a depth
> counter, like this:

It's definitely not as large as I'd have expected it to be, we're only
talking about kilobytes of data. Feels like it might be in the realm of
possibility to get transferred by a mail provider.

> diff --git a/mailinfo.c b/mailinfo.c
> index 737b9e5e13..db236f9f9f 100644
> --- a/mailinfo.c
> +++ b/mailinfo.c
> @@ -59,6 +59,7 @@ static void parse_bogus_from(struct mailinfo *mi, const struct strbuf *line)
>  static const char *unquote_comment(struct strbuf *outbuf, const char *in)
>  {
>  	int take_next_literally = 0;
> +	int depth = 1;
>  
>  	strbuf_addch(outbuf, '(');
>  
> @@ -72,11 +73,14 @@ static const char *unquote_comment(struct strbuf *outbuf, const char *in)
>  				take_next_literally = 1;
>  				continue;
>  			case '(':
> -				in = unquote_comment(outbuf, in);
> +				strbuf_addch(outbuf, '(');
> +				depth++;
>  				continue;
>  			case ')':
>  				strbuf_addch(outbuf, ')');
> -				return in;
> +				if (!--depth)
> +					return in;
> +				continue;
>  			}
>  		}
>  
> I doubt it's a big deal either way, but if it's that easy to do it might
> be worth it.

Isn't this only protecting against unbalanced braces? That might be a
sensible check to do regardless, but does it protect against recursion
blowing the stack if you just happen to have many opening braces?

Might be I'm missing something.

Patrick

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox