* [PATCH 1/3] submodule-config.h: move check_submodule_url
2024-01-09 17:53 [PATCH 0/3] Strengthen fsck checks for submodule URLs Victoria Dye via GitGitGadget
@ 2024-01-09 17:53 ` Victoria Dye via GitGitGadget
2024-01-09 17:53 ` [PATCH 2/3] t7450: test submodule urls Victoria Dye via GitGitGadget
` (3 subsequent siblings)
4 siblings, 0 replies; 31+ messages in thread
From: Victoria Dye via GitGitGadget @ 2024-01-09 17:53 UTC (permalink / raw)
To: git; +Cc: Victoria Dye, Victoria Dye
From: Victoria Dye <vdye@github.com>
Move 'check_submodule_url' out of 'fsck.c' and into 'submodule-config.h' as
a public method, similar to 'check_submodule_name'. With the function now
accessible outside of 'fsck', it can be used in a later commit to extend
'test-tool submodule' to check the validity of submodule URLs as it does
with names in the 'check-name' subcommand.
Other than its location, no changes are made to 'check_submodule_url' in
this patch.
Signed-off-by: Victoria Dye <vdye@github.com>
---
fsck.c | 133 --------------------------------------------
submodule-config.c | 134 +++++++++++++++++++++++++++++++++++++++++++++
submodule-config.h | 3 +
3 files changed, 137 insertions(+), 133 deletions(-)
diff --git a/fsck.c b/fsck.c
index 1ad02fcdfab..8ded0a473a4 100644
--- a/fsck.c
+++ b/fsck.c
@@ -20,7 +20,6 @@
#include "packfile.h"
#include "submodule-config.h"
#include "config.h"
-#include "credential.h"
#include "help.h"
static ssize_t max_tree_entry_len = 4096;
@@ -1047,138 +1046,6 @@ done:
return ret;
}
-static int starts_with_dot_slash(const char *const path)
-{
- return path_match_flags(path, PATH_MATCH_STARTS_WITH_DOT_SLASH |
- PATH_MATCH_XPLATFORM);
-}
-
-static int starts_with_dot_dot_slash(const char *const path)
-{
- return path_match_flags(path, PATH_MATCH_STARTS_WITH_DOT_DOT_SLASH |
- PATH_MATCH_XPLATFORM);
-}
-
-static int submodule_url_is_relative(const char *url)
-{
- return starts_with_dot_slash(url) || starts_with_dot_dot_slash(url);
-}
-
-/*
- * Count directory components that a relative submodule URL should chop
- * from the remote_url it is to be resolved against.
- *
- * In other words, this counts "../" components at the start of a
- * submodule URL.
- *
- * Returns the number of directory components to chop and writes a
- * pointer to the next character of url after all leading "./" and
- * "../" components to out.
- */
-static int count_leading_dotdots(const char *url, const char **out)
-{
- int result = 0;
- while (1) {
- if (starts_with_dot_dot_slash(url)) {
- result++;
- url += strlen("../");
- continue;
- }
- if (starts_with_dot_slash(url)) {
- url += strlen("./");
- continue;
- }
- *out = url;
- return result;
- }
-}
-/*
- * Check whether a transport is implemented by git-remote-curl.
- *
- * If it is, returns 1 and writes the URL that would be passed to
- * git-remote-curl to the "out" parameter.
- *
- * Otherwise, returns 0 and leaves "out" untouched.
- *
- * Examples:
- * http::https://example.com/repo.git -> 1, https://example.com/repo.git
- * https://example.com/repo.git -> 1, https://example.com/repo.git
- * git://example.com/repo.git -> 0
- *
- * This is for use in checking for previously exploitable bugs that
- * required a submodule URL to be passed to git-remote-curl.
- */
-static int url_to_curl_url(const char *url, const char **out)
-{
- /*
- * We don't need to check for case-aliases, "http.exe", and so
- * on because in the default configuration, is_transport_allowed
- * prevents URLs with those schemes from being cloned
- * automatically.
- */
- if (skip_prefix(url, "http::", out) ||
- skip_prefix(url, "https::", out) ||
- skip_prefix(url, "ftp::", out) ||
- skip_prefix(url, "ftps::", out))
- return 1;
- if (starts_with(url, "http://") ||
- starts_with(url, "https://") ||
- starts_with(url, "ftp://") ||
- starts_with(url, "ftps://")) {
- *out = url;
- return 1;
- }
- return 0;
-}
-
-static int check_submodule_url(const char *url)
-{
- const char *curl_url;
-
- if (looks_like_command_line_option(url))
- return -1;
-
- if (submodule_url_is_relative(url) || starts_with(url, "git://")) {
- char *decoded;
- const char *next;
- int has_nl;
-
- /*
- * This could be appended to an http URL and url-decoded;
- * check for malicious characters.
- */
- decoded = url_decode(url);
- has_nl = !!strchr(decoded, '\n');
-
- free(decoded);
- if (has_nl)
- return -1;
-
- /*
- * URLs which escape their root via "../" can overwrite
- * the host field and previous components, resolving to
- * URLs like https::example.com/submodule.git and
- * https:///example.com/submodule.git that were
- * susceptible to CVE-2020-11008.
- */
- if (count_leading_dotdots(url, &next) > 0 &&
- (*next == ':' || *next == '/'))
- return -1;
- }
-
- else if (url_to_curl_url(url, &curl_url)) {
- struct credential c = CREDENTIAL_INIT;
- int ret = 0;
- if (credential_from_url_gently(&c, curl_url, 1) ||
- !*c.host)
- ret = -1;
- credential_clear(&c);
- return ret;
- }
-
- return 0;
-}
-
struct fsck_gitmodules_data {
const struct object_id *oid;
struct fsck_options *options;
diff --git a/submodule-config.c b/submodule-config.c
index f4dd482abc9..3b295e9f89c 100644
--- a/submodule-config.c
+++ b/submodule-config.c
@@ -14,6 +14,8 @@
#include "parse-options.h"
#include "thread-utils.h"
#include "tree-walk.h"
+#include "url.h"
+#include "credential.h"
/*
* submodule cache lookup structure
@@ -228,6 +230,138 @@ in_component:
return 0;
}
+static int starts_with_dot_slash(const char *const path)
+{
+ return path_match_flags(path, PATH_MATCH_STARTS_WITH_DOT_SLASH |
+ PATH_MATCH_XPLATFORM);
+}
+
+static int starts_with_dot_dot_slash(const char *const path)
+{
+ return path_match_flags(path, PATH_MATCH_STARTS_WITH_DOT_DOT_SLASH |
+ PATH_MATCH_XPLATFORM);
+}
+
+static int submodule_url_is_relative(const char *url)
+{
+ return starts_with_dot_slash(url) || starts_with_dot_dot_slash(url);
+}
+
+/*
+ * Count directory components that a relative submodule URL should chop
+ * from the remote_url it is to be resolved against.
+ *
+ * In other words, this counts "../" components at the start of a
+ * submodule URL.
+ *
+ * Returns the number of directory components to chop and writes a
+ * pointer to the next character of url after all leading "./" and
+ * "../" components to out.
+ */
+static int count_leading_dotdots(const char *url, const char **out)
+{
+ int result = 0;
+ while (1) {
+ if (starts_with_dot_dot_slash(url)) {
+ result++;
+ url += strlen("../");
+ continue;
+ }
+ if (starts_with_dot_slash(url)) {
+ url += strlen("./");
+ continue;
+ }
+ *out = url;
+ return result;
+ }
+}
+/*
+ * Check whether a transport is implemented by git-remote-curl.
+ *
+ * If it is, returns 1 and writes the URL that would be passed to
+ * git-remote-curl to the "out" parameter.
+ *
+ * Otherwise, returns 0 and leaves "out" untouched.
+ *
+ * Examples:
+ * http::https://example.com/repo.git -> 1, https://example.com/repo.git
+ * https://example.com/repo.git -> 1, https://example.com/repo.git
+ * git://example.com/repo.git -> 0
+ *
+ * This is for use in checking for previously exploitable bugs that
+ * required a submodule URL to be passed to git-remote-curl.
+ */
+static int url_to_curl_url(const char *url, const char **out)
+{
+ /*
+ * We don't need to check for case-aliases, "http.exe", and so
+ * on because in the default configuration, is_transport_allowed
+ * prevents URLs with those schemes from being cloned
+ * automatically.
+ */
+ if (skip_prefix(url, "http::", out) ||
+ skip_prefix(url, "https::", out) ||
+ skip_prefix(url, "ftp::", out) ||
+ skip_prefix(url, "ftps::", out))
+ return 1;
+ if (starts_with(url, "http://") ||
+ starts_with(url, "https://") ||
+ starts_with(url, "ftp://") ||
+ starts_with(url, "ftps://")) {
+ *out = url;
+ return 1;
+ }
+ return 0;
+}
+
+int check_submodule_url(const char *url)
+{
+ const char *curl_url;
+
+ if (looks_like_command_line_option(url))
+ return -1;
+
+ if (submodule_url_is_relative(url) || starts_with(url, "git://")) {
+ char *decoded;
+ const char *next;
+ int has_nl;
+
+ /*
+ * This could be appended to an http URL and url-decoded;
+ * check for malicious characters.
+ */
+ decoded = url_decode(url);
+ has_nl = !!strchr(decoded, '\n');
+
+ free(decoded);
+ if (has_nl)
+ return -1;
+
+ /*
+ * URLs which escape their root via "../" can overwrite
+ * the host field and previous components, resolving to
+ * URLs like https::example.com/submodule.git and
+ * https:///example.com/submodule.git that were
+ * susceptible to CVE-2020-11008.
+ */
+ if (count_leading_dotdots(url, &next) > 0 &&
+ (*next == ':' || *next == '/'))
+ return -1;
+ }
+
+ else if (url_to_curl_url(url, &curl_url)) {
+ struct credential c = CREDENTIAL_INIT;
+ int ret = 0;
+ if (credential_from_url_gently(&c, curl_url, 1) ||
+ !*c.host)
+ ret = -1;
+ credential_clear(&c);
+ return ret;
+ }
+
+ return 0;
+}
+
static int name_and_item_from_var(const char *var, struct strbuf *name,
struct strbuf *item)
{
diff --git a/submodule-config.h b/submodule-config.h
index 958f320ac6c..b6133af71b0 100644
--- a/submodule-config.h
+++ b/submodule-config.h
@@ -89,6 +89,9 @@ int config_set_in_gitmodules_file_gently(const char *key, const char *value);
*/
int check_submodule_name(const char *name);
+/* Returns 0 if the URL valid per RFC3986 and -1 otherwise. */
+int check_submodule_url(const char *url);
+
/*
* Note: these helper functions exist solely to maintain backward
* compatibility with 'fetch' and 'update_clone' storing configuration in
--
gitgitgadget
^ permalink raw reply related [flat|nested] 31+ messages in thread
* [PATCH 2/3] t7450: test submodule urls
2024-01-09 17:53 [PATCH 0/3] Strengthen fsck checks for submodule URLs Victoria Dye via GitGitGadget
2024-01-09 17:53 ` [PATCH 1/3] submodule-config.h: move check_submodule_url Victoria Dye via GitGitGadget
@ 2024-01-09 17:53 ` Victoria Dye via GitGitGadget
2024-01-09 21:38 ` Junio C Hamano
2024-01-10 10:38 ` Jeff King
2024-01-09 17:53 ` [PATCH 3/3] submodule-config.c: strengthen URL fsck check Victoria Dye via GitGitGadget
` (2 subsequent siblings)
4 siblings, 2 replies; 31+ messages in thread
From: Victoria Dye via GitGitGadget @ 2024-01-09 17:53 UTC (permalink / raw)
To: git; +Cc: Victoria Dye, Victoria Dye
From: Victoria Dye <vdye@github.com>
Add a test to 't7450-bad-git-dotfiles.sh' to check the validity of different
submodule URLs. To test this directly (without setting up test repositories
& submodules), add a 'check-url' subcommand to 'test-tool submodule' that
calls 'check_submodule_url' in the same way that 'check-name' calls
'check_submodule_name'.
Mark the test with 'test_expect_failure' because, as it stands,
'check_submodule_url' marks certain invalid URLs valid. Specifically, the
invalid URL "http://example.com:test/foo.git" is incorrectly marked valid in
the test.
Signed-off-by: Victoria Dye <vdye@github.com>
---
t/helper/test-submodule.c | 31 +++++++++++++++++++++++++++----
t/t7450-bad-git-dotfiles.sh | 26 ++++++++++++++++++++++++++
2 files changed, 53 insertions(+), 4 deletions(-)
diff --git a/t/helper/test-submodule.c b/t/helper/test-submodule.c
index 50c154d0370..da89d265f0f 100644
--- a/t/helper/test-submodule.c
+++ b/t/helper/test-submodule.c
@@ -15,6 +15,13 @@ static const char *submodule_check_name_usage[] = {
NULL
};
+#define TEST_TOOL_CHECK_URL_USAGE \
+ "test-tool submodule check-url <url>"
+static const char *submodule_check_url_usage[] = {
+ TEST_TOOL_CHECK_URL_USAGE,
+ NULL
+};
+
#define TEST_TOOL_IS_ACTIVE_USAGE \
"test-tool submodule is-active <name>"
static const char *submodule_is_active_usage[] = {
@@ -36,22 +43,24 @@ static const char *submodule_usage[] = {
NULL
};
+typedef int (*check_fn_t)(const char *);
+
/*
* Exit non-zero if any of the submodule names given on the command line is
* invalid. If no names are given, filter stdin to print only valid names
* (which is primarily intended for testing).
*/
-static int check_name(int argc, const char **argv)
+static int check_submodule(int argc, const char **argv, check_fn_t check_fn)
{
if (argc > 1) {
while (*++argv) {
- if (check_submodule_name(*argv) < 0)
+ if (check_fn(*argv) < 0)
return 1;
}
} else {
struct strbuf buf = STRBUF_INIT;
while (strbuf_getline(&buf, stdin) != EOF) {
- if (!check_submodule_name(buf.buf))
+ if (!check_fn(buf.buf))
printf("%s\n", buf.buf);
}
strbuf_release(&buf);
@@ -69,7 +78,20 @@ static int cmd__submodule_check_name(int argc, const char **argv)
if (argc)
usage_with_options(submodule_check_name_usage, options);
- return check_name(argc, argv);
+ return check_submodule(argc, argv, check_submodule_name);
+}
+
+static int cmd__submodule_check_url(int argc, const char **argv)
+{
+ struct option options[] = {
+ OPT_END()
+ };
+ argc = parse_options(argc, argv, "test-tools", options,
+ submodule_check_url_usage, 0);
+ if (argc)
+ usage_with_options(submodule_check_url_usage, options);
+
+ return check_submodule(argc, argv, check_submodule_url);
}
static int cmd__submodule_is_active(int argc, const char **argv)
@@ -195,6 +217,7 @@ static int cmd__submodule_config_writeable(int argc, const char **argv UNUSED)
static struct test_cmd cmds[] = {
{ "check-name", cmd__submodule_check_name },
+ { "check-url", cmd__submodule_check_url },
{ "is-active", cmd__submodule_is_active },
{ "resolve-relative-url", cmd__submodule_resolve_relative_url},
{ "config-list", cmd__submodule_config_list },
diff --git a/t/t7450-bad-git-dotfiles.sh b/t/t7450-bad-git-dotfiles.sh
index 35a31acd4d7..0dbf13724f4 100755
--- a/t/t7450-bad-git-dotfiles.sh
+++ b/t/t7450-bad-git-dotfiles.sh
@@ -45,6 +45,32 @@ test_expect_success 'check names' '
test_cmp expect actual
'
+test_expect_failure 'check urls' '
+ cat >expect <<-\EOF &&
+ ./bar/baz/foo.git
+ https://example.com/foo.git
+ http://example.com:80/deeper/foo.git
+ EOF
+
+ test-tool submodule check-url >actual <<-\EOF &&
+ ./bar/baz/foo.git
+ https://example.com/foo.git
+ http://example.com:80/deeper/foo.git
+ -a./foo
+ ../../..//test/foo.git
+ ../../../../../:localhost:8080/foo.git
+ ..\../.\../:example.com/foo.git
+ ./%0ahost=example.com/foo.git
+ https://one.example.com/evil?%0ahost=two.example.com
+ https:///example.com/foo.git
+ http://example.com:test/foo.git
+ https::example.com/foo.git
+ http:::example.com/foo.git
+ EOF
+
+ test_cmp expect actual
+'
+
test_expect_success 'create innocent subrepo' '
git init innocent &&
git -C innocent commit --allow-empty -m foo
--
gitgitgadget
^ permalink raw reply related [flat|nested] 31+ messages in thread
* Re: [PATCH 2/3] t7450: test submodule urls
2024-01-09 17:53 ` [PATCH 2/3] t7450: test submodule urls Victoria Dye via GitGitGadget
@ 2024-01-09 21:38 ` Junio C Hamano
2024-01-11 17:23 ` Victoria Dye
2024-01-10 10:38 ` Jeff King
1 sibling, 1 reply; 31+ messages in thread
From: Junio C Hamano @ 2024-01-09 21:38 UTC (permalink / raw)
To: Victoria Dye via GitGitGadget; +Cc: git, Victoria Dye
"Victoria Dye via GitGitGadget" <gitgitgadget@gmail.com> writes:
> +#define TEST_TOOL_CHECK_URL_USAGE \
> + "test-tool submodule check-url <url>"
> +static const char *submodule_check_url_usage[] = {
> + TEST_TOOL_CHECK_URL_USAGE,
> + NULL
> +};
Granted, the entry that follows this new one already uses the same
pattern, but TEST_TOOL_CHECK_URL_USAGE being used only once here and
nowhere else, with its name almost as long as the value it expands to,
I found it unnecessarily verbose and confusing.
> #define TEST_TOOL_IS_ACTIVE_USAGE \
> "test-tool submodule is-active <name>"
> static const char *submodule_is_active_usage[] = {
> +typedef int (*check_fn_t)(const char *);
> +
> /*
> * Exit non-zero if any of the submodule names given on the command line is
> * invalid. If no names are given, filter stdin to print only valid names
> * (which is primarily intended for testing).
> */
OK. As long as each of the input lines are unique, we can use the
usual "does the actual output match the expected?" to test many of
them at once, and notice if there is an extra one in the output that
shouldn't have been emitted, or there is a missing one that should
have.
> -static int check_name(int argc, const char **argv)
> +static int check_submodule(int argc, const char **argv, check_fn_t check_fn)
> {
> if (argc > 1) {
> while (*++argv) {
> - if (check_submodule_name(*argv) < 0)
> + if (check_fn(*argv) < 0)
Quite nice way to reuse what we already have, thanks to [1/3].
^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [PATCH 2/3] t7450: test submodule urls
2024-01-09 21:38 ` Junio C Hamano
@ 2024-01-11 17:23 ` Victoria Dye
0 siblings, 0 replies; 31+ messages in thread
From: Victoria Dye @ 2024-01-11 17:23 UTC (permalink / raw)
To: Junio C Hamano, Victoria Dye via GitGitGadget; +Cc: git
Junio C Hamano wrote:
> "Victoria Dye via GitGitGadget" <gitgitgadget@gmail.com> writes:
>
>> +#define TEST_TOOL_CHECK_URL_USAGE \
>> + "test-tool submodule check-url <url>"
>> +static const char *submodule_check_url_usage[] = {
>> + TEST_TOOL_CHECK_URL_USAGE,
>> + NULL
>> +};
>
> Granted, the entry that follows this new one already uses the same
> pattern, but TEST_TOOL_CHECK_URL_USAGE being used only once here and
> nowhere else, with its name almost as long as the value it expands to,
> I found it unnecessarily verbose and confusing.
This is only used once because I missed the second place it should be used
(in 'submodule_usage[]'). It's still somewhat verbose, but once I fix that
it'll at least have the benefit of avoiding some duplication.
^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [PATCH 2/3] t7450: test submodule urls
2024-01-09 17:53 ` [PATCH 2/3] t7450: test submodule urls Victoria Dye via GitGitGadget
2024-01-09 21:38 ` Junio C Hamano
@ 2024-01-10 10:38 ` Jeff King
2024-01-11 16:54 ` Victoria Dye
1 sibling, 1 reply; 31+ messages in thread
From: Jeff King @ 2024-01-10 10:38 UTC (permalink / raw)
To: Victoria Dye via GitGitGadget; +Cc: git, Victoria Dye
On Tue, Jan 09, 2024 at 05:53:36PM +0000, Victoria Dye via GitGitGadget wrote:
> +#define TEST_TOOL_CHECK_URL_USAGE \
> + "test-tool submodule check-url <url>"
I don't think this command-line "<url>" mode works at all. Your
underlying function can handle either stdin or arguments:
> -static int check_name(int argc, const char **argv)
> +static int check_submodule(int argc, const char **argv, check_fn_t check_fn)
> {
> if (argc > 1) {
> while (*++argv) {
> - if (check_submodule_name(*argv) < 0)
> + if (check_fn(*argv) < 0)
> return 1;
> }
> } else {
> struct strbuf buf = STRBUF_INIT;
> while (strbuf_getline(&buf, stdin) != EOF) {
> - if (!check_submodule_name(buf.buf))
> + if (!check_fn(buf.buf))
> printf("%s\n", buf.buf);
> }
> strbuf_release(&buf);
...but the new caller rejects them before we get there:
> +static int cmd__submodule_check_url(int argc, const char **argv)
> +{
> + struct option options[] = {
> + OPT_END()
> + };
> + argc = parse_options(argc, argv, "test-tools", options,
> + submodule_check_url_usage, 0);
> + if (argc)
> + usage_with_options(submodule_check_url_usage, options);
> +
> + return check_submodule(argc, argv, check_submodule_url);
> }
So you'd want at least:
diff --git a/t/helper/test-submodule.c b/t/helper/test-submodule.c
index da89d265f0..6b964c88ab 100644
--- a/t/helper/test-submodule.c
+++ b/t/helper/test-submodule.c
@@ -88,8 +88,6 @@ static int cmd__submodule_check_url(int argc, const char **argv)
};
argc = parse_options(argc, argv, "test-tools", options,
submodule_check_url_usage, 0);
- if (argc)
- usage_with_options(submodule_check_url_usage, options);
return check_submodule(argc, argv, check_submodule_url);
}
but then that reveals another mismatch. In check_submodule() above we
expect argv[0] to be uninteresting (i.e., the name of the program), but
parse_options() will already have thrown it away. So we silently fail to
check the first option (which is especially bad since the only output is
the exit code, and thus the skipped one looks the same as one that
validated correctly).
All of this is inherited from the existing check_name() code, which I
think has all of the same bugs. The test scripts all just use the stdin
mode, so they don't notice. It's not too hard to fix, but maybe it's
worth just ripping out the unreachable code.
-Peff
^ permalink raw reply related [flat|nested] 31+ messages in thread
* Re: [PATCH 2/3] t7450: test submodule urls
2024-01-10 10:38 ` Jeff King
@ 2024-01-11 16:54 ` Victoria Dye
2024-01-12 6:57 ` Jeff King
0 siblings, 1 reply; 31+ messages in thread
From: Victoria Dye @ 2024-01-11 16:54 UTC (permalink / raw)
To: Jeff King, Victoria Dye via GitGitGadget; +Cc: git
Jeff King wrote:
> On Tue, Jan 09, 2024 at 05:53:36PM +0000, Victoria Dye via GitGitGadget wrote:
>
>> +#define TEST_TOOL_CHECK_URL_USAGE \
>> + "test-tool submodule check-url <url>"
>
> I don't think this command-line "<url>" mode works at all. Your
> underlying function can handle either stdin or arguments:
...
> All of this is inherited from the existing check_name() code, which I
> think has all of the same bugs. The test scripts all just use the stdin
> mode, so they don't notice. It's not too hard to fix, but maybe it's
> worth just ripping out the unreachable code.
Thanks for pointing out those issues, I think removing the command line
input mode is the way to go. The description of the 'check_name()' mentions
that the stdin mode was "primarily intended for testing". But as 85321a346b5
(submodule--helper: move "check-name" to a test-tool, 2022-09-01) pointed
out, 'check_name()' was never used outside of tests anyway, so whatever use
case was imagined for the command line mode never seemed to have existed.
Combine that with the fact that the command line mode is so different from
the stdin mode (non-zero exit code for invalid names, prints nothing vs.
zero exit code, prints valid names), there don't seem to be any real
downsides to removing the unused code.
>
> -Peff
^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [PATCH 2/3] t7450: test submodule urls
2024-01-11 16:54 ` Victoria Dye
@ 2024-01-12 6:57 ` Jeff King
0 siblings, 0 replies; 31+ messages in thread
From: Jeff King @ 2024-01-12 6:57 UTC (permalink / raw)
To: Victoria Dye; +Cc: Victoria Dye via GitGitGadget, git
On Thu, Jan 11, 2024 at 08:54:47AM -0800, Victoria Dye wrote:
> > All of this is inherited from the existing check_name() code, which I
> > think has all of the same bugs. The test scripts all just use the stdin
> > mode, so they don't notice. It's not too hard to fix, but maybe it's
> > worth just ripping out the unreachable code.
>
> Thanks for pointing out those issues, I think removing the command line
> input mode is the way to go. The description of the 'check_name()' mentions
> that the stdin mode was "primarily intended for testing". But as 85321a346b5
> (submodule--helper: move "check-name" to a test-tool, 2022-09-01) pointed
> out, 'check_name()' was never used outside of tests anyway, so whatever use
> case was imagined for the command line mode never seemed to have existed.
>
> Combine that with the fact that the command line mode is so different from
> the stdin mode (non-zero exit code for invalid names, prints nothing vs.
> zero exit code, prints valid names), there don't seem to be any real
> downsides to removing the unused code.
That sounds like a good plan to me. :)
-Peff
^ permalink raw reply [flat|nested] 31+ messages in thread
* [PATCH 3/3] submodule-config.c: strengthen URL fsck check
2024-01-09 17:53 [PATCH 0/3] Strengthen fsck checks for submodule URLs Victoria Dye via GitGitGadget
2024-01-09 17:53 ` [PATCH 1/3] submodule-config.h: move check_submodule_url Victoria Dye via GitGitGadget
2024-01-09 17:53 ` [PATCH 2/3] t7450: test submodule urls Victoria Dye via GitGitGadget
@ 2024-01-09 17:53 ` Victoria Dye via GitGitGadget
2024-01-09 21:57 ` Junio C Hamano
2024-01-10 6:33 ` Patrick Steinhardt
2024-01-10 10:23 ` [PATCH 0/3] Strengthen fsck checks for submodule URLs Jeff King
2024-01-18 1:55 ` [PATCH v2 0/4] " Victoria Dye via GitGitGadget
4 siblings, 2 replies; 31+ messages in thread
From: Victoria Dye via GitGitGadget @ 2024-01-09 17:53 UTC (permalink / raw)
To: git; +Cc: Victoria Dye, Victoria Dye
From: Victoria Dye <vdye@github.com>
Update the validation of "curl URL" submodule URLs (i.e. those that specify
an "http[s]" or "ftp[s]" protocol) in 'check_submodule_url()' to catch more
invalid URLs. The existing validation using 'credential_from_url_gently()'
parses certain URLs incorrectly, leading to invalid submodule URLs passing
'git fsck' checks. Conversely, 'url_normalize()' - used to validate remote
URLs in 'remote_get()' - correctly identifies the invalid URLs missed by
'credential_from_url_gently()'.
To catch more invalid cases, replace 'credential_from_url_gently()' with
'url_normalize()' followed by a 'url_decode()' and a check for newlines
(mirroring 'check_url_component()' in the 'credential_from_url_gently()'
validation).
Signed-off-by: Victoria Dye <vdye@github.com>
---
submodule-config.c | 16 +++++++++++-----
t/t7450-bad-git-dotfiles.sh | 2 +-
2 files changed, 12 insertions(+), 6 deletions(-)
diff --git a/submodule-config.c b/submodule-config.c
index 3b295e9f89c..54130f6a385 100644
--- a/submodule-config.c
+++ b/submodule-config.c
@@ -15,7 +15,7 @@
#include "thread-utils.h"
#include "tree-walk.h"
#include "url.h"
-#include "credential.h"
+#include "urlmatch.h"
/*
* submodule cache lookup structure
@@ -350,12 +350,18 @@ int check_submodule_url(const char *url)
}
else if (url_to_curl_url(url, &curl_url)) {
- struct credential c = CREDENTIAL_INIT;
int ret = 0;
- if (credential_from_url_gently(&c, curl_url, 1) ||
- !*c.host)
+ char *normalized = url_normalize(curl_url, NULL);
+ if (normalized) {
+ char *decoded = url_decode(normalized);
+ if (strchr(decoded, '\n'))
+ ret = -1;
+ free(normalized);
+ free(decoded);
+ } else {
ret = -1;
- credential_clear(&c);
+ }
+
return ret;
}
diff --git a/t/t7450-bad-git-dotfiles.sh b/t/t7450-bad-git-dotfiles.sh
index 0dbf13724f4..46d4fb0354b 100755
--- a/t/t7450-bad-git-dotfiles.sh
+++ b/t/t7450-bad-git-dotfiles.sh
@@ -45,7 +45,7 @@ test_expect_success 'check names' '
test_cmp expect actual
'
-test_expect_failure 'check urls' '
+test_expect_success 'check urls' '
cat >expect <<-\EOF &&
./bar/baz/foo.git
https://example.com/foo.git
--
gitgitgadget
^ permalink raw reply related [flat|nested] 31+ messages in thread
* Re: [PATCH 3/3] submodule-config.c: strengthen URL fsck check
2024-01-09 17:53 ` [PATCH 3/3] submodule-config.c: strengthen URL fsck check Victoria Dye via GitGitGadget
@ 2024-01-09 21:57 ` Junio C Hamano
2024-01-10 6:33 ` Patrick Steinhardt
1 sibling, 0 replies; 31+ messages in thread
From: Junio C Hamano @ 2024-01-09 21:57 UTC (permalink / raw)
To: Victoria Dye via GitGitGadget; +Cc: git, Victoria Dye
"Victoria Dye via GitGitGadget" <gitgitgadget@gmail.com> writes:
> From: Victoria Dye <vdye@github.com>
>
> Update the validation of "curl URL" submodule URLs (i.e. those that specify
> an "http[s]" or "ftp[s]" protocol) in 'check_submodule_url()' to catch more
> invalid URLs. The existing validation using 'credential_from_url_gently()'
> parses certain URLs incorrectly, leading to invalid submodule URLs passing
> 'git fsck' checks. Conversely, 'url_normalize()' - used to validate remote
> URLs in 'remote_get()' - correctly identifies the invalid URLs missed by
> 'credential_from_url_gently()'.
>
> To catch more invalid cases, replace 'credential_from_url_gently()' with
> 'url_normalize()' followed by a 'url_decode()' and a check for newlines
> (mirroring 'check_url_component()' in the 'credential_from_url_gently()'
> validation).
Thanks. Left hand and right hand checking the same thing in
different ways and coming up with different result is never a happy
situation. Making sure we consistently use the same definition of
what the valid URLs are is a very welcome thing to do, of course.
> -test_expect_failure 'check urls' '
> +test_expect_success 'check urls' '
> cat >expect <<-\EOF &&
> ./bar/baz/foo.git
> https://example.com/foo.git
It is a bit unfortunate that from here we cannot tell which bogus
URLs in this test that were incorrectly accepted are now rejected.
Among the many bogus URLs in the input, we used to allow
http://example.com:test/foo.git
(we do not accept non-numeric representation of port numbers, so
http://example.com:http/foo.git would also be rejected), but with
this change, it is now rejected. All the other bogus ones are
rejected just as before this change.
Will queue. Thanks.
^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [PATCH 3/3] submodule-config.c: strengthen URL fsck check
2024-01-09 17:53 ` [PATCH 3/3] submodule-config.c: strengthen URL fsck check Victoria Dye via GitGitGadget
2024-01-09 21:57 ` Junio C Hamano
@ 2024-01-10 6:33 ` Patrick Steinhardt
2024-01-17 21:19 ` Victoria Dye
1 sibling, 1 reply; 31+ messages in thread
From: Patrick Steinhardt @ 2024-01-10 6:33 UTC (permalink / raw)
To: Victoria Dye via GitGitGadget; +Cc: git, Victoria Dye
[-- Attachment #1: Type: text/plain, Size: 1426 bytes --]
On Tue, Jan 09, 2024 at 05:53:37PM +0000, Victoria Dye via GitGitGadget wrote:
> From: Victoria Dye <vdye@github.com>
>
> Update the validation of "curl URL" submodule URLs (i.e. those that specify
> an "http[s]" or "ftp[s]" protocol) in 'check_submodule_url()' to catch more
> invalid URLs. The existing validation using 'credential_from_url_gently()'
> parses certain URLs incorrectly, leading to invalid submodule URLs passing
> 'git fsck' checks. Conversely, 'url_normalize()' - used to validate remote
> URLs in 'remote_get()' - correctly identifies the invalid URLs missed by
> 'credential_from_url_gently()'.
Okay, so we retain the wrong behavior of `credential_from_url_gently()`,
right? I wonder whether this can be abused in any way, doubly so because
the function gets invoked with untrusted input from the remote server
when we handle redirects in `http_request_reauth()`. But the redirect
URL we end up passing to `credential_from_url_gently()` would have to
contain a non-numeric port, and curl seemingly does not know to handle
those either.
Other callsites include fsck (which you're fixing) and the credential
store (which is entirely user-controlled). It would be great regardless
to fix the underlying bug in `credential_from_url_gently()` eventually
though. But I do not think that this has to be part this patch series
here, which is a strict improvement.
Thanks!
Patrick
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]
^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [PATCH 3/3] submodule-config.c: strengthen URL fsck check
2024-01-10 6:33 ` Patrick Steinhardt
@ 2024-01-17 21:19 ` Victoria Dye
0 siblings, 0 replies; 31+ messages in thread
From: Victoria Dye @ 2024-01-17 21:19 UTC (permalink / raw)
To: Patrick Steinhardt, Victoria Dye via GitGitGadget; +Cc: git
Patrick Steinhardt wrote:
> On Tue, Jan 09, 2024 at 05:53:37PM +0000, Victoria Dye via GitGitGadget wrote:
>> From: Victoria Dye <vdye@github.com>
>>
>> Update the validation of "curl URL" submodule URLs (i.e. those that specify
>> an "http[s]" or "ftp[s]" protocol) in 'check_submodule_url()' to catch more
>> invalid URLs. The existing validation using 'credential_from_url_gently()'
>> parses certain URLs incorrectly, leading to invalid submodule URLs passing
>> 'git fsck' checks. Conversely, 'url_normalize()' - used to validate remote
>> URLs in 'remote_get()' - correctly identifies the invalid URLs missed by
>> 'credential_from_url_gently()'.
>
> Okay, so we retain the wrong behavior of `credential_from_url_gently()`,
> right? I wonder whether this can be abused in any way, doubly so because
> the function gets invoked with untrusted input from the remote server
> when we handle redirects in `http_request_reauth()`. But the redirect
> URL we end up passing to `credential_from_url_gently()` would have to
> contain a non-numeric port, and curl seemingly does not know to handle
> those either.
Correct, nothing about 'credential_from_url_gently()' changes here. As for
whether it could be abused - I don't *think* so, but I'm definitely not a
security expert. If it helps, here's a more detailed breakdown of the issue:
In 'credential_from_url_1()', suppose we have URL
"http://example.com:test/repo.git". Stepping through the variables:
- 'cp' is "example.com:test/repo.git"
- 'at' is NULL
- 'colon' is ":test/repo.git"
- 'slash' is "/repo.git"
Because 'at' is NULL, we set 'host = cp'. Later, because 'slash - host > 0',
we call 'url_decode_mem()' on "example.com:test" (which, in this case,
doesn't change anything) and the result 'host' to "example.com:test".
The issue for the fsck check is that 'credential_from_url_gently()' doesn't
validate the hostname it extracts (e.g. whether ':' precedes a valid port,
or if the hostname contains a '%'-escaped sequence). I don't *think* that
could be abused since, like you said, cURL should just reject the invalid
URL altogether.
>
> Other callsites include fsck (which you're fixing) and the credential
> store (which is entirely user-controlled). It would be great regardless
> to fix the underlying bug in `credential_from_url_gently()` eventually
> though. But I do not think that this has to be part this patch series
> here, which is a strict improvement.
Agreed! I think normalizing the URL before trying to extract the credentials
may be all that's needed to avoid surprise URL errors, but that probably
warrants a separate patch submission (with appropriately thorough testing).
>
> Thanks!
>
> Patrick
^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [PATCH 0/3] Strengthen fsck checks for submodule URLs
2024-01-09 17:53 [PATCH 0/3] Strengthen fsck checks for submodule URLs Victoria Dye via GitGitGadget
` (2 preceding siblings ...)
2024-01-09 17:53 ` [PATCH 3/3] submodule-config.c: strengthen URL fsck check Victoria Dye via GitGitGadget
@ 2024-01-10 10:23 ` Jeff King
2024-11-13 19:24 ` Neil Mayhew
2024-01-18 1:55 ` [PATCH v2 0/4] " Victoria Dye via GitGitGadget
4 siblings, 1 reply; 31+ messages in thread
From: Jeff King @ 2024-01-10 10:23 UTC (permalink / raw)
To: Victoria Dye via GitGitGadget; +Cc: git, Victoria Dye
On Tue, Jan 09, 2024 at 05:53:34PM +0000, Victoria Dye via GitGitGadget wrote:
> While testing 'git fsck' checks on .gitmodules URLs, I noticed that some
> invalid URLs were passing the checks. Digging into it a bit more, the issue
> turned out to be that 'credential_from_url_gently()' parses certain URLs
> (like "http://example.com:something/deeper/path") incorrectly, in a way that
> appeared to return a valid result.
I don't think that checks was ever intended to be an overall URL-quality
check. The reason we used the credential code in the fsck check is that
we were checking for URLs which triggered a specific credential-related
vulnerability.
I don't mind tightening things further as long as:
1. We are not allowing any cases that the credential code would have
forbidden (i.e., something that might let the vulnerability slip
through, since ultimately it is the credential code which will need
to be protected). You ported over the newline check, which is the
main thing. It's possible that there is some difference between the
two parsers that may allow an invalid input to create a newline for
one but not the other, but having now looked over the code, I don't
think so.
And I think one could argue that the security-importance of the
fsck check has mostly run its course. The real fix was in the
credential code itself, and the matching fsck change was mostly
about protecting downstream clients until they were upgraded. Now
that it's been several years, there's not as much value there.
2. It is not making it harder for users to work with repositories that
may contain malformed URLs that _aren't_ vulnerabilities. It sounds
like the specific cases you found already don't work at all with
Git, so presumably nobody is using them. By making it an fsck
check, though, any mistakes that are embedded in history (even if
they are now corrected) will make it a pain to use the repository
with sites that enable transfer.fsckObjects.
My gut feeling is that this is probably OK in practice. If it does
cause pain, we might consider loosening the fsck.gitmodulesUrl
severity (under the notion from above that it is no longer a
critical security check). But if it doesn't cause real-world pain,
being pickier is probably better (it may save us from a
vulnerability down the road).
-Peff
^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [PATCH 0/3] Strengthen fsck checks for submodule URLs
2024-01-10 10:23 ` [PATCH 0/3] Strengthen fsck checks for submodule URLs Jeff King
@ 2024-11-13 19:24 ` Neil Mayhew
2024-11-13 19:44 ` Neil Mayhew
2024-11-13 22:40 ` Junio C Hamano
0 siblings, 2 replies; 31+ messages in thread
From: Neil Mayhew @ 2024-11-13 19:24 UTC (permalink / raw)
To: Jeff King, Victoria Dye via GitGitGadget; +Cc: git, Victoria Dye
[-- Attachment #1.1: Type: text/plain, Size: 1509 bytes --]
On 10 Jan 24 03:23, Jeff King wrote:
> By making it an fsck
> check, though, any mistakes that are embedded in history (even if
> they are now corrected) will make it a pain to use the repository
> with sites that enable transfer.fsckObjects.
>
> My gut feeling is that this is probably OK in practice. If it does
> cause pain, we might consider loosening the fsck.gitmodulesUrl
> severity (under the notion from above that it is no longer a
> critical security check). But if it doesn't cause real-world pain,
> being pickier is probably better (it may save us from a
> vulnerability down the road).
This pain is happening in
https://github.com/IntersectMBO/cardano-ledger.git, a large open-source
repo. There was a bad edit to .gitmodules which was immediately
corrected by another commit. However, the bad commit is still in the
history. It happened 6 years ago, so there's no possibility of us
changing the history. We just spent time investigating a bug report from
someone who was unable to clone the repo, and eventually we discovered
that they had transfer.fsckObjects enabled. Even without this option,
however, we still want people to be able to run fsck successfully on the
repo.
It's awkward that our repo now won't pass an fsck check and we have no
way to correct that. I'd really like not to have to put a note in the
README warning about this.
Is there any possibility of "loosening the fsck.gitmodulesUrl severity",
as Jeff suggested?
[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 840 bytes --]
^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [PATCH 0/3] Strengthen fsck checks for submodule URLs
2024-11-13 19:24 ` Neil Mayhew
@ 2024-11-13 19:44 ` Neil Mayhew
2024-11-13 22:40 ` Junio C Hamano
1 sibling, 0 replies; 31+ messages in thread
From: Neil Mayhew @ 2024-11-13 19:44 UTC (permalink / raw)
To: Jeff King, Victoria Dye via GitGitGadget; +Cc: git, Victoria Dye
On 13 Nov 24 12:24, Neil Mayhew wrote:
> This pain is happening in
> https://github.com/IntersectMBO/cardano-ledger.git, a large
> open-source repo.
In case it's helpful, here's the script I wrote to reproduce the problem
minimally in a fresh repo:
https://gist.github.com/neilmayhew/f01dd8f807ebc0bdd37c4db154eabf64
^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [PATCH 0/3] Strengthen fsck checks for submodule URLs
2024-11-13 19:24 ` Neil Mayhew
2024-11-13 19:44 ` Neil Mayhew
@ 2024-11-13 22:40 ` Junio C Hamano
2024-11-14 0:10 ` Jeff King
2024-11-14 0:27 ` Neil Mayhew
1 sibling, 2 replies; 31+ messages in thread
From: Junio C Hamano @ 2024-11-13 22:40 UTC (permalink / raw)
To: Neil Mayhew; +Cc: Jeff King, Victoria Dye via GitGitGadget, git, Victoria Dye
Neil Mayhew <neil@mayhew.name> writes:
> ... immediately corrected by another commit. However, the bad commit is
> still in the history. It happened 6 years ago, so there's no
> possibility of us changing the history.
I think fsck.skipList was meant to cover such a case. The idea is
that the blob object name of the bad .gitmodules file can be placed
on the list, and the rest of the "bad commit" and the whole history
can still be checked for consistency, without triggering the warning
(or error) resulting from the offending .gitmodules file.
> Is there any possibility of "loosening the fsck.gitmodulesUrl
> severity", as Jeff suggested?
Isn't the suggestion not about butchering the rest of the world but
by locally configuring fsck.gitmodulesUrl down from error to
warning? I personally think excluding a single known-offending blob
without doing such loosening is a much better idea in that it
prevents *new* offending instances from getting into the repository,
while allowing an existing benign and honest mistake to stay in your
history. Loosening the severity of a class of check means you will
accept *new* offending instances, which may very well be malicious,
unlike the existing benign one you know about.
^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [PATCH 0/3] Strengthen fsck checks for submodule URLs
2024-11-13 22:40 ` Junio C Hamano
@ 2024-11-14 0:10 ` Jeff King
2024-11-14 0:51 ` Neil Mayhew
2024-11-14 2:20 ` Junio C Hamano
2024-11-14 0:27 ` Neil Mayhew
1 sibling, 2 replies; 31+ messages in thread
From: Jeff King @ 2024-11-14 0:10 UTC (permalink / raw)
To: Junio C Hamano
Cc: Neil Mayhew, Victoria Dye via GitGitGadget, git, Victoria Dye
On Thu, Nov 14, 2024 at 07:40:02AM +0900, Junio C Hamano wrote:
> > Is there any possibility of "loosening the fsck.gitmodulesUrl
> > severity", as Jeff suggested?
>
> Isn't the suggestion not about butchering the rest of the world but
> by locally configuring fsck.gitmodulesUrl down from error to
> warning? I personally think excluding a single known-offending blob
> without doing such loosening is a much better idea in that it
> prevents *new* offending instances from getting into the repository,
> while allowing an existing benign and honest mistake to stay in your
> history. Loosening the severity of a class of check means you will
> accept *new* offending instances, which may very well be malicious,
> unlike the existing benign one you know about.
The trouble with configuring fsck.gitmodulesUrl yourself (or using
skipList, which I agree is better if you can do it) is that it only
helps your local repo, and not:
1. Hosting sites which may need special work-arounds to let you push up
to them, since they are using receive.fsckObjects.
In theory this is a good thing, because it prevents dumb mistakes
from getting distributed in the first place. But it also is a pain
for projects with established history.
2. All of the people who are going to clone your repo, who might need
to follow special instructions.
The only reason this hasn't been a huge pain in practice is that
almost nobody turns on transfer.fsckObjects in the first place. In
theory the people who do turn it on know enough to examine the
objects themselves and decide if it's OK. I don't know how true that
is in practice, though (and certainly it would be nice to turn this
feature on by default, but I do worry about people getting caught up
in exactly these kind of historical messes).
We did add the gitmoduleUrl check to help with malicious URLs. But it
was always an extra layer of defense over the real fix, which was in the
credential code. It's _possible_ that a newly discovered vulnerability
will be protected by the existing fsck check, but I'm a little skeptical
about its security value at this point (especially because hardly
anybody runs it locally, and protection on the hosting sites isn't that
hard to work around).
So if it's causing people real pain in practice, I think there could be
an argument for downgrading the check to a warning. I don't have a
strong feeling that we _should_ do that, only that I don't personally
reject it immediately as an option.
-Peff
^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [PATCH 0/3] Strengthen fsck checks for submodule URLs
2024-11-14 0:10 ` Jeff King
@ 2024-11-14 0:51 ` Neil Mayhew
2024-11-14 2:20 ` Junio C Hamano
1 sibling, 0 replies; 31+ messages in thread
From: Neil Mayhew @ 2024-11-14 0:51 UTC (permalink / raw)
To: Jeff King, Junio C Hamano
Cc: Victoria Dye via GitGitGadget, git, Victoria Dye
On 13 Nov 24 17:10, Jeff King wrote:
My previous message crossed with Jeff's, and he already addressed most
of what I was saying.
> 2. All of the people who are going to clone your repo, who might need
> to follow special instructions.
>
> The only reason this hasn't been a huge pain in practice is that
> almost nobody turns on transfer.fsckObjects in the first place. In
> theory the people who do turn it on know enough to examine the
> objects themselves and decide if it's OK. I don't know how true that
> is in practice, though (and certainly it would be nice to turn this
> feature on by default, but I do worry about people getting caught up
> in exactly these kind of historical messes).
This is what happened in our situation. The person had
transfer.fsckObjects enabled but didn't realize that this was the cause
of the error. They assumed that the repo's *current* submodule
configuration was corrupt and was somehow causing the clone to fail even
though they tried explicitly turning off submodule recursion.
> We did add the gitmoduleUrl check to help with malicious URLs. But it
> was always an extra layer of defense over the real fix, which was in the
> credential code. It's _possible_ that a newly discovered vulnerability
> will be protected by the existing fsck check, but I'm a little skeptical
> about its security value at this point (especially because hardly
> anybody runs it locally, and protection on the hosting sites isn't that
> hard to work around).
I also think it's surprising to have fsck check the *content* of blobs
rather than just the relationships between them, and to give a blob
named .gitmodules special treatment. It goes against the philosophy of
"do one thing well". I feel that there should be a separate tool for
checking repos for security vulnerabilities, and it could be given
additional capabilities (such as checking the configuration as well as
the objects).
> So if it's causing people real pain in practice, I think there could be
> an argument for downgrading the check to a warning. I don't have a
> strong feeling that we _should_ do that, only that I don't personally
> reject it immediately as an option.
Perhaps there could be some additional warnings in the documentation for
transfer.fsckObjects to make people aware of the potential costs of
using it, particularly the existence of legacy issues in established
repos that would prevent cloning unless some of the fsck.<msg-id> values
are set to warn. The documentation currently just says "see
fsck.<msg-id>" and in my case, despite being fairly familiar with git,
that didn't give me enough to go on while investigating this.
It might also help to give some guidance on how to track down the object
name(s) that fsck lists, for example by using git log --raw --all
--find-object=<NAME>. This would help a user to make a more informed
decision on how to handle such a situation if it arises. In our case,
once we did this it was quickly obvious that the problem was a
historical error that had since been fixed rather than a current problem.
^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [PATCH 0/3] Strengthen fsck checks for submodule URLs
2024-11-14 0:10 ` Jeff King
2024-11-14 0:51 ` Neil Mayhew
@ 2024-11-14 2:20 ` Junio C Hamano
2024-11-14 19:11 ` Neil Mayhew
1 sibling, 1 reply; 31+ messages in thread
From: Junio C Hamano @ 2024-11-14 2:20 UTC (permalink / raw)
To: Jeff King; +Cc: Neil Mayhew, Victoria Dye via GitGitGadget, git, Victoria Dye
Jeff King <peff@peff.net> writes:
> ... I'm a little skeptical
> about its security value at this point (especially because hardly
> anybody runs it locally, and protection on the hosting sites isn't that
> hard to work around).
>
> So if it's causing people real pain in practice, I think there could be
> an argument for downgrading the check to a warning. I don't have a
> strong feeling that we _should_ do that, only that I don't personally
> reject it immediately as an option.
Oh, I see. I do not think I have strong objection, either.
Thanks.
^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [PATCH 0/3] Strengthen fsck checks for submodule URLs
2024-11-14 2:20 ` Junio C Hamano
@ 2024-11-14 19:11 ` Neil Mayhew
0 siblings, 0 replies; 31+ messages in thread
From: Neil Mayhew @ 2024-11-14 19:11 UTC (permalink / raw)
To: Junio C Hamano, Jeff King
Cc: Victoria Dye via GitGitGadget, git, Victoria Dye
On 13 Nov 24 19:20, Junio C Hamano wrote:
> Jeff King <peff@peff.net> writes:
>
>> So if it's causing people real pain in practice, I think there could be
>> an argument for downgrading the check to a warning. I don't have a
>> strong feeling that we _should_ do that, only that I don't personally
>> reject it immediately as an option.
>
> Oh, I see. I do not think I have strong objection, either.
So if I was to submit a patch making this downgrade, is there a good
chance it would be accepted?
^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [PATCH 0/3] Strengthen fsck checks for submodule URLs
2024-11-13 22:40 ` Junio C Hamano
2024-11-14 0:10 ` Jeff King
@ 2024-11-14 0:27 ` Neil Mayhew
1 sibling, 0 replies; 31+ messages in thread
From: Neil Mayhew @ 2024-11-14 0:27 UTC (permalink / raw)
To: Junio C Hamano
Cc: Jeff King, Victoria Dye via GitGitGadget, git, Victoria Dye
On 13 Nov 24 15:40, Junio C Hamano wrote:
> Neil Mayhew <neil@mayhew.name> writes:
>
>> ... immediately corrected by another commit. However, the bad commit is
>> still in the history. It happened 6 years ago, so there's no
>> possibility of us changing the history.
>
> I think fsck.skipList was meant to cover such a case. The idea is
> that the blob object name of the bad .gitmodules file can be placed
> on the list, and the rest of the "bad commit" and the whole history
> can still be checked for consistency, without triggering the warning
> (or error) resulting from the offending .gitmodules file.
Thank you. This explanation is very helpful. I hadn't noticed the
existence of this config option, and using it does indeed enable fsck to
pass on our repo.
>> Is there any possibility of "loosening the fsck.gitmodulesUrl
>> severity", as Jeff suggested?
>
> Isn't the suggestion not about butchering the rest of the world but
> by locally configuring fsck.gitmodulesUrl down from error to
> warning?
Again, thank you for the helpful explanation. I hadn't fully understood
what was being suggested.
> I personally think excluding a single known-offending blob
> without doing such loosening is a much better idea in that it
> prevents *new* offending instances from getting into the repository,
> while allowing an existing benign and honest mistake to stay in your
> history. Loosening the severity of a class of check means you will
> accept *new* offending instances, which may very well be malicious,
> unlike the existing benign one you know about.
I agree that skipping a single object is better than loosening.
However, there is a bit of a catch-22 situation here. The problem in our
repo was reported to us by someone who was unable to clone, because they
have transfer.fsckObjects set globally (and initially didn't realise
that was where the error message was coming from). Someone in that
situation could in addition set fetch.fsck.gitmodulesUrl=warn globally,
but as you say that's too all-encompassing and it would be better to
skip the specific object. Unfortunately, this is difficult to do when
cloning, because the configuration value doesn't come automatically with
the repo. A local skiplist configuration value can't exist until after
the repo has been cloned, and adding the object name to a global
skiplist doesn't seem appropriate. It's possible to add the option to
the clone command line (with -c) but that's starting to get quite messy
and in any case many people wouldn't see the instruction even if we put
it in our README.
I don't think there's a perfect solution here. It seems like the best we
can do is to put the object name in a .git-fsck-skiplist file at the top
level of the repo, and add a section at the end of the README telling
people to configure it for all three *fsck.skipList values if they want
to use git fsck explicitly, or implicitly via other configuration
values. We would also need to mention using -c when cloning initially.
This information will probably be intimidating for most people, and I
wish we didn't have to include it, but hopefully we can make it clear
that most people won't need it.
^ permalink raw reply [flat|nested] 31+ messages in thread
* [PATCH v2 0/4] Strengthen fsck checks for submodule URLs
2024-01-09 17:53 [PATCH 0/3] Strengthen fsck checks for submodule URLs Victoria Dye via GitGitGadget
` (3 preceding siblings ...)
2024-01-10 10:23 ` [PATCH 0/3] Strengthen fsck checks for submodule URLs Jeff King
@ 2024-01-18 1:55 ` Victoria Dye via GitGitGadget
2024-01-18 1:55 ` [PATCH v2 1/4] submodule-config.h: move check_submodule_url Victoria Dye via GitGitGadget
` (4 more replies)
4 siblings, 5 replies; 31+ messages in thread
From: Victoria Dye via GitGitGadget @ 2024-01-18 1:55 UTC (permalink / raw)
To: git; +Cc: Patrick Steinhardt, Jeff King, Victoria Dye
While testing 'git fsck' checks on .gitmodules URLs, I noticed that some
invalid URLs were passing the checks. Digging into it a bit more, the issue
turned out to be that 'credential_from_url_gently()' parses certain URLs
(like "http://example.com:something/deeper/path") incorrectly, in a way that
appeared to return a valid result.
Fortunately, these URLs are rejected in fetches/clones/pushes anyway because
'url_normalize()' (called in 'validate_remote_url()') correctly identifies
them as invalid. So, to bring 'git fsck' in line with other (stronger)
validation done on remote URLs, this series replaces the
'credential_from_url_gently()' check with one that uses 'url_normalize()'.
* Patch 1 moves 'check_submodule_url()' to a public location so that it can
be used outside of 'fsck.c'.
* Patch 2 removes the obsolete/never-used code in 'test-tool submodule
check-name' handling names provided on the command line.
* Patch 3 adds a 'check-url' mode to 'test-tool submodule', calling the
now-public 'check_submodule_url()' method on a given URL, and adds new
tests checking valid and invalid submodule URLs.
* Patch 4 replaces the 'credential_from_url_gently()' check with
'url_normalize()' followed by 'url_decode()' and an explicit check for
newlines (to preserve the newline handling added in 07259e74ec1 (fsck:
detect gitmodules URLs with embedded newlines, 2020-03-11)).
Changes since V1
================
* Added 'TEST_TOOL_CHECK_URL_USAGE' to 'submodule_usage'.
* Removed unused/unreachable code related to command line inputs in
'test-tool submodule check-name' and 'test-tool submodule check-url'.
* Split the new t7450 test case into two tests (the first contains URLs
that are validated successfully, the second demonstrates a URL
incorrectly marked valid) to clearly show which pattern is handled
improperly. The tests are merged in the final patch once the validation
is corrected.
Thanks!
* Victoria
Victoria Dye (4):
submodule-config.h: move check_submodule_url
test-submodule: remove command line handling for check-name
t7450: test submodule urls
submodule-config.c: strengthen URL fsck check
fsck.c | 133 ----------------------------------
submodule-config.c | 140 ++++++++++++++++++++++++++++++++++++
submodule-config.h | 3 +
t/helper/test-submodule.c | 52 +++++++++-----
t/t7450-bad-git-dotfiles.sh | 26 +++++++
5 files changed, 203 insertions(+), 151 deletions(-)
base-commit: a54a84b333adbecf7bc4483c0e36ed5878cac17b
Published-As: https://github.com/gitgitgadget/git/releases/tag/pr-1635%2Fvdye%2Fvdye%2Fstrengthen-fsck-url-checks-v2
Fetch-It-Via: git fetch https://github.com/gitgitgadget/git pr-1635/vdye/vdye/strengthen-fsck-url-checks-v2
Pull-Request: https://github.com/gitgitgadget/git/pull/1635
Range-diff vs v1:
1: 588de3022d7 = 1: ce1de0406ef submodule-config.h: move check_submodule_url
-: ----------- > 2: 14e8834c38b test-submodule: remove command line handling for check-name
2: cf7848edffc ! 3: b6843a58389 t7450: test submodule urls
@@ Metadata
## Commit message ##
t7450: test submodule urls
- Add a test to 't7450-bad-git-dotfiles.sh' to check the validity of different
- submodule URLs. To test this directly (without setting up test repositories
- & submodules), add a 'check-url' subcommand to 'test-tool submodule' that
- calls 'check_submodule_url' in the same way that 'check-name' calls
- 'check_submodule_name'.
+ Add tests to 't7450-bad-git-dotfiles.sh' to check the validity of different
+ submodule URLs. To verify this directly (without setting up test
+ repositories & submodules), add a 'check-url' subcommand to 'test-tool
+ submodule' that calls 'check_submodule_url' in the same way that
+ 'check-name' calls 'check_submodule_name'.
- Mark the test with 'test_expect_failure' because, as it stands,
- 'check_submodule_url' marks certain invalid URLs valid. Specifically, the
- invalid URL "http://example.com:test/foo.git" is incorrectly marked valid in
- the test.
+ Add two tests to separately address cases where the URL check correctly
+ filters out invalid URLs and cases where the check misses invalid URLs. Mark
+ the latter ("url check misses invalid cases") with 'test_expect_failure' to
+ indicate that this not the undesired behavior.
Signed-off-by: Victoria Dye <vdye@github.com>
@@ t/helper/test-submodule.c: static const char *submodule_check_name_usage[] = {
};
+#define TEST_TOOL_CHECK_URL_USAGE \
-+ "test-tool submodule check-url <url>"
++ "test-tool submodule check-url"
+static const char *submodule_check_url_usage[] = {
+ TEST_TOOL_CHECK_URL_USAGE,
+ NULL
@@ t/helper/test-submodule.c: static const char *submodule_check_name_usage[] = {
#define TEST_TOOL_IS_ACTIVE_USAGE \
"test-tool submodule is-active <name>"
static const char *submodule_is_active_usage[] = {
-@@ t/helper/test-submodule.c: static const char *submodule_usage[] = {
+@@ t/helper/test-submodule.c: static const char *submodule_resolve_relative_url_usage[] = {
+
+ static const char *submodule_usage[] = {
+ TEST_TOOL_CHECK_NAME_USAGE,
++ TEST_TOOL_CHECK_URL_USAGE,
+ TEST_TOOL_IS_ACTIVE_USAGE,
+ TEST_TOOL_RESOLVE_RELATIVE_URL_USAGE,
NULL
};
+-/* Filter stdin to print only valid names. */
+-static int check_name(void)
+typedef int (*check_fn_t)(const char *);
+
- /*
- * Exit non-zero if any of the submodule names given on the command line is
- * invalid. If no names are given, filter stdin to print only valid names
- * (which is primarily intended for testing).
- */
--static int check_name(int argc, const char **argv)
-+static int check_submodule(int argc, const char **argv, check_fn_t check_fn)
++/*
++ * Apply 'check_fn' to each line of stdin, printing values that pass the check
++ * to stdout.
++ */
++static int check_submodule(check_fn_t check_fn)
{
- if (argc > 1) {
- while (*++argv) {
-- if (check_submodule_name(*argv) < 0)
-+ if (check_fn(*argv) < 0)
- return 1;
- }
- } else {
- struct strbuf buf = STRBUF_INIT;
- while (strbuf_getline(&buf, stdin) != EOF) {
-- if (!check_submodule_name(buf.buf))
-+ if (!check_fn(buf.buf))
- printf("%s\n", buf.buf);
- }
- strbuf_release(&buf);
+ struct strbuf buf = STRBUF_INIT;
+ while (strbuf_getline(&buf, stdin) != EOF) {
+- if (!check_submodule_name(buf.buf))
++ if (!check_fn(buf.buf))
+ printf("%s\n", buf.buf);
+ }
+ strbuf_release(&buf);
@@ t/helper/test-submodule.c: static int cmd__submodule_check_name(int argc, const char **argv)
if (argc)
usage_with_options(submodule_check_name_usage, options);
-- return check_name(argc, argv);
-+ return check_submodule(argc, argv, check_submodule_name);
+- return check_name();
++ return check_submodule(check_submodule_name);
+}
+
+static int cmd__submodule_check_url(int argc, const char **argv)
@@ t/helper/test-submodule.c: static int cmd__submodule_check_name(int argc, const
+ if (argc)
+ usage_with_options(submodule_check_url_usage, options);
+
-+ return check_submodule(argc, argv, check_submodule_url);
++ return check_submodule(check_submodule_url);
}
static int cmd__submodule_is_active(int argc, const char **argv)
@@ t/t7450-bad-git-dotfiles.sh: test_expect_success 'check names' '
test_cmp expect actual
'
-+test_expect_failure 'check urls' '
++test_expect_success 'check urls' '
+ cat >expect <<-\EOF &&
+ ./bar/baz/foo.git
+ https://example.com/foo.git
@@ t/t7450-bad-git-dotfiles.sh: test_expect_success 'check names' '
+ ./%0ahost=example.com/foo.git
+ https://one.example.com/evil?%0ahost=two.example.com
+ https:///example.com/foo.git
-+ http://example.com:test/foo.git
+ https::example.com/foo.git
+ http:::example.com/foo.git
+ EOF
+
+ test_cmp expect actual
+'
++
++# NEEDSWORK: the URL checked here is not valid (and will not work as a remote if
++# a user attempts to clone it), but the fsck check passes.
++test_expect_failure 'url check misses invalid cases' '
++ test-tool submodule check-url >actual <<-\EOF &&
++ http://example.com:test/foo.git
++ EOF
++
++ test_must_be_empty actual
++'
+
test_expect_success 'create innocent subrepo' '
git init innocent &&
3: 893071530d3 ! 4: b79b1a71780 submodule-config.c: strengthen URL fsck check
@@ submodule-config.c: int check_submodule_url(const char *url)
## t/t7450-bad-git-dotfiles.sh ##
-@@ t/t7450-bad-git-dotfiles.sh: test_expect_success 'check names' '
+@@ t/t7450-bad-git-dotfiles.sh: test_expect_success 'check urls' '
+ ./%0ahost=example.com/foo.git
+ https://one.example.com/evil?%0ahost=two.example.com
+ https:///example.com/foo.git
++ http://example.com:test/foo.git
+ https::example.com/foo.git
+ http:::example.com/foo.git
+ EOF
+@@ t/t7450-bad-git-dotfiles.sh: test_expect_success 'check urls' '
test_cmp expect actual
'
--test_expect_failure 'check urls' '
-+test_expect_success 'check urls' '
- cat >expect <<-\EOF &&
- ./bar/baz/foo.git
- https://example.com/foo.git
+-# NEEDSWORK: the URL checked here is not valid (and will not work as a remote if
+-# a user attempts to clone it), but the fsck check passes.
+-test_expect_failure 'url check misses invalid cases' '
+- test-tool submodule check-url >actual <<-\EOF &&
+- http://example.com:test/foo.git
+- EOF
+-
+- test_must_be_empty actual
+-'
+-
+ test_expect_success 'create innocent subrepo' '
+ git init innocent &&
+ git -C innocent commit --allow-empty -m foo
--
gitgitgadget
^ permalink raw reply [flat|nested] 31+ messages in thread
* [PATCH v2 1/4] submodule-config.h: move check_submodule_url
2024-01-18 1:55 ` [PATCH v2 0/4] " Victoria Dye via GitGitGadget
@ 2024-01-18 1:55 ` Victoria Dye via GitGitGadget
2024-01-18 1:55 ` [PATCH v2 2/4] test-submodule: remove command line handling for check-name Victoria Dye via GitGitGadget
` (3 subsequent siblings)
4 siblings, 0 replies; 31+ messages in thread
From: Victoria Dye via GitGitGadget @ 2024-01-18 1:55 UTC (permalink / raw)
To: git; +Cc: Patrick Steinhardt, Jeff King, Victoria Dye, Victoria Dye
From: Victoria Dye <vdye@github.com>
Move 'check_submodule_url' out of 'fsck.c' and into 'submodule-config.h' as
a public method, similar to 'check_submodule_name'. With the function now
accessible outside of 'fsck', it can be used in a later commit to extend
'test-tool submodule' to check the validity of submodule URLs as it does
with names in the 'check-name' subcommand.
Other than its location, no changes are made to 'check_submodule_url' in
this patch.
Signed-off-by: Victoria Dye <vdye@github.com>
---
fsck.c | 133 --------------------------------------------
submodule-config.c | 134 +++++++++++++++++++++++++++++++++++++++++++++
submodule-config.h | 3 +
3 files changed, 137 insertions(+), 133 deletions(-)
diff --git a/fsck.c b/fsck.c
index 1ad02fcdfab..8ded0a473a4 100644
--- a/fsck.c
+++ b/fsck.c
@@ -20,7 +20,6 @@
#include "packfile.h"
#include "submodule-config.h"
#include "config.h"
-#include "credential.h"
#include "help.h"
static ssize_t max_tree_entry_len = 4096;
@@ -1047,138 +1046,6 @@ int fsck_tag_standalone(const struct object_id *oid, const char *buffer,
return ret;
}
-static int starts_with_dot_slash(const char *const path)
-{
- return path_match_flags(path, PATH_MATCH_STARTS_WITH_DOT_SLASH |
- PATH_MATCH_XPLATFORM);
-}
-
-static int starts_with_dot_dot_slash(const char *const path)
-{
- return path_match_flags(path, PATH_MATCH_STARTS_WITH_DOT_DOT_SLASH |
- PATH_MATCH_XPLATFORM);
-}
-
-static int submodule_url_is_relative(const char *url)
-{
- return starts_with_dot_slash(url) || starts_with_dot_dot_slash(url);
-}
-
-/*
- * Count directory components that a relative submodule URL should chop
- * from the remote_url it is to be resolved against.
- *
- * In other words, this counts "../" components at the start of a
- * submodule URL.
- *
- * Returns the number of directory components to chop and writes a
- * pointer to the next character of url after all leading "./" and
- * "../" components to out.
- */
-static int count_leading_dotdots(const char *url, const char **out)
-{
- int result = 0;
- while (1) {
- if (starts_with_dot_dot_slash(url)) {
- result++;
- url += strlen("../");
- continue;
- }
- if (starts_with_dot_slash(url)) {
- url += strlen("./");
- continue;
- }
- *out = url;
- return result;
- }
-}
-/*
- * Check whether a transport is implemented by git-remote-curl.
- *
- * If it is, returns 1 and writes the URL that would be passed to
- * git-remote-curl to the "out" parameter.
- *
- * Otherwise, returns 0 and leaves "out" untouched.
- *
- * Examples:
- * http::https://example.com/repo.git -> 1, https://example.com/repo.git
- * https://example.com/repo.git -> 1, https://example.com/repo.git
- * git://example.com/repo.git -> 0
- *
- * This is for use in checking for previously exploitable bugs that
- * required a submodule URL to be passed to git-remote-curl.
- */
-static int url_to_curl_url(const char *url, const char **out)
-{
- /*
- * We don't need to check for case-aliases, "http.exe", and so
- * on because in the default configuration, is_transport_allowed
- * prevents URLs with those schemes from being cloned
- * automatically.
- */
- if (skip_prefix(url, "http::", out) ||
- skip_prefix(url, "https::", out) ||
- skip_prefix(url, "ftp::", out) ||
- skip_prefix(url, "ftps::", out))
- return 1;
- if (starts_with(url, "http://") ||
- starts_with(url, "https://") ||
- starts_with(url, "ftp://") ||
- starts_with(url, "ftps://")) {
- *out = url;
- return 1;
- }
- return 0;
-}
-
-static int check_submodule_url(const char *url)
-{
- const char *curl_url;
-
- if (looks_like_command_line_option(url))
- return -1;
-
- if (submodule_url_is_relative(url) || starts_with(url, "git://")) {
- char *decoded;
- const char *next;
- int has_nl;
-
- /*
- * This could be appended to an http URL and url-decoded;
- * check for malicious characters.
- */
- decoded = url_decode(url);
- has_nl = !!strchr(decoded, '\n');
-
- free(decoded);
- if (has_nl)
- return -1;
-
- /*
- * URLs which escape their root via "../" can overwrite
- * the host field and previous components, resolving to
- * URLs like https::example.com/submodule.git and
- * https:///example.com/submodule.git that were
- * susceptible to CVE-2020-11008.
- */
- if (count_leading_dotdots(url, &next) > 0 &&
- (*next == ':' || *next == '/'))
- return -1;
- }
-
- else if (url_to_curl_url(url, &curl_url)) {
- struct credential c = CREDENTIAL_INIT;
- int ret = 0;
- if (credential_from_url_gently(&c, curl_url, 1) ||
- !*c.host)
- ret = -1;
- credential_clear(&c);
- return ret;
- }
-
- return 0;
-}
-
struct fsck_gitmodules_data {
const struct object_id *oid;
struct fsck_options *options;
diff --git a/submodule-config.c b/submodule-config.c
index f4dd482abc9..3b295e9f89c 100644
--- a/submodule-config.c
+++ b/submodule-config.c
@@ -14,6 +14,8 @@
#include "parse-options.h"
#include "thread-utils.h"
#include "tree-walk.h"
+#include "url.h"
+#include "credential.h"
/*
* submodule cache lookup structure
@@ -228,6 +230,138 @@ int check_submodule_name(const char *name)
return 0;
}
+static int starts_with_dot_slash(const char *const path)
+{
+ return path_match_flags(path, PATH_MATCH_STARTS_WITH_DOT_SLASH |
+ PATH_MATCH_XPLATFORM);
+}
+
+static int starts_with_dot_dot_slash(const char *const path)
+{
+ return path_match_flags(path, PATH_MATCH_STARTS_WITH_DOT_DOT_SLASH |
+ PATH_MATCH_XPLATFORM);
+}
+
+static int submodule_url_is_relative(const char *url)
+{
+ return starts_with_dot_slash(url) || starts_with_dot_dot_slash(url);
+}
+
+/*
+ * Count directory components that a relative submodule URL should chop
+ * from the remote_url it is to be resolved against.
+ *
+ * In other words, this counts "../" components at the start of a
+ * submodule URL.
+ *
+ * Returns the number of directory components to chop and writes a
+ * pointer to the next character of url after all leading "./" and
+ * "../" components to out.
+ */
+static int count_leading_dotdots(const char *url, const char **out)
+{
+ int result = 0;
+ while (1) {
+ if (starts_with_dot_dot_slash(url)) {
+ result++;
+ url += strlen("../");
+ continue;
+ }
+ if (starts_with_dot_slash(url)) {
+ url += strlen("./");
+ continue;
+ }
+ *out = url;
+ return result;
+ }
+}
+/*
+ * Check whether a transport is implemented by git-remote-curl.
+ *
+ * If it is, returns 1 and writes the URL that would be passed to
+ * git-remote-curl to the "out" parameter.
+ *
+ * Otherwise, returns 0 and leaves "out" untouched.
+ *
+ * Examples:
+ * http::https://example.com/repo.git -> 1, https://example.com/repo.git
+ * https://example.com/repo.git -> 1, https://example.com/repo.git
+ * git://example.com/repo.git -> 0
+ *
+ * This is for use in checking for previously exploitable bugs that
+ * required a submodule URL to be passed to git-remote-curl.
+ */
+static int url_to_curl_url(const char *url, const char **out)
+{
+ /*
+ * We don't need to check for case-aliases, "http.exe", and so
+ * on because in the default configuration, is_transport_allowed
+ * prevents URLs with those schemes from being cloned
+ * automatically.
+ */
+ if (skip_prefix(url, "http::", out) ||
+ skip_prefix(url, "https::", out) ||
+ skip_prefix(url, "ftp::", out) ||
+ skip_prefix(url, "ftps::", out))
+ return 1;
+ if (starts_with(url, "http://") ||
+ starts_with(url, "https://") ||
+ starts_with(url, "ftp://") ||
+ starts_with(url, "ftps://")) {
+ *out = url;
+ return 1;
+ }
+ return 0;
+}
+
+int check_submodule_url(const char *url)
+{
+ const char *curl_url;
+
+ if (looks_like_command_line_option(url))
+ return -1;
+
+ if (submodule_url_is_relative(url) || starts_with(url, "git://")) {
+ char *decoded;
+ const char *next;
+ int has_nl;
+
+ /*
+ * This could be appended to an http URL and url-decoded;
+ * check for malicious characters.
+ */
+ decoded = url_decode(url);
+ has_nl = !!strchr(decoded, '\n');
+
+ free(decoded);
+ if (has_nl)
+ return -1;
+
+ /*
+ * URLs which escape their root via "../" can overwrite
+ * the host field and previous components, resolving to
+ * URLs like https::example.com/submodule.git and
+ * https:///example.com/submodule.git that were
+ * susceptible to CVE-2020-11008.
+ */
+ if (count_leading_dotdots(url, &next) > 0 &&
+ (*next == ':' || *next == '/'))
+ return -1;
+ }
+
+ else if (url_to_curl_url(url, &curl_url)) {
+ struct credential c = CREDENTIAL_INIT;
+ int ret = 0;
+ if (credential_from_url_gently(&c, curl_url, 1) ||
+ !*c.host)
+ ret = -1;
+ credential_clear(&c);
+ return ret;
+ }
+
+ return 0;
+}
+
static int name_and_item_from_var(const char *var, struct strbuf *name,
struct strbuf *item)
{
diff --git a/submodule-config.h b/submodule-config.h
index 958f320ac6c..b6133af71b0 100644
--- a/submodule-config.h
+++ b/submodule-config.h
@@ -89,6 +89,9 @@ int config_set_in_gitmodules_file_gently(const char *key, const char *value);
*/
int check_submodule_name(const char *name);
+/* Returns 0 if the URL valid per RFC3986 and -1 otherwise. */
+int check_submodule_url(const char *url);
+
/*
* Note: these helper functions exist solely to maintain backward
* compatibility with 'fetch' and 'update_clone' storing configuration in
--
gitgitgadget
^ permalink raw reply related [flat|nested] 31+ messages in thread
* [PATCH v2 2/4] test-submodule: remove command line handling for check-name
2024-01-18 1:55 ` [PATCH v2 0/4] " Victoria Dye via GitGitGadget
2024-01-18 1:55 ` [PATCH v2 1/4] submodule-config.h: move check_submodule_url Victoria Dye via GitGitGadget
@ 2024-01-18 1:55 ` Victoria Dye via GitGitGadget
2024-01-18 20:44 ` Junio C Hamano
2024-01-18 1:55 ` [PATCH v2 3/4] t7450: test submodule urls Victoria Dye via GitGitGadget
` (2 subsequent siblings)
4 siblings, 1 reply; 31+ messages in thread
From: Victoria Dye via GitGitGadget @ 2024-01-18 1:55 UTC (permalink / raw)
To: git; +Cc: Patrick Steinhardt, Jeff King, Victoria Dye, Victoria Dye
From: Victoria Dye <vdye@github.com>
The 'check-name' subcommand to 'test-tool submodule' is documented as being
able to take a command line argument '<name>'. However, this does not work -
and has never worked - because 'argc > 0' triggers the usage message in
'cmd__submodule_check_name()'. To simplify the helper and avoid future
confusion around proper use of the subcommand, remove any references to
command line arguments for 'check-name' in usage strings and handling in
'check_name()'.
Helped-by: Jeff King <peff@peff.net>
Signed-off-by: Victoria Dye <vdye@github.com>
---
t/helper/test-submodule.c | 29 +++++++++--------------------
1 file changed, 9 insertions(+), 20 deletions(-)
diff --git a/t/helper/test-submodule.c b/t/helper/test-submodule.c
index 50c154d0370..9adbc8d1568 100644
--- a/t/helper/test-submodule.c
+++ b/t/helper/test-submodule.c
@@ -9,7 +9,7 @@
#include "submodule.h"
#define TEST_TOOL_CHECK_NAME_USAGE \
- "test-tool submodule check-name <name>"
+ "test-tool submodule check-name"
static const char *submodule_check_name_usage[] = {
TEST_TOOL_CHECK_NAME_USAGE,
NULL
@@ -36,26 +36,15 @@ static const char *submodule_usage[] = {
NULL
};
-/*
- * Exit non-zero if any of the submodule names given on the command line is
- * invalid. If no names are given, filter stdin to print only valid names
- * (which is primarily intended for testing).
- */
-static int check_name(int argc, const char **argv)
+/* Filter stdin to print only valid names. */
+static int check_name(void)
{
- if (argc > 1) {
- while (*++argv) {
- if (check_submodule_name(*argv) < 0)
- return 1;
- }
- } else {
- struct strbuf buf = STRBUF_INIT;
- while (strbuf_getline(&buf, stdin) != EOF) {
- if (!check_submodule_name(buf.buf))
- printf("%s\n", buf.buf);
- }
- strbuf_release(&buf);
+ struct strbuf buf = STRBUF_INIT;
+ while (strbuf_getline(&buf, stdin) != EOF) {
+ if (!check_submodule_name(buf.buf))
+ printf("%s\n", buf.buf);
}
+ strbuf_release(&buf);
return 0;
}
@@ -69,7 +58,7 @@ static int cmd__submodule_check_name(int argc, const char **argv)
if (argc)
usage_with_options(submodule_check_name_usage, options);
- return check_name(argc, argv);
+ return check_name();
}
static int cmd__submodule_is_active(int argc, const char **argv)
--
gitgitgadget
^ permalink raw reply related [flat|nested] 31+ messages in thread
* Re: [PATCH v2 2/4] test-submodule: remove command line handling for check-name
2024-01-18 1:55 ` [PATCH v2 2/4] test-submodule: remove command line handling for check-name Victoria Dye via GitGitGadget
@ 2024-01-18 20:44 ` Junio C Hamano
0 siblings, 0 replies; 31+ messages in thread
From: Junio C Hamano @ 2024-01-18 20:44 UTC (permalink / raw)
To: Victoria Dye via GitGitGadget
Cc: git, Patrick Steinhardt, Jeff King, Victoria Dye
"Victoria Dye via GitGitGadget" <gitgitgadget@gmail.com> writes:
> From: Victoria Dye <vdye@github.com>
>
> The 'check-name' subcommand to 'test-tool submodule' is documented as being
> able to take a command line argument '<name>'. However, this does not work -
> and has never worked - because 'argc > 0' triggers the usage message in
> 'cmd__submodule_check_name()'. To simplify the helper and avoid future
> confusion around proper use of the subcommand, remove any references to
> command line arguments for 'check-name' in usage strings and handling in
> 'check_name()'.
>
> Helped-by: Jeff King <peff@peff.net>
> Signed-off-by: Victoria Dye <vdye@github.com>
> ---
> t/helper/test-submodule.c | 29 +++++++++--------------------
> 1 file changed, 9 insertions(+), 20 deletions(-)
Excellent, both of you.
^ permalink raw reply [flat|nested] 31+ messages in thread
* [PATCH v2 3/4] t7450: test submodule urls
2024-01-18 1:55 ` [PATCH v2 0/4] " Victoria Dye via GitGitGadget
2024-01-18 1:55 ` [PATCH v2 1/4] submodule-config.h: move check_submodule_url Victoria Dye via GitGitGadget
2024-01-18 1:55 ` [PATCH v2 2/4] test-submodule: remove command line handling for check-name Victoria Dye via GitGitGadget
@ 2024-01-18 1:55 ` Victoria Dye via GitGitGadget
2024-01-19 6:05 ` Patrick Steinhardt
2024-01-18 1:55 ` [PATCH v2 4/4] submodule-config.c: strengthen URL fsck check Victoria Dye via GitGitGadget
2024-01-18 18:24 ` [PATCH v2 0/4] Strengthen fsck checks for submodule URLs Junio C Hamano
4 siblings, 1 reply; 31+ messages in thread
From: Victoria Dye via GitGitGadget @ 2024-01-18 1:55 UTC (permalink / raw)
To: git; +Cc: Patrick Steinhardt, Jeff King, Victoria Dye, Victoria Dye
From: Victoria Dye <vdye@github.com>
Add tests to 't7450-bad-git-dotfiles.sh' to check the validity of different
submodule URLs. To verify this directly (without setting up test
repositories & submodules), add a 'check-url' subcommand to 'test-tool
submodule' that calls 'check_submodule_url' in the same way that
'check-name' calls 'check_submodule_name'.
Add two tests to separately address cases where the URL check correctly
filters out invalid URLs and cases where the check misses invalid URLs. Mark
the latter ("url check misses invalid cases") with 'test_expect_failure' to
indicate that this not the undesired behavior.
Signed-off-by: Victoria Dye <vdye@github.com>
---
t/helper/test-submodule.c | 35 +++++++++++++++++++++++++++++++----
t/t7450-bad-git-dotfiles.sh | 35 +++++++++++++++++++++++++++++++++++
2 files changed, 66 insertions(+), 4 deletions(-)
diff --git a/t/helper/test-submodule.c b/t/helper/test-submodule.c
index 9adbc8d1568..7197969a081 100644
--- a/t/helper/test-submodule.c
+++ b/t/helper/test-submodule.c
@@ -15,6 +15,13 @@ static const char *submodule_check_name_usage[] = {
NULL
};
+#define TEST_TOOL_CHECK_URL_USAGE \
+ "test-tool submodule check-url"
+static const char *submodule_check_url_usage[] = {
+ TEST_TOOL_CHECK_URL_USAGE,
+ NULL
+};
+
#define TEST_TOOL_IS_ACTIVE_USAGE \
"test-tool submodule is-active <name>"
static const char *submodule_is_active_usage[] = {
@@ -31,17 +38,23 @@ static const char *submodule_resolve_relative_url_usage[] = {
static const char *submodule_usage[] = {
TEST_TOOL_CHECK_NAME_USAGE,
+ TEST_TOOL_CHECK_URL_USAGE,
TEST_TOOL_IS_ACTIVE_USAGE,
TEST_TOOL_RESOLVE_RELATIVE_URL_USAGE,
NULL
};
-/* Filter stdin to print only valid names. */
-static int check_name(void)
+typedef int (*check_fn_t)(const char *);
+
+/*
+ * Apply 'check_fn' to each line of stdin, printing values that pass the check
+ * to stdout.
+ */
+static int check_submodule(check_fn_t check_fn)
{
struct strbuf buf = STRBUF_INIT;
while (strbuf_getline(&buf, stdin) != EOF) {
- if (!check_submodule_name(buf.buf))
+ if (!check_fn(buf.buf))
printf("%s\n", buf.buf);
}
strbuf_release(&buf);
@@ -58,7 +71,20 @@ static int cmd__submodule_check_name(int argc, const char **argv)
if (argc)
usage_with_options(submodule_check_name_usage, options);
- return check_name();
+ return check_submodule(check_submodule_name);
+}
+
+static int cmd__submodule_check_url(int argc, const char **argv)
+{
+ struct option options[] = {
+ OPT_END()
+ };
+ argc = parse_options(argc, argv, "test-tools", options,
+ submodule_check_url_usage, 0);
+ if (argc)
+ usage_with_options(submodule_check_url_usage, options);
+
+ return check_submodule(check_submodule_url);
}
static int cmd__submodule_is_active(int argc, const char **argv)
@@ -184,6 +210,7 @@ static int cmd__submodule_config_writeable(int argc, const char **argv UNUSED)
static struct test_cmd cmds[] = {
{ "check-name", cmd__submodule_check_name },
+ { "check-url", cmd__submodule_check_url },
{ "is-active", cmd__submodule_is_active },
{ "resolve-relative-url", cmd__submodule_resolve_relative_url},
{ "config-list", cmd__submodule_config_list },
diff --git a/t/t7450-bad-git-dotfiles.sh b/t/t7450-bad-git-dotfiles.sh
index 35a31acd4d7..c73b1c92ecc 100755
--- a/t/t7450-bad-git-dotfiles.sh
+++ b/t/t7450-bad-git-dotfiles.sh
@@ -45,6 +45,41 @@ test_expect_success 'check names' '
test_cmp expect actual
'
+test_expect_success 'check urls' '
+ cat >expect <<-\EOF &&
+ ./bar/baz/foo.git
+ https://example.com/foo.git
+ http://example.com:80/deeper/foo.git
+ EOF
+
+ test-tool submodule check-url >actual <<-\EOF &&
+ ./bar/baz/foo.git
+ https://example.com/foo.git
+ http://example.com:80/deeper/foo.git
+ -a./foo
+ ../../..//test/foo.git
+ ../../../../../:localhost:8080/foo.git
+ ..\../.\../:example.com/foo.git
+ ./%0ahost=example.com/foo.git
+ https://one.example.com/evil?%0ahost=two.example.com
+ https:///example.com/foo.git
+ https::example.com/foo.git
+ http:::example.com/foo.git
+ EOF
+
+ test_cmp expect actual
+'
+
+# NEEDSWORK: the URL checked here is not valid (and will not work as a remote if
+# a user attempts to clone it), but the fsck check passes.
+test_expect_failure 'url check misses invalid cases' '
+ test-tool submodule check-url >actual <<-\EOF &&
+ http://example.com:test/foo.git
+ EOF
+
+ test_must_be_empty actual
+'
+
test_expect_success 'create innocent subrepo' '
git init innocent &&
git -C innocent commit --allow-empty -m foo
--
gitgitgadget
^ permalink raw reply related [flat|nested] 31+ messages in thread
* Re: [PATCH v2 3/4] t7450: test submodule urls
2024-01-18 1:55 ` [PATCH v2 3/4] t7450: test submodule urls Victoria Dye via GitGitGadget
@ 2024-01-19 6:05 ` Patrick Steinhardt
2024-01-19 18:16 ` Junio C Hamano
0 siblings, 1 reply; 31+ messages in thread
From: Patrick Steinhardt @ 2024-01-19 6:05 UTC (permalink / raw)
To: Victoria Dye via GitGitGadget; +Cc: git, Jeff King, Victoria Dye
[-- Attachment #1: Type: text/plain, Size: 960 bytes --]
On Thu, Jan 18, 2024 at 01:55:17AM +0000, Victoria Dye via GitGitGadget wrote:
> From: Victoria Dye <vdye@github.com>
>
> Add tests to 't7450-bad-git-dotfiles.sh' to check the validity of different
> submodule URLs. To verify this directly (without setting up test
> repositories & submodules), add a 'check-url' subcommand to 'test-tool
> submodule' that calls 'check_submodule_url' in the same way that
> 'check-name' calls 'check_submodule_name'.
>
> Add two tests to separately address cases where the URL check correctly
> filters out invalid URLs and cases where the check misses invalid URLs. Mark
> the latter ("url check misses invalid cases") with 'test_expect_failure' to
> indicate that this not the undesired behavior.
Nit: this should probably say "to indicate that this is not the desired
behaviour." But given that the other patches in this series look good to
me I don't think this warrants a reroll.
Thanks!
Patrick
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]
^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [PATCH v2 3/4] t7450: test submodule urls
2024-01-19 6:05 ` Patrick Steinhardt
@ 2024-01-19 18:16 ` Junio C Hamano
0 siblings, 0 replies; 31+ messages in thread
From: Junio C Hamano @ 2024-01-19 18:16 UTC (permalink / raw)
To: Patrick Steinhardt
Cc: Victoria Dye via GitGitGadget, git, Jeff King, Victoria Dye
Patrick Steinhardt <ps@pks.im> writes:
>> Add two tests to separately address cases where the URL check correctly
>> filters out invalid URLs and cases where the check misses invalid URLs. Mark
>> the latter ("url check misses invalid cases") with 'test_expect_failure' to
>> indicate that this not the undesired behavior.
>
> Nit: this should probably say "to indicate that this is not the desired
> behaviour." But given that the other patches in this series look good to
> me I don't think this warrants a reroll.
Good eyes.
I'll rewrite that part to "... to indicate that this is currently
broken, which will be fixed in the next step." before merging the
series to 'next'.
Thanks.
^ permalink raw reply [flat|nested] 31+ messages in thread
* [PATCH v2 4/4] submodule-config.c: strengthen URL fsck check
2024-01-18 1:55 ` [PATCH v2 0/4] " Victoria Dye via GitGitGadget
` (2 preceding siblings ...)
2024-01-18 1:55 ` [PATCH v2 3/4] t7450: test submodule urls Victoria Dye via GitGitGadget
@ 2024-01-18 1:55 ` Victoria Dye via GitGitGadget
2024-01-18 18:24 ` [PATCH v2 0/4] Strengthen fsck checks for submodule URLs Junio C Hamano
4 siblings, 0 replies; 31+ messages in thread
From: Victoria Dye via GitGitGadget @ 2024-01-18 1:55 UTC (permalink / raw)
To: git; +Cc: Patrick Steinhardt, Jeff King, Victoria Dye, Victoria Dye
From: Victoria Dye <vdye@github.com>
Update the validation of "curl URL" submodule URLs (i.e. those that specify
an "http[s]" or "ftp[s]" protocol) in 'check_submodule_url()' to catch more
invalid URLs. The existing validation using 'credential_from_url_gently()'
parses certain URLs incorrectly, leading to invalid submodule URLs passing
'git fsck' checks. Conversely, 'url_normalize()' - used to validate remote
URLs in 'remote_get()' - correctly identifies the invalid URLs missed by
'credential_from_url_gently()'.
To catch more invalid cases, replace 'credential_from_url_gently()' with
'url_normalize()' followed by a 'url_decode()' and a check for newlines
(mirroring 'check_url_component()' in the 'credential_from_url_gently()'
validation).
Signed-off-by: Victoria Dye <vdye@github.com>
---
submodule-config.c | 16 +++++++++++-----
t/t7450-bad-git-dotfiles.sh | 11 +----------
2 files changed, 12 insertions(+), 15 deletions(-)
diff --git a/submodule-config.c b/submodule-config.c
index 3b295e9f89c..54130f6a385 100644
--- a/submodule-config.c
+++ b/submodule-config.c
@@ -15,7 +15,7 @@
#include "thread-utils.h"
#include "tree-walk.h"
#include "url.h"
-#include "credential.h"
+#include "urlmatch.h"
/*
* submodule cache lookup structure
@@ -350,12 +350,18 @@ int check_submodule_url(const char *url)
}
else if (url_to_curl_url(url, &curl_url)) {
- struct credential c = CREDENTIAL_INIT;
int ret = 0;
- if (credential_from_url_gently(&c, curl_url, 1) ||
- !*c.host)
+ char *normalized = url_normalize(curl_url, NULL);
+ if (normalized) {
+ char *decoded = url_decode(normalized);
+ if (strchr(decoded, '\n'))
+ ret = -1;
+ free(normalized);
+ free(decoded);
+ } else {
ret = -1;
- credential_clear(&c);
+ }
+
return ret;
}
diff --git a/t/t7450-bad-git-dotfiles.sh b/t/t7450-bad-git-dotfiles.sh
index c73b1c92ecc..46d4fb0354b 100755
--- a/t/t7450-bad-git-dotfiles.sh
+++ b/t/t7450-bad-git-dotfiles.sh
@@ -63,6 +63,7 @@ test_expect_success 'check urls' '
./%0ahost=example.com/foo.git
https://one.example.com/evil?%0ahost=two.example.com
https:///example.com/foo.git
+ http://example.com:test/foo.git
https::example.com/foo.git
http:::example.com/foo.git
EOF
@@ -70,16 +71,6 @@ test_expect_success 'check urls' '
test_cmp expect actual
'
-# NEEDSWORK: the URL checked here is not valid (and will not work as a remote if
-# a user attempts to clone it), but the fsck check passes.
-test_expect_failure 'url check misses invalid cases' '
- test-tool submodule check-url >actual <<-\EOF &&
- http://example.com:test/foo.git
- EOF
-
- test_must_be_empty actual
-'
-
test_expect_success 'create innocent subrepo' '
git init innocent &&
git -C innocent commit --allow-empty -m foo
--
gitgitgadget
^ permalink raw reply related [flat|nested] 31+ messages in thread
* Re: [PATCH v2 0/4] Strengthen fsck checks for submodule URLs
2024-01-18 1:55 ` [PATCH v2 0/4] " Victoria Dye via GitGitGadget
` (3 preceding siblings ...)
2024-01-18 1:55 ` [PATCH v2 4/4] submodule-config.c: strengthen URL fsck check Victoria Dye via GitGitGadget
@ 2024-01-18 18:24 ` Junio C Hamano
2024-01-20 0:51 ` Jeff King
4 siblings, 1 reply; 31+ messages in thread
From: Junio C Hamano @ 2024-01-18 18:24 UTC (permalink / raw)
To: Victoria Dye via GitGitGadget
Cc: git, Patrick Steinhardt, Jeff King, Victoria Dye
"Victoria Dye via GitGitGadget" <gitgitgadget@gmail.com> writes:
> While testing 'git fsck' checks on .gitmodules URLs, I noticed that some
> invalid URLs were passing the checks. Digging into it a bit more, the issue
> turned out to be that 'credential_from_url_gently()' parses certain URLs
> (like "http://example.com:something/deeper/path") incorrectly, in a way that
> appeared to return a valid result.
>
> Fortunately, these URLs are rejected in fetches/clones/pushes anyway because
> 'url_normalize()' (called in 'validate_remote_url()') correctly identifies
> them as invalid. So, to bring 'git fsck' in line with other (stronger)
> validation done on remote URLs, this series replaces the
> 'credential_from_url_gently()' check with one that uses 'url_normalize()'.
>
> * Patch 1 moves 'check_submodule_url()' to a public location so that it can
> be used outside of 'fsck.c'.
> * Patch 2 removes the obsolete/never-used code in 'test-tool submodule
> check-name' handling names provided on the command line.
> * Patch 3 adds a 'check-url' mode to 'test-tool submodule', calling the
> now-public 'check_submodule_url()' method on a given URL, and adds new
> tests checking valid and invalid submodule URLs.
> * Patch 4 replaces the 'credential_from_url_gently()' check with
> 'url_normalize()' followed by 'url_decode()' and an explicit check for
> newlines (to preserve the newline handling added in 07259e74ec1 (fsck:
> detect gitmodules URLs with embedded newlines, 2020-03-11)).
Nicely done. I'll wait for a few days to see if anybody else has
reaction but after reading the patches myself, my inclination is to
suggest merging it to 'next'.
Thanks.
^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [PATCH v2 0/4] Strengthen fsck checks for submodule URLs
2024-01-18 18:24 ` [PATCH v2 0/4] Strengthen fsck checks for submodule URLs Junio C Hamano
@ 2024-01-20 0:51 ` Jeff King
0 siblings, 0 replies; 31+ messages in thread
From: Jeff King @ 2024-01-20 0:51 UTC (permalink / raw)
To: Junio C Hamano
Cc: Victoria Dye via GitGitGadget, git, Patrick Steinhardt,
Victoria Dye
On Thu, Jan 18, 2024 at 10:24:51AM -0800, Junio C Hamano wrote:
> > * Patch 1 moves 'check_submodule_url()' to a public location so that it can
> > be used outside of 'fsck.c'.
> > * Patch 2 removes the obsolete/never-used code in 'test-tool submodule
> > check-name' handling names provided on the command line.
> > * Patch 3 adds a 'check-url' mode to 'test-tool submodule', calling the
> > now-public 'check_submodule_url()' method on a given URL, and adds new
> > tests checking valid and invalid submodule URLs.
> > * Patch 4 replaces the 'credential_from_url_gently()' check with
> > 'url_normalize()' followed by 'url_decode()' and an explicit check for
> > newlines (to preserve the newline handling added in 07259e74ec1 (fsck:
> > detect gitmodules URLs with embedded newlines, 2020-03-11)).
>
> Nicely done. I'll wait for a few days to see if anybody else has
> reaction but after reading the patches myself, my inclination is to
> suggest merging it to 'next'.
It all looks good to me to go to 'next'.
After simplifying the input handling in patch 2, I probably would not
have bothered with the abstracted interface in patch 3 (and instead just
repeated the few lines of boilerplate, since there's so much already).
Mostly just because function pointers in C often make reading and
debugging more annoying. But I don't think it's a very big deal either
way in this instance.
-Peff
^ permalink raw reply [flat|nested] 31+ messages in thread