Git development
 help / color / mirror / Atom feed
From: "Matheus Afonso Martins Moreira via GitGitGadget" <gitgitgadget@gmail.com>
To: git@vger.kernel.org
Cc: "Torsten Bögershausen" <tboegi@web.de>,
	"Ghanshyam Thakkar" <shyamthakkar001@gmail.com>,
	"Matheus Moreira" <matheus@matheusmoreira.com>,
	"Matheus Afonso Martins Moreira" <matheus@matheusmoreira.com>
Subject: [PATCH v2 6/8] builtin: create url-parse command
Date: Fri, 01 May 2026 23:15:08 +0000	[thread overview]
Message-ID: <886a7d659ee3b018290ed24ad16d381525436b81.1777677310.git.gitgitgadget@gmail.com> (raw)
In-Reply-To: <pull.1715.v2.git.git.1777677310.gitgitgadget@gmail.com>

From: Matheus Afonso Martins Moreira <matheus@matheusmoreira.com>

Git commands can accept a rather wide variety of URLs syntaxes.
The range of accepted inputs might expand even more in the future.
This makes the parsing of URL components difficult since standard URL
parsers cannot be used. Extracting the components of a git URL would
require implementing all the schemes that git itself supports, not to
mention tracking its development continuously in case new URL schemes
are added.

The url-parse builtin command is designed to solve this problem
by exposing git's native URL parsing facilities as a plumbing command.
Other programs can then call upon git itself to parse the git URLs
and extract their components. This should be quite useful for scripts.

Signed-off-by: Matheus Afonso Martins Moreira <matheus@matheusmoreira.com>
---
 .gitignore          |   1 +
 Makefile            |   1 +
 builtin.h           |   1 +
 builtin/url-parse.c | 132 ++++++++++++++++++++++++++++++++++++++++++++
 command-list.txt    |   1 +
 git.c               |   1 +
 meson.build         |   1 +
 7 files changed, 138 insertions(+)
 create mode 100644 builtin/url-parse.c

diff --git a/.gitignore b/.gitignore
index 24635cf2d6..c5673daa6e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -182,6 +182,7 @@
 /git-update-server-info
 /git-upload-archive
 /git-upload-pack
+/git-url-parse
 /git-var
 /git-verify-commit
 /git-verify-pack
diff --git a/Makefile b/Makefile
index cedc234173..1c757a1aa0 100644
--- a/Makefile
+++ b/Makefile
@@ -1497,6 +1497,7 @@ BUILTIN_OBJS += builtin/update-ref.o
 BUILTIN_OBJS += builtin/update-server-info.o
 BUILTIN_OBJS += builtin/upload-archive.o
 BUILTIN_OBJS += builtin/upload-pack.o
+BUILTIN_OBJS += builtin/url-parse.o
 BUILTIN_OBJS += builtin/var.o
 BUILTIN_OBJS += builtin/verify-commit.o
 BUILTIN_OBJS += builtin/verify-pack.o
diff --git a/builtin.h b/builtin.h
index 235c51f30e..c6f7672991 100644
--- a/builtin.h
+++ b/builtin.h
@@ -271,6 +271,7 @@ int cmd_update_server_info(int argc, const char **argv, const char *prefix, stru
 int cmd_upload_archive(int argc, const char **argv, const char *prefix, struct repository *repo);
 int cmd_upload_archive_writer(int argc, const char **argv, const char *prefix, struct repository *repo);
 int cmd_upload_pack(int argc, const char **argv, const char *prefix, struct repository *repo);
+int cmd_url_parse(int argc, const char **argv, const char *prefix, struct repository *repo);
 int cmd_var(int argc, const char **argv, const char *prefix, struct repository *repo);
 int cmd_verify_commit(int argc, const char **argv, const char *prefix, struct repository *repo);
 int cmd_verify_tag(int argc, const char **argv, const char *prefix, struct repository *repo);
diff --git a/builtin/url-parse.c b/builtin/url-parse.c
new file mode 100644
index 0000000000..6c70c131e1
--- /dev/null
+++ b/builtin/url-parse.c
@@ -0,0 +1,132 @@
+#include "builtin.h"
+#include "gettext.h"
+#include "parse-options.h"
+#include "url.h"
+#include "urlmatch.h"
+
+static const char * const builtin_url_parse_usage[] = {
+	N_("git url-parse [-c <component>] [--] <url>..."),
+	NULL
+};
+
+static char *component_arg;
+
+static struct option builtin_url_parse_options[] = {
+	OPT_STRING('c', "component", &component_arg, N_("component"),
+		N_("which URL component to extract")),
+	OPT_END(),
+};
+
+enum url_component {
+	URL_NONE = 0,
+	URL_SCHEME,
+	URL_USER,
+	URL_PASSWORD,
+	URL_HOST,
+	URL_PORT,
+	URL_PATH,
+};
+
+static void parse_or_die(const char *url, struct url_info *info)
+{
+	if (url_is_local_not_ssh(url)) {
+		if (*url == '/')
+			die("'%s' is not a URL; if you meant a local "
+			    "repository, use 'file://%s'", url, url);
+		die("'%s' is not a URL; if you meant a local repository, "
+		    "use a 'file://' URL with an absolute path", url);
+	}
+	if (!url_parse(url, info))
+		die("invalid git URL '%s': %s", url, info->err);
+}
+
+static enum url_component get_component_or_die(const char *arg)
+{
+	if (!strcmp("path", arg))
+		return URL_PATH;
+	if (!strcmp("host", arg))
+		return URL_HOST;
+	if (!strcmp("scheme", arg))
+		return URL_SCHEME;
+	if (!strcmp("user", arg))
+		return URL_USER;
+	if (!strcmp("password", arg))
+		return URL_PASSWORD;
+	if (!strcmp("port", arg))
+		return URL_PORT;
+	die("invalid git URL component '%s'", arg);
+}
+
+static char *extract_component(enum url_component component,
+			       struct url_info *info)
+{
+	size_t offset, length;
+
+	switch (component) {
+	case URL_SCHEME:
+		offset = 0;
+		length = info->scheme_len;
+		break;
+	case URL_USER:
+		offset = info->user_off;
+		length = info->user_len;
+		break;
+	case URL_PASSWORD:
+		offset = info->passwd_off;
+		length = info->passwd_len;
+		break;
+	case URL_HOST:
+		offset = info->host_off;
+		length = info->host_len;
+		break;
+	case URL_PORT:
+		offset = info->port_off;
+		length = info->port_len;
+		break;
+	case URL_PATH:
+		offset = info->path_off;
+		length = info->path_len;
+		break;
+	case URL_NONE:
+		return NULL;
+	}
+
+	return xstrndup(info->url + offset, length);
+}
+
+int cmd_url_parse(int argc,
+		  const char **argv,
+		  const char *prefix,
+		  struct repository *repo UNUSED)
+{
+	struct url_info info;
+	enum url_component selected = URL_NONE;
+	char *extracted;
+	int i;
+
+	argc = parse_options(argc, argv, prefix, builtin_url_parse_options,
+			     builtin_url_parse_usage, 0);
+
+	if (argc == 0)
+		usage_with_options(builtin_url_parse_usage,
+				   builtin_url_parse_options);
+
+	if (component_arg)
+		selected = get_component_or_die(component_arg);
+
+	for (i = 0; i < argc; i++) {
+		parse_or_die(argv[i], &info);
+
+		if (selected != URL_NONE) {
+			extracted = extract_component(selected, &info);
+			if (extracted) {
+				puts(extracted);
+				free(extracted);
+			}
+		}
+
+		free(info.url);
+	}
+
+	return 0;
+}
diff --git a/command-list.txt b/command-list.txt
index f9005cf459..1ede48186f 100644
--- a/command-list.txt
+++ b/command-list.txt
@@ -202,6 +202,7 @@ git-update-ref                          plumbingmanipulators
 git-update-server-info                  synchingrepositories
 git-upload-archive                      synchelpers
 git-upload-pack                         synchelpers
+git-url-parse                           purehelpers
 git-var                                 plumbinginterrogators
 git-verify-commit                       ancillaryinterrogators
 git-verify-pack                         plumbinginterrogators
diff --git a/git.c b/git.c
index 5a40eab8a2..a073eed931 100644
--- a/git.c
+++ b/git.c
@@ -670,6 +670,7 @@ static struct cmd_struct commands[] = {
 	{ "upload-archive", cmd_upload_archive, NO_PARSEOPT },
 	{ "upload-archive--writer", cmd_upload_archive_writer, NO_PARSEOPT },
 	{ "upload-pack", cmd_upload_pack },
+	{ "url-parse", cmd_url_parse },
 	{ "var", cmd_var, RUN_SETUP_GENTLY | NO_PARSEOPT },
 	{ "verify-commit", cmd_verify_commit, RUN_SETUP },
 	{ "verify-pack", cmd_verify_pack },
diff --git a/meson.build b/meson.build
index 11488623bf..dc3cf68ee5 100644
--- a/meson.build
+++ b/meson.build
@@ -686,6 +686,7 @@ builtin_sources = [
   'builtin/update-server-info.c',
   'builtin/upload-archive.c',
   'builtin/upload-pack.c',
+  'builtin/url-parse.c',
   'builtin/var.c',
   'builtin/verify-commit.c',
   'builtin/verify-pack.c',
-- 
gitgitgadget


  parent reply	other threads:[~2026-05-01 23:15 UTC|newest]

Thread overview: 44+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-04-28 22:30 [PATCH 00/13] builtin: implement, document and test url-parse Matheus Moreira via GitGitGadget
2024-04-28 22:30 ` [PATCH 01/13] url: move helper function to URL header and source Matheus Afonso Martins Moreira via GitGitGadget
2024-04-28 22:30 ` [PATCH 02/13] urlmatch: define url_parse function Matheus Afonso Martins Moreira via GitGitGadget
2024-05-01 22:18   ` Ghanshyam Thakkar
2024-05-02  4:02     ` Torsten Bögershausen
2024-04-28 22:30 ` [PATCH 03/13] builtin: create url-parse command Matheus Afonso Martins Moreira via GitGitGadget
2024-04-28 22:30 ` [PATCH 04/13] url-parse: add URL parsing helper function Matheus Afonso Martins Moreira via GitGitGadget
2024-04-28 22:30 ` [PATCH 05/13] url-parse: enumerate possible URL components Matheus Afonso Martins Moreira via GitGitGadget
2024-04-28 22:30 ` [PATCH 06/13] url-parse: define component extraction helper fn Matheus Afonso Martins Moreira via GitGitGadget
2024-04-28 22:30 ` [PATCH 07/13] url-parse: define string to component converter fn Matheus Afonso Martins Moreira via GitGitGadget
2024-04-28 22:30 ` [PATCH 08/13] url-parse: define usage and options Matheus Afonso Martins Moreira via GitGitGadget
2024-04-28 22:30 ` [PATCH 09/13] url-parse: parse options given on the command line Matheus Afonso Martins Moreira via GitGitGadget
2024-04-28 22:30 ` [PATCH 10/13] url-parse: validate all given git URLs Matheus Afonso Martins Moreira via GitGitGadget
2024-04-28 22:30 ` [PATCH 11/13] url-parse: output URL components selected by user Matheus Afonso Martins Moreira via GitGitGadget
2024-04-28 22:31 ` [PATCH 12/13] Documentation: describe the url-parse builtin Matheus Afonso Martins Moreira via GitGitGadget
2024-04-30  7:37   ` Ghanshyam Thakkar
2024-04-28 22:31 ` [PATCH 13/13] tests: add tests for the new " Matheus Afonso Martins Moreira via GitGitGadget
2024-04-29 20:53 ` [PATCH 00/13] builtin: implement, document and test url-parse Torsten Bögershausen
2024-04-29 22:04   ` Reply to community feedback Matheus Afonso Martins Moreira
2024-04-30  6:51     ` Torsten Bögershausen
2026-05-01 23:15 ` [PATCH v2 0/8] builtin: implement, document and test url-parse Matheus Moreira via GitGitGadget
2026-05-01 23:15   ` [PATCH v2 1/8] connect: rename enum protocol to url_scheme Matheus Afonso Martins Moreira via GitGitGadget
2026-05-01 23:15   ` [PATCH v2 2/8] url: move url_is_local_not_ssh to url.h Matheus Afonso Martins Moreira via GitGitGadget
2026-05-01 23:15   ` [PATCH v2 3/8] url: move scheme detection to URL header/source Matheus Afonso Martins Moreira via GitGitGadget
2026-05-01 23:15   ` [PATCH v2 4/8] url: return URL_SCHEME_UNKNOWN instead of dying Matheus Afonso Martins Moreira via GitGitGadget
2026-05-01 23:15   ` [PATCH v2 5/8] urlmatch: define url_parse function Matheus Afonso Martins Moreira via GitGitGadget
2026-05-01 23:15   ` Matheus Afonso Martins Moreira via GitGitGadget [this message]
2026-05-01 23:15   ` [PATCH v2 7/8] doc: describe the url-parse builtin Matheus Afonso Martins Moreira via GitGitGadget
2026-05-01 23:15   ` [PATCH v2 8/8] t9904: add tests for the new " Matheus Afonso Martins Moreira via GitGitGadget
2026-05-02  5:28   ` [PATCH v3 0/8] builtin: implement, document and test url-parse Matheus Moreira via GitGitGadget
2026-05-02  5:28     ` [PATCH v3 1/8] connect: rename enum protocol to url_scheme Matheus Afonso Martins Moreira via GitGitGadget
2026-05-02  5:28     ` [PATCH v3 2/8] url: move url_is_local_not_ssh to url.h Matheus Afonso Martins Moreira via GitGitGadget
2026-05-02  5:28     ` [PATCH v3 3/8] url: move scheme detection to URL header/source Matheus Afonso Martins Moreira via GitGitGadget
2026-05-02  5:28     ` [PATCH v3 4/8] url: return URL_SCHEME_UNKNOWN instead of dying Matheus Afonso Martins Moreira via GitGitGadget
2026-05-02  5:28     ` [PATCH v3 5/8] urlmatch: define url_parse function Matheus Afonso Martins Moreira via GitGitGadget
2026-05-02  5:28     ` [PATCH v3 6/8] builtin: create url-parse command Matheus Afonso Martins Moreira via GitGitGadget
2026-05-02  5:28     ` [PATCH v3 7/8] doc: describe the url-parse builtin Matheus Afonso Martins Moreira via GitGitGadget
2026-05-02  5:28     ` [PATCH v3 8/8] t9904: add tests for the new " Matheus Afonso Martins Moreira via GitGitGadget
2026-05-03  3:49     ` [PATCH v3 0/8] builtin: implement, document and test url-parse Junio C Hamano
2026-05-03  4:29       ` Matheus Afonso Martins Moreira
2026-05-03 17:28     ` Torsten Bögershausen
2026-05-03 19:36       ` Matheus Afonso Martins Moreira
2026-05-12  3:50         ` Junio C Hamano
2026-05-12  8:57           ` Torsten Bögershausen

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=886a7d659ee3b018290ed24ad16d381525436b81.1777677310.git.gitgitgadget@gmail.com \
    --to=gitgitgadget@gmail.com \
    --cc=git@vger.kernel.org \
    --cc=matheus@matheusmoreira.com \
    --cc=shyamthakkar001@gmail.com \
    --cc=tboegi@web.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox