All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Ivan Frade via GitGitGadget" <gitgitgadget@gmail.com>
To: git@vger.kernel.org
Cc: "Ævar Arnfjörð Bjarmason" <avarab@gmail.com>,
	"Eric Sunshine" <sunshine@sunshineco.com>,
	"Ivan Frade" <ifrade@google.com>
Subject: [PATCH v5 0/2] fetch-pack: redact packfile urls in traces
Date: Thu, 28 Oct 2021 22:51:38 +0000	[thread overview]
Message-ID: <pull.1052.v5.git.1635461500.gitgitgadget@gmail.com> (raw)
In-Reply-To: <pull.1052.v4.git.1635288599.gitgitgadget@gmail.com>

Changes since v4:

 * Use "uri" instead of "url"
 * Look specifically for a line with packfile-uri format (instead of for a
   URL in general)
 * Limit the redacting to the packfile-uri section in do_fetch_pack_v2
 * Use "%.*s" instead of duplicating parts of the string to print

Changes since v3:

 * Enable redacting URLs for all sections
 * Redact only URL path (it was until the end of line)
 * Redact URL in die() with more friendly message
 * Update doc to mention that packfile URIs are also redacted.

Changes since v2:

 * Redact only the path of the URL
 * Test are now strict, validating the exact line expected in the log

Changes since v1:

 * Removed non-POSIX flags in tests
 * More accurate regex for the non-encrypted packfile line
 * Dropped documentation change
 * Dropped redacting the die message in http-fetch

Ivan Frade (2):
  fetch-pack: redact packfile urls in traces
  http-fetch: redact url on die() message

 Documentation/git.txt  |  5 +++--
 fetch-pack.c           |  4 ++++
 http-fetch.c           | 14 ++++++++++--
 pkt-line.c             | 39 +++++++++++++++++++++++++++++++-
 pkt-line.h             |  1 +
 t/t5702-protocol-v2.sh | 51 ++++++++++++++++++++++++++++++++++++++++++
 6 files changed, 109 insertions(+), 5 deletions(-)


base-commit: e9e5ba39a78c8f5057262d49e261b42a8660d5b9
Published-As: https://github.com/gitgitgadget/git/releases/tag/pr-1052%2Fifradeo%2Fredact-packfile-uri-v5
Fetch-It-Via: git fetch https://github.com/gitgitgadget/git pr-1052/ifradeo/redact-packfile-uri-v5
Pull-Request: https://github.com/gitgitgadget/git/pull/1052

Range-diff vs v4:

 1:  973a250752c ! 1:  c95b3cafcd6 fetch-pack: redact packfile urls in traces
     @@ Documentation/git.txt: for full details.
      -	cookies, the "Authorization:" header, and the "Proxy-Authorization:"
      -	header. Set this variable to `0` to prevent this redaction.
      +	cookies, the "Authorization:" header, the "Proxy-Authorization:"
     -+	header and packfile URLs. Set this variable to `0` to prevent this
     ++	header and packfile URIs. Set this variable to `0` to prevent this
      +	redaction.
       
       `GIT_LITERAL_PATHSPECS`::
     @@ Documentation/git.txt: for full details.
      
       ## fetch-pack.c ##
      @@ fetch-pack.c: static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args,
     - 		reader.me = "fetch-pack";
     - 	}
     + 				receive_wanted_refs(&reader, sought, nr_sought);
       
     -+	if (git_env_bool("GIT_TRACE_REDACT", 1))
     -+		reader.options |= PACKET_READ_REDACT_URL_PATH;
     -+
     - 	while (state != FETCH_DONE) {
     - 		switch (state) {
     - 		case FETCH_CHECK_LOCAL:
     + 			/* get the pack(s) */
     ++			if (git_env_bool("GIT_TRACE_REDACT", 1))
     ++				reader.options |= PACKET_READ_REDACT_URI_PATH;
     + 			if (process_section_header(&reader, "packfile-uris", 1))
     + 				receive_packfile_uris(&reader, &packfile_uris);
     ++			reader.options &= ~PACKET_READ_REDACT_URI_PATH;
     ++
     + 			process_section_header(&reader, "packfile", 0);
     + 
     + 			/*
      
       ## pkt-line.c ##
      @@ pkt-line.c: int packet_length(const char lenbuf_hex[4])
       	return (val < 0) ? val : (val << 8) | hex2chr(lenbuf_hex + 2);
       }
       
     -+static char *find_url_path(const char* buffer, int *path_len)
     ++static char *find_packfile_uri_path(const char *buffer)
      +{
     -+	const char *URL_MARK = "://";
     -+	char *path = strstr(buffer, URL_MARK);
     -+	if (!path)
     -+		return NULL;
     ++	const char *URI_MARK = "://";
     ++	char *path;
     ++	int len;
      +
     -+	path += strlen(URL_MARK);
     -+	while (*path && *path != '/')
     -+		path++;
     ++	/* First char is sideband mark */
     ++	buffer += 1;
      +
     -+	if (!*path || !*(path + 1))
     -+		return NULL;
     ++	len = strspn(buffer, "0123456789abcdefABCDEF");
     ++	if (!(len == 40 || len == 64) || buffer[len] != ' ')
     ++		return NULL; /* required "<hash>SP" not seen */
      +
     -+	// position after '/'
     -+	path++;
     ++	path = strstr(buffer + len + 1, URI_MARK);
     ++	if (!path)
     ++		return NULL;
      +
     -+	if (path_len) {
     -+		char *url_end = strchrnul(path, ' ');
     -+		*path_len = url_end - path;
     -+	}
     ++	path = strchr(path + strlen(URI_MARK), '/');
     ++	if (!path || !*(path + 1))
     ++		return NULL;
      +
     -+	return path;
     ++	/* position after '/' */
     ++	return ++path;
      +}
      +
       enum packet_read_status packet_read_with_status(int fd, char **src_buffer,
     @@ pkt-line.c: enum packet_read_status packet_read_with_status(int fd, char **src_b
       {
       	int len;
       	char linelen[4];
     -+	char *url_path_start;
     -+	int url_path_len;
     ++	char *uri_path_start;
       
       	if (get_packet_data(fd, src_buffer, src_len, linelen, 4, options) < 0) {
       		*pktlen = -1;
     @@ pkt-line.c: enum packet_read_status packet_read_with_status(int fd, char **src_b
       
       	buffer[len] = 0;
      -	packet_trace(buffer, len, 0);
     -+	if (options & PACKET_READ_REDACT_URL_PATH &&
     -+	    (url_path_start = find_url_path(buffer, &url_path_len))) {
     ++	if (options & PACKET_READ_REDACT_URI_PATH &&
     ++	    (uri_path_start = find_packfile_uri_path(buffer))) {
      +		const char *redacted = "<redacted>";
      +		struct strbuf tracebuf = STRBUF_INIT;
      +		strbuf_insert(&tracebuf, 0, buffer, len);
     -+		strbuf_splice(&tracebuf, url_path_start - buffer,
     -+			      url_path_len, redacted, strlen(redacted));
     ++		strbuf_splice(&tracebuf, uri_path_start - buffer,
     ++			      strlen(uri_path_start), redacted, strlen(redacted));
      +		packet_trace(tracebuf.buf, tracebuf.len, 0);
      +		strbuf_release(&tracebuf);
      +	} else {
     @@ pkt-line.h: void packet_fflush(FILE *f);
       #define PACKET_READ_CHOMP_NEWLINE        (1u<<1)
       #define PACKET_READ_DIE_ON_ERR_PACKET    (1u<<2)
       #define PACKET_READ_GENTLE_ON_READ_ERROR (1u<<3)
     -+#define PACKET_READ_REDACT_URL_PATH      (1u<<4)
     ++#define PACKET_READ_REDACT_URI_PATH      (1u<<4)
       int packet_read(int fd, char *buffer, unsigned size, int options);
       
       /*
 2:  c7f0977cabd ! 2:  6912a690197 http-fetch: redact url on die() message
     @@ Commit message
          http-fetch: redact url on die() message
      
          http-fetch prints the URL after failing to fetch it. This can be
     -    confusing to users (they cannot really do anything with it) but even
     -    worse, they can share by accident a sensitive URL (e.g. with
     -    credentials) while looking for help.
     +    confusing to users (they cannot really do anything with it), and they
     +    can share by accident a sensitive URL (e.g. with credentials) while
     +    looking for help.
      
          Redact the URL unless the GIT_TRACE_REDACT variable is set to false. This
          mimics the redaction of other sensitive information in git, like the
          Authorization header in HTTP.
      
     +    Fix also capitalization of previous die() message (must start in
     +    lowercase).
     +
          Signed-off-by: Ivan Frade <ifrade@google.com>
      
       ## http-fetch.c ##
     @@ http-fetch.c: static void fetch_single_packfile(struct object_id *packfile_hash,
      -			    curl_errorstr);
      +			struct url_info url;
      +			char *nurl = url_normalize(preq->url, &url);
     -+			if (!git_env_bool("GIT_TRACE_REDACT", 1) || !nurl) {
     -+				die("Unable to get pack file %s\n%s", preq->url,
     ++			if (!nurl || !git_env_bool("GIT_TRACE_REDACT", 1)) {
     ++				die("unable to get pack file '%s'\n%s", preq->url,
      +				    curl_errorstr);
      +			} else {
     -+				char *schema = xstrndup(url.url, url.scheme_len);
     -+				char *host = xstrndup(&url.url[url.host_off], url.host_len);
     -+				die("failed to get '%s' url from '%s' "
     ++				die("failed to get '%.*s' url from '%.*s' "
      +				    "(full URL redacted due to GIT_TRACE_REDACT setting)\n%s",
     -+				    schema, host, curl_errorstr);
     ++				    (int)url.scheme_len, url.url,
     ++				    (int)url.host_len, &url.url[url.host_off], curl_errorstr);
      +			}
       		}
       	} else {

-- 
gitgitgadget

  parent reply	other threads:[~2021-10-28 22:51 UTC|newest]

Thread overview: 43+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-10-08 16:03 [PATCH 0/2] fetch-pack: redact packfile urls in traces Ivan Frade via GitGitGadget
2021-10-08 16:03 ` [PATCH 1/2] " Ivan Frade via GitGitGadget
2021-10-08 19:36   ` Ævar Arnfjörð Bjarmason
2021-10-08 23:15     ` Ivan Frade
2021-10-08 16:03 ` [PATCH 2/2] Documentation: packfile-uri hash can be longer than 40 hex chars Ivan Frade via GitGitGadget
2021-10-08 19:43   ` Ævar Arnfjörð Bjarmason
2021-10-09  2:20 ` [PATCH v2] fetch-pack: redact packfile urls in traces Ivan Frade via GitGitGadget
2021-10-11 20:39   ` Junio C Hamano
2021-10-26 19:32     ` Ivan Frade
2021-10-19 22:57   ` [PATCH v3] " Ivan Frade via GitGitGadget
2021-10-20 11:41     ` Ævar Arnfjörð Bjarmason
2021-10-26 22:49     ` [PATCH v4 0/2] " Ivan Frade via GitGitGadget
2021-10-26 22:49       ` [PATCH v4 1/2] " Ivan Frade via GitGitGadget
2021-10-28  1:01         ` Junio C Hamano
2021-10-28 22:15           ` Ivan Frade
2021-10-28 22:46             ` Junio C Hamano
2021-10-26 22:49       ` [PATCH v4 2/2] http-fetch: redact url on die() message Ivan Frade via GitGitGadget
2021-10-28 16:39         ` Ævar Arnfjörð Bjarmason
2021-10-28 17:25           ` Eric Sunshine
2021-10-28 22:44             ` Ivan Frade
2021-10-28 22:41           ` Ivan Frade
2021-10-29 23:18           ` Junio C Hamano
2021-11-09  1:54             ` Ævar Arnfjörð Bjarmason
2021-10-28 22:51       ` Ivan Frade via GitGitGadget [this message]
2021-10-28 22:51         ` [PATCH v5 1/2] fetch-pack: redact packfile urls in traces Ivan Frade via GitGitGadget
2021-10-28 23:21           ` Junio C Hamano
2021-10-29 18:42             ` Ivan Frade
2021-10-29 19:59               ` Junio C Hamano
2021-11-08 22:43                 ` Jonathan Tan
2021-10-28 22:51         ` [PATCH v5 2/2] http-fetch: redact url on die() message Ivan Frade via GitGitGadget
2021-10-29 18:42         ` [PATCH v6 0/2] fetch-pack: redact packfile urls in traces Ivan Frade via GitGitGadget
2021-10-29 18:42           ` [PATCH v6 1/2] " Ivan Frade via GitGitGadget
2021-11-08 23:01             ` Jonathan Tan
2021-11-09  1:36               ` Ævar Arnfjörð Bjarmason
2021-11-10 23:44                 ` Ivan Frade
2021-11-11  0:01                   ` Ævar Arnfjörð Bjarmason
2021-11-10 21:18               ` Ivan Frade
2021-10-29 18:42           ` [PATCH v6 2/2] http-fetch: redact url on die() message Ivan Frade via GitGitGadget
2021-11-08 23:06             ` Jonathan Tan
2021-11-10 23:51           ` [PATCH v7 0/2] fetch-pack: redact packfile urls in traces Ivan Frade via GitGitGadget
2021-11-10 23:51             ` [PATCH v7 1/2] " Ivan Frade via GitGitGadget
2021-11-10 23:51             ` [PATCH v7 2/2] http-fetch: redact url on die() message Ivan Frade via GitGitGadget
2021-11-12  4:43             ` [PATCH v7 0/2] fetch-pack: redact packfile urls in traces Junio C Hamano

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=pull.1052.v5.git.1635461500.gitgitgadget@gmail.com \
    --to=gitgitgadget@gmail.com \
    --cc=avarab@gmail.com \
    --cc=git@vger.kernel.org \
    --cc=ifrade@google.com \
    --cc=sunshine@sunshineco.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.