git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Christian Couder <christian.couder@gmail.com>
To: git@vger.kernel.org
Cc: Junio C Hamano <gitster@pobox.com>,
	Patrick Steinhardt <ps@pks.im>,
	Luke Shumaker <lukeshu@lukeshu.com>,
	Elijah Newren <newren@gmail.com>, Jeff King <peff@peff.net>,
	Johannes Schindelin <Johannes.Schindelin@gmx.de>,
	Taylor Blau <me@ttaylorr.com>,
	"brian m . carlson" <sandals@crustytoothpaste.net>,
	Eric Sunshine <sunshine@sunshineco.com>,
	Luke Shumaker <lukeshu@datawire.io>,
	Christian Couder <chriscool@tuxfamily.org>
Subject: [PATCH v5 5/6] fast-export: do not modify memory from get_commit_buffer
Date: Mon, 24 Feb 2025 15:27:43 +0100	[thread overview]
Message-ID: <20250224142744.279643-6-christian.couder@gmail.com> (raw)
In-Reply-To: <20250224142744.279643-1-christian.couder@gmail.com>

From: Luke Shumaker <lukeshu@datawire.io>

fast-export's helper function find_encoding() takes a `const char *`, but
modifies that memory despite the `const`.  Ultimately, this memory came
from get_commit_buffer(), and you're not supposed to modify the memory
that you get from get_commit_buffer().

So, get rid of find_encoding() in favor of commit.h:find_commit_header(),
which gives back a string length, rather than mutating the memory to
insert a '\0' terminator.

Because find_commit_header() detects the "\n\n" string that separates the
headers and the commit message, move the call to be above the
`message = strstr(..., "\n\n")` call.  This helps readability, and allows
for the value of `encoding` to be used for a better value of "..." so that
the same memory doesn't need to be checked twice.  Introduce a
`commit_buffer_cursor` variable to avoid writing an awkward
`encoding ? encoding + encoding_len : committer_end` expression.

Signed-off-by: Luke Shumaker <lukeshu@datawire.io>
Signed-off-by: Christian Couder <chriscool@tuxfamily.org>
---
 builtin/fast-export.c | 61 +++++++++++++++++++++++--------------------
 1 file changed, 33 insertions(+), 28 deletions(-)

diff --git a/builtin/fast-export.c b/builtin/fast-export.c
index 2de2adc30e..39d43c2a29 100644
--- a/builtin/fast-export.c
+++ b/builtin/fast-export.c
@@ -510,21 +510,6 @@ static void show_filemodify(struct diff_queue_struct *q,
 	}
 }
 
-static const char *find_encoding(const char *begin, const char *end)
-{
-	const char *needle = "\nencoding ";
-	char *bol, *eol;
-
-	bol = memmem(begin, end ? end - begin : strlen(begin),
-		     needle, strlen(needle));
-	if (!bol)
-		return NULL;
-	bol += strlen(needle);
-	eol = strchrnul(bol, '\n');
-	*eol = '\0';
-	return bol;
-}
-
 static char *anonymize_ref_component(void)
 {
 	static int counter;
@@ -630,9 +615,11 @@ static void handle_commit(struct commit *commit, struct rev_info *rev,
 			  struct string_list *paths_of_changed_objects)
 {
 	int saved_output_format = rev->diffopt.output_format;
-	const char *commit_buffer;
+	const char *commit_buffer, *commit_buffer_cursor;
 	const char *author, *author_end, *committer, *committer_end;
-	const char *encoding, *message;
+	const char *encoding = NULL;
+	size_t encoding_len;
+	const char *message;
 	char *reencoded = NULL;
 	struct commit_list *p;
 	const char *refname;
@@ -641,21 +628,35 @@ static void handle_commit(struct commit *commit, struct rev_info *rev,
 	rev->diffopt.output_format = DIFF_FORMAT_CALLBACK;
 
 	parse_commit_or_die(commit);
-	commit_buffer = repo_get_commit_buffer(the_repository, commit, NULL);
-	author = strstr(commit_buffer, "\nauthor ");
+	commit_buffer_cursor = commit_buffer = repo_get_commit_buffer(the_repository, commit, NULL);
+
+	author = strstr(commit_buffer_cursor, "\nauthor ");
 	if (!author)
 		die("could not find author in commit %s",
 		    oid_to_hex(&commit->object.oid));
 	author++;
-	author_end = strchrnul(author, '\n');
-	committer = strstr(author_end, "\ncommitter ");
+	commit_buffer_cursor = author_end = strchrnul(author, '\n');
+
+	committer = strstr(commit_buffer_cursor, "\ncommitter ");
 	if (!committer)
 		die("could not find committer in commit %s",
 		    oid_to_hex(&commit->object.oid));
 	committer++;
-	committer_end = strchrnul(committer, '\n');
-	message = strstr(committer_end, "\n\n");
-	encoding = find_encoding(committer_end, message);
+	commit_buffer_cursor = committer_end = strchrnul(committer, '\n');
+
+	/*
+	 * find_commit_header() gets a `+ 1` because
+	 * commit_buffer_cursor points at the trailing "\n" at the end
+	 * of the previous line, but find_commit_header() wants a
+	 * pointer to the beginning of the next line.
+	 */
+	if (*commit_buffer_cursor == '\n') {
+		encoding = find_commit_header(commit_buffer_cursor + 1, "encoding", &encoding_len);
+		if (encoding)
+			commit_buffer_cursor = encoding + encoding_len;
+	}
+
+	message = strstr(commit_buffer_cursor, "\n\n");
 	if (message)
 		message += 2;
 
@@ -694,16 +695,20 @@ static void handle_commit(struct commit *commit, struct rev_info *rev,
 	if (anonymize) {
 		reencoded = anonymize_commit_message();
 	} else if (encoding) {
+		char *buf;
 		switch (reencode_mode) {
 		case REENCODE_YES:
-			reencoded = reencode_string(message, "UTF-8", encoding);
+			buf = xstrfmt("%.*s", (int)encoding_len, encoding);
+			reencoded = reencode_string(message, "UTF-8", buf);
+			free(buf);
 			break;
 		case REENCODE_NO:
 			break;
 		case REENCODE_ABORT:
-			die("Encountered commit-specific encoding %s in commit "
+			die("Encountered commit-specific encoding %.*s in commit "
 			    "%s; use --reencode=[yes|no] to handle it",
-			    encoding, oid_to_hex(&commit->object.oid));
+			    (int)encoding_len, encoding,
+			    oid_to_hex(&commit->object.oid));
 		}
 	}
 	if (!commit->parents)
@@ -715,7 +720,7 @@ static void handle_commit(struct commit *commit, struct rev_info *rev,
 	       (int)(author_end - author), author,
 	       (int)(committer_end - committer), committer);
 	if (!reencoded && encoding)
-		printf("encoding %s\n", encoding);
+		printf("encoding %.*s\n", (int)encoding_len, encoding);
 	printf("data %u\n%s",
 	       (unsigned)(reencoded
 			  ? strlen(reencoded) : message
-- 
2.48.1.401.g48e0d4203c


  parent reply	other threads:[~2025-02-24 14:28 UTC|newest]

Thread overview: 60+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-04-22  0:27 [PATCH v2 0/3] fast-export, fast-import: implement signed-commits Luke Shumaker
2021-04-22  0:27 ` [PATCH v2 1/3] git-fast-import.txt: add missing LF in the BNF Luke Shumaker
2021-04-22  0:27 ` [PATCH v2 2/3] fast-export: rename --signed-tags='warn' to 'warn-verbatim' Luke Shumaker
2021-04-22  3:59   ` Eric Sunshine
2021-04-22  4:43     ` Luke Shumaker
2021-04-22  4:50       ` Luke Shumaker
2021-04-22  0:27 ` [PATCH v2 3/3] fast-export, fast-import: implement signed-commits Luke Shumaker
2021-04-23 16:41 ` [PATCH v3 0/3] " Luke Shumaker
2021-04-23 16:41   ` [PATCH v3 1/3] git-fast-import.txt: add missing LF in the BNF Luke Shumaker
2021-04-23 16:41   ` [PATCH v3 2/3] fast-export: rename --signed-tags='warn' to 'warn-verbatim' Luke Shumaker
2021-04-28  3:29     ` Junio C Hamano
2021-04-29 19:02       ` Luke Shumaker
2021-04-30  0:03         ` Junio C Hamano
2021-04-23 16:41   ` [PATCH v3 3/3] fast-export, fast-import: implement signed-commits Luke Shumaker
2021-04-28  4:02     ` Junio C Hamano
2021-04-29 20:06       ` Luke Shumaker
2021-04-29 22:38         ` Elijah Newren
2021-04-29 23:42           ` Junio C Hamano
2021-04-30  2:23             ` Elijah Newren
2021-04-30  3:20               ` Junio C Hamano
2021-04-30 17:07             ` Luke Shumaker
2021-04-30 19:34       ` Luke Shumaker
2021-04-30 19:59         ` Elijah Newren
2021-04-30 22:21           ` Luke Shumaker
2021-04-30 23:25   ` [PATCH v4 0/5] fast-export, fast-import: add support for signed-commits Luke Shumaker
2021-04-30 23:25     ` [PATCH v4 1/5] git-fast-import.txt: add missing LF in the BNF Luke Shumaker
2021-04-30 23:25     ` [PATCH v4 2/5] fast-export: rename --signed-tags='warn' to 'warn-verbatim' Luke Shumaker
2021-04-30 23:25     ` [PATCH v4 3/5] git-fast-export.txt: clarify why 'verbatim' may not be a good idea Luke Shumaker
2021-04-30 23:25     ` [PATCH v4 4/5] fast-export: do not modify memory from get_commit_buffer Luke Shumaker
2021-05-03  4:41       ` Junio C Hamano
2021-04-30 23:25     ` [PATCH v4 5/5] fast-export, fast-import: add support for signed-commits Luke Shumaker
2021-05-03  5:09       ` Junio C Hamano
2025-02-24 14:27     ` [PATCH v5 0/6] " Christian Couder
2025-02-24 14:27       ` [PATCH v5 1/6] git-fast-import.adoc: add missing LF in the BNF Christian Couder
2025-02-24 14:27       ` [PATCH v5 2/6] fast-export: fix missing whitespace after switch Christian Couder
2025-02-24 14:27       ` [PATCH v5 3/6] fast-export: rename --signed-tags='warn' to 'warn-verbatim' Christian Couder
2025-02-24 14:27       ` [PATCH v5 4/6] git-fast-export.txt: clarify why 'verbatim' may not be a good idea Christian Couder
2025-02-24 19:26         ` Elijah Newren
2025-03-10 15:58           ` Christian Couder
2025-02-24 14:27       ` Christian Couder [this message]
2025-02-24 14:27       ` [PATCH v5 6/6] fast-export, fast-import: add support for signed-commits Christian Couder
2025-02-25  7:35         ` Elijah Newren
2025-02-25 16:25           ` Junio C Hamano
2025-03-10 15:58           ` Christian Couder
2025-02-24 17:01       ` [PATCH v5 0/6] " Junio C Hamano
2025-02-25  7:35         ` Elijah Newren
2025-02-25  7:51           ` Patrick Steinhardt
2025-02-25 16:48             ` Elijah Newren
2025-02-25 16:56               ` Junio C Hamano
2025-03-10 15:59                 ` Christian Couder
2025-02-25 14:53       ` Phillip Wood
2025-03-10 15:59         ` Christian Couder
2025-03-10 15:57       ` [PATCH v6 " Christian Couder
2025-03-10 15:57         ` [PATCH v6 1/6] git-fast-import.adoc: add missing LF in the BNF Christian Couder
2025-03-10 15:57         ` [PATCH v6 2/6] fast-export: fix missing whitespace after switch Christian Couder
2025-03-10 15:57         ` [PATCH v6 3/6] fast-export: rename --signed-tags='warn' to 'warn-verbatim' Christian Couder
2025-03-10 15:57         ` [PATCH v6 4/6] git-fast-export.adoc: clarify why 'verbatim' may not be a good idea Christian Couder
2025-03-10 15:57         ` [PATCH v6 5/6] fast-export: do not modify memory from get_commit_buffer Christian Couder
2025-03-10 15:57         ` [PATCH v6 6/6] fast-export, fast-import: add support for signed-commits Christian Couder
2025-03-10 22:36         ` [PATCH v6 0/6] " Elijah Newren

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250224142744.279643-6-christian.couder@gmail.com \
    --to=christian.couder@gmail.com \
    --cc=Johannes.Schindelin@gmx.de \
    --cc=chriscool@tuxfamily.org \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    --cc=lukeshu@datawire.io \
    --cc=lukeshu@lukeshu.com \
    --cc=me@ttaylorr.com \
    --cc=newren@gmail.com \
    --cc=peff@peff.net \
    --cc=ps@pks.im \
    --cc=sandals@crustytoothpaste.net \
    --cc=sunshine@sunshineco.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).