git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: "Martin Ågren" <martin.agren@gmail.com>
To: git@vger.kernel.org
Cc: "Patrick Steinhardt" <ps@pks.im>, "René Scharfe" <l.s.r@web.de>
Subject: [PATCH 6/8] pretty: refactor parsing of line-wrapping "%w" placeholder
Date: Wed, 19 Mar 2025 08:23:39 +0100	[thread overview]
Message-ID: <465c91155eb30197b5eac00d294dc6e7ea2dd310.1742367347.git.martin.agren@gmail.com> (raw)
In-Reply-To: <cover.1742367347.git.martin.agren@gmail.com>

Our parsing of a "%w" placeholder is quite a big chunk of code in
the middle of our switch for handling a few different placeholders. We
parse into three different variables, then use them to compare to and
update existing values in the big `struct format_commit_context`.

Pull out a helper function for parsing such a "%w" placeholder. Define a
struct for collecting the three variables.

Unlike recent commits, parsing and subsequent use are already a bit more
separated in the sense that we don't parse directly into the big context
struct. Thus, unlike the preceding commits, this does not fix any bugs
that I'm aware of. There's still value in separating parsing and usage
more clearly and simplifying `format_commit_one()`.

Note that we use two different types for these values, `unsigned long`
when parsing, `size_t` when eventually applying. Let's go for `size_t`
in our struct. I don't know if there are platforms where assigning an
`unsigned long` to a `size_t` could truncate the value, but since we
already verify the values to be at most 16 KiB, we should be able to fit
them into any sane `size_t`s.

Signed-off-by: Martin Ågren <martin.agren@gmail.com>
---
 pretty.c | 120 +++++++++++++++++++++++++++++++++++--------------------
 1 file changed, 76 insertions(+), 44 deletions(-)

diff --git a/pretty.c b/pretty.c
index f53e77ed86..c44ff87481 100644
--- a/pretty.c
+++ b/pretty.c
@@ -893,6 +893,10 @@ struct padding_args {
 	int padding;
 };
 
+struct rewrap_args {
+	size_t width, indent1, indent2;
+};
+
 struct format_commit_context {
 	struct repository *repository;
 	const struct commit *commit;
@@ -902,7 +906,7 @@ struct format_commit_context {
 	struct signature_check signature_check;
 	const char *message;
 	char *commit_encoding;
-	size_t width, indent1, indent2;
+	struct rewrap_args rewrap;
 	int auto_color;
 	struct padding_args pad;
 
@@ -1034,18 +1038,21 @@ static void strbuf_wrap(struct strbuf *sb, size_t pos,
 
 static void rewrap_message_tail(struct strbuf *sb,
 				struct format_commit_context *c,
-				size_t new_width, size_t new_indent1,
-				size_t new_indent2)
+				const struct rewrap_args *new_rewrap)
 {
-	if (c->width == new_width && c->indent1 == new_indent1 &&
-	    c->indent2 == new_indent2)
+	const struct rewrap_args *old_rewrap = &c->rewrap;
+
+	if (old_rewrap->width == new_rewrap->width &&
+	    old_rewrap->indent1 == new_rewrap->indent1 &&
+	    old_rewrap->indent2 == new_rewrap->indent2)
 		return;
+
 	if (c->wrap_start < sb->len)
-		strbuf_wrap(sb, c->wrap_start, c->width, c->indent1, c->indent2);
+		strbuf_wrap(sb, c->wrap_start, old_rewrap->width,
+			    old_rewrap->indent1, old_rewrap->indent2);
+
 	c->wrap_start = sb->len;
-	c->width = new_width;
-	c->indent1 = new_indent1;
-	c->indent2 = new_indent2;
+	c->rewrap = *new_rewrap;
 }
 
 static int format_reflog_person(struct strbuf *sb,
@@ -1443,6 +1450,57 @@ static void free_decoration_options(const struct decoration_options *opts)
 	free(opts->tag);
 }
 
+static size_t parse_rewrap(const char *placeholder, struct rewrap_args *rewrap)
+{
+	unsigned long width = 0, indent1 = 0, indent2 = 0;
+	char *next;
+	const char *start;
+	const char *end;
+
+	memset(rewrap, 0, sizeof(*rewrap));
+
+	if (placeholder[1] != '(')
+		return 0;
+
+	start = placeholder + 2;
+	end = strchr(start, ')');
+
+	if (!end)
+		return 0;
+	if (end > start) {
+		width = strtoul(start, &next, 10);
+		if (*next == ',') {
+			indent1 = strtoul(next + 1, &next, 10);
+			if (*next == ',') {
+				indent2 = strtoul(next + 1,
+						 &next, 10);
+			}
+		}
+		if (*next != ')')
+			return 0;
+	}
+
+	/*
+	 * We need to limit the format here as it allows the
+	 * user to prepend arbitrarily many bytes to the buffer
+	 * when rewrapping.
+	 */
+	if (width > FORMATTING_LIMIT ||
+	    indent1 > FORMATTING_LIMIT ||
+	    indent2 > FORMATTING_LIMIT)
+		return 0;
+
+	/*
+	 * These values are small enough to fit in any
+	 * real-world size_t.
+	 */
+	rewrap->width = width;
+	rewrap->indent1 = indent1;
+	rewrap->indent2 = indent2;
+
+	return end - placeholder + 1;
+}
+
 static size_t format_commit_one(struct strbuf *sb, /* in UTF-8 */
 				const char *placeholder,
 				struct format_commit_context *c)
@@ -1478,40 +1536,13 @@ static size_t format_commit_one(struct strbuf *sb, /* in UTF-8 */
 			return ret;
 		}
 	case 'w':
-		if (placeholder[1] == '(') {
-			unsigned long width = 0, indent1 = 0, indent2 = 0;
-			char *next;
-			const char *start = placeholder + 2;
-			const char *end = strchr(start, ')');
-			if (!end)
-				return 0;
-			if (end > start) {
-				width = strtoul(start, &next, 10);
-				if (*next == ',') {
-					indent1 = strtoul(next + 1, &next, 10);
-					if (*next == ',') {
-						indent2 = strtoul(next + 1,
-								 &next, 10);
-					}
-				}
-				if (*next != ')')
-					return 0;
-			}
-
-			/*
-			 * We need to limit the format here as it allows the
-			 * user to prepend arbitrarily many bytes to the buffer
-			 * when rewrapping.
-			 */
-			if (width > FORMATTING_LIMIT ||
-			    indent1 > FORMATTING_LIMIT ||
-			    indent2 > FORMATTING_LIMIT)
-				return 0;
-			rewrap_message_tail(sb, c, width, indent1, indent2);
-			return end - placeholder + 1;
-		} else
-			return 0;
-
+		{
+			struct rewrap_args rewrap;
+			res = parse_rewrap(placeholder, &rewrap);
+			if (res)
+				rewrap_message_tail(sb, c, &rewrap);
+			return res;
+		}
 	case '<':
 	case '>':
 		return parse_padding_placeholder(placeholder, &c->pad);
@@ -2005,6 +2036,7 @@ void repo_format_commit_message(struct repository *r,
 	};
 	const char *output_enc = pretty_ctx->output_encoding;
 	const char *utf8 = "UTF-8";
+	const struct rewrap_args rewrap_reset = { 0 };
 
 	while (strbuf_expand_step(sb, &format)) {
 		size_t len;
@@ -2016,7 +2048,7 @@ void repo_format_commit_message(struct repository *r,
 		else
 			strbuf_addch(sb, '%');
 	}
-	rewrap_message_tail(sb, &context, 0, 0, 0);
+	rewrap_message_tail(sb, &context, &rewrap_reset);
 
 	/*
 	 * Convert output to an actual output encoding; note that
-- 
2.49.0.472.ge94155a9ec


  parent reply	other threads:[~2025-03-19  7:25 UTC|newest]

Thread overview: 22+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-03-19  7:23 [PATCH 0/8] pretty: minor bugfixing, some refactorings Martin Ågren
2025-03-19  7:23 ` [PATCH 1/8] pretty: tighten function signature to not take `void *` Martin Ågren
2025-03-20  9:18   ` Patrick Steinhardt
2025-03-19  7:23 ` [PATCH 2/8] pretty: simplify if-else to reduce code duplication Martin Ågren
2025-03-20  9:17   ` Patrick Steinhardt
2025-03-20 16:10     ` Martin Ågren
2025-03-24  3:50   ` Jeff King
2025-03-19  7:23 ` [PATCH 3/8] pretty: collect padding-related fields in separate struct Martin Ågren
2025-03-19  7:23 ` [PATCH 4/8] pretty: fix parsing of half-valid "%<" and "%>" placeholders Martin Ågren
2025-03-20  9:18   ` Patrick Steinhardt
2025-03-20 16:11     ` Martin Ågren
2025-03-24 10:10       ` Patrick Steinhardt
2025-03-19  7:23 ` [PATCH 5/8] pretty: after padding, reset padding info Martin Ågren
2025-03-20  9:18   ` Patrick Steinhardt
2025-03-20 16:11     ` Martin Ågren
2025-03-19  7:23 ` Martin Ågren [this message]
2025-03-20  9:18   ` [PATCH 6/8] pretty: refactor parsing of line-wrapping "%w" placeholder Patrick Steinhardt
2025-03-20 16:11     ` Martin Ågren
2025-03-19  7:23 ` [PATCH 7/8] pretty: refactor parsing of magic Martin Ågren
2025-03-20  9:18   ` Patrick Steinhardt
2025-03-20 16:12     ` Martin Ågren
2025-03-19  7:23 ` [PATCH 8/8] pretty: refactor parsing of decoration options Martin Ågren

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=465c91155eb30197b5eac00d294dc6e7ea2dd310.1742367347.git.martin.agren@gmail.com \
    --to=martin.agren@gmail.com \
    --cc=git@vger.kernel.org \
    --cc=l.s.r@web.de \
    --cc=ps@pks.im \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).