* [PATCH] fsck: use starts_with() in fsck_commit()
@ 2025-10-31 22:00 keita
2025-11-01 14:59 ` Christian Couder
0 siblings, 1 reply; 3+ messages in thread
From: keita @ 2025-10-31 22:00 UTC (permalink / raw)
To: git@vger.kernel.org; +Cc: outreachy@gitgitgadget.github.io
From 30136adebaffb97edacae2c58c4ea491e39e3f5b Mon Sep 17 00:00:00 2001From: Songiso Cooper Lyambai <rudykeita@proton.me>
Date: Fri, 31 Oct 2025 23:45:23 +0200
Subject: [PATCH] fsck: use starts_with() in fsck_commit()
Replace manual buffer checks with starts_with() for safety and clarity.
This avoids buffer overreads and follows Git's idiomatic style used
Signed-off-by: Songiso Cooper Lyambai <rudykeita@proton.me>
---
fsck.c | 124 +++++++++++++++++++++++++++++++--------------------------
1 file changed, 67 insertions(+), 57 deletions(-)
diff --git a/fsck.c b/fsck.c
index 341e100d24..7172c4ff1c 100644
--- a/fsck.c
+++ b/fsck.c
@@ -921,67 +921,77 @@ static int fsck_ident(const char **ident,
}
static int fsck_commit(const struct object_id *oid,
- const char *buffer, unsigned long size,
- struct fsck_options *options)
+ const char *buffer, unsigned long size,
+ struct fsck_options *options)
{
- struct object_id tree_oid, parent_oid;
- unsigned author_count;
- int err;
- const char *buffer_begin = buffer;
- const char *buffer_end = buffer + size;
- const char *p;
+ struct object_id tree_oid, parent_oid;
+ unsigned author_count = 0;
+ int err = 0;
+ const char *buffer_end = buffer + size;
+ const char *p;
- /*
- * We _must_ stop parsing immediately if this reports failure, as the
- * memory safety of the rest of the function depends on it. See the
- * comment above the definition of verify_headers() for more details.
- */
- if (verify_headers(buffer, size, oid, OBJ_COMMIT, options))
- return -1;
- if (buffer >= buffer_end || !skip_prefix(buffer, "tree ", &buffer))
- return report(options, oid, OBJ_COMMIT, FSCK_MSG_MISSING_TREE, "invalid format - expected 'tree' line");
- if (parse_oid_hex(buffer, &tree_oid, &p) || *p != '\n') {
- err = report(options, oid, OBJ_COMMIT, FSCK_MSG_BAD_TREE_SHA1, "invalid 'tree' line format - bad sha1");
- if (err)
- return err;
- }
- buffer = p + 1;
- while (buffer < buffer_end && skip_prefix(buffer, "parent ", &buffer)) {
- if (parse_oid_hex(buffer, &parent_oid, &p) || *p != '\n') {
- err = report(options, oid, OBJ_COMMIT, FSCK_MSG_BAD_PARENT_SHA1, "invalid 'parent' line format - bad sha1");
- if (err)
- return err;
- }
- buffer = p + 1;
- }
- author_count = 0;
- while (buffer < buffer_end && skip_prefix(buffer, "author ", &buffer)) {
- author_count++;
- err = fsck_ident(&buffer, oid, OBJ_COMMIT, options);
- if (err)
- return err;
- }
- if (author_count < 1)
- err = report(options, oid, OBJ_COMMIT, FSCK_MSG_MISSING_AUTHOR, "invalid format - expected 'author' line");
- else if (author_count > 1)
- err = report(options, oid, OBJ_COMMIT, FSCK_MSG_MULTIPLE_AUTHORS, "invalid format - multiple 'author' lines");
- if (err)
- return err;
- if (buffer >= buffer_end || !skip_prefix(buffer, "committer ", &buffer))
- return report(options, oid, OBJ_COMMIT, FSCK_MSG_MISSING_COMMITTER, "invalid format - expected 'committer' line");
- err = fsck_ident(&buffer, oid, OBJ_COMMIT, options);
- if (err)
- return err;
- if (memchr(buffer_begin, '\0', size)) {
- err = report(options, oid, OBJ_COMMIT, FSCK_MSG_NUL_IN_COMMIT,
- "NUL byte in the commit object body");
- if (err)
- return err;
- }
- return 0;
+ /*
+ * We _must_ stop parsing immediately if this reports failure, as the
+ * memory safety of the rest of the function depends on it. See the
+ * comment above the definition of verify_headers() for more details.
+ */
+
+ if (verify_headers(buffer, size, oid, OBJ_COMMIT, options))
+ return -1;
+
+
+ if (!skip_prefix(buffer, "tree ", &buffer))
+ return report(options, oid, OBJ_COMMIT, FSCK_MSG_MISSING_TREE,
+ "invalid format - expected 'tree' line");
+ if (parse_oid_hex(buffer, &tree_oid, &p) || *p != '\n') {
+ return report(options, oid, OBJ_COMMIT, FSCK_MSG_BAD_TREE_SHA1,
+ "invalid 'tree' line format - bad sha1");
+ }
+ buffer = p + 1;
+
+ while (starts_with(buffer, "parent ")) {
+ if (!skip_prefix(buffer, "parent ", &buffer) ||
+ parse_oid_hex(buffer, &parent_oid, &p) || *p != '\n') {
+ return report(options, oid, OBJ_COMMIT, FSCK_MSG_BAD_PARENT_SHA1,
+ "invalid 'parent' line format - bad sha1");
+ }
+ buffer = p + 1;
+ }
+
+ while (starts_with(buffer, "author ")) {
+ author_count++;
+ if (!skip_prefix(buffer, "author ", &buffer))
+ return report(options, oid, OBJ_COMMIT, FSCK_MSG_MISSING_AUTHOR,
+ "invalid format - expected 'author' line");
+ if ((err = fsck_ident(&buffer, oid, OBJ_COMMIT, options)))
+ return err;
+ }
+
+ if (author_count < 1)
+ return report(options, oid, OBJ_COMMIT, FSCK_MSG_MISSING_AUTHOR,
+ "invalid format - expected 'author' line");
+ if (author_count > 1)
+ return report(options, oid, OBJ_COMMIT, FSCK_MSG_MULTIPLE_AUTHORS,
+ "invalid format - multiple 'author' lines");
+
+ if (!starts_with(buffer, "committer "))
+ return report(options, oid, OBJ_COMMIT, FSCK_MSG_MISSING_COMMITTER,
+ "invalid format - expected 'committer' line");
+
+ if (!skip_prefix(buffer, "committer ", &buffer))
+ return report(options, oid, OBJ_COMMIT, FSCK_MSG_MISSING_COMMITTER,
+ "invalid format - expected 'committer' line");
+
+ if ((err = fsck_ident(&buffer, oid, OBJ_COMMIT, options)))
+ return err;
+
+ if (memchr(buffer, '\0', buffer_end - buffer))
+ return report(options, oid, OBJ_COMMIT, FSCK_MSG_NUL_IN_COMMIT,
+ "NUL byte in the commit object body");
+
+ return 0;
}
^ permalink raw reply related [flat|nested] 3+ messages in thread
* Re: [PATCH] fsck: use starts_with() in fsck_commit()
2025-10-31 22:00 [PATCH] fsck: use starts_with() in fsck_commit() keita
@ 2025-11-01 14:59 ` Christian Couder
2025-11-02 3:58 ` Junio C Hamano
0 siblings, 1 reply; 3+ messages in thread
From: Christian Couder @ 2025-11-01 14:59 UTC (permalink / raw)
To: keita; +Cc: git@vger.kernel.org, outreachy@gitgitgadget.github.io
Hi,
On Fri, Oct 31, 2025 at 11:01 PM keita <rudykeita@proton.me> wrote:
> From 30136adebaffb97edacae2c58c4ea491e39e3f5b Mon Sep 17 00:00:00 2001From: Songiso Cooper Lyambai <rudykeita@proton.me>
> Date: Fri, 31 Oct 2025 23:45:23 +0200
> Subject: [PATCH] fsck: use starts_with() in fsck_commit()
If this is related to Outreachy, it would be better to put
"[Outreachy]" at the start of the subject.
> Replace manual buffer checks with starts_with() for safety and clarity.
>
> This avoids buffer overreads and follows Git's idiomatic style used
It looks like the above sentence is not finished. Maybe s/ used/./ ?
> Signed-off-by: Songiso Cooper Lyambai <rudykeita@proton.me>
> ---
> fsck.c | 124 +++++++++++++++++++++++++++++++--------------------------
> 1 file changed, 67 insertions(+), 57 deletions(-)
A lot of lines seem to be changed for a patch that wants to use
starts_with() in fsck_commit(). Let's see below.
> diff --git a/fsck.c b/fsck.c
> index 341e100d24..7172c4ff1c 100644
> --- a/fsck.c
> +++ b/fsck.c
> @@ -921,67 +921,77 @@ static int fsck_ident(const char **ident,
> }
>
> static int fsck_commit(const struct object_id *oid,
> - const char *buffer, unsigned long size,
> - struct fsck_options *options)
> + const char *buffer, unsigned long size,
> + struct fsck_options *options)
If a patch does other things than it's main goal, it should be
mentioned in the commit message that it's doing those other things
"while at it".
Anyway when I look at fsck_commit() in "fsck.c" on 'master', it seems
to me that the function is properly indented. So I suspect that your
patch changes its indentation for no good reason.
> {
> - struct object_id tree_oid, parent_oid;
> - unsigned author_count;
> - int err;
> - const char *buffer_begin = buffer;
> - const char *buffer_end = buffer + size;
> - const char *p;
> + struct object_id tree_oid, parent_oid;
> + unsigned author_count = 0;
> + int err = 0;
> + const char *buffer_end = buffer + size;
> + const char *p;
Here also I suspect that the indentation changes are not necessary.
They are also making it more difficult to spot actual changes like
'buffer_begin' being removed and 'author_count' and 'err' being
initialized to 0.
Sorry I am stopping my review here as there seems to be too many
indentation changes that are masking the actual changes.
Please make sure you send patches that don't change the indentation
for no good reason.
Thanks.
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [PATCH] fsck: use starts_with() in fsck_commit()
2025-11-01 14:59 ` Christian Couder
@ 2025-11-02 3:58 ` Junio C Hamano
0 siblings, 0 replies; 3+ messages in thread
From: Junio C Hamano @ 2025-11-02 3:58 UTC (permalink / raw)
To: Christian Couder
Cc: keita, git@vger.kernel.org, outreachy@gitgitgadget.github.io
Christian Couder <christian.couder@gmail.com> writes:
> Hi,
>
> On Fri, Oct 31, 2025 at 11:01 PM keita <rudykeita@proton.me> wrote:
>
>> From 30136adebaffb97edacae2c58c4ea491e39e3f5b Mon Sep 17 00:00:00 2001From: Songiso Cooper Lyambai <rudykeita@proton.me>
>> Date: Fri, 31 Oct 2025 23:45:23 +0200
>> Subject: [PATCH] fsck: use starts_with() in fsck_commit()
>
> If this is related to Outreachy, it would be better to put
> "[Outreachy]" at the start of the subject.
Plus there is a lot more important thing to be said for this part fo
the lines that you forgot to point out. They should *NOT* be part
of the e-mail body. The Subject: header of the e-mail seems to be
set to the same as this line, so the sender only needs to delete all
these four lines from the e-mail body and correct the subject.
>> {
>> - struct object_id tree_oid, parent_oid;
>> - unsigned author_count;
>> - int err;
>> - const char *buffer_begin = buffer;
>> - const char *buffer_end = buffer + size;
>> - const char *p;
>> + struct object_id tree_oid, parent_oid;
>> + unsigned author_count = 0;
>> + int err = 0;
>> + const char *buffer_end = buffer + size;
>> + const char *p;
>
> Here also I suspect that the indentation changes are not necessary.
> They are also making it ...
Does the preimage even match our code? There is no C code in our
codebase that uses a single space indent, so I would not expect
these 7 lines of preimage to be found in fsck.c or anywhere in our
codebase.
Hence, another more important thing to point out is that the patch
would not apply. A suggestion to the author (and other aspiring
folks who want to become Git developers) is to
- Send the patch you are planning to submit, but not to the list
but only to yourself.
- Subscribe to the list and then observe the traffic for a day or
two to find patch e-mails from others. Find other patch e-mails
from each of these people at https://lore.kernel.org/git and pick
the author who is highly regarded.
- Compare the e-mailed patch you received from yourself, and the
one you received from the list written by that highly regarded
author you picked. I am reasonably sure that they do not have
the e-mail headers repeated.
- In your clone of Git, check out an appropriate target; if you are
fixing something, you want to use a tag that corresponds to a
released version, if you are proposing a new feature, you want to
build on the latest released version. Use "git am" to apply the
e-mailed patch you received from yourself.
We may want to #leftoverbits add something like the above to the
MyFirstContribution document near the part that it teachs to send
patches.
> Please make sure you send patches that don't change the indentation
> for no good reason.
A good suggestion that applies much wider than the indentation.
Your patch should not do anything related to the theme of the patch,
whcih you explained in your proposed log message.
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2025-11-02 3:58 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-10-31 22:00 [PATCH] fsck: use starts_with() in fsck_commit() keita
2025-11-01 14:59 ` Christian Couder
2025-11-02 3:58 ` Junio C Hamano
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).