* [PATCH] parse_commit_buffer: don't parse invalid commits
@ 2008-01-14 21:20 Martin Koegler
2008-01-15 7:32 ` Johannes Sixt
0 siblings, 1 reply; 8+ messages in thread
From: Martin Koegler @ 2008-01-14 21:20 UTC (permalink / raw)
To: Junio C Hamano; +Cc: git, Martin Koegler
Signed-off-by: Martin Koegler <mkoegler@auto.tuwien.ac.at>
---
commit.c | 27 ++++++++++++++++++++-------
1 files changed, 20 insertions(+), 7 deletions(-)
diff --git a/commit.c b/commit.c
index f074811..8b8fb04 100644
--- a/commit.c
+++ b/commit.c
@@ -48,19 +48,32 @@ struct commit *lookup_commit(const unsigned char *sha1)
return check_commit(obj, sha1, 0);
}
-static unsigned long parse_commit_date(const char *buf)
+static unsigned long parse_commit_date(const char *buf, const char *tail)
{
unsigned long date;
+ const char *dateptr;
+ if (buf + 6 >= tail)
+ return 0;
if (memcmp(buf, "author", 6))
return 0;
- while (*buf++ != '\n')
+ while (buf < tail && *buf++ != '\n')
/* nada */;
+ if (buf + 9 >= tail)
+ return 0;
if (memcmp(buf, "committer", 9))
return 0;
- while (*buf++ != '>')
+ while (buf < tail && *buf++ != '>')
/* nada */;
- date = strtoul(buf, NULL, 10);
+ if (buf >= tail)
+ return 0;
+ dateptr = buf;
+ while (buf < tail && *buf++ != '\n')
+ /* nada */;
+ if (buf >= tail)
+ return 0;
+ /* dateptr < buf && buf[-1] == '\n', so strtoul will stop at buf-1 */
+ date = strtoul(dateptr, NULL, 10);
if (date == ULONG_MAX)
date = 0;
return date;
@@ -236,9 +249,9 @@ int parse_commit_buffer(struct commit *item, void *buffer, unsigned long size)
return 0;
item->object.parsed = 1;
tail += size;
- if (tail <= bufptr + 5 || memcmp(bufptr, "tree ", 5))
+ if (tail <= bufptr + 46 || memcmp(bufptr, "tree ", 5) || bufptr[45] != '\n')
return error("bogus commit object %s", sha1_to_hex(item->object.sha1));
- if (tail <= bufptr + 45 || get_sha1_hex(bufptr + 5, parent) < 0)
+ if (get_sha1_hex(bufptr + 5, parent) < 0)
return error("bad tree pointer in commit %s",
sha1_to_hex(item->object.sha1));
item->tree = lookup_tree(parent);
@@ -275,7 +288,7 @@ int parse_commit_buffer(struct commit *item, void *buffer, unsigned long size)
n_refs++;
}
}
- item->date = parse_commit_date(bufptr);
+ item->date = parse_commit_date(bufptr, tail);
if (track_object_refs) {
unsigned i = 0;
--
gitgui.0.9.1.g61374
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH] parse_commit_buffer: don't parse invalid commits
@ 2008-01-19 17:35 Martin Koegler
2008-01-19 19:52 ` Junio C Hamano
0 siblings, 1 reply; 8+ messages in thread
From: Martin Koegler @ 2008-01-19 17:35 UTC (permalink / raw)
To: Junio C Hamano; +Cc: git, Martin Koegler
* check, that the tree line ends with \n
* prevent parse_commit_date from reading beyond the buffer,
- if author line does not end with \n
- if committer line does not contain >
* verify in parse_commit_date, that the commiter line ends with \n
Signed-off-by: Martin Koegler <mkoegler@auto.tuwien.ac.at>
---
commit.c | 27 ++++++++++++++++++++-------
1 files changed, 20 insertions(+), 7 deletions(-)
diff --git a/commit.c b/commit.c
index f074811..8b8fb04 100644
--- a/commit.c
+++ b/commit.c
@@ -48,19 +48,32 @@ struct commit *lookup_commit(const unsigned char *sha1)
return check_commit(obj, sha1, 0);
}
-static unsigned long parse_commit_date(const char *buf)
+static unsigned long parse_commit_date(const char *buf, const char *tail)
{
unsigned long date;
+ const char *dateptr;
+ if (buf + 6 >= tail)
+ return 0;
if (memcmp(buf, "author", 6))
return 0;
- while (*buf++ != '\n')
+ while (buf < tail && *buf++ != '\n')
/* nada */;
+ if (buf + 9 >= tail)
+ return 0;
if (memcmp(buf, "committer", 9))
return 0;
- while (*buf++ != '>')
+ while (buf < tail && *buf++ != '>')
/* nada */;
- date = strtoul(buf, NULL, 10);
+ if (buf >= tail)
+ return 0;
+ dateptr = buf;
+ while (buf < tail && *buf++ != '\n')
+ /* nada */;
+ if (buf >= tail)
+ return 0;
+ /* dateptr < buf && buf[-1] == '\n', so strtoul will stop at buf-1 */
+ date = strtoul(dateptr, NULL, 10);
if (date == ULONG_MAX)
date = 0;
return date;
@@ -236,9 +249,9 @@ int parse_commit_buffer(struct commit *item, void *buffer, unsigned long size)
return 0;
item->object.parsed = 1;
tail += size;
- if (tail <= bufptr + 5 || memcmp(bufptr, "tree ", 5))
+ if (tail <= bufptr + 46 || memcmp(bufptr, "tree ", 5) || bufptr[45] != '\n')
return error("bogus commit object %s", sha1_to_hex(item->object.sha1));
- if (tail <= bufptr + 45 || get_sha1_hex(bufptr + 5, parent) < 0)
+ if (get_sha1_hex(bufptr + 5, parent) < 0)
return error("bad tree pointer in commit %s",
sha1_to_hex(item->object.sha1));
item->tree = lookup_tree(parent);
@@ -275,7 +288,7 @@ int parse_commit_buffer(struct commit *item, void *buffer, unsigned long size)
n_refs++;
}
}
- item->date = parse_commit_date(bufptr);
+ item->date = parse_commit_date(bufptr, tail);
if (track_object_refs) {
unsigned i = 0;
--
gitgui.0.9.1.g39b9
^ permalink raw reply related [flat|nested] 8+ messages in thread
* Re: [PATCH] parse_commit_buffer: don't parse invalid commits
2008-01-19 17:35 Martin Koegler
@ 2008-01-19 19:52 ` Junio C Hamano
2008-01-20 16:11 ` Martin Koegler
0 siblings, 1 reply; 8+ messages in thread
From: Junio C Hamano @ 2008-01-19 19:52 UTC (permalink / raw)
To: Martin Koegler; +Cc: git
Martin Koegler <mkoegler@auto.tuwien.ac.at> writes:
> * check, that the tree line ends with \n
Ok.
> * prevent parse_commit_date from reading beyond the buffer,
> - if author line does not end with \n
> - if committer line does not contain >
"even if" would be nicer, I think.
> * verify in parse_commit_date, that the commiter line ends with \n
Perhaps an additional note to leave for people who will later be
tempted to touch this part of the code is needed; otherwise the
time spent on the thread goes wasted.
* keep the existing behaviour to return a commit object even
when non-structural fields such as committer and author are
malformed, so that tools that need to look at commits to
clean up a history with such broken commits can still get at
the structural data (i.e. the parents chain and the tree
object).
No need to resend if you allow me to amend the patch along the
above lines (please just say so), or you could resend with
improvements on your own.
Thanks.
^ permalink raw reply [flat|nested] 8+ messages in thread
* [PATCH] parse_tag_buffer: don't parse invalid tags
@ 2008-01-06 19:03 Martin Koegler
2008-01-06 19:03 ` [PATCH] parse_commit_buffer: don't parse invalid commits Martin Koegler
0 siblings, 1 reply; 8+ messages in thread
From: Martin Koegler @ 2008-01-06 19:03 UTC (permalink / raw)
To: Junio C Hamano; +Cc: git, Martin Koegler
The current tag parsing code can access memory outside the tag buffer,
if \n are missing. This patch prevent this behaviour.
Signed-off-by: Martin Koegler <mkoegler@auto.tuwien.ac.at>
---
tag.c | 5 +++--
1 files changed, 3 insertions(+), 2 deletions(-)
diff --git a/tag.c b/tag.c
index f62bcdd..fa22ae6 100644
--- a/tag.c
+++ b/tag.c
@@ -39,6 +39,7 @@ int parse_tag_buffer(struct tag *item, void *data, unsigned long size)
unsigned char sha1[20];
const char *type_line, *tag_line, *sig_line;
char type[20];
+ const char* start = data;
if (item->object.parsed)
return 0;
@@ -53,11 +54,11 @@ int parse_tag_buffer(struct tag *item, void *data, unsigned long size)
if (memcmp("\ntype ", type_line-1, 6))
return -1;
- tag_line = strchr(type_line, '\n');
+ tag_line = memchr(type_line, '\n', size - (type_line - start));
if (!tag_line || memcmp("tag ", ++tag_line, 4))
return -1;
- sig_line = strchr(tag_line, '\n');
+ sig_line = memchr(tag_line, '\n', size - (tag_line - start));
if (!sig_line)
return -1;
sig_line++;
--
1.4.4.4
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH] parse_commit_buffer: don't parse invalid commits
2008-01-06 19:03 [PATCH] parse_tag_buffer: don't parse invalid tags Martin Koegler
@ 2008-01-06 19:03 ` Martin Koegler
2008-01-06 22:00 ` Junio C Hamano
0 siblings, 1 reply; 8+ messages in thread
From: Martin Koegler @ 2008-01-06 19:03 UTC (permalink / raw)
To: Junio C Hamano; +Cc: git, Martin Koegler
Signed-off-by: Martin Koegler <mkoegler@auto.tuwien.ac.at>
---
commit.c | 28 +++++++++++++++++++++-------
1 files changed, 21 insertions(+), 7 deletions(-)
diff --git a/commit.c b/commit.c
index f074811..ffa0894 100644
--- a/commit.c
+++ b/commit.c
@@ -48,19 +48,33 @@ struct commit *lookup_commit(const unsigned char *sha1)
return check_commit(obj, sha1, 0);
}
-static unsigned long parse_commit_date(const char *buf)
+static unsigned long parse_commit_date(const char *buf, const char* tail)
{
unsigned long date;
+ char datebuf[20];
+ unsigned long len;
+ if (buf + 6 >= tail)
+ return 0;
if (memcmp(buf, "author", 6))
return 0;
- while (*buf++ != '\n')
+ while (buf < tail && *buf++ != '\n')
/* nada */;
+ if (buf + 9 >= tail)
+ return 0;
if (memcmp(buf, "committer", 9))
return 0;
- while (*buf++ != '>')
+ while (buf < tail && *buf++ != '>')
/* nada */;
- date = strtoul(buf, NULL, 10);
+ if (buf >= tail)
+ return 0;
+
+ len = tail - buf;
+ if (len > sizeof(datebuf) - 1)
+ len = sizeof(datebuf) - 1;
+ memcpy(datebuf, buf, len);
+ datebuf[len] = 0;
+ date = strtoul(datebuf, NULL, 10);
if (date == ULONG_MAX)
date = 0;
return date;
@@ -236,9 +250,9 @@ int parse_commit_buffer(struct commit *item, void *buffer, unsigned long size)
return 0;
item->object.parsed = 1;
tail += size;
- if (tail <= bufptr + 5 || memcmp(bufptr, "tree ", 5))
+ if (tail <= bufptr + 46 || memcmp(bufptr, "tree ", 5) || bufptr[45] != '\n')
return error("bogus commit object %s", sha1_to_hex(item->object.sha1));
- if (tail <= bufptr + 45 || get_sha1_hex(bufptr + 5, parent) < 0)
+ if (get_sha1_hex(bufptr + 5, parent) < 0)
return error("bad tree pointer in commit %s",
sha1_to_hex(item->object.sha1));
item->tree = lookup_tree(parent);
@@ -275,7 +289,7 @@ int parse_commit_buffer(struct commit *item, void *buffer, unsigned long size)
n_refs++;
}
}
- item->date = parse_commit_date(bufptr);
+ item->date = parse_commit_date(bufptr, tail);
if (track_object_refs) {
unsigned i = 0;
--
1.4.4.4
^ permalink raw reply related [flat|nested] 8+ messages in thread
* Re: [PATCH] parse_commit_buffer: don't parse invalid commits
2008-01-06 19:03 ` [PATCH] parse_commit_buffer: don't parse invalid commits Martin Koegler
@ 2008-01-06 22:00 ` Junio C Hamano
2008-01-07 7:40 ` Martin Koegler
0 siblings, 1 reply; 8+ messages in thread
From: Junio C Hamano @ 2008-01-06 22:00 UTC (permalink / raw)
To: Martin Koegler; +Cc: git
Martin Koegler <mkoegler@auto.tuwien.ac.at> writes:
> Signed-off-by: Martin Koegler <mkoegler@auto.tuwien.ac.at>
> ---
> commit.c | 28 +++++++++++++++++++++-------
> 1 files changed, 21 insertions(+), 7 deletions(-)
>
> diff --git a/commit.c b/commit.c
> index f074811..ffa0894 100644
> --- a/commit.c
> +++ b/commit.c
> @@ -48,19 +48,33 @@ struct commit *lookup_commit(const unsigned char *sha1)
> return check_commit(obj, sha1, 0);
> }
>
> -static unsigned long parse_commit_date(const char *buf)
> +static unsigned long parse_commit_date(const char *buf, const char* tail)
Should be "const char *tail" in our codebase.
> {
> unsigned long date;
> + char datebuf[20];
> + unsigned long len;
>
> + if (buf + 6 >= tail)
> + return 0;
> if (memcmp(buf, "author", 6))
> return 0;
Even though buf, which is a result from read_sha1_file(), is
always terminated with an extra NUL (outside its object size),
if a bogus commit object ends with "author" (and without the
author information) this part will pass, and ...
> - while (*buf++ != '\n')
> + while (buf < tail && *buf++ != '\n')
> /* nada */;
> + if (buf + 9 >= tail)
> + return 0;
... you catch that here. That seems like a good change.
> if (memcmp(buf, "committer", 9))
> return 0;
> - while (*buf++ != '>')
> + while (buf < tail && *buf++ != '>')
> /* nada */;
> - date = strtoul(buf, NULL, 10);
> + if (buf >= tail)
> + return 0;
Likewise here.
> + len = tail - buf;
> + if (len > sizeof(datebuf) - 1)
> + len = sizeof(datebuf) - 1;
Broken indentation.
> + memcpy(datebuf, buf, len);
> + datebuf[len] = 0;
> + date = strtoul(datebuf, NULL, 10);
However, as long as buf at this point hasn't go beyond tail,
which you already checked, I think we can rely on strtoul()
stopping at the NUL at the end of buffer (that is one beyond
tail), without this extra memcpy(). Am I mistaken?
> @@ -236,9 +250,9 @@ int parse_commit_buffer(struct commit *item, void *buffer, unsigned long size)
> return 0;
> item->object.parsed = 1;
> tail += size;
> - if (tail <= bufptr + 5 || memcmp(bufptr, "tree ", 5))
> + if (tail <= bufptr + 46 || memcmp(bufptr, "tree ", 5) || bufptr[45] != '\n')
> return error("bogus commit object %s", sha1_to_hex(item->object.sha1));
> - if (tail <= bufptr + 45 || get_sha1_hex(bufptr + 5, parent) < 0)
> + if (get_sha1_hex(bufptr + 5, parent) < 0)
> return error("bad tree pointer in commit %s",
> sha1_to_hex(item->object.sha1));
> item->tree = lookup_tree(parent);
This hunk is logically a no-op but I like your version better.
It also makes sure tree object name is terminated with a LF.
> @@ -275,7 +289,7 @@ int parse_commit_buffer(struct commit *item, void *buffer, unsigned long size)
> n_refs++;
> }
> }
> - item->date = parse_commit_date(bufptr);
> + item->date = parse_commit_date(bufptr, tail);
>
> if (track_object_refs) {
> unsigned i = 0;
> --
> 1.4.4.4
When already somewhat deep in the rc cycle, looking at a patch
from somebody who uses 1.4.4.4 makes me look at the patch a bit
more carefully than usual ;-)
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH] parse_commit_buffer: don't parse invalid commits
2008-01-06 22:00 ` Junio C Hamano
@ 2008-01-07 7:40 ` Martin Koegler
0 siblings, 0 replies; 8+ messages in thread
From: Martin Koegler @ 2008-01-07 7:40 UTC (permalink / raw)
To: Junio C Hamano; +Cc: git
On Sun, Jan 06, 2008 at 02:00:57PM -0800, Junio C Hamano wrote:
> Martin Koegler <mkoegler@auto.tuwien.ac.at> writes:
> > diff --git a/commit.c b/commit.c
> > index f074811..ffa0894 100644
> > --- a/commit.c
> > +++ b/commit.c
> > @@ -48,19 +48,33 @@ struct commit *lookup_commit(const unsigned char *sha1)
> > return check_commit(obj, sha1, 0);
> > }
> >
> > -static unsigned long parse_commit_date(const char *buf)
> > +static unsigned long parse_commit_date(const char *buf, const char* tail)
>
> Should be "const char *tail" in our codebase.
Will fix.
> > {
> > unsigned long date;
> > + char datebuf[20];
> > + unsigned long len;
> >
> > + if (buf + 6 >= tail)
> > + return 0;
> > if (memcmp(buf, "author", 6))
> > return 0;
>
> Even though buf, which is a result from read_sha1_file(), is
> always terminated with an extra NUL (outside its object size),
> if a bogus commit object ends with "author" (and without the
> author information) this part will pass, and ...
fsck_commit (builtin-fsck.c) does quite redundant checks (except that
it checks for author too). Should I make parse_commit fail, if there
is no author and commiter line? This way, we could remove
| if (memcmp(buffer, "tree ", 5))
| return objerror(&commit->object, "invalid format - expected 'tree' line");
| if (get_sha1_hex(buffer+5, tree_sha1) || buffer[45] != '\n')
| return objerror(&commit->object, "invalid 'tree' line format - bad sha1");
| buffer += 46;
| while (!memcmp(buffer, "parent ", 7)) {
| if (get_sha1_hex(buffer+7, sha1) || buffer[47] != '\n')
| return objerror(&commit->object, "invalid 'parent' line format - bad sha1");
| buffer += 48;
| }
| if (memcmp(buffer, "author ", 7))
| return objerror(&commit->object, "invalid format - expected 'author' line");
from fsck_commit.
> > - while (*buf++ != '\n')
> > + while (buf < tail && *buf++ != '\n')
> > /* nada */;
> > + if (buf + 9 >= tail)
> > + return 0;
>
> ... you catch that here. That seems like a good change.
>
> > if (memcmp(buf, "committer", 9))
> > return 0;
> > - while (*buf++ != '>')
> > + while (buf < tail && *buf++ != '>')
> > /* nada */;
> > - date = strtoul(buf, NULL, 10);
> > + if (buf >= tail)
> > + return 0;
>
> Likewise here.
>
> > + len = tail - buf;
> > + if (len > sizeof(datebuf) - 1)
> > + len = sizeof(datebuf) - 1;
>
> Broken indentation.
Will fix.
> > + memcpy(datebuf, buf, len);
> > + datebuf[len] = 0;
> > + date = strtoul(datebuf, NULL, 10);
>
> However, as long as buf at this point hasn't go beyond tail,
> which you already checked, I think we can rely on strtoul()
> stopping at the NUL at the end of buffer (that is one beyond
> tail), without this extra memcpy(). Am I mistaken?
No.
parse_commit_buffer is only called from parse_commit (safe, as it uses
a buffer by read_sha1_file) and parse_object_buffer (safe, as it is
called by parse_object (safe as it uses a buffer by read_sha1_file)
and get_obj (in builtin-for-each-ref.c, safe as it uses a buffer by
read_sha1_file)).
> > @@ -275,7 +289,7 @@ int parse_commit_buffer(struct commit *item, void *buffer, unsigned long size)
> > n_refs++;
> > }
> > }
> > - item->date = parse_commit_date(bufptr);
> > + item->date = parse_commit_date(bufptr, tail);
> >
> > if (track_object_refs) {
> > unsigned i = 0;
> > --
> > 1.4.4.4
>
> When already somewhat deep in the rc cycle, looking at a patch
> from somebody who uses 1.4.4.4 makes me look at the patch a bit
> more carefully than usual ;-)
stg is much simpler to use for such patches. My distribution [Linus
already called it insafe for it's git version] ships stg 0.11, which
is sufficient for this. I simply use stg with the shipped git 1.4.4.4
to avoid any compatibiltiy problems.
mfg Martin Kögler
PS:
The function of get_obj in builtin-for-each-ref.c looks quite similar to
parse_object.
^ permalink raw reply [flat|nested] 8+ messages in thread
end of thread, other threads:[~2008-01-20 16:11 UTC | newest]
Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-01-14 21:20 [PATCH] parse_commit_buffer: don't parse invalid commits Martin Koegler
2008-01-15 7:32 ` Johannes Sixt
-- strict thread matches above, loose matches on Subject: below --
2008-01-19 17:35 Martin Koegler
2008-01-19 19:52 ` Junio C Hamano
2008-01-20 16:11 ` Martin Koegler
2008-01-06 19:03 [PATCH] parse_tag_buffer: don't parse invalid tags Martin Koegler
2008-01-06 19:03 ` [PATCH] parse_commit_buffer: don't parse invalid commits Martin Koegler
2008-01-06 22:00 ` Junio C Hamano
2008-01-07 7:40 ` Martin Koegler
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).