* [PATCH] parse_tag_buffer: don't parse invalid tags
@ 2008-01-06 19:03 Martin Koegler
2008-01-06 19:03 ` [PATCH] parse_commit_buffer: don't parse invalid commits Martin Koegler
0 siblings, 1 reply; 4+ messages in thread
From: Martin Koegler @ 2008-01-06 19:03 UTC (permalink / raw)
To: Junio C Hamano; +Cc: git, Martin Koegler
The current tag parsing code can access memory outside the tag buffer,
if \n are missing. This patch prevent this behaviour.
Signed-off-by: Martin Koegler <mkoegler@auto.tuwien.ac.at>
---
tag.c | 5 +++--
1 files changed, 3 insertions(+), 2 deletions(-)
diff --git a/tag.c b/tag.c
index f62bcdd..fa22ae6 100644
--- a/tag.c
+++ b/tag.c
@@ -39,6 +39,7 @@ int parse_tag_buffer(struct tag *item, void *data, unsigned long size)
unsigned char sha1[20];
const char *type_line, *tag_line, *sig_line;
char type[20];
+ const char* start = data;
if (item->object.parsed)
return 0;
@@ -53,11 +54,11 @@ int parse_tag_buffer(struct tag *item, void *data, unsigned long size)
if (memcmp("\ntype ", type_line-1, 6))
return -1;
- tag_line = strchr(type_line, '\n');
+ tag_line = memchr(type_line, '\n', size - (type_line - start));
if (!tag_line || memcmp("tag ", ++tag_line, 4))
return -1;
- sig_line = strchr(tag_line, '\n');
+ sig_line = memchr(tag_line, '\n', size - (tag_line - start));
if (!sig_line)
return -1;
sig_line++;
--
1.4.4.4
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [PATCH] parse_commit_buffer: don't parse invalid commits
2008-01-06 19:03 [PATCH] parse_tag_buffer: don't parse invalid tags Martin Koegler
@ 2008-01-06 19:03 ` Martin Koegler
2008-01-06 22:00 ` Junio C Hamano
0 siblings, 1 reply; 4+ messages in thread
From: Martin Koegler @ 2008-01-06 19:03 UTC (permalink / raw)
To: Junio C Hamano; +Cc: git, Martin Koegler
Signed-off-by: Martin Koegler <mkoegler@auto.tuwien.ac.at>
---
commit.c | 28 +++++++++++++++++++++-------
1 files changed, 21 insertions(+), 7 deletions(-)
diff --git a/commit.c b/commit.c
index f074811..ffa0894 100644
--- a/commit.c
+++ b/commit.c
@@ -48,19 +48,33 @@ struct commit *lookup_commit(const unsigned char *sha1)
return check_commit(obj, sha1, 0);
}
-static unsigned long parse_commit_date(const char *buf)
+static unsigned long parse_commit_date(const char *buf, const char* tail)
{
unsigned long date;
+ char datebuf[20];
+ unsigned long len;
+ if (buf + 6 >= tail)
+ return 0;
if (memcmp(buf, "author", 6))
return 0;
- while (*buf++ != '\n')
+ while (buf < tail && *buf++ != '\n')
/* nada */;
+ if (buf + 9 >= tail)
+ return 0;
if (memcmp(buf, "committer", 9))
return 0;
- while (*buf++ != '>')
+ while (buf < tail && *buf++ != '>')
/* nada */;
- date = strtoul(buf, NULL, 10);
+ if (buf >= tail)
+ return 0;
+
+ len = tail - buf;
+ if (len > sizeof(datebuf) - 1)
+ len = sizeof(datebuf) - 1;
+ memcpy(datebuf, buf, len);
+ datebuf[len] = 0;
+ date = strtoul(datebuf, NULL, 10);
if (date == ULONG_MAX)
date = 0;
return date;
@@ -236,9 +250,9 @@ int parse_commit_buffer(struct commit *item, void *buffer, unsigned long size)
return 0;
item->object.parsed = 1;
tail += size;
- if (tail <= bufptr + 5 || memcmp(bufptr, "tree ", 5))
+ if (tail <= bufptr + 46 || memcmp(bufptr, "tree ", 5) || bufptr[45] != '\n')
return error("bogus commit object %s", sha1_to_hex(item->object.sha1));
- if (tail <= bufptr + 45 || get_sha1_hex(bufptr + 5, parent) < 0)
+ if (get_sha1_hex(bufptr + 5, parent) < 0)
return error("bad tree pointer in commit %s",
sha1_to_hex(item->object.sha1));
item->tree = lookup_tree(parent);
@@ -275,7 +289,7 @@ int parse_commit_buffer(struct commit *item, void *buffer, unsigned long size)
n_refs++;
}
}
- item->date = parse_commit_date(bufptr);
+ item->date = parse_commit_date(bufptr, tail);
if (track_object_refs) {
unsigned i = 0;
--
1.4.4.4
^ permalink raw reply related [flat|nested] 4+ messages in thread
* Re: [PATCH] parse_commit_buffer: don't parse invalid commits
2008-01-06 19:03 ` [PATCH] parse_commit_buffer: don't parse invalid commits Martin Koegler
@ 2008-01-06 22:00 ` Junio C Hamano
2008-01-07 7:40 ` Martin Koegler
0 siblings, 1 reply; 4+ messages in thread
From: Junio C Hamano @ 2008-01-06 22:00 UTC (permalink / raw)
To: Martin Koegler; +Cc: git
Martin Koegler <mkoegler@auto.tuwien.ac.at> writes:
> Signed-off-by: Martin Koegler <mkoegler@auto.tuwien.ac.at>
> ---
> commit.c | 28 +++++++++++++++++++++-------
> 1 files changed, 21 insertions(+), 7 deletions(-)
>
> diff --git a/commit.c b/commit.c
> index f074811..ffa0894 100644
> --- a/commit.c
> +++ b/commit.c
> @@ -48,19 +48,33 @@ struct commit *lookup_commit(const unsigned char *sha1)
> return check_commit(obj, sha1, 0);
> }
>
> -static unsigned long parse_commit_date(const char *buf)
> +static unsigned long parse_commit_date(const char *buf, const char* tail)
Should be "const char *tail" in our codebase.
> {
> unsigned long date;
> + char datebuf[20];
> + unsigned long len;
>
> + if (buf + 6 >= tail)
> + return 0;
> if (memcmp(buf, "author", 6))
> return 0;
Even though buf, which is a result from read_sha1_file(), is
always terminated with an extra NUL (outside its object size),
if a bogus commit object ends with "author" (and without the
author information) this part will pass, and ...
> - while (*buf++ != '\n')
> + while (buf < tail && *buf++ != '\n')
> /* nada */;
> + if (buf + 9 >= tail)
> + return 0;
... you catch that here. That seems like a good change.
> if (memcmp(buf, "committer", 9))
> return 0;
> - while (*buf++ != '>')
> + while (buf < tail && *buf++ != '>')
> /* nada */;
> - date = strtoul(buf, NULL, 10);
> + if (buf >= tail)
> + return 0;
Likewise here.
> + len = tail - buf;
> + if (len > sizeof(datebuf) - 1)
> + len = sizeof(datebuf) - 1;
Broken indentation.
> + memcpy(datebuf, buf, len);
> + datebuf[len] = 0;
> + date = strtoul(datebuf, NULL, 10);
However, as long as buf at this point hasn't go beyond tail,
which you already checked, I think we can rely on strtoul()
stopping at the NUL at the end of buffer (that is one beyond
tail), without this extra memcpy(). Am I mistaken?
> @@ -236,9 +250,9 @@ int parse_commit_buffer(struct commit *item, void *buffer, unsigned long size)
> return 0;
> item->object.parsed = 1;
> tail += size;
> - if (tail <= bufptr + 5 || memcmp(bufptr, "tree ", 5))
> + if (tail <= bufptr + 46 || memcmp(bufptr, "tree ", 5) || bufptr[45] != '\n')
> return error("bogus commit object %s", sha1_to_hex(item->object.sha1));
> - if (tail <= bufptr + 45 || get_sha1_hex(bufptr + 5, parent) < 0)
> + if (get_sha1_hex(bufptr + 5, parent) < 0)
> return error("bad tree pointer in commit %s",
> sha1_to_hex(item->object.sha1));
> item->tree = lookup_tree(parent);
This hunk is logically a no-op but I like your version better.
It also makes sure tree object name is terminated with a LF.
> @@ -275,7 +289,7 @@ int parse_commit_buffer(struct commit *item, void *buffer, unsigned long size)
> n_refs++;
> }
> }
> - item->date = parse_commit_date(bufptr);
> + item->date = parse_commit_date(bufptr, tail);
>
> if (track_object_refs) {
> unsigned i = 0;
> --
> 1.4.4.4
When already somewhat deep in the rc cycle, looking at a patch
from somebody who uses 1.4.4.4 makes me look at the patch a bit
more carefully than usual ;-)
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH] parse_commit_buffer: don't parse invalid commits
2008-01-06 22:00 ` Junio C Hamano
@ 2008-01-07 7:40 ` Martin Koegler
0 siblings, 0 replies; 4+ messages in thread
From: Martin Koegler @ 2008-01-07 7:40 UTC (permalink / raw)
To: Junio C Hamano; +Cc: git
On Sun, Jan 06, 2008 at 02:00:57PM -0800, Junio C Hamano wrote:
> Martin Koegler <mkoegler@auto.tuwien.ac.at> writes:
> > diff --git a/commit.c b/commit.c
> > index f074811..ffa0894 100644
> > --- a/commit.c
> > +++ b/commit.c
> > @@ -48,19 +48,33 @@ struct commit *lookup_commit(const unsigned char *sha1)
> > return check_commit(obj, sha1, 0);
> > }
> >
> > -static unsigned long parse_commit_date(const char *buf)
> > +static unsigned long parse_commit_date(const char *buf, const char* tail)
>
> Should be "const char *tail" in our codebase.
Will fix.
> > {
> > unsigned long date;
> > + char datebuf[20];
> > + unsigned long len;
> >
> > + if (buf + 6 >= tail)
> > + return 0;
> > if (memcmp(buf, "author", 6))
> > return 0;
>
> Even though buf, which is a result from read_sha1_file(), is
> always terminated with an extra NUL (outside its object size),
> if a bogus commit object ends with "author" (and without the
> author information) this part will pass, and ...
fsck_commit (builtin-fsck.c) does quite redundant checks (except that
it checks for author too). Should I make parse_commit fail, if there
is no author and commiter line? This way, we could remove
| if (memcmp(buffer, "tree ", 5))
| return objerror(&commit->object, "invalid format - expected 'tree' line");
| if (get_sha1_hex(buffer+5, tree_sha1) || buffer[45] != '\n')
| return objerror(&commit->object, "invalid 'tree' line format - bad sha1");
| buffer += 46;
| while (!memcmp(buffer, "parent ", 7)) {
| if (get_sha1_hex(buffer+7, sha1) || buffer[47] != '\n')
| return objerror(&commit->object, "invalid 'parent' line format - bad sha1");
| buffer += 48;
| }
| if (memcmp(buffer, "author ", 7))
| return objerror(&commit->object, "invalid format - expected 'author' line");
from fsck_commit.
> > - while (*buf++ != '\n')
> > + while (buf < tail && *buf++ != '\n')
> > /* nada */;
> > + if (buf + 9 >= tail)
> > + return 0;
>
> ... you catch that here. That seems like a good change.
>
> > if (memcmp(buf, "committer", 9))
> > return 0;
> > - while (*buf++ != '>')
> > + while (buf < tail && *buf++ != '>')
> > /* nada */;
> > - date = strtoul(buf, NULL, 10);
> > + if (buf >= tail)
> > + return 0;
>
> Likewise here.
>
> > + len = tail - buf;
> > + if (len > sizeof(datebuf) - 1)
> > + len = sizeof(datebuf) - 1;
>
> Broken indentation.
Will fix.
> > + memcpy(datebuf, buf, len);
> > + datebuf[len] = 0;
> > + date = strtoul(datebuf, NULL, 10);
>
> However, as long as buf at this point hasn't go beyond tail,
> which you already checked, I think we can rely on strtoul()
> stopping at the NUL at the end of buffer (that is one beyond
> tail), without this extra memcpy(). Am I mistaken?
No.
parse_commit_buffer is only called from parse_commit (safe, as it uses
a buffer by read_sha1_file) and parse_object_buffer (safe, as it is
called by parse_object (safe as it uses a buffer by read_sha1_file)
and get_obj (in builtin-for-each-ref.c, safe as it uses a buffer by
read_sha1_file)).
> > @@ -275,7 +289,7 @@ int parse_commit_buffer(struct commit *item, void *buffer, unsigned long size)
> > n_refs++;
> > }
> > }
> > - item->date = parse_commit_date(bufptr);
> > + item->date = parse_commit_date(bufptr, tail);
> >
> > if (track_object_refs) {
> > unsigned i = 0;
> > --
> > 1.4.4.4
>
> When already somewhat deep in the rc cycle, looking at a patch
> from somebody who uses 1.4.4.4 makes me look at the patch a bit
> more carefully than usual ;-)
stg is much simpler to use for such patches. My distribution [Linus
already called it insafe for it's git version] ships stg 0.11, which
is sufficient for this. I simply use stg with the shipped git 1.4.4.4
to avoid any compatibiltiy problems.
mfg Martin Kögler
PS:
The function of get_obj in builtin-for-each-ref.c looks quite similar to
parse_object.
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2008-01-07 7:40 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-01-06 19:03 [PATCH] parse_tag_buffer: don't parse invalid tags Martin Koegler
2008-01-06 19:03 ` [PATCH] parse_commit_buffer: don't parse invalid commits Martin Koegler
2008-01-06 22:00 ` Junio C Hamano
2008-01-07 7:40 ` Martin Koegler
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).