* [PATCH v3 bpf-next 01/10] lib/buildid: harden build ID parsing logic
[not found] <20240730203914.1182569-1-andrii@kernel.org>
@ 2024-07-30 20:39 ` Andrii Nakryiko
2024-07-31 4:04 ` Andi Kleen
2024-08-07 15:11 ` Jann Horn
0 siblings, 2 replies; 5+ messages in thread
From: Andrii Nakryiko @ 2024-07-30 20:39 UTC (permalink / raw)
To: bpf
Cc: linux-mm, akpm, adobriyan, shakeel.butt, hannes, ak, osandov,
song, jannh, Andrii Nakryiko, stable
Harden build ID parsing logic, adding explicit READ_ONCE() where it's
important to have a consistent value read and validated just once.
Fixes tag below points to the code that moved this code into
lib/buildid.c, and then subsequently was used in perf subsystem, making
this code exposed to perf_event_open() users in v5.12+.
Cc: stable@vger.kernel.org
Cc: Jann Horn <jannh@google.com>
Suggested-by: Andi Kleen <ak@linux.intel.com>
Fixes: bd7525dacd7e ("bpf: Move stack_map_get_build_id into lib")
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
---
lib/buildid.c | 51 +++++++++++++++++++++++++++------------------------
1 file changed, 27 insertions(+), 24 deletions(-)
diff --git a/lib/buildid.c b/lib/buildid.c
index e02b5507418b..d21d86f6c19a 100644
--- a/lib/buildid.c
+++ b/lib/buildid.c
@@ -18,28 +18,29 @@ static int parse_build_id_buf(unsigned char *build_id,
const void *note_start,
Elf32_Word note_size)
{
+ const char note_name[] = "GNU";
+ const size_t note_name_sz = sizeof(note_name);
Elf32_Word note_offs = 0, new_offs;
+ u32 name_sz, desc_sz;
+ const char *data;
while (note_offs + sizeof(Elf32_Nhdr) < note_size) {
Elf32_Nhdr *nhdr = (Elf32_Nhdr *)(note_start + note_offs);
+ name_sz = READ_ONCE(nhdr->n_namesz);
+ desc_sz = READ_ONCE(nhdr->n_descsz);
if (nhdr->n_type == BUILD_ID &&
- nhdr->n_namesz == sizeof("GNU") &&
- !strcmp((char *)(nhdr + 1), "GNU") &&
- nhdr->n_descsz > 0 &&
- nhdr->n_descsz <= BUILD_ID_SIZE_MAX) {
- memcpy(build_id,
- note_start + note_offs +
- ALIGN(sizeof("GNU"), 4) + sizeof(Elf32_Nhdr),
- nhdr->n_descsz);
- memset(build_id + nhdr->n_descsz, 0,
- BUILD_ID_SIZE_MAX - nhdr->n_descsz);
+ name_sz == note_name_sz &&
+ strcmp((char *)(nhdr + 1), note_name) == 0 &&
+ desc_sz > 0 && desc_sz <= BUILD_ID_SIZE_MAX) {
+ data = note_start + note_offs + ALIGN(note_name_sz, 4);
+ memcpy(build_id, data, desc_sz);
+ memset(build_id + desc_sz, 0, BUILD_ID_SIZE_MAX - desc_sz);
if (size)
- *size = nhdr->n_descsz;
+ *size = desc_sz;
return 0;
}
- new_offs = note_offs + sizeof(Elf32_Nhdr) +
- ALIGN(nhdr->n_namesz, 4) + ALIGN(nhdr->n_descsz, 4);
+ new_offs = note_offs + sizeof(Elf32_Nhdr) + ALIGN(name_sz, 4) + ALIGN(desc_sz, 4);
if (new_offs <= note_offs) /* overflow */
break;
note_offs = new_offs;
@@ -71,7 +72,7 @@ static int get_build_id_32(const void *page_addr, unsigned char *build_id,
{
Elf32_Ehdr *ehdr = (Elf32_Ehdr *)page_addr;
Elf32_Phdr *phdr;
- int i;
+ __u32 i, phnum;
/*
* FIXME
@@ -80,9 +81,10 @@ static int get_build_id_32(const void *page_addr, unsigned char *build_id,
*/
if (ehdr->e_phoff != sizeof(Elf32_Ehdr))
return -EINVAL;
+
+ phnum = READ_ONCE(ehdr->e_phnum);
/* only supports phdr that fits in one page */
- if (ehdr->e_phnum >
- (PAGE_SIZE - sizeof(Elf32_Ehdr)) / sizeof(Elf32_Phdr))
+ if (phnum > (PAGE_SIZE - sizeof(Elf32_Ehdr)) / sizeof(Elf32_Phdr))
return -EINVAL;
phdr = (Elf32_Phdr *)(page_addr + sizeof(Elf32_Ehdr));
@@ -90,8 +92,8 @@ static int get_build_id_32(const void *page_addr, unsigned char *build_id,
for (i = 0; i < ehdr->e_phnum; ++i) {
if (phdr[i].p_type == PT_NOTE &&
!parse_build_id(page_addr, build_id, size,
- page_addr + phdr[i].p_offset,
- phdr[i].p_filesz))
+ page_addr + READ_ONCE(phdr[i].p_offset),
+ READ_ONCE(phdr[i].p_filesz)))
return 0;
}
return -EINVAL;
@@ -103,7 +105,7 @@ static int get_build_id_64(const void *page_addr, unsigned char *build_id,
{
Elf64_Ehdr *ehdr = (Elf64_Ehdr *)page_addr;
Elf64_Phdr *phdr;
- int i;
+ __u32 i, phnum;
/*
* FIXME
@@ -112,18 +114,19 @@ static int get_build_id_64(const void *page_addr, unsigned char *build_id,
*/
if (ehdr->e_phoff != sizeof(Elf64_Ehdr))
return -EINVAL;
+
+ phnum = READ_ONCE(ehdr->e_phnum);
/* only supports phdr that fits in one page */
- if (ehdr->e_phnum >
- (PAGE_SIZE - sizeof(Elf64_Ehdr)) / sizeof(Elf64_Phdr))
+ if (phnum > (PAGE_SIZE - sizeof(Elf64_Ehdr)) / sizeof(Elf64_Phdr))
return -EINVAL;
phdr = (Elf64_Phdr *)(page_addr + sizeof(Elf64_Ehdr));
- for (i = 0; i < ehdr->e_phnum; ++i) {
+ for (i = 0; i < phnum; ++i) {
if (phdr[i].p_type == PT_NOTE &&
!parse_build_id(page_addr, build_id, size,
- page_addr + phdr[i].p_offset,
- phdr[i].p_filesz))
+ page_addr + READ_ONCE(phdr[i].p_offset),
+ READ_ONCE(phdr[i].p_filesz)))
return 0;
}
return -EINVAL;
--
2.43.0
^ permalink raw reply related [flat|nested] 5+ messages in thread
* Re: [PATCH v3 bpf-next 01/10] lib/buildid: harden build ID parsing logic
2024-07-30 20:39 ` [PATCH v3 bpf-next 01/10] lib/buildid: harden build ID parsing logic Andrii Nakryiko
@ 2024-07-31 4:04 ` Andi Kleen
2024-07-31 21:54 ` Andrii Nakryiko
2024-08-07 15:11 ` Jann Horn
1 sibling, 1 reply; 5+ messages in thread
From: Andi Kleen @ 2024-07-31 4:04 UTC (permalink / raw)
To: Andrii Nakryiko
Cc: bpf, linux-mm, akpm, adobriyan, shakeel.butt, hannes, osandov,
song, jannh, stable
> while (note_offs + sizeof(Elf32_Nhdr) < note_size) {
> Elf32_Nhdr *nhdr = (Elf32_Nhdr *)(note_start + note_offs);
>
> + name_sz = READ_ONCE(nhdr->n_namesz);
> + desc_sz = READ_ONCE(nhdr->n_descsz);
> if (nhdr->n_type == BUILD_ID &&
> - nhdr->n_namesz == sizeof("GNU") &&
> - !strcmp((char *)(nhdr + 1), "GNU") &&
> - nhdr->n_descsz > 0 &&
> - nhdr->n_descsz <= BUILD_ID_SIZE_MAX) {
> - memcpy(build_id,
> - note_start + note_offs +
> - ALIGN(sizeof("GNU"), 4) + sizeof(Elf32_Nhdr),
> - nhdr->n_descsz);
> - memset(build_id + nhdr->n_descsz, 0,
> - BUILD_ID_SIZE_MAX - nhdr->n_descsz);
> + name_sz == note_name_sz &&
> + strcmp((char *)(nhdr + 1), note_name) == 0 &&
Doesn't the strcmp need a boundary check to be inside note_size too?
Other it may read into the next page, which could be unmapped, causing a fault.
Given it's unlikely that this happen, and the end has guard pages,
but there are some users of set_memory_np.
You could just move the later checks earlier.
The rest looks good to me.
-Andi
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH v3 bpf-next 01/10] lib/buildid: harden build ID parsing logic
2024-07-31 4:04 ` Andi Kleen
@ 2024-07-31 21:54 ` Andrii Nakryiko
0 siblings, 0 replies; 5+ messages in thread
From: Andrii Nakryiko @ 2024-07-31 21:54 UTC (permalink / raw)
To: Andi Kleen
Cc: Andrii Nakryiko, bpf, linux-mm, akpm, adobriyan, shakeel.butt,
hannes, osandov, song, jannh, stable
On Tue, Jul 30, 2024 at 9:05 PM Andi Kleen <ak@linux.intel.com> wrote:
>
> > while (note_offs + sizeof(Elf32_Nhdr) < note_size) {
> > Elf32_Nhdr *nhdr = (Elf32_Nhdr *)(note_start + note_offs);
> >
> > + name_sz = READ_ONCE(nhdr->n_namesz);
> > + desc_sz = READ_ONCE(nhdr->n_descsz);
> > if (nhdr->n_type == BUILD_ID &&
> > - nhdr->n_namesz == sizeof("GNU") &&
> > - !strcmp((char *)(nhdr + 1), "GNU") &&
> > - nhdr->n_descsz > 0 &&
> > - nhdr->n_descsz <= BUILD_ID_SIZE_MAX) {
> > - memcpy(build_id,
> > - note_start + note_offs +
> > - ALIGN(sizeof("GNU"), 4) + sizeof(Elf32_Nhdr),
> > - nhdr->n_descsz);
> > - memset(build_id + nhdr->n_descsz, 0,
> > - BUILD_ID_SIZE_MAX - nhdr->n_descsz);
> > + name_sz == note_name_sz &&
> > + strcmp((char *)(nhdr + 1), note_name) == 0 &&
>
> Doesn't the strcmp need a boundary check to be inside note_size too?
>
> Other it may read into the next page, which could be unmapped, causing a fault.
> Given it's unlikely that this happen, and the end has guard pages,
> but there are some users of set_memory_np.
>
> You could just move the later checks earlier.
Yep, good catch! I'll move the overflow check and will add a note_size
check to it, thanks!
>
> The rest looks good to me.
>
> -Andi
>
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH v3 bpf-next 01/10] lib/buildid: harden build ID parsing logic
2024-07-30 20:39 ` [PATCH v3 bpf-next 01/10] lib/buildid: harden build ID parsing logic Andrii Nakryiko
2024-07-31 4:04 ` Andi Kleen
@ 2024-08-07 15:11 ` Jann Horn
2024-08-07 16:47 ` Andrii Nakryiko
1 sibling, 1 reply; 5+ messages in thread
From: Jann Horn @ 2024-08-07 15:11 UTC (permalink / raw)
To: Andrii Nakryiko, Matthew Wilcox, linux-fsdevel
Cc: bpf, linux-mm, akpm, adobriyan, shakeel.butt, hannes, ak, osandov,
song, stable
+Matthew and fsdevel list for pagecache question
On Tue, Jul 30, 2024 at 10:39 PM Andrii Nakryiko <andrii@kernel.org> wrote:
> Harden build ID parsing logic, adding explicit READ_ONCE() where it's
> important to have a consistent value read and validated just once.
>
> Fixes tag below points to the code that moved this code into
> lib/buildid.c, and then subsequently was used in perf subsystem, making
> this code exposed to perf_event_open() users in v5.12+.
One thing that still seems dodgy to me with this patch applied is the
call from build_id_parse() to find_get_page(), followed by reading the
page contents. My understanding of the page cache (which might be
incorrect) is that find_get_page() can return a page whose contents
have not been initialized yet, and you're supposed to check for
PageUptodate() or something like that before reading from it.
Maybe Matthew can check if I understood that right?
Also, it might be a good idea to liberally spray READ_ONCE() around
all the remaining unannotated shared memory accesses in
build_id_parse(), get_build_id_32(), get_build_id_64() and
parse_build_id_buf().
> Cc: stable@vger.kernel.org
> Cc: Jann Horn <jannh@google.com>
> Suggested-by: Andi Kleen <ak@linux.intel.com>
> Fixes: bd7525dacd7e ("bpf: Move stack_map_get_build_id into lib")
> Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
> ---
> lib/buildid.c | 51 +++++++++++++++++++++++++++------------------------
> 1 file changed, 27 insertions(+), 24 deletions(-)
>
> diff --git a/lib/buildid.c b/lib/buildid.c
> index e02b5507418b..d21d86f6c19a 100644
> --- a/lib/buildid.c
> +++ b/lib/buildid.c
> @@ -18,28 +18,29 @@ static int parse_build_id_buf(unsigned char *build_id,
> const void *note_start,
> Elf32_Word note_size)
> {
> + const char note_name[] = "GNU";
> + const size_t note_name_sz = sizeof(note_name);
> Elf32_Word note_offs = 0, new_offs;
> + u32 name_sz, desc_sz;
> + const char *data;
>
> while (note_offs + sizeof(Elf32_Nhdr) < note_size) {
> Elf32_Nhdr *nhdr = (Elf32_Nhdr *)(note_start + note_offs);
>
> + name_sz = READ_ONCE(nhdr->n_namesz);
> + desc_sz = READ_ONCE(nhdr->n_descsz);
> if (nhdr->n_type == BUILD_ID &&
> - nhdr->n_namesz == sizeof("GNU") &&
> - !strcmp((char *)(nhdr + 1), "GNU") &&
> - nhdr->n_descsz > 0 &&
> - nhdr->n_descsz <= BUILD_ID_SIZE_MAX) {
> - memcpy(build_id,
> - note_start + note_offs +
> - ALIGN(sizeof("GNU"), 4) + sizeof(Elf32_Nhdr),
> - nhdr->n_descsz);
> - memset(build_id + nhdr->n_descsz, 0,
> - BUILD_ID_SIZE_MAX - nhdr->n_descsz);
> + name_sz == note_name_sz &&
> + strcmp((char *)(nhdr + 1), note_name) == 0 &&
> + desc_sz > 0 && desc_sz <= BUILD_ID_SIZE_MAX) {
> + data = note_start + note_offs + ALIGN(note_name_sz, 4);
I don't think we have any guarantee here that this addition won't
result in an OOB pointer?
> + memcpy(build_id, data, desc_sz);
I think this can access OOB data (because "data" can already be OOB
and because "desc_sz" hasn't been checked against the amount of
remaining space in the page).
> + memset(build_id + desc_sz, 0, BUILD_ID_SIZE_MAX - desc_sz);
> if (size)
> - *size = nhdr->n_descsz;
> + *size = desc_sz;
> return 0;
> }
> - new_offs = note_offs + sizeof(Elf32_Nhdr) +
> - ALIGN(nhdr->n_namesz, 4) + ALIGN(nhdr->n_descsz, 4);
> + new_offs = note_offs + sizeof(Elf32_Nhdr) + ALIGN(name_sz, 4) + ALIGN(desc_sz, 4);
> if (new_offs <= note_offs) /* overflow */
> break;
You check whether "new_offs" has wrapped here, but then on the next
loop iteration, you check for "note_offs + sizeof(Elf32_Nhdr) <
note_size". So if new_offs is 0xffffffff at this point, then I think
the overflow check here will be passed, the loop condition will be
true on 32-bit kernels (on 64-bit kernels it won't be because the
addition happens on 64-bit numbers thanks to sizeof()), and "nhdr"
will point in front of the note?
> note_offs = new_offs;
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH v3 bpf-next 01/10] lib/buildid: harden build ID parsing logic
2024-08-07 15:11 ` Jann Horn
@ 2024-08-07 16:47 ` Andrii Nakryiko
0 siblings, 0 replies; 5+ messages in thread
From: Andrii Nakryiko @ 2024-08-07 16:47 UTC (permalink / raw)
To: Jann Horn
Cc: Andrii Nakryiko, Matthew Wilcox, linux-fsdevel, bpf, linux-mm,
akpm, adobriyan, shakeel.butt, hannes, ak, osandov, song, stable
On Wed, Aug 7, 2024 at 8:12 AM Jann Horn <jannh@google.com> wrote:
>
> +Matthew and fsdevel list for pagecache question
>
> On Tue, Jul 30, 2024 at 10:39 PM Andrii Nakryiko <andrii@kernel.org> wrote:
> > Harden build ID parsing logic, adding explicit READ_ONCE() where it's
> > important to have a consistent value read and validated just once.
> >
> > Fixes tag below points to the code that moved this code into
> > lib/buildid.c, and then subsequently was used in perf subsystem, making
> > this code exposed to perf_event_open() users in v5.12+.
>
> One thing that still seems dodgy to me with this patch applied is the
> call from build_id_parse() to find_get_page(), followed by reading the
> page contents. My understanding of the page cache (which might be
> incorrect) is that find_get_page() can return a page whose contents
> have not been initialized yet, and you're supposed to check for
> PageUptodate() or something like that before reading from it.
>
> Maybe Matthew can check if I understood that right?
>
>
> Also, it might be a good idea to liberally spray READ_ONCE() around
> all the remaining unannotated shared memory accesses in
> build_id_parse(), get_build_id_32(), get_build_id_64() and
> parse_build_id_buf().
Andi was against that, so I kept READ_ONCE() only where strictly
necessary, AFAICT.
>
> > Cc: stable@vger.kernel.org
> > Cc: Jann Horn <jannh@google.com>
> > Suggested-by: Andi Kleen <ak@linux.intel.com>
> > Fixes: bd7525dacd7e ("bpf: Move stack_map_get_build_id into lib")
> > Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
> > ---
> > lib/buildid.c | 51 +++++++++++++++++++++++++++------------------------
> > 1 file changed, 27 insertions(+), 24 deletions(-)
> >
> > diff --git a/lib/buildid.c b/lib/buildid.c
> > index e02b5507418b..d21d86f6c19a 100644
> > --- a/lib/buildid.c
> > +++ b/lib/buildid.c
> > @@ -18,28 +18,29 @@ static int parse_build_id_buf(unsigned char *build_id,
> > const void *note_start,
> > Elf32_Word note_size)
> > {
> > + const char note_name[] = "GNU";
> > + const size_t note_name_sz = sizeof(note_name);
> > Elf32_Word note_offs = 0, new_offs;
> > + u32 name_sz, desc_sz;
> > + const char *data;
> >
> > while (note_offs + sizeof(Elf32_Nhdr) < note_size) {
> > Elf32_Nhdr *nhdr = (Elf32_Nhdr *)(note_start + note_offs);
> >
> > + name_sz = READ_ONCE(nhdr->n_namesz);
> > + desc_sz = READ_ONCE(nhdr->n_descsz);
> > if (nhdr->n_type == BUILD_ID &&
> > - nhdr->n_namesz == sizeof("GNU") &&
> > - !strcmp((char *)(nhdr + 1), "GNU") &&
> > - nhdr->n_descsz > 0 &&
> > - nhdr->n_descsz <= BUILD_ID_SIZE_MAX) {
> > - memcpy(build_id,
> > - note_start + note_offs +
> > - ALIGN(sizeof("GNU"), 4) + sizeof(Elf32_Nhdr),
> > - nhdr->n_descsz);
> > - memset(build_id + nhdr->n_descsz, 0,
> > - BUILD_ID_SIZE_MAX - nhdr->n_descsz);
> > + name_sz == note_name_sz &&
> > + strcmp((char *)(nhdr + 1), note_name) == 0 &&
> > + desc_sz > 0 && desc_sz <= BUILD_ID_SIZE_MAX) {
> > + data = note_start + note_offs + ALIGN(note_name_sz, 4);
>
> I don't think we have any guarantee here that this addition won't
> result in an OOB pointer?
>
> > + memcpy(build_id, data, desc_sz);
>
> I think this can access OOB data (because "data" can already be OOB
> and because "desc_sz" hasn't been checked against the amount of
> remaining space in the page).
Andi already pointed this out and I fixed it locally, thanks.
>
> > + memset(build_id + desc_sz, 0, BUILD_ID_SIZE_MAX - desc_sz);
> > if (size)
> > - *size = nhdr->n_descsz;
> > + *size = desc_sz;
> > return 0;
> > }
> > - new_offs = note_offs + sizeof(Elf32_Nhdr) +
> > - ALIGN(nhdr->n_namesz, 4) + ALIGN(nhdr->n_descsz, 4);
> > + new_offs = note_offs + sizeof(Elf32_Nhdr) + ALIGN(name_sz, 4) + ALIGN(desc_sz, 4);
> > if (new_offs <= note_offs) /* overflow */
> > break;
>
> You check whether "new_offs" has wrapped here, but then on the next
> loop iteration, you check for "note_offs + sizeof(Elf32_Nhdr) <
> note_size". So if new_offs is 0xffffffff at this point, then I think
> the overflow check here will be passed, the loop condition will be
> true on 32-bit kernels (on 64-bit kernels it won't be because the
> addition happens on 64-bit numbers thanks to sizeof()), and "nhdr"
> will point in front of the note?
Correct, and so I moved this new_offs calculation and overflow check
to the beginning of the loop, which I think should capture this issue.
For the while() condition itself I have:
if (check_add_overflow(note_offs, note_size, ¬e_end))
return -EINVAL;
while (note_offs < note_end - sizeof(Elf32_Nhdr) - note_name_sz) {
...
}
I'll try to post an updated version soon-ish, have been waiting for mm
folks feedback before posting a new version.
>
> > note_offs = new_offs;
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2024-08-07 16:47 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
[not found] <20240730203914.1182569-1-andrii@kernel.org>
2024-07-30 20:39 ` [PATCH v3 bpf-next 01/10] lib/buildid: harden build ID parsing logic Andrii Nakryiko
2024-07-31 4:04 ` Andi Kleen
2024-07-31 21:54 ` Andrii Nakryiko
2024-08-07 15:11 ` Jann Horn
2024-08-07 16:47 ` Andrii Nakryiko
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox