Linux kernel -stable discussions
 help / color / mirror / Atom feed
* [PATCH v3 bpf-next 01/10] lib/buildid: harden build ID parsing logic
       [not found] <20240730203914.1182569-1-andrii@kernel.org>
@ 2024-07-30 20:39 ` Andrii Nakryiko
  2024-07-31  4:04   ` Andi Kleen
  2024-08-07 15:11   ` Jann Horn
  0 siblings, 2 replies; 5+ messages in thread
From: Andrii Nakryiko @ 2024-07-30 20:39 UTC (permalink / raw)
  To: bpf
  Cc: linux-mm, akpm, adobriyan, shakeel.butt, hannes, ak, osandov,
	song, jannh, Andrii Nakryiko, stable

Harden build ID parsing logic, adding explicit READ_ONCE() where it's
important to have a consistent value read and validated just once.

Fixes tag below points to the code that moved this code into
lib/buildid.c, and then subsequently was used in perf subsystem, making
this code exposed to perf_event_open() users in v5.12+.

Cc: stable@vger.kernel.org
Cc: Jann Horn <jannh@google.com>
Suggested-by: Andi Kleen <ak@linux.intel.com>
Fixes: bd7525dacd7e ("bpf: Move stack_map_get_build_id into lib")
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
---
 lib/buildid.c | 51 +++++++++++++++++++++++++++------------------------
 1 file changed, 27 insertions(+), 24 deletions(-)

diff --git a/lib/buildid.c b/lib/buildid.c
index e02b5507418b..d21d86f6c19a 100644
--- a/lib/buildid.c
+++ b/lib/buildid.c
@@ -18,28 +18,29 @@ static int parse_build_id_buf(unsigned char *build_id,
 			      const void *note_start,
 			      Elf32_Word note_size)
 {
+	const char note_name[] = "GNU";
+	const size_t note_name_sz = sizeof(note_name);
 	Elf32_Word note_offs = 0, new_offs;
+	u32 name_sz, desc_sz;
+	const char *data;
 
 	while (note_offs + sizeof(Elf32_Nhdr) < note_size) {
 		Elf32_Nhdr *nhdr = (Elf32_Nhdr *)(note_start + note_offs);
 
+		name_sz = READ_ONCE(nhdr->n_namesz);
+		desc_sz = READ_ONCE(nhdr->n_descsz);
 		if (nhdr->n_type == BUILD_ID &&
-		    nhdr->n_namesz == sizeof("GNU") &&
-		    !strcmp((char *)(nhdr + 1), "GNU") &&
-		    nhdr->n_descsz > 0 &&
-		    nhdr->n_descsz <= BUILD_ID_SIZE_MAX) {
-			memcpy(build_id,
-			       note_start + note_offs +
-			       ALIGN(sizeof("GNU"), 4) + sizeof(Elf32_Nhdr),
-			       nhdr->n_descsz);
-			memset(build_id + nhdr->n_descsz, 0,
-			       BUILD_ID_SIZE_MAX - nhdr->n_descsz);
+		    name_sz == note_name_sz &&
+		    strcmp((char *)(nhdr + 1), note_name) == 0 &&
+		    desc_sz > 0 && desc_sz <= BUILD_ID_SIZE_MAX) {
+			data = note_start + note_offs + ALIGN(note_name_sz, 4);
+			memcpy(build_id, data, desc_sz);
+			memset(build_id + desc_sz, 0, BUILD_ID_SIZE_MAX - desc_sz);
 			if (size)
-				*size = nhdr->n_descsz;
+				*size = desc_sz;
 			return 0;
 		}
-		new_offs = note_offs + sizeof(Elf32_Nhdr) +
-			ALIGN(nhdr->n_namesz, 4) + ALIGN(nhdr->n_descsz, 4);
+		new_offs = note_offs + sizeof(Elf32_Nhdr) + ALIGN(name_sz, 4) + ALIGN(desc_sz, 4);
 		if (new_offs <= note_offs)  /* overflow */
 			break;
 		note_offs = new_offs;
@@ -71,7 +72,7 @@ static int get_build_id_32(const void *page_addr, unsigned char *build_id,
 {
 	Elf32_Ehdr *ehdr = (Elf32_Ehdr *)page_addr;
 	Elf32_Phdr *phdr;
-	int i;
+	__u32 i, phnum;
 
 	/*
 	 * FIXME
@@ -80,9 +81,10 @@ static int get_build_id_32(const void *page_addr, unsigned char *build_id,
 	 */
 	if (ehdr->e_phoff != sizeof(Elf32_Ehdr))
 		return -EINVAL;
+
+	phnum = READ_ONCE(ehdr->e_phnum);
 	/* only supports phdr that fits in one page */
-	if (ehdr->e_phnum >
-	    (PAGE_SIZE - sizeof(Elf32_Ehdr)) / sizeof(Elf32_Phdr))
+	if (phnum > (PAGE_SIZE - sizeof(Elf32_Ehdr)) / sizeof(Elf32_Phdr))
 		return -EINVAL;
 
 	phdr = (Elf32_Phdr *)(page_addr + sizeof(Elf32_Ehdr));
@@ -90,8 +92,8 @@ static int get_build_id_32(const void *page_addr, unsigned char *build_id,
 	for (i = 0; i < ehdr->e_phnum; ++i) {
 		if (phdr[i].p_type == PT_NOTE &&
 		    !parse_build_id(page_addr, build_id, size,
-				    page_addr + phdr[i].p_offset,
-				    phdr[i].p_filesz))
+				    page_addr + READ_ONCE(phdr[i].p_offset),
+				    READ_ONCE(phdr[i].p_filesz)))
 			return 0;
 	}
 	return -EINVAL;
@@ -103,7 +105,7 @@ static int get_build_id_64(const void *page_addr, unsigned char *build_id,
 {
 	Elf64_Ehdr *ehdr = (Elf64_Ehdr *)page_addr;
 	Elf64_Phdr *phdr;
-	int i;
+	__u32 i, phnum;
 
 	/*
 	 * FIXME
@@ -112,18 +114,19 @@ static int get_build_id_64(const void *page_addr, unsigned char *build_id,
 	 */
 	if (ehdr->e_phoff != sizeof(Elf64_Ehdr))
 		return -EINVAL;
+
+	phnum = READ_ONCE(ehdr->e_phnum);
 	/* only supports phdr that fits in one page */
-	if (ehdr->e_phnum >
-	    (PAGE_SIZE - sizeof(Elf64_Ehdr)) / sizeof(Elf64_Phdr))
+	if (phnum > (PAGE_SIZE - sizeof(Elf64_Ehdr)) / sizeof(Elf64_Phdr))
 		return -EINVAL;
 
 	phdr = (Elf64_Phdr *)(page_addr + sizeof(Elf64_Ehdr));
 
-	for (i = 0; i < ehdr->e_phnum; ++i) {
+	for (i = 0; i < phnum; ++i) {
 		if (phdr[i].p_type == PT_NOTE &&
 		    !parse_build_id(page_addr, build_id, size,
-				    page_addr + phdr[i].p_offset,
-				    phdr[i].p_filesz))
+				    page_addr + READ_ONCE(phdr[i].p_offset),
+				    READ_ONCE(phdr[i].p_filesz)))
 			return 0;
 	}
 	return -EINVAL;
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH v3 bpf-next 01/10] lib/buildid: harden build ID parsing logic
  2024-07-30 20:39 ` [PATCH v3 bpf-next 01/10] lib/buildid: harden build ID parsing logic Andrii Nakryiko
@ 2024-07-31  4:04   ` Andi Kleen
  2024-07-31 21:54     ` Andrii Nakryiko
  2024-08-07 15:11   ` Jann Horn
  1 sibling, 1 reply; 5+ messages in thread
From: Andi Kleen @ 2024-07-31  4:04 UTC (permalink / raw)
  To: Andrii Nakryiko
  Cc: bpf, linux-mm, akpm, adobriyan, shakeel.butt, hannes, osandov,
	song, jannh, stable

>  	while (note_offs + sizeof(Elf32_Nhdr) < note_size) {
>  		Elf32_Nhdr *nhdr = (Elf32_Nhdr *)(note_start + note_offs);
>  
> +		name_sz = READ_ONCE(nhdr->n_namesz);
> +		desc_sz = READ_ONCE(nhdr->n_descsz);
>  		if (nhdr->n_type == BUILD_ID &&
> -		    nhdr->n_namesz == sizeof("GNU") &&
> -		    !strcmp((char *)(nhdr + 1), "GNU") &&
> -		    nhdr->n_descsz > 0 &&
> -		    nhdr->n_descsz <= BUILD_ID_SIZE_MAX) {
> -			memcpy(build_id,
> -			       note_start + note_offs +
> -			       ALIGN(sizeof("GNU"), 4) + sizeof(Elf32_Nhdr),
> -			       nhdr->n_descsz);
> -			memset(build_id + nhdr->n_descsz, 0,
> -			       BUILD_ID_SIZE_MAX - nhdr->n_descsz);
> +		    name_sz == note_name_sz &&
> +		    strcmp((char *)(nhdr + 1), note_name) == 0 &&

Doesn't the strcmp need a boundary check to be inside note_size too?

Other it may read into the next page, which could be unmapped, causing a fault.
Given it's unlikely that this happen, and the end has guard pages,
but there are some users of set_memory_np.

You could just move the later checks earlier.

The rest looks good to me.

-Andi


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH v3 bpf-next 01/10] lib/buildid: harden build ID parsing logic
  2024-07-31  4:04   ` Andi Kleen
@ 2024-07-31 21:54     ` Andrii Nakryiko
  0 siblings, 0 replies; 5+ messages in thread
From: Andrii Nakryiko @ 2024-07-31 21:54 UTC (permalink / raw)
  To: Andi Kleen
  Cc: Andrii Nakryiko, bpf, linux-mm, akpm, adobriyan, shakeel.butt,
	hannes, osandov, song, jannh, stable

On Tue, Jul 30, 2024 at 9:05 PM Andi Kleen <ak@linux.intel.com> wrote:
>
> >       while (note_offs + sizeof(Elf32_Nhdr) < note_size) {
> >               Elf32_Nhdr *nhdr = (Elf32_Nhdr *)(note_start + note_offs);
> >
> > +             name_sz = READ_ONCE(nhdr->n_namesz);
> > +             desc_sz = READ_ONCE(nhdr->n_descsz);
> >               if (nhdr->n_type == BUILD_ID &&
> > -                 nhdr->n_namesz == sizeof("GNU") &&
> > -                 !strcmp((char *)(nhdr + 1), "GNU") &&
> > -                 nhdr->n_descsz > 0 &&
> > -                 nhdr->n_descsz <= BUILD_ID_SIZE_MAX) {
> > -                     memcpy(build_id,
> > -                            note_start + note_offs +
> > -                            ALIGN(sizeof("GNU"), 4) + sizeof(Elf32_Nhdr),
> > -                            nhdr->n_descsz);
> > -                     memset(build_id + nhdr->n_descsz, 0,
> > -                            BUILD_ID_SIZE_MAX - nhdr->n_descsz);
> > +                 name_sz == note_name_sz &&
> > +                 strcmp((char *)(nhdr + 1), note_name) == 0 &&
>
> Doesn't the strcmp need a boundary check to be inside note_size too?
>
> Other it may read into the next page, which could be unmapped, causing a fault.
> Given it's unlikely that this happen, and the end has guard pages,
> but there are some users of set_memory_np.
>
> You could just move the later checks earlier.

Yep, good catch! I'll move the overflow check and will add a note_size
check to it, thanks!

>
> The rest looks good to me.
>
> -Andi
>

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH v3 bpf-next 01/10] lib/buildid: harden build ID parsing logic
  2024-07-30 20:39 ` [PATCH v3 bpf-next 01/10] lib/buildid: harden build ID parsing logic Andrii Nakryiko
  2024-07-31  4:04   ` Andi Kleen
@ 2024-08-07 15:11   ` Jann Horn
  2024-08-07 16:47     ` Andrii Nakryiko
  1 sibling, 1 reply; 5+ messages in thread
From: Jann Horn @ 2024-08-07 15:11 UTC (permalink / raw)
  To: Andrii Nakryiko, Matthew Wilcox, linux-fsdevel
  Cc: bpf, linux-mm, akpm, adobriyan, shakeel.butt, hannes, ak, osandov,
	song, stable

+Matthew and fsdevel list for pagecache question

On Tue, Jul 30, 2024 at 10:39 PM Andrii Nakryiko <andrii@kernel.org> wrote:
> Harden build ID parsing logic, adding explicit READ_ONCE() where it's
> important to have a consistent value read and validated just once.
>
> Fixes tag below points to the code that moved this code into
> lib/buildid.c, and then subsequently was used in perf subsystem, making
> this code exposed to perf_event_open() users in v5.12+.

One thing that still seems dodgy to me with this patch applied is the
call from build_id_parse() to find_get_page(), followed by reading the
page contents. My understanding of the page cache (which might be
incorrect) is that find_get_page() can return a page whose contents
have not been initialized yet, and you're supposed to check for
PageUptodate() or something like that before reading from it.

Maybe Matthew can check if I understood that right?


Also, it might be a good idea to liberally spray READ_ONCE() around
all the remaining unannotated shared memory accesses in
build_id_parse(), get_build_id_32(), get_build_id_64() and
parse_build_id_buf().

> Cc: stable@vger.kernel.org
> Cc: Jann Horn <jannh@google.com>
> Suggested-by: Andi Kleen <ak@linux.intel.com>
> Fixes: bd7525dacd7e ("bpf: Move stack_map_get_build_id into lib")
> Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
> ---
>  lib/buildid.c | 51 +++++++++++++++++++++++++++------------------------
>  1 file changed, 27 insertions(+), 24 deletions(-)
>
> diff --git a/lib/buildid.c b/lib/buildid.c
> index e02b5507418b..d21d86f6c19a 100644
> --- a/lib/buildid.c
> +++ b/lib/buildid.c
> @@ -18,28 +18,29 @@ static int parse_build_id_buf(unsigned char *build_id,
>                               const void *note_start,
>                               Elf32_Word note_size)
>  {
> +       const char note_name[] = "GNU";
> +       const size_t note_name_sz = sizeof(note_name);
>         Elf32_Word note_offs = 0, new_offs;
> +       u32 name_sz, desc_sz;
> +       const char *data;
>
>         while (note_offs + sizeof(Elf32_Nhdr) < note_size) {
>                 Elf32_Nhdr *nhdr = (Elf32_Nhdr *)(note_start + note_offs);
>
> +               name_sz = READ_ONCE(nhdr->n_namesz);
> +               desc_sz = READ_ONCE(nhdr->n_descsz);
>                 if (nhdr->n_type == BUILD_ID &&
> -                   nhdr->n_namesz == sizeof("GNU") &&
> -                   !strcmp((char *)(nhdr + 1), "GNU") &&
> -                   nhdr->n_descsz > 0 &&
> -                   nhdr->n_descsz <= BUILD_ID_SIZE_MAX) {
> -                       memcpy(build_id,
> -                              note_start + note_offs +
> -                              ALIGN(sizeof("GNU"), 4) + sizeof(Elf32_Nhdr),
> -                              nhdr->n_descsz);
> -                       memset(build_id + nhdr->n_descsz, 0,
> -                              BUILD_ID_SIZE_MAX - nhdr->n_descsz);
> +                   name_sz == note_name_sz &&
> +                   strcmp((char *)(nhdr + 1), note_name) == 0 &&
> +                   desc_sz > 0 && desc_sz <= BUILD_ID_SIZE_MAX) {
> +                       data = note_start + note_offs + ALIGN(note_name_sz, 4);

I don't think we have any guarantee here that this addition won't
result in an OOB pointer?

> +                       memcpy(build_id, data, desc_sz);

I think this can access OOB data (because "data" can already be OOB
and because "desc_sz" hasn't been checked against the amount of
remaining space in the page).

> +                       memset(build_id + desc_sz, 0, BUILD_ID_SIZE_MAX - desc_sz);
>                         if (size)
> -                               *size = nhdr->n_descsz;
> +                               *size = desc_sz;
>                         return 0;
>                 }
> -               new_offs = note_offs + sizeof(Elf32_Nhdr) +
> -                       ALIGN(nhdr->n_namesz, 4) + ALIGN(nhdr->n_descsz, 4);
> +               new_offs = note_offs + sizeof(Elf32_Nhdr) + ALIGN(name_sz, 4) + ALIGN(desc_sz, 4);
>                 if (new_offs <= note_offs)  /* overflow */
>                         break;

You check whether "new_offs" has wrapped here, but then on the next
loop iteration, you check for "note_offs + sizeof(Elf32_Nhdr) <
note_size". So if new_offs is 0xffffffff at this point, then I think
the overflow check here will be passed, the loop condition will be
true on 32-bit kernels (on 64-bit kernels it won't be because the
addition happens on 64-bit numbers thanks to sizeof()), and "nhdr"
will point in front of the note?

>                 note_offs = new_offs;

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH v3 bpf-next 01/10] lib/buildid: harden build ID parsing logic
  2024-08-07 15:11   ` Jann Horn
@ 2024-08-07 16:47     ` Andrii Nakryiko
  0 siblings, 0 replies; 5+ messages in thread
From: Andrii Nakryiko @ 2024-08-07 16:47 UTC (permalink / raw)
  To: Jann Horn
  Cc: Andrii Nakryiko, Matthew Wilcox, linux-fsdevel, bpf, linux-mm,
	akpm, adobriyan, shakeel.butt, hannes, ak, osandov, song, stable

On Wed, Aug 7, 2024 at 8:12 AM Jann Horn <jannh@google.com> wrote:
>
> +Matthew and fsdevel list for pagecache question
>
> On Tue, Jul 30, 2024 at 10:39 PM Andrii Nakryiko <andrii@kernel.org> wrote:
> > Harden build ID parsing logic, adding explicit READ_ONCE() where it's
> > important to have a consistent value read and validated just once.
> >
> > Fixes tag below points to the code that moved this code into
> > lib/buildid.c, and then subsequently was used in perf subsystem, making
> > this code exposed to perf_event_open() users in v5.12+.
>
> One thing that still seems dodgy to me with this patch applied is the
> call from build_id_parse() to find_get_page(), followed by reading the
> page contents. My understanding of the page cache (which might be
> incorrect) is that find_get_page() can return a page whose contents
> have not been initialized yet, and you're supposed to check for
> PageUptodate() or something like that before reading from it.
>
> Maybe Matthew can check if I understood that right?
>
>
> Also, it might be a good idea to liberally spray READ_ONCE() around
> all the remaining unannotated shared memory accesses in
> build_id_parse(), get_build_id_32(), get_build_id_64() and
> parse_build_id_buf().

Andi was against that, so I kept READ_ONCE() only where strictly
necessary, AFAICT.

>
> > Cc: stable@vger.kernel.org
> > Cc: Jann Horn <jannh@google.com>
> > Suggested-by: Andi Kleen <ak@linux.intel.com>
> > Fixes: bd7525dacd7e ("bpf: Move stack_map_get_build_id into lib")
> > Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
> > ---
> >  lib/buildid.c | 51 +++++++++++++++++++++++++++------------------------
> >  1 file changed, 27 insertions(+), 24 deletions(-)
> >
> > diff --git a/lib/buildid.c b/lib/buildid.c
> > index e02b5507418b..d21d86f6c19a 100644
> > --- a/lib/buildid.c
> > +++ b/lib/buildid.c
> > @@ -18,28 +18,29 @@ static int parse_build_id_buf(unsigned char *build_id,
> >                               const void *note_start,
> >                               Elf32_Word note_size)
> >  {
> > +       const char note_name[] = "GNU";
> > +       const size_t note_name_sz = sizeof(note_name);
> >         Elf32_Word note_offs = 0, new_offs;
> > +       u32 name_sz, desc_sz;
> > +       const char *data;
> >
> >         while (note_offs + sizeof(Elf32_Nhdr) < note_size) {
> >                 Elf32_Nhdr *nhdr = (Elf32_Nhdr *)(note_start + note_offs);
> >
> > +               name_sz = READ_ONCE(nhdr->n_namesz);
> > +               desc_sz = READ_ONCE(nhdr->n_descsz);
> >                 if (nhdr->n_type == BUILD_ID &&
> > -                   nhdr->n_namesz == sizeof("GNU") &&
> > -                   !strcmp((char *)(nhdr + 1), "GNU") &&
> > -                   nhdr->n_descsz > 0 &&
> > -                   nhdr->n_descsz <= BUILD_ID_SIZE_MAX) {
> > -                       memcpy(build_id,
> > -                              note_start + note_offs +
> > -                              ALIGN(sizeof("GNU"), 4) + sizeof(Elf32_Nhdr),
> > -                              nhdr->n_descsz);
> > -                       memset(build_id + nhdr->n_descsz, 0,
> > -                              BUILD_ID_SIZE_MAX - nhdr->n_descsz);
> > +                   name_sz == note_name_sz &&
> > +                   strcmp((char *)(nhdr + 1), note_name) == 0 &&
> > +                   desc_sz > 0 && desc_sz <= BUILD_ID_SIZE_MAX) {
> > +                       data = note_start + note_offs + ALIGN(note_name_sz, 4);
>
> I don't think we have any guarantee here that this addition won't
> result in an OOB pointer?
>
> > +                       memcpy(build_id, data, desc_sz);
>
> I think this can access OOB data (because "data" can already be OOB
> and because "desc_sz" hasn't been checked against the amount of
> remaining space in the page).

Andi already pointed this out and I fixed it locally, thanks.

>
> > +                       memset(build_id + desc_sz, 0, BUILD_ID_SIZE_MAX - desc_sz);
> >                         if (size)
> > -                               *size = nhdr->n_descsz;
> > +                               *size = desc_sz;
> >                         return 0;
> >                 }
> > -               new_offs = note_offs + sizeof(Elf32_Nhdr) +
> > -                       ALIGN(nhdr->n_namesz, 4) + ALIGN(nhdr->n_descsz, 4);
> > +               new_offs = note_offs + sizeof(Elf32_Nhdr) + ALIGN(name_sz, 4) + ALIGN(desc_sz, 4);
> >                 if (new_offs <= note_offs)  /* overflow */
> >                         break;
>
> You check whether "new_offs" has wrapped here, but then on the next
> loop iteration, you check for "note_offs + sizeof(Elf32_Nhdr) <
> note_size". So if new_offs is 0xffffffff at this point, then I think
> the overflow check here will be passed, the loop condition will be
> true on 32-bit kernels (on 64-bit kernels it won't be because the
> addition happens on 64-bit numbers thanks to sizeof()), and "nhdr"
> will point in front of the note?

Correct, and so I moved this new_offs calculation and overflow check
to the beginning of the loop, which I think should capture this issue.

For the while() condition itself I have:

        if (check_add_overflow(note_offs, note_size, &note_end))
                return -EINVAL;

        while (note_offs < note_end - sizeof(Elf32_Nhdr) - note_name_sz) {
            ...
        }

I'll try to post an updated version soon-ish, have been waiting for mm
folks feedback before posting a new version.

>
> >                 note_offs = new_offs;

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2024-08-07 16:47 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
     [not found] <20240730203914.1182569-1-andrii@kernel.org>
2024-07-30 20:39 ` [PATCH v3 bpf-next 01/10] lib/buildid: harden build ID parsing logic Andrii Nakryiko
2024-07-31  4:04   ` Andi Kleen
2024-07-31 21:54     ` Andrii Nakryiko
2024-08-07 15:11   ` Jann Horn
2024-08-07 16:47     ` Andrii Nakryiko

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox