linux-perf-users.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] uprobes: use vm_special_mapping close() functionality
       [not found] <CAHk-=wjD0XLhkzou89J-TK=L6B88pFoNYxN1uTWRQB3U5Czywg@mail.gmail.com>
@ 2024-09-03  7:36 ` Sven Schnelle
  2024-09-03  7:49   ` Sven Schnelle
                     ` (3 more replies)
  0 siblings, 4 replies; 18+ messages in thread
From: Sven Schnelle @ 2024-09-03  7:36 UTC (permalink / raw)
  To: Michael Ellerman, Masami Hiramatsu, Oleg Nesterov, Peter Zijlstra,
	Ingo Molnar, Arnaldo Carvalho de Melo, Namhyung Kim, Mark Rutland,
	Alexander Shishkin, Jiri Olsa, Ian Rogers, Adrian Hunter,
	Liang, Kan
  Cc: Linus Torvalds, linux-kernel, linux-trace-kernel,
	linux-perf-users

The following KASAN splat was shown:

[   44.505448] ==================================================================                                                                      20:37:27 [3421/145075]
[   44.505455] BUG: KASAN: slab-use-after-free in special_mapping_close+0x9c/0xc8
[   44.505471] Read of size 8 at addr 00000000868dac48 by task sh/1384
[   44.505479]
[   44.505486] CPU: 51 UID: 0 PID: 1384 Comm: sh Not tainted 6.11.0-rc6-next-20240902-dirty #1496
[   44.505503] Hardware name: IBM 3931 A01 704 (z/VM 7.3.0)
[   44.505508] Call Trace:
[   44.505511]  [<000b0324d2f78080>] dump_stack_lvl+0xd0/0x108
[   44.505521]  [<000b0324d2f5435c>] print_address_description.constprop.0+0x34/0x2e0
[   44.505529]  [<000b0324d2f5464c>] print_report+0x44/0x138
[   44.505536]  [<000b0324d1383192>] kasan_report+0xc2/0x140
[   44.505543]  [<000b0324d2f52904>] special_mapping_close+0x9c/0xc8
[   44.505550]  [<000b0324d12c7978>] remove_vma+0x78/0x120
[   44.505557]  [<000b0324d128a2c6>] exit_mmap+0x326/0x750
[   44.505563]  [<000b0324d0ba655a>] __mmput+0x9a/0x370
[   44.505570]  [<000b0324d0bbfbe0>] exit_mm+0x240/0x340
[   44.505575]  [<000b0324d0bc0228>] do_exit+0x548/0xd70
[   44.505580]  [<000b0324d0bc1102>] do_group_exit+0x132/0x390
[   44.505586]  [<000b0324d0bc13b6>] __s390x_sys_exit_group+0x56/0x60
[   44.505592]  [<000b0324d0adcbd6>] do_syscall+0x2f6/0x430
[   44.505599]  [<000b0324d2f78434>] __do_syscall+0xa4/0x170
[   44.505606]  [<000b0324d2f9454c>] system_call+0x74/0x98
[   44.505614]
[   44.505616] Allocated by task 1384:
[   44.505621]  kasan_save_stack+0x40/0x70
[   44.505630]  kasan_save_track+0x28/0x40
[   44.505636]  __kasan_kmalloc+0xa0/0xc0
[   44.505642]  __create_xol_area+0xfa/0x410
[   44.505648]  get_xol_area+0xb0/0xf0
[   44.505652]  uprobe_notify_resume+0x27a/0x470
[   44.505657]  irqentry_exit_to_user_mode+0x15e/0x1d0
[   44.505664]  pgm_check_handler+0x122/0x170
[   44.505670]
[   44.505672] Freed by task 1384:
[   44.505676]  kasan_save_stack+0x40/0x70
[   44.505682]  kasan_save_track+0x28/0x40
[   44.505687]  kasan_save_free_info+0x4a/0x70
[   44.505693]  __kasan_slab_free+0x5a/0x70
[   44.505698]  kfree+0xe8/0x3f0
[   44.505704]  __mmput+0x20/0x370
[   44.505709]  exit_mm+0x240/0x340
[   44.505713]  do_exit+0x548/0xd70
[   44.505718]  do_group_exit+0x132/0x390
[   44.505722]  __s390x_sys_exit_group+0x56/0x60
[   44.505727]  do_syscall+0x2f6/0x430
[   44.505732]  __do_syscall+0xa4/0x170
[   44.505738]  system_call+0x74/0x98

The problem is that uprobe_clear_state() kfree's struct xol_area, which
contains struct vm_special_mapping *xol_mapping. This one is passed to
_install_special_mapping() in xol_add_vma().
__mput reads:

static inline void __mmput(struct mm_struct *mm)
{
        VM_BUG_ON(atomic_read(&mm->mm_users));

        uprobe_clear_state(mm);
        exit_aio(mm);
        ksm_exit(mm);
        khugepaged_exit(mm); /* must run before exit_mmap */
        exit_mmap(mm);
        ...
}

So uprobe_clear_state() in the beginning free's the memory area
containing the vm_special_mapping data, but exit_mmap() uses this
address later via vma->vm_private_data (which was set in
_install_special_mapping().

Fix this by moving uprobe_clear_state() to uprobes.c and use it as
close() callback.

Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Sven Schnelle <svens@linux.ibm.com>
---
 include/linux/uprobes.h |  1 -
 kernel/events/uprobes.c | 39 ++++++++++++++++++++-------------------
 kernel/fork.c           |  1 -
 3 files changed, 20 insertions(+), 21 deletions(-)

diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
index f50df1fa93e7..2ae96c98d287 100644
--- a/include/linux/uprobes.h
+++ b/include/linux/uprobes.h
@@ -128,7 +128,6 @@ extern int uprobe_pre_sstep_notifier(struct pt_regs *regs);
 extern void uprobe_notify_resume(struct pt_regs *regs);
 extern bool uprobe_deny_signal(void);
 extern bool arch_uprobe_skip_sstep(struct arch_uprobe *aup, struct pt_regs *regs);
-extern void uprobe_clear_state(struct mm_struct *mm);
 extern int  arch_uprobe_analyze_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long addr);
 extern int  arch_uprobe_pre_xol(struct arch_uprobe *aup, struct pt_regs *regs);
 extern int  arch_uprobe_post_xol(struct arch_uprobe *aup, struct pt_regs *regs);
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 30348f13d4a7..ab19a43a4dfc 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -1463,6 +1463,25 @@ void * __weak arch_uprobe_trampoline(unsigned long *psize)
 	return &insn;
 }
 
+/*
+ * uprobe_clear_state - Free the area allocated for slots.
+ */
+static void uprobe_clear_state(const struct vm_special_mapping *sm, struct vm_area_struct *vma)
+{
+	struct xol_area *area = container_of(vma->vm_private_data, struct xol_area, xol_mapping);
+
+	mutex_lock(&delayed_uprobe_lock);
+	delayed_uprobe_remove(NULL, vma->vm_mm);
+	mutex_unlock(&delayed_uprobe_lock);
+
+	if (!area)
+		return;
+
+	put_page(area->pages[0]);
+	kfree(area->bitmap);
+	kfree(area);
+}
+
 static struct xol_area *__create_xol_area(unsigned long vaddr)
 {
 	struct mm_struct *mm = current->mm;
@@ -1481,6 +1500,7 @@ static struct xol_area *__create_xol_area(unsigned long vaddr)
 
 	area->xol_mapping.name = "[uprobes]";
 	area->xol_mapping.fault = NULL;
+	area->xol_mapping.close = uprobe_clear_state;
 	area->xol_mapping.pages = area->pages;
 	area->pages[0] = alloc_page(GFP_HIGHUSER);
 	if (!area->pages[0])
@@ -1526,25 +1546,6 @@ static struct xol_area *get_xol_area(void)
 	return area;
 }
 
-/*
- * uprobe_clear_state - Free the area allocated for slots.
- */
-void uprobe_clear_state(struct mm_struct *mm)
-{
-	struct xol_area *area = mm->uprobes_state.xol_area;
-
-	mutex_lock(&delayed_uprobe_lock);
-	delayed_uprobe_remove(NULL, mm);
-	mutex_unlock(&delayed_uprobe_lock);
-
-	if (!area)
-		return;
-
-	put_page(area->pages[0]);
-	kfree(area->bitmap);
-	kfree(area);
-}
-
 void uprobe_start_dup_mmap(void)
 {
 	percpu_down_read(&dup_mmap_sem);
diff --git a/kernel/fork.c b/kernel/fork.c
index df8e4575ff01..ad0e16cf528b 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1340,7 +1340,6 @@ static inline void __mmput(struct mm_struct *mm)
 {
 	VM_BUG_ON(atomic_read(&mm->mm_users));
 
-	uprobe_clear_state(mm);
 	exit_aio(mm);
 	ksm_exit(mm);
 	khugepaged_exit(mm); /* must run before exit_mmap */
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* Re: [PATCH] uprobes: use vm_special_mapping close() functionality
  2024-09-03  7:36 ` [PATCH] uprobes: use vm_special_mapping close() functionality Sven Schnelle
@ 2024-09-03  7:49   ` Sven Schnelle
  2024-09-04  3:57     ` Michael Ellerman
  2024-09-03  9:08   ` Oleg Nesterov
                     ` (2 subsequent siblings)
  3 siblings, 1 reply; 18+ messages in thread
From: Sven Schnelle @ 2024-09-03  7:49 UTC (permalink / raw)
  To: Michael Ellerman
  Cc: Masami Hiramatsu, Oleg Nesterov, Peter Zijlstra, Ingo Molnar,
	Arnaldo Carvalho de Melo, Namhyung Kim, Mark Rutland,
	Alexander Shishkin, Jiri Olsa, Ian Rogers, Adrian Hunter,
	Liang, Kan, Linus Torvalds, linux-kernel, linux-trace-kernel,
	linux-perf-users

Hi Michael,

Sven Schnelle <svens@linux.ibm.com> writes:

> The following KASAN splat was shown:
>
> [   44.505448] ==================================================================                                                                      20:37:27 [3421/145075]
> [   44.505455] BUG: KASAN: slab-use-after-free in special_mapping_close+0x9c/0xc8
> [   44.505471] Read of size 8 at addr 00000000868dac48 by task sh/1384
> [   44.505479]
> [   44.505486] CPU: 51 UID: 0 PID: 1384 Comm: sh Not tainted 6.11.0-rc6-next-20240902-dirty #1496
> [   44.505503] Hardware name: IBM 3931 A01 704 (z/VM 7.3.0)
> [   44.505508] Call Trace:
> [   44.505511]  [<000b0324d2f78080>] dump_stack_lvl+0xd0/0x108
> [   44.505521]  [<000b0324d2f5435c>] print_address_description.constprop.0+0x34/0x2e0
> [   44.505529]  [<000b0324d2f5464c>] print_report+0x44/0x138
> [   44.505536]  [<000b0324d1383192>] kasan_report+0xc2/0x140
> [   44.505543]  [<000b0324d2f52904>] special_mapping_close+0x9c/0xc8
> [   44.505550]  [<000b0324d12c7978>] remove_vma+0x78/0x120
> [   44.505557]  [<000b0324d128a2c6>] exit_mmap+0x326/0x750
> [   44.505563]  [<000b0324d0ba655a>] __mmput+0x9a/0x370
> [   44.505570]  [<000b0324d0bbfbe0>] exit_mm+0x240/0x340
> [   44.505575]  [<000b0324d0bc0228>] do_exit+0x548/0xd70
> [   44.505580]  [<000b0324d0bc1102>] do_group_exit+0x132/0x390
> [   44.505586]  [<000b0324d0bc13b6>] __s390x_sys_exit_group+0x56/0x60
> [   44.505592]  [<000b0324d0adcbd6>] do_syscall+0x2f6/0x430
> [   44.505599]  [<000b0324d2f78434>] __do_syscall+0xa4/0x170
> [   44.505606]  [<000b0324d2f9454c>] system_call+0x74/0x98
> [   44.505614]
> [   44.505616] Allocated by task 1384:
> [   44.505621]  kasan_save_stack+0x40/0x70
> [   44.505630]  kasan_save_track+0x28/0x40
> [   44.505636]  __kasan_kmalloc+0xa0/0xc0
> [   44.505642]  __create_xol_area+0xfa/0x410
> [   44.505648]  get_xol_area+0xb0/0xf0
> [   44.505652]  uprobe_notify_resume+0x27a/0x470
> [   44.505657]  irqentry_exit_to_user_mode+0x15e/0x1d0
> [   44.505664]  pgm_check_handler+0x122/0x170
> [   44.505670]
> [   44.505672] Freed by task 1384:
> [   44.505676]  kasan_save_stack+0x40/0x70
> [   44.505682]  kasan_save_track+0x28/0x40
> [   44.505687]  kasan_save_free_info+0x4a/0x70
> [   44.505693]  __kasan_slab_free+0x5a/0x70
> [   44.505698]  kfree+0xe8/0x3f0
> [   44.505704]  __mmput+0x20/0x370
> [   44.505709]  exit_mm+0x240/0x340
> [   44.505713]  do_exit+0x548/0xd70
> [   44.505718]  do_group_exit+0x132/0x390
> [   44.505722]  __s390x_sys_exit_group+0x56/0x60
> [   44.505727]  do_syscall+0x2f6/0x430
> [   44.505732]  __do_syscall+0xa4/0x170
> [   44.505738]  system_call+0x74/0x98
> [..]

As this has a dependency on your special mapping close series, do you
want to carry that with your patches?

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] uprobes: use vm_special_mapping close() functionality
  2024-09-03  7:36 ` [PATCH] uprobes: use vm_special_mapping close() functionality Sven Schnelle
  2024-09-03  7:49   ` Sven Schnelle
@ 2024-09-03  9:08   ` Oleg Nesterov
  2024-09-03  9:32     ` Sven Schnelle
  2024-09-03 19:12     ` Linus Torvalds
  2024-09-11  9:44   ` Oleg Nesterov
  2024-09-11 13:13   ` [PATCH -mm 1/3] Revert "uprobes: use vm_special_mapping close() functionality" Oleg Nesterov
  3 siblings, 2 replies; 18+ messages in thread
From: Oleg Nesterov @ 2024-09-03  9:08 UTC (permalink / raw)
  To: Sven Schnelle
  Cc: Michael Ellerman, Masami Hiramatsu, Peter Zijlstra, Ingo Molnar,
	Arnaldo Carvalho de Melo, Namhyung Kim, Mark Rutland,
	Alexander Shishkin, Jiri Olsa, Ian Rogers, Adrian Hunter,
	Liang, Kan, Linus Torvalds, linux-kernel, linux-trace-kernel,
	linux-perf-users

On 09/03, Sven Schnelle wrote:
>
> [   44.505448] ==================================================================                                                                      20:37:27 [3421/145075]
> [   44.505455] BUG: KASAN: slab-use-after-free in special_mapping_close+0x9c/0xc8
> [   44.505471] Read of size 8 at addr 00000000868dac48 by task sh/1384
> [   44.505479]
> [   44.505486] CPU: 51 UID: 0 PID: 1384 Comm: sh Not tainted 6.11.0-rc6-next-20240902-dirty #1496
> [   44.505503] Hardware name: IBM 3931 A01 704 (z/VM 7.3.0)
> [   44.505508] Call Trace:
> [   44.505511]  [<000b0324d2f78080>] dump_stack_lvl+0xd0/0x108
> [   44.505521]  [<000b0324d2f5435c>] print_address_description.constprop.0+0x34/0x2e0
> [   44.505529]  [<000b0324d2f5464c>] print_report+0x44/0x138
> [   44.505536]  [<000b0324d1383192>] kasan_report+0xc2/0x140
> [   44.505543]  [<000b0324d2f52904>] special_mapping_close+0x9c/0xc8
                                       ^^^^^^^^^^^^^^^^^^^^^
Caused by

	[PATCH v2 1/4] mm: Add optional close() to struct vm_special_mapping
	https://lore.kernel.org/all/20240812082605.743814-1-mpe@ellerman.id.au/

?

> +static void uprobe_clear_state(const struct vm_special_mapping *sm, struct vm_area_struct *vma)
> +{
> +	struct xol_area *area = container_of(vma->vm_private_data, struct xol_area, xol_mapping);
> +
> +	mutex_lock(&delayed_uprobe_lock);
> +	delayed_uprobe_remove(NULL, vma->vm_mm);
> +	mutex_unlock(&delayed_uprobe_lock);
> +
> +	if (!area)
> +		return;
> +
> +	put_page(area->pages[0]);
> +	kfree(area->bitmap);
> +	kfree(area);
> +}
> +
>  static struct xol_area *__create_xol_area(unsigned long vaddr)
>  {
>  	struct mm_struct *mm = current->mm;
> @@ -1481,6 +1500,7 @@ static struct xol_area *__create_xol_area(unsigned long vaddr)
>
>  	area->xol_mapping.name = "[uprobes]";
>  	area->xol_mapping.fault = NULL;
> +	area->xol_mapping.close = uprobe_clear_state;

LGTM.

but with or without this fix __create_xol_area() also needs

	area->xol_mapping.mremap = NULL;

?

And in the longer term I think we should probably add a single instance
of "struct vm_special_mapping uprobe_special_mapping with ->fault != NULL
but this is another issue.

Oleg.


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] uprobes: use vm_special_mapping close() functionality
  2024-09-03  9:08   ` Oleg Nesterov
@ 2024-09-03  9:32     ` Sven Schnelle
  2024-09-03 19:12     ` Linus Torvalds
  1 sibling, 0 replies; 18+ messages in thread
From: Sven Schnelle @ 2024-09-03  9:32 UTC (permalink / raw)
  To: Oleg Nesterov
  Cc: Michael Ellerman, Masami Hiramatsu, Peter Zijlstra, Ingo Molnar,
	Arnaldo Carvalho de Melo, Namhyung Kim, Mark Rutland,
	Alexander Shishkin, Jiri Olsa, Ian Rogers, Adrian Hunter,
	Liang, Kan, Linus Torvalds, linux-kernel, linux-trace-kernel,
	linux-perf-users

Oleg Nesterov <oleg@redhat.com> writes:

> On 09/03, Sven Schnelle wrote:
>>
>> [   44.505448] ==================================================================                                                                      20:37:27 [3421/145075]
>> [   44.505455] BUG: KASAN: slab-use-after-free in special_mapping_close+0x9c/0xc8
>> [   44.505471] Read of size 8 at addr 00000000868dac48 by task sh/1384
>> [   44.505479]
>> [   44.505486] CPU: 51 UID: 0 PID: 1384 Comm: sh Not tainted 6.11.0-rc6-next-20240902-dirty #1496
>> [   44.505503] Hardware name: IBM 3931 A01 704 (z/VM 7.3.0)
>> [   44.505508] Call Trace:
>> [   44.505511]  [<000b0324d2f78080>] dump_stack_lvl+0xd0/0x108
>> [   44.505521]  [<000b0324d2f5435c>] print_address_description.constprop.0+0x34/0x2e0
>> [   44.505529]  [<000b0324d2f5464c>] print_report+0x44/0x138
>> [   44.505536]  [<000b0324d1383192>] kasan_report+0xc2/0x140
>> [   44.505543]  [<000b0324d2f52904>] special_mapping_close+0x9c/0xc8
>                                        ^^^^^^^^^^^^^^^^^^^^^
> Caused by
>
> 	[PATCH v2 1/4] mm: Add optional close() to struct vm_special_mapping
> 	https://lore.kernel.org/all/20240812082605.743814-1-mpe@ellerman.id.au/
>
> ?
>
>> +static void uprobe_clear_state(const struct vm_special_mapping *sm, struct vm_area_struct *vma)
>> +{
>> +	struct xol_area *area = container_of(vma->vm_private_data, struct xol_area, xol_mapping);
>> +
>> +	mutex_lock(&delayed_uprobe_lock);
>> +	delayed_uprobe_remove(NULL, vma->vm_mm);
>> +	mutex_unlock(&delayed_uprobe_lock);
>> +
>> +	if (!area)
>> +		return;
>> +
>> +	put_page(area->pages[0]);
>> +	kfree(area->bitmap);
>> +	kfree(area);
>> +}
>> +
>>  static struct xol_area *__create_xol_area(unsigned long vaddr)
>>  {
>>  	struct mm_struct *mm = current->mm;
>> @@ -1481,6 +1500,7 @@ static struct xol_area *__create_xol_area(unsigned long vaddr)
>>
>>  	area->xol_mapping.name = "[uprobes]";
>>  	area->xol_mapping.fault = NULL;
>> +	area->xol_mapping.close = uprobe_clear_state;
>
> LGTM.
>
> but with or without this fix __create_xol_area() also needs
>
> 	area->xol_mapping.mremap = NULL;
>
> ?

I think the code should just use kzalloc() instead of kmalloc() so all
members are cleared. I noticed that when looking into the issue because
the new close member of xol_mapping wasn't initialized. My plan was to
send that change as a separate patch - because it's not related to this
issue.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] uprobes: use vm_special_mapping close() functionality
  2024-09-03  9:08   ` Oleg Nesterov
  2024-09-03  9:32     ` Sven Schnelle
@ 2024-09-03 19:12     ` Linus Torvalds
  2024-09-03 19:31       ` Sven Schnelle
                         ` (3 more replies)
  1 sibling, 4 replies; 18+ messages in thread
From: Linus Torvalds @ 2024-09-03 19:12 UTC (permalink / raw)
  To: Oleg Nesterov
  Cc: Sven Schnelle, Michael Ellerman, Masami Hiramatsu, Peter Zijlstra,
	Ingo Molnar, Arnaldo Carvalho de Melo, Namhyung Kim, Mark Rutland,
	Alexander Shishkin, Jiri Olsa, Ian Rogers, Adrian Hunter,
	Liang, Kan, linux-kernel, linux-trace-kernel, linux-perf-users

On Tue, 3 Sept 2024 at 02:09, Oleg Nesterov <oleg@redhat.com> wrote:
>
> but with or without this fix __create_xol_area() also needs
>
>         area->xol_mapping.mremap = NULL;

I think the whole thing needs to be zeroed out.

It was always horribly buggy. The close thing just made it more
*obviously* buggy, because closing a vma is a lot more common than
mremap'ing it.

Either use kzalloc(), or do a proper initializer something like this:

-       area->xol_mapping.name = "[uprobes]";
-       area->xol_mapping.fault = NULL;
-       area->xol_mapping.pages = area->pages;
+       area->xol_mapping = (struct vm_special_mapping) {
+               .name = "[uprobes]",
+               .pages = area->pages,
+               .close = uprobe_clear_state,
+       };

which should initialize the struct vm_special_mapping fully.

                     Linus

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] uprobes: use vm_special_mapping close() functionality
  2024-09-03 19:12     ` Linus Torvalds
@ 2024-09-03 19:31       ` Sven Schnelle
  2024-09-03 19:34         ` Linus Torvalds
  2024-09-03 19:32       ` Oleg Nesterov
                         ` (2 subsequent siblings)
  3 siblings, 1 reply; 18+ messages in thread
From: Sven Schnelle @ 2024-09-03 19:31 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Oleg Nesterov, Michael Ellerman, Masami Hiramatsu, Peter Zijlstra,
	Ingo Molnar, Arnaldo Carvalho de Melo, Namhyung Kim, Mark Rutland,
	Alexander Shishkin, Jiri Olsa, Ian Rogers, Adrian Hunter,
	Liang, Kan, linux-kernel, linux-trace-kernel, linux-perf-users

Hi Linus,

Linus Torvalds <torvalds@linux-foundation.org> writes:

> On Tue, 3 Sept 2024 at 02:09, Oleg Nesterov <oleg@redhat.com> wrote:
>>
>> but with or without this fix __create_xol_area() also needs
>>
>>         area->xol_mapping.mremap = NULL;
>
> I think the whole thing needs to be zeroed out.
>
> It was always horribly buggy. The close thing just made it more
> *obviously* buggy, because closing a vma is a lot more common than
> mremap'ing it.
>
> Either use kzalloc(), or do a proper initializer something like this:

I sent a patch which does this today:

https://lore.kernel.org/all/20240903102313.3402529-1-svens@linux.ibm.com/

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] uprobes: use vm_special_mapping close() functionality
  2024-09-03 19:12     ` Linus Torvalds
  2024-09-03 19:31       ` Sven Schnelle
@ 2024-09-03 19:32       ` Oleg Nesterov
  2024-09-04  9:56       ` Oleg Nesterov
  2024-09-04 10:03       ` Oleg Nesterov
  3 siblings, 0 replies; 18+ messages in thread
From: Oleg Nesterov @ 2024-09-03 19:32 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Sven Schnelle, Michael Ellerman, Masami Hiramatsu, Peter Zijlstra,
	Ingo Molnar, Arnaldo Carvalho de Melo, Namhyung Kim, Mark Rutland,
	Alexander Shishkin, Jiri Olsa, Ian Rogers, Adrian Hunter,
	Liang, Kan, linux-kernel, linux-trace-kernel, linux-perf-users

On 09/03, Linus Torvalds wrote:
>
> On Tue, 3 Sept 2024 at 02:09, Oleg Nesterov <oleg@redhat.com> wrote:
> >
> > but with or without this fix __create_xol_area() also needs
> >
> >         area->xol_mapping.mremap = NULL;
>
> I think the whole thing needs to be zeroed out.

Agreed,

> Either use kzalloc(),

This is what Sven did in

	[PATCH] uprobes: use kzalloc to allocate xol area
	https://lore.kernel.org/all/20240903102313.3402529-1-svens@linux.ibm.com/

Oleg.


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] uprobes: use vm_special_mapping close() functionality
  2024-09-03 19:31       ` Sven Schnelle
@ 2024-09-03 19:34         ` Linus Torvalds
  0 siblings, 0 replies; 18+ messages in thread
From: Linus Torvalds @ 2024-09-03 19:34 UTC (permalink / raw)
  To: Sven Schnelle
  Cc: Oleg Nesterov, Michael Ellerman, Masami Hiramatsu, Peter Zijlstra,
	Ingo Molnar, Arnaldo Carvalho de Melo, Namhyung Kim, Mark Rutland,
	Alexander Shishkin, Jiri Olsa, Ian Rogers, Adrian Hunter,
	Liang, Kan, linux-kernel, linux-trace-kernel, linux-perf-users

On Tue, 3 Sept 2024 at 12:31, Sven Schnelle <svens@linux.ibm.com> wrote:
>
> > Either use kzalloc(), or do a proper initializer something like this:
>
> I sent a patch which does this today:

Ack, looks good.

This should probably be backported independently of the new close()
series, since the mremap issue seems to be a longstanding issue.

                     Linus

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] uprobes: use vm_special_mapping close() functionality
  2024-09-03  7:49   ` Sven Schnelle
@ 2024-09-04  3:57     ` Michael Ellerman
  2024-09-04 21:26       ` Andrew Morton
  0 siblings, 1 reply; 18+ messages in thread
From: Michael Ellerman @ 2024-09-04  3:57 UTC (permalink / raw)
  To: Sven Schnelle, Andrew Morton
  Cc: Masami Hiramatsu, Oleg Nesterov, Peter Zijlstra, Ingo Molnar,
	Arnaldo Carvalho de Melo, Namhyung Kim, Mark Rutland,
	Alexander Shishkin, Jiri Olsa, Ian Rogers, Adrian Hunter,
	Liang, Kan, Linus Torvalds, linux-kernel, linux-trace-kernel,
	linux-perf-users

Sven Schnelle <svens@linux.ibm.com> writes:
> Hi Michael,

Hi Sven,

> Sven Schnelle <svens@linux.ibm.com> writes:
>
>> The following KASAN splat was shown:
>>
>> [   44.505448] ==================================================================                                                                      20:37:27 [3421/145075]
>> [   44.505455] BUG: KASAN: slab-use-after-free in special_mapping_close+0x9c/0xc8
>> [   44.505471] Read of size 8 at addr 00000000868dac48 by task sh/1384
...
>> [..]
>
> As this has a dependency on your special mapping close series, do you
> want to carry that with your patches?

Andrew has my series in mm-stable, so I think this should go into mm as
well. I assume he will pick it up.

cheers

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] uprobes: use vm_special_mapping close() functionality
  2024-09-03 19:12     ` Linus Torvalds
  2024-09-03 19:31       ` Sven Schnelle
  2024-09-03 19:32       ` Oleg Nesterov
@ 2024-09-04  9:56       ` Oleg Nesterov
  2024-09-04 10:03       ` Oleg Nesterov
  3 siblings, 0 replies; 18+ messages in thread
From: Oleg Nesterov @ 2024-09-04  9:56 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Sven Schnelle, Michael Ellerman, Masami Hiramatsu, Peter Zijlstra,
	Ingo Molnar, Arnaldo Carvalho de Melo, Namhyung Kim, Mark Rutland,
	Alexander Shishkin, Jiri Olsa, Ian Rogers, Adrian Hunter,
	Liang, Kan, linux-kernel, linux-trace-kernel, linux-perf-users

I didn't have time to write a full reply yesterday.

On 09/03, Linus Torvalds wrote:
>
> On Tue, 3 Sept 2024 at 02:09, Oleg Nesterov <oleg@redhat.com> wrote:
> >
> > but with or without this fix __create_xol_area() also needs
> >
> >         area->xol_mapping.mremap = NULL;
>
> I think the whole thing needs to be zeroed out.

Again, this is what Sven did and I agree with this fix for now.

> It was always horribly buggy. The close thing just made it more
> *obviously* buggy, because closing a vma is a lot more common than
> mremap'ing it.

Well, this code is very old, I don't think it was "always" buggy.
But at least today it is horribly ugly, and

> Either use kzalloc(), or do a proper initializer something like this:
>
> -       area->xol_mapping.name = "[uprobes]";
> -       area->xol_mapping.fault = NULL;
> -       area->xol_mapping.pages = area->pages;
> +       area->xol_mapping = (struct vm_special_mapping) {
> +               .name = "[uprobes]",
> +               .pages = area->pages,
> +               .close = uprobe_clear_state,
> +       };

either way the code is still ugly, imo.

How about the (untested) patch below?

I am not going to send this patch right now, it conflicts with the ongoing
close/mremap changes, but what do you think?

We do not need to add the instance of vm_special_mapping into mm_struct,
a single instance (xol_mapping below) with .fault = xol_fault() is enough.
And, with this change xol_area no longer needs "struct page *pages[2]", it
can be turned into "struct page *page".

Oleg.
---

--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -101,7 +101,6 @@ struct xol_area {
 	atomic_t 			slot_count;	/* number of in-use slots */
 	unsigned long 			*bitmap;	/* 0 = free slot */
 
-	struct vm_special_mapping	xol_mapping;
 	struct page 			*pages[2];
 	/*
 	 * We keep the vma's vm_start rather than a pointer to the vma
@@ -1453,6 +1452,21 @@ void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned lon
 		set_bit(MMF_RECALC_UPROBES, &vma->vm_mm->flags);
 }
 
+static vm_fault_t xol_fault(const struct vm_special_mapping *sm,
+			    struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+	struct xol_area *area = vma->vm_mm->uprobes_state.xol_area;
+
+	vmf->page = area->pages[0];
+	get_page(vmf->page);
+	return 0;
+}
+
+static const struct vm_special_mapping xol_mapping = {
+	.name = "[uprobes]",
+	.fault = xol_fault,
+};
+
 /* Slot allocation for XOL */
 static int xol_add_vma(struct mm_struct *mm, struct xol_area *area)
 {
@@ -1479,7 +1493,7 @@ static int xol_add_vma(struct mm_struct *mm, struct xol_area *area)
 
 	vma = _install_special_mapping(mm, area->vaddr, PAGE_SIZE,
 				VM_EXEC|VM_MAYEXEC|VM_DONTCOPY|VM_IO,
-				&area->xol_mapping);
+				&xol_mapping);
 	if (IS_ERR(vma)) {
 		ret = PTR_ERR(vma);
 		goto fail;
@@ -1518,9 +1532,6 @@ static struct xol_area *__create_xol_area(unsigned long vaddr)
 	if (!area->bitmap)
 		goto free_area;
 
-	area->xol_mapping.name = "[uprobes]";
-	area->xol_mapping.fault = NULL;
-	area->xol_mapping.pages = area->pages;
 	area->pages[0] = alloc_page(GFP_HIGHUSER);
 	if (!area->pages[0])
 		goto free_bitmap;


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] uprobes: use vm_special_mapping close() functionality
  2024-09-03 19:12     ` Linus Torvalds
                         ` (2 preceding siblings ...)
  2024-09-04  9:56       ` Oleg Nesterov
@ 2024-09-04 10:03       ` Oleg Nesterov
  3 siblings, 0 replies; 18+ messages in thread
From: Oleg Nesterov @ 2024-09-04 10:03 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Sven Schnelle, Michael Ellerman, Masami Hiramatsu, Peter Zijlstra,
	Ingo Molnar, Arnaldo Carvalho de Melo, Namhyung Kim, Mark Rutland,
	Alexander Shishkin, Jiri Olsa, Ian Rogers, Adrian Hunter,
	Liang, Kan, linux-kernel, linux-trace-kernel, linux-perf-users

I didn't have time to write a full reply yesterday.

On 09/03, Linus Torvalds wrote:
>
> On Tue, 3 Sept 2024 at 02:09, Oleg Nesterov <oleg@redhat.com> wrote:
> >
> > but with or without this fix __create_xol_area() also needs
> >
> >         area->xol_mapping.mremap = NULL;
>
> I think the whole thing needs to be zeroed out.

Again, this is what Sven did and I agree with this fix for now.

> It was always horribly buggy. The close thing just made it more
> *obviously* buggy, because closing a vma is a lot more common than
> mremap'ing it.

Well, this code is very old, I don't think it was "always" buggy.
But at least today it is horribly ugly, and

> Either use kzalloc(), or do a proper initializer something like this:
>
> -       area->xol_mapping.name = "[uprobes]";
> -       area->xol_mapping.fault = NULL;
> -       area->xol_mapping.pages = area->pages;
> +       area->xol_mapping = (struct vm_special_mapping) {
> +               .name = "[uprobes]",
> +               .pages = area->pages,
> +               .close = uprobe_clear_state,
> +       };

either way the code is still ugly, imo.

How about the (untested) patch below?

I am not going to send this patch right now, it conflicts with the ongoing
xol_mapping.close changes, but what do you think?

We do not need to add the instance of vm_special_mapping into mm_struct,
a single instance (xol_mapping below) with .fault = xol_fault() is enough.
And, with this change xol_area no longer needs "struct page *pages[2]", it
can be turned into "struct page *page".

Oleg.
---

--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -101,7 +101,6 @@ struct xol_area {
 	atomic_t 			slot_count;	/* number of in-use slots */
 	unsigned long 			*bitmap;	/* 0 = free slot */
 
-	struct vm_special_mapping	xol_mapping;
 	struct page 			*pages[2];
 	/*
 	 * We keep the vma's vm_start rather than a pointer to the vma
@@ -1453,6 +1452,21 @@ void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned lon
 		set_bit(MMF_RECALC_UPROBES, &vma->vm_mm->flags);
 }
 
+static vm_fault_t xol_fault(const struct vm_special_mapping *sm,
+			    struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+	struct xol_area *area = vma->vm_mm->uprobes_state.xol_area;
+
+	vmf->page = area->pages[0];
+	get_page(vmf->page);
+	return 0;
+}
+
+static const struct vm_special_mapping xol_mapping = {
+	.name = "[uprobes]",
+	.fault = xol_fault,
+};
+
 /* Slot allocation for XOL */
 static int xol_add_vma(struct mm_struct *mm, struct xol_area *area)
 {
@@ -1479,7 +1493,7 @@ static int xol_add_vma(struct mm_struct *mm, struct xol_area *area)
 
 	vma = _install_special_mapping(mm, area->vaddr, PAGE_SIZE,
 				VM_EXEC|VM_MAYEXEC|VM_DONTCOPY|VM_IO,
-				&area->xol_mapping);
+				&xol_mapping);
 	if (IS_ERR(vma)) {
 		ret = PTR_ERR(vma);
 		goto fail;
@@ -1518,9 +1532,6 @@ static struct xol_area *__create_xol_area(unsigned long vaddr)
 	if (!area->bitmap)
 		goto free_area;
 
-	area->xol_mapping.name = "[uprobes]";
-	area->xol_mapping.fault = NULL;
-	area->xol_mapping.pages = area->pages;
 	area->pages[0] = alloc_page(GFP_HIGHUSER);
 	if (!area->pages[0])
 		goto free_bitmap;


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] uprobes: use vm_special_mapping close() functionality
  2024-09-04  3:57     ` Michael Ellerman
@ 2024-09-04 21:26       ` Andrew Morton
  0 siblings, 0 replies; 18+ messages in thread
From: Andrew Morton @ 2024-09-04 21:26 UTC (permalink / raw)
  To: Michael Ellerman
  Cc: Sven Schnelle, Masami Hiramatsu, Oleg Nesterov, Peter Zijlstra,
	Ingo Molnar, Arnaldo Carvalho de Melo, Namhyung Kim, Mark Rutland,
	Alexander Shishkin, Jiri Olsa, Ian Rogers, Adrian Hunter,
	Liang, Kan, Linus Torvalds, linux-kernel, linux-trace-kernel,
	linux-perf-users

On Wed, 04 Sep 2024 13:57:13 +1000 Michael Ellerman <mpe@ellerman.id.au> wrote:

> Sven Schnelle <svens@linux.ibm.com> writes:
> > Hi Michael,
> 
> Hi Sven,
> 
> > Sven Schnelle <svens@linux.ibm.com> writes:
> >
> >> The following KASAN splat was shown:
> >>
> >> [   44.505448] ==================================================================                                                                      20:37:27 [3421/145075]
> >> [   44.505455] BUG: KASAN: slab-use-after-free in special_mapping_close+0x9c/0xc8
> >> [   44.505471] Read of size 8 at addr 00000000868dac48 by task sh/1384
> ...
> >> [..]
> >
> > As this has a dependency on your special mapping close series, do you
> > want to carry that with your patches?
> 
> Andrew has my series in mm-stable, so I think this should go into mm as
> well. I assume he will pick it up.

yup, thanks.  Added, with

Fixes: 223febc6e557 ("mm: add optional close() to struct vm_special_mapping")

It appears that peterz is scooping up Sven's "uprobes: use kzalloc to
allocate xol area".


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] uprobes: use vm_special_mapping close() functionality
  2024-09-03  7:36 ` [PATCH] uprobes: use vm_special_mapping close() functionality Sven Schnelle
  2024-09-03  7:49   ` Sven Schnelle
  2024-09-03  9:08   ` Oleg Nesterov
@ 2024-09-11  9:44   ` Oleg Nesterov
  2024-09-11  9:57     ` Oleg Nesterov
  2024-09-11 13:13   ` [PATCH -mm 1/3] Revert "uprobes: use vm_special_mapping close() functionality" Oleg Nesterov
  3 siblings, 1 reply; 18+ messages in thread
From: Oleg Nesterov @ 2024-09-11  9:44 UTC (permalink / raw)
  To: Sven Schnelle, Andrew Morton
  Cc: Michael Ellerman, Masami Hiramatsu, Peter Zijlstra, Ingo Molnar,
	Arnaldo Carvalho de Melo, Namhyung Kim, Mark Rutland,
	Alexander Shishkin, Jiri Olsa, Ian Rogers, Adrian Hunter,
	Liang, Kan, Linus Torvalds, linux-kernel, linux-trace-kernel,
	linux-perf-users

On 09/03, Sven Schnelle wrote:
>
> +static void uprobe_clear_state(const struct vm_special_mapping *sm, struct vm_area_struct *vma)
> +{
> +	struct xol_area *area = container_of(vma->vm_private_data, struct xol_area, xol_mapping);
> +
> +	mutex_lock(&delayed_uprobe_lock);
> +	delayed_uprobe_remove(NULL, vma->vm_mm);
> +	mutex_unlock(&delayed_uprobe_lock);
> +
> +	if (!area)
> +		return;
> +
> +	put_page(area->pages[0]);
> +	kfree(area->bitmap);
> +	kfree(area);
> +}
> +
>  static struct xol_area *__create_xol_area(unsigned long vaddr)
>  {
>  	struct mm_struct *mm = current->mm;
> @@ -1481,6 +1500,7 @@ static struct xol_area *__create_xol_area(unsigned long vaddr)
>
>  	area->xol_mapping.name = "[uprobes]";
>  	area->xol_mapping.fault = NULL;
> +	area->xol_mapping.close = uprobe_clear_state;

Ah, no, we can't do this :/

A malicious application can munmap() its "[uprobes]" vma and free
area/pages/bitmap. If this application hits the uprobe breakpoint after
that it will use the freed memory.

And no, "mm->uprobes_state.xol_area = NULL" in uprobe_clear_state() won't
help. Say, another thread can sleep on area.wq when munmap() is called.

Sorry, I should have realized that immediately, but I didn't :/

Andrew, this is uprobes-use-vm_special_mapping-close-functionality.patch
in mm-stable

Oleg.


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] uprobes: use vm_special_mapping close() functionality
  2024-09-11  9:44   ` Oleg Nesterov
@ 2024-09-11  9:57     ` Oleg Nesterov
  2024-09-11 10:12       ` Oleg Nesterov
  0 siblings, 1 reply; 18+ messages in thread
From: Oleg Nesterov @ 2024-09-11  9:57 UTC (permalink / raw)
  To: Sven Schnelle, Andrew Morton
  Cc: Michael Ellerman, Masami Hiramatsu, Peter Zijlstra, Ingo Molnar,
	Arnaldo Carvalho de Melo, Namhyung Kim, Mark Rutland,
	Alexander Shishkin, Jiri Olsa, Ian Rogers, Adrian Hunter,
	Liang, Kan, Linus Torvalds, linux-kernel, linux-trace-kernel,
	linux-perf-users

I guess VM_SEALED could help, but it depends on CONFIG_64BIT


On 09/11, Oleg Nesterov wrote:
>
> On 09/03, Sven Schnelle wrote:
> >
> > +static void uprobe_clear_state(const struct vm_special_mapping *sm, struct vm_area_struct *vma)
> > +{
> > +	struct xol_area *area = container_of(vma->vm_private_data, struct xol_area, xol_mapping);
> > +
> > +	mutex_lock(&delayed_uprobe_lock);
> > +	delayed_uprobe_remove(NULL, vma->vm_mm);
> > +	mutex_unlock(&delayed_uprobe_lock);
> > +
> > +	if (!area)
> > +		return;
> > +
> > +	put_page(area->pages[0]);
> > +	kfree(area->bitmap);
> > +	kfree(area);
> > +}
> > +
> >  static struct xol_area *__create_xol_area(unsigned long vaddr)
> >  {
> >  	struct mm_struct *mm = current->mm;
> > @@ -1481,6 +1500,7 @@ static struct xol_area *__create_xol_area(unsigned long vaddr)
> >
> >  	area->xol_mapping.name = "[uprobes]";
> >  	area->xol_mapping.fault = NULL;
> > +	area->xol_mapping.close = uprobe_clear_state;
> 
> Ah, no, we can't do this :/
> 
> A malicious application can munmap() its "[uprobes]" vma and free
> area/pages/bitmap. If this application hits the uprobe breakpoint after
> that it will use the freed memory.
> 
> And no, "mm->uprobes_state.xol_area = NULL" in uprobe_clear_state() won't
> help. Say, another thread can sleep on area.wq when munmap() is called.
> 
> Sorry, I should have realized that immediately, but I didn't :/
> 
> Andrew, this is uprobes-use-vm_special_mapping-close-functionality.patch
> in mm-stable
> 
> Oleg.


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] uprobes: use vm_special_mapping close() functionality
  2024-09-11  9:57     ` Oleg Nesterov
@ 2024-09-11 10:12       ` Oleg Nesterov
  0 siblings, 0 replies; 18+ messages in thread
From: Oleg Nesterov @ 2024-09-11 10:12 UTC (permalink / raw)
  To: Sven Schnelle, Andrew Morton
  Cc: Michael Ellerman, Masami Hiramatsu, Peter Zijlstra, Ingo Molnar,
	Arnaldo Carvalho de Melo, Namhyung Kim, Mark Rutland,
	Alexander Shishkin, Jiri Olsa, Ian Rogers, Adrian Hunter,
	Liang, Kan, Linus Torvalds, linux-kernel, linux-trace-kernel,
	linux-perf-users

Damn, sorry for the spam.

So I am going to send the patch from
https://lore.kernel.org/all/20240904095449.GA28781@redhat.com/

To me it looks like a good cleanup regardless, and unless I am totally
confused it should fix the problem with use-after-free.

Oleg.

On 09/11, Oleg Nesterov wrote:
>
> I guess VM_SEALED could help, but it depends on CONFIG_64BIT
>
>
> On 09/11, Oleg Nesterov wrote:
> >
> > On 09/03, Sven Schnelle wrote:
> > >
> > > +static void uprobe_clear_state(const struct vm_special_mapping *sm, struct vm_area_struct *vma)
> > > +{
> > > +	struct xol_area *area = container_of(vma->vm_private_data, struct xol_area, xol_mapping);
> > > +
> > > +	mutex_lock(&delayed_uprobe_lock);
> > > +	delayed_uprobe_remove(NULL, vma->vm_mm);
> > > +	mutex_unlock(&delayed_uprobe_lock);
> > > +
> > > +	if (!area)
> > > +		return;
> > > +
> > > +	put_page(area->pages[0]);
> > > +	kfree(area->bitmap);
> > > +	kfree(area);
> > > +}
> > > +
> > >  static struct xol_area *__create_xol_area(unsigned long vaddr)
> > >  {
> > >  	struct mm_struct *mm = current->mm;
> > > @@ -1481,6 +1500,7 @@ static struct xol_area *__create_xol_area(unsigned long vaddr)
> > >
> > >  	area->xol_mapping.name = "[uprobes]";
> > >  	area->xol_mapping.fault = NULL;
> > > +	area->xol_mapping.close = uprobe_clear_state;
> >
> > Ah, no, we can't do this :/
> >
> > A malicious application can munmap() its "[uprobes]" vma and free
> > area/pages/bitmap. If this application hits the uprobe breakpoint after
> > that it will use the freed memory.
> >
> > And no, "mm->uprobes_state.xol_area = NULL" in uprobe_clear_state() won't
> > help. Say, another thread can sleep on area.wq when munmap() is called.
> >
> > Sorry, I should have realized that immediately, but I didn't :/
> >
> > Andrew, this is uprobes-use-vm_special_mapping-close-functionality.patch
> > in mm-stable
> >
> > Oleg.


^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH -mm 1/3] Revert "uprobes: use vm_special_mapping close() functionality"
  2024-09-03  7:36 ` [PATCH] uprobes: use vm_special_mapping close() functionality Sven Schnelle
                     ` (2 preceding siblings ...)
  2024-09-11  9:44   ` Oleg Nesterov
@ 2024-09-11 13:13   ` Oleg Nesterov
  2024-09-11 13:14     ` [PATCH -mm 2/3] uprobes: introduce the global struct vm_special_mapping xol_mapping Oleg Nesterov
  2024-09-11 13:14     ` [PATCH -mm 3/3] uprobes: turn xol_area->pages[2] into xol_area->page Oleg Nesterov
  3 siblings, 2 replies; 18+ messages in thread
From: Oleg Nesterov @ 2024-09-11 13:13 UTC (permalink / raw)
  To: Sven Schnelle, Andrew Morton
  Cc: Michael Ellerman, Masami Hiramatsu, Peter Zijlstra, Ingo Molnar,
	Arnaldo Carvalho de Melo, Namhyung Kim, Mark Rutland,
	Alexander Shishkin, Jiri Olsa, Ian Rogers, Adrian Hunter,
	Liang, Kan, Linus Torvalds, Andrii Nakryiko, linux-kernel,
	linux-trace-kernel, linux-perf-users

This reverts commit 08e28de1160a712724268fd33d77b32f1bc84d1c.

A malicious application can munmap() its "[uprobes]" vma and in this case
xol_mapping.close == uprobe_clear_state() will free the memory which can
be used by another thread, or the same thread when it hits the uprobe bp
afterwards.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
---
 include/linux/uprobes.h |  1 +
 kernel/events/uprobes.c | 36 +++++++++++++++++++-----------------
 kernel/fork.c           |  1 +
 3 files changed, 21 insertions(+), 17 deletions(-)

diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
index 493dc95d912c..b503fafb7fb3 100644
--- a/include/linux/uprobes.h
+++ b/include/linux/uprobes.h
@@ -126,6 +126,7 @@ extern int uprobe_pre_sstep_notifier(struct pt_regs *regs);
 extern void uprobe_notify_resume(struct pt_regs *regs);
 extern bool uprobe_deny_signal(void);
 extern bool arch_uprobe_skip_sstep(struct arch_uprobe *aup, struct pt_regs *regs);
+extern void uprobe_clear_state(struct mm_struct *mm);
 extern int  arch_uprobe_analyze_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long addr);
 extern int  arch_uprobe_pre_xol(struct arch_uprobe *aup, struct pt_regs *regs);
 extern int  arch_uprobe_post_xol(struct arch_uprobe *aup, struct pt_regs *regs);
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index c2d6b2d64de2..73cc47708679 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -1482,22 +1482,6 @@ void * __weak arch_uprobe_trampoline(unsigned long *psize)
 	return &insn;
 }
 
-/*
- * uprobe_clear_state - Free the area allocated for slots.
- */
-static void uprobe_clear_state(const struct vm_special_mapping *sm, struct vm_area_struct *vma)
-{
-	struct xol_area *area = container_of(vma->vm_private_data, struct xol_area, xol_mapping);
-
-	mutex_lock(&delayed_uprobe_lock);
-	delayed_uprobe_remove(NULL, vma->vm_mm);
-	mutex_unlock(&delayed_uprobe_lock);
-
-	put_page(area->pages[0]);
-	kfree(area->bitmap);
-	kfree(area);
-}
-
 static struct xol_area *__create_xol_area(unsigned long vaddr)
 {
 	struct mm_struct *mm = current->mm;
@@ -1516,7 +1500,6 @@ static struct xol_area *__create_xol_area(unsigned long vaddr)
 
 	area->xol_mapping.name = "[uprobes]";
 	area->xol_mapping.fault = NULL;
-	area->xol_mapping.close = uprobe_clear_state;
 	area->xol_mapping.pages = area->pages;
 	area->pages[0] = alloc_page(GFP_HIGHUSER);
 	if (!area->pages[0])
@@ -1562,6 +1545,25 @@ static struct xol_area *get_xol_area(void)
 	return area;
 }
 
+/*
+ * uprobe_clear_state - Free the area allocated for slots.
+ */
+void uprobe_clear_state(struct mm_struct *mm)
+{
+	struct xol_area *area = mm->uprobes_state.xol_area;
+
+	mutex_lock(&delayed_uprobe_lock);
+	delayed_uprobe_remove(NULL, mm);
+	mutex_unlock(&delayed_uprobe_lock);
+
+	if (!area)
+		return;
+
+	put_page(area->pages[0]);
+	kfree(area->bitmap);
+	kfree(area);
+}
+
 void uprobe_start_dup_mmap(void)
 {
 	percpu_down_read(&dup_mmap_sem);
diff --git a/kernel/fork.c b/kernel/fork.c
index 61070248a7d3..3d590e51ce84 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1338,6 +1338,7 @@ static inline void __mmput(struct mm_struct *mm)
 {
 	VM_BUG_ON(atomic_read(&mm->mm_users));
 
+	uprobe_clear_state(mm);
 	exit_aio(mm);
 	ksm_exit(mm);
 	khugepaged_exit(mm); /* must run before exit_mmap */
-- 
2.25.1.362.g51ebf55



^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH -mm 2/3] uprobes: introduce the global struct vm_special_mapping xol_mapping
  2024-09-11 13:13   ` [PATCH -mm 1/3] Revert "uprobes: use vm_special_mapping close() functionality" Oleg Nesterov
@ 2024-09-11 13:14     ` Oleg Nesterov
  2024-09-11 13:14     ` [PATCH -mm 3/3] uprobes: turn xol_area->pages[2] into xol_area->page Oleg Nesterov
  1 sibling, 0 replies; 18+ messages in thread
From: Oleg Nesterov @ 2024-09-11 13:14 UTC (permalink / raw)
  To: Sven Schnelle, Andrew Morton
  Cc: Michael Ellerman, Masami Hiramatsu, Peter Zijlstra, Ingo Molnar,
	Arnaldo Carvalho de Melo, Namhyung Kim, Mark Rutland,
	Alexander Shishkin, Jiri Olsa, Ian Rogers, Adrian Hunter,
	Liang, Kan, Linus Torvalds, Andrii Nakryiko, linux-kernel,
	linux-trace-kernel, linux-perf-users

Currently each xol_area has its own instance of vm_special_mapping, this
is suboptimal and ugly. Kill xol_area->xol_mapping and add a single global
instance of vm_special_mapping, the ->fault() method can use area->pages
rather than xol_mapping->pages.

As a side effect this fixes the problem introduced by the recent commit
223febc6e557 ("mm: add optional close() to struct vm_special_mapping"), if
special_mapping_close() is called from the __mmput() paths, it will use
vma->vm_private_data = &area->xol_mapping freed by uprobe_clear_state().

Reported-by: Sven Schnelle <svens@linux.ibm.com>
Closes: https://lore.kernel.org/all/yt9dy149vprr.fsf@linux.ibm.com/
Fixes: 223febc6e557 ("mm: add optional close() to struct vm_special_mapping")
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
---
 kernel/events/uprobes.c | 21 ++++++++++++++++-----
 1 file changed, 16 insertions(+), 5 deletions(-)

diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 73cc47708679..a478e028043f 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -99,7 +99,6 @@ struct xol_area {
 	atomic_t 			slot_count;	/* number of in-use slots */
 	unsigned long 			*bitmap;	/* 0 = free slot */
 
-	struct vm_special_mapping	xol_mapping;
 	struct page 			*pages[2];
 	/*
 	 * We keep the vma's vm_start rather than a pointer to the vma
@@ -1433,6 +1432,21 @@ void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned lon
 		set_bit(MMF_RECALC_UPROBES, &vma->vm_mm->flags);
 }
 
+static vm_fault_t xol_fault(const struct vm_special_mapping *sm,
+			    struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+	struct xol_area *area = vma->vm_mm->uprobes_state.xol_area;
+
+	vmf->page = area->pages[0];
+	get_page(vmf->page);
+	return 0;
+}
+
+static const struct vm_special_mapping xol_mapping = {
+	.name = "[uprobes]",
+	.fault = xol_fault,
+};
+
 /* Slot allocation for XOL */
 static int xol_add_vma(struct mm_struct *mm, struct xol_area *area)
 {
@@ -1459,7 +1473,7 @@ static int xol_add_vma(struct mm_struct *mm, struct xol_area *area)
 
 	vma = _install_special_mapping(mm, area->vaddr, PAGE_SIZE,
 				VM_EXEC|VM_MAYEXEC|VM_DONTCOPY|VM_IO,
-				&area->xol_mapping);
+				&xol_mapping);
 	if (IS_ERR(vma)) {
 		ret = PTR_ERR(vma);
 		goto fail;
@@ -1498,9 +1512,6 @@ static struct xol_area *__create_xol_area(unsigned long vaddr)
 	if (!area->bitmap)
 		goto free_area;
 
-	area->xol_mapping.name = "[uprobes]";
-	area->xol_mapping.fault = NULL;
-	area->xol_mapping.pages = area->pages;
 	area->pages[0] = alloc_page(GFP_HIGHUSER);
 	if (!area->pages[0])
 		goto free_bitmap;
-- 
2.25.1.362.g51ebf55



^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH -mm 3/3] uprobes: turn xol_area->pages[2] into xol_area->page
  2024-09-11 13:13   ` [PATCH -mm 1/3] Revert "uprobes: use vm_special_mapping close() functionality" Oleg Nesterov
  2024-09-11 13:14     ` [PATCH -mm 2/3] uprobes: introduce the global struct vm_special_mapping xol_mapping Oleg Nesterov
@ 2024-09-11 13:14     ` Oleg Nesterov
  1 sibling, 0 replies; 18+ messages in thread
From: Oleg Nesterov @ 2024-09-11 13:14 UTC (permalink / raw)
  To: Sven Schnelle, Andrew Morton
  Cc: Michael Ellerman, Masami Hiramatsu, Peter Zijlstra, Ingo Molnar,
	Arnaldo Carvalho de Melo, Namhyung Kim, Mark Rutland,
	Alexander Shishkin, Jiri Olsa, Ian Rogers, Adrian Hunter,
	Liang, Kan, Linus Torvalds, Andrii Nakryiko, linux-kernel,
	linux-trace-kernel, linux-perf-users

Now that xol_mapping has its own ->fault() method we no longer need
xol_area->pages[1] == NULL, we need a single page.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
---
 kernel/events/uprobes.c | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index a478e028043f..94a17435cfe6 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -99,7 +99,7 @@ struct xol_area {
 	atomic_t 			slot_count;	/* number of in-use slots */
 	unsigned long 			*bitmap;	/* 0 = free slot */
 
-	struct page 			*pages[2];
+	struct page			*page;
 	/*
 	 * We keep the vma's vm_start rather than a pointer to the vma
 	 * itself.  The probed process or a naughty kernel module could make
@@ -1437,7 +1437,7 @@ static vm_fault_t xol_fault(const struct vm_special_mapping *sm,
 {
 	struct xol_area *area = vma->vm_mm->uprobes_state.xol_area;
 
-	vmf->page = area->pages[0];
+	vmf->page = area->page;
 	get_page(vmf->page);
 	return 0;
 }
@@ -1512,10 +1512,9 @@ static struct xol_area *__create_xol_area(unsigned long vaddr)
 	if (!area->bitmap)
 		goto free_area;
 
-	area->pages[0] = alloc_page(GFP_HIGHUSER);
-	if (!area->pages[0])
+	area->page = alloc_page(GFP_HIGHUSER);
+	if (!area->page)
 		goto free_bitmap;
-	area->pages[1] = NULL;
 
 	area->vaddr = vaddr;
 	init_waitqueue_head(&area->wq);
@@ -1523,12 +1522,12 @@ static struct xol_area *__create_xol_area(unsigned long vaddr)
 	set_bit(0, area->bitmap);
 	atomic_set(&area->slot_count, 1);
 	insns = arch_uprobe_trampoline(&insns_size);
-	arch_uprobe_copy_ixol(area->pages[0], 0, insns, insns_size);
+	arch_uprobe_copy_ixol(area->page, 0, insns, insns_size);
 
 	if (!xol_add_vma(mm, area))
 		return area;
 
-	__free_page(area->pages[0]);
+	__free_page(area->page);
  free_bitmap:
 	kfree(area->bitmap);
  free_area:
@@ -1570,7 +1569,7 @@ void uprobe_clear_state(struct mm_struct *mm)
 	if (!area)
 		return;
 
-	put_page(area->pages[0]);
+	put_page(area->page);
 	kfree(area->bitmap);
 	kfree(area);
 }
@@ -1637,7 +1636,7 @@ static unsigned long xol_get_insn_slot(struct uprobe *uprobe)
 	if (unlikely(!xol_vaddr))
 		return 0;
 
-	arch_uprobe_copy_ixol(area->pages[0], xol_vaddr,
+	arch_uprobe_copy_ixol(area->page, xol_vaddr,
 			      &uprobe->arch.ixol, sizeof(uprobe->arch.ixol));
 
 	return xol_vaddr;
-- 
2.25.1.362.g51ebf55



^ permalink raw reply related	[flat|nested] 18+ messages in thread

end of thread, other threads:[~2024-09-11 13:15 UTC | newest]

Thread overview: 18+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
     [not found] <CAHk-=wjD0XLhkzou89J-TK=L6B88pFoNYxN1uTWRQB3U5Czywg@mail.gmail.com>
2024-09-03  7:36 ` [PATCH] uprobes: use vm_special_mapping close() functionality Sven Schnelle
2024-09-03  7:49   ` Sven Schnelle
2024-09-04  3:57     ` Michael Ellerman
2024-09-04 21:26       ` Andrew Morton
2024-09-03  9:08   ` Oleg Nesterov
2024-09-03  9:32     ` Sven Schnelle
2024-09-03 19:12     ` Linus Torvalds
2024-09-03 19:31       ` Sven Schnelle
2024-09-03 19:34         ` Linus Torvalds
2024-09-03 19:32       ` Oleg Nesterov
2024-09-04  9:56       ` Oleg Nesterov
2024-09-04 10:03       ` Oleg Nesterov
2024-09-11  9:44   ` Oleg Nesterov
2024-09-11  9:57     ` Oleg Nesterov
2024-09-11 10:12       ` Oleg Nesterov
2024-09-11 13:13   ` [PATCH -mm 1/3] Revert "uprobes: use vm_special_mapping close() functionality" Oleg Nesterov
2024-09-11 13:14     ` [PATCH -mm 2/3] uprobes: introduce the global struct vm_special_mapping xol_mapping Oleg Nesterov
2024-09-11 13:14     ` [PATCH -mm 3/3] uprobes: turn xol_area->pages[2] into xol_area->page Oleg Nesterov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).