* Re: [syzbot] [net?] [virt?] [kvm?] KASAN: slab-use-after-free Read in vhost_task_fn
2024-04-30 8:25 [syzbot] [net?] [virt?] [kvm?] KASAN: slab-use-after-free Read in vhost_task_fn syzbot
@ 2024-04-30 9:31 ` Edward Adam Davis
2024-04-30 11:09 ` syzbot
2024-05-01 20:33 ` Michael S. Tsirkin
2024-04-30 11:02 ` Hillf Danton
` (6 subsequent siblings)
7 siblings, 2 replies; 27+ messages in thread
From: Edward Adam Davis @ 2024-04-30 9:31 UTC (permalink / raw)
To: syzbot+98edc2df894917b3431f; +Cc: linux-kernel, syzkaller-bugs
please test uaf in vhost_task_fn
#syz test https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git bb7a2467e6be
diff --git a/kernel/vhost_task.c b/kernel/vhost_task.c
index 48c289947b99..8800f5acc007 100644
--- a/kernel/vhost_task.c
+++ b/kernel/vhost_task.c
@@ -61,8 +61,8 @@ static int vhost_task_fn(void *data)
set_bit(VHOST_TASK_FLAGS_KILLED, &vtsk->flags);
vtsk->handle_sigkill(vtsk->data);
}
- complete(&vtsk->exited);
mutex_unlock(&vtsk->exit_mutex);
+ complete(&vtsk->exited);
do_exit(0);
}
^ permalink raw reply related [flat|nested] 27+ messages in thread* Re: [syzbot] [net?] [virt?] [kvm?] KASAN: slab-use-after-free Read in vhost_task_fn
2024-04-30 9:31 ` Edward Adam Davis
@ 2024-04-30 11:09 ` syzbot
2024-05-01 20:33 ` Michael S. Tsirkin
1 sibling, 0 replies; 27+ messages in thread
From: syzbot @ 2024-04-30 11:09 UTC (permalink / raw)
To: eadavis, linux-kernel, syzkaller-bugs
Hello,
syzbot has tested the proposed patch but the reproducer is still triggering an issue:
WARNING: suspicious RCU usage in __do_softirq
=============================
WARNING: suspicious RCU usage
6.9.0-rc5-next-20240426-syzkaller-dirty #0 Not tainted
-----------------------------
kernel/rcu/tree.c:276 Illegal rcu_softirq_qs() in RCU read-side critical section!
other info that might help us debug this:
rcu_scheduler_active = 2, debug_locks = 1
1 lock held by ksoftirqd/0/16:
#0: ffffffff8e333b20 (rcu_read_lock_sched){....}-{1:2}, at: rcu_lock_acquire include/linux/rcupdate.h:329 [inline]
#0: ffffffff8e333b20 (rcu_read_lock_sched){....}-{1:2}, at: rcu_read_lock_sched include/linux/rcupdate.h:933 [inline]
#0: ffffffff8e333b20 (rcu_read_lock_sched){....}-{1:2}, at: pfn_valid include/linux/mmzone.h:2022 [inline]
#0: ffffffff8e333b20 (rcu_read_lock_sched){....}-{1:2}, at: __virt_addr_valid+0x183/0x520 arch/x86/mm/physaddr.c:65
stack backtrace:
CPU: 0 PID: 16 Comm: ksoftirqd/0 Not tainted 6.9.0-rc5-next-20240426-syzkaller-dirty #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 03/27/2024
Call Trace:
<IRQ>
__dump_stack lib/dump_stack.c:88 [inline]
dump_stack_lvl+0x241/0x360 lib/dump_stack.c:114
lockdep_rcu_suspicious+0x221/0x340 kernel/locking/lockdep.c:6712
rcu_softirq_qs+0xd9/0x370 kernel/rcu/tree.c:273
__do_softirq+0x5fd/0x980 kernel/softirq.c:568
invoke_softirq kernel/softirq.c:428 [inline]
__irq_exit_rcu+0xf2/0x1c0 kernel/softirq.c:633
irq_exit_rcu+0x9/0x30 kernel/softirq.c:645
instr_sysvec_apic_timer_interrupt arch/x86/kernel/apic/apic.c:1043 [inline]
sysvec_apic_timer_interrupt+0xa6/0xc0 arch/x86/kernel/apic/apic.c:1043
</IRQ>
<TASK>
asm_sysvec_apic_timer_interrupt+0x1a/0x20 arch/x86/include/asm/idtentry.h:702
RIP: 0010:check_preemption_disabled+0x2/0x120 lib/smp_processor_id.c:13
Code: c4 1f 8c 48 c7 c6 c0 c4 1f 8c eb 1c 66 2e 0f 1f 84 00 00 00 00 00 66 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 41 57 <41> 56 41 54 53 48 83 ec 10 65 48 8b 04 25 28 00 00 00 48 89 44 24
RSP: 0018:ffffc900001578f0 EFLAGS: 00000287
RAX: 0000000000000001 RBX: 0000000000000000 RCX: ffffffff8172cd10
RDX: 0000000000000000 RSI: ffffffff8c1fc4c0 RDI: ffffffff8c1fc480
RBP: ffffc90000157a48 R08: ffffffff8fac7fef R09: 1ffffffff1f58ffd
R10: dffffc0000000000 R11: fffffbfff1f58ffe R12: 1ffff9200002af30
R13: ffffffff81423f93 R14: ffffffff81423f93 R15: dffffc0000000000
rcu_dynticks_curr_cpu_in_eqs include/linux/context_tracking.h:122 [inline]
rcu_is_watching+0x15/0xb0 kernel/rcu/tree.c:725
trace_lock_release include/trace/events/lock.h:69 [inline]
lock_release+0xbf/0x9f0 kernel/locking/lockdep.c:5765
rcu_lock_release include/linux/rcupdate.h:339 [inline]
rcu_read_unlock_sched include/linux/rcupdate.h:954 [inline]
pfn_valid include/linux/mmzone.h:2032 [inline]
__virt_addr_valid+0x41e/0x520 arch/x86/mm/physaddr.c:65
kasan_addr_to_slab+0xd/0x80 mm/kasan/common.c:37
__kasan_record_aux_stack+0x11/0xc0 mm/kasan/generic.c:526
__call_rcu_common kernel/rcu/tree.c:3103 [inline]
call_rcu+0x167/0xa70 kernel/rcu/tree.c:3207
context_switch kernel/sched/core.c:5411 [inline]
__schedule+0x17f0/0x4a50 kernel/sched/core.c:6745
__schedule_loop kernel/sched/core.c:6822 [inline]
schedule+0x14b/0x320 kernel/sched/core.c:6837
smpboot_thread_fn+0x61e/0xa30 kernel/smpboot.c:160
kthread+0x2f0/0x390 kernel/kthread.c:389
ret_from_fork+0x4b/0x80 arch/x86/kernel/process.c:147
ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:244
</TASK>
----------------
Code disassembly (best guess), 3 bytes skipped:
0: 48 c7 c6 c0 c4 1f 8c mov $0xffffffff8c1fc4c0,%rsi
7: eb 1c jmp 0x25
9: 66 2e 0f 1f 84 00 00 cs nopw 0x0(%rax,%rax,1)
10: 00 00 00
13: 66 90 xchg %ax,%ax
15: 90 nop
16: 90 nop
17: 90 nop
18: 90 nop
19: 90 nop
1a: 90 nop
1b: 90 nop
1c: 90 nop
1d: 90 nop
1e: 90 nop
1f: 90 nop
20: 90 nop
21: 90 nop
22: 90 nop
23: 90 nop
24: 90 nop
25: 41 57 push %r15
* 27: 41 56 push %r14 <-- trapping instruction
29: 41 54 push %r12
2b: 53 push %rbx
2c: 48 83 ec 10 sub $0x10,%rsp
30: 65 48 8b 04 25 28 00 mov %gs:0x28,%rax
37: 00 00
39: 48 rex.W
3a: 89 .byte 0x89
3b: 44 rex.R
3c: 24 .byte 0x24
Tested on:
commit: bb7a2467 Add linux-next specific files for 20240426
git tree: https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
console output: https://syzkaller.appspot.com/x/log.txt?x=12a94f0f180000
kernel config: https://syzkaller.appspot.com/x/.config?x=5c6a0288262dd108
dashboard link: https://syzkaller.appspot.com/bug?extid=98edc2df894917b3431f
compiler: Debian clang version 15.0.6, GNU ld (GNU Binutils for Debian) 2.40
patch: https://syzkaller.appspot.com/x/patch.diff?x=16df8838980000
^ permalink raw reply [flat|nested] 27+ messages in thread* Re: [syzbot] [net?] [virt?] [kvm?] KASAN: slab-use-after-free Read in vhost_task_fn
2024-04-30 9:31 ` Edward Adam Davis
2024-04-30 11:09 ` syzbot
@ 2024-05-01 20:33 ` Michael S. Tsirkin
1 sibling, 0 replies; 27+ messages in thread
From: Michael S. Tsirkin @ 2024-05-01 20:33 UTC (permalink / raw)
To: Edward Adam Davis
Cc: syzbot+98edc2df894917b3431f, linux-kernel, syzkaller-bugs
On Tue, Apr 30, 2024 at 05:31:47PM +0800, Edward Adam Davis wrote:
> please test uaf in vhost_task_fn
>
> #syz test https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git bb7a2467e6be
> diff --git a/kernel/vhost_task.c b/kernel/vhost_task.c
> index 48c289947b99..8800f5acc007 100644
> --- a/kernel/vhost_task.c
> +++ b/kernel/vhost_task.c
> @@ -61,8 +61,8 @@ static int vhost_task_fn(void *data)
> set_bit(VHOST_TASK_FLAGS_KILLED, &vtsk->flags);
> vtsk->handle_sigkill(vtsk->data);
> }
> - complete(&vtsk->exited);
> mutex_unlock(&vtsk->exit_mutex);
> + complete(&vtsk->exited);
>
> do_exit(0);
> }
OK so if rebased on latest master by linus then this patch is
sufficient. I squashed it in. Edward, thanks a lot for working
on this! I added Suggested-by tag if you like me to add your
S.O.B too I can do this.
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [syzbot] [net?] [virt?] [kvm?] KASAN: slab-use-after-free Read in vhost_task_fn
2024-04-30 8:25 [syzbot] [net?] [virt?] [kvm?] KASAN: slab-use-after-free Read in vhost_task_fn syzbot
2024-04-30 9:31 ` Edward Adam Davis
@ 2024-04-30 11:02 ` Hillf Danton
2024-04-30 15:47 ` syzbot
2024-04-30 11:57 ` Edward Adam Davis
` (5 subsequent siblings)
7 siblings, 1 reply; 27+ messages in thread
From: Hillf Danton @ 2024-04-30 11:02 UTC (permalink / raw)
To: syzbot; +Cc: linux-kernel, syzkaller-bugs
On Tue, 30 Apr 2024 01:25:26 -0700
> syzbot found the following issue on:
>
> HEAD commit: bb7a2467e6be Add linux-next specific files for 20240426
> git tree: linux-next
> C reproducer: https://syzkaller.appspot.com/x/repro.c?x=16c30028980000
#syz test https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git bb7a2467e6be
--- x/kernel/vhost_task.c
+++ y/kernel/vhost_task.c
@@ -100,6 +100,8 @@ void vhost_task_stop(struct vhost_task *
* freeing it below.
*/
wait_for_completion(&vtsk->exited);
+ mutex_lock(&vtsk->exit_mutex);
+ mutex_unlock(&vtsk->exit_mutex);
kfree(vtsk);
}
EXPORT_SYMBOL_GPL(vhost_task_stop);
--
^ permalink raw reply [flat|nested] 27+ messages in thread* Re: [syzbot] [net?] [virt?] [kvm?] KASAN: slab-use-after-free Read in vhost_task_fn
2024-04-30 11:02 ` Hillf Danton
@ 2024-04-30 15:47 ` syzbot
0 siblings, 0 replies; 27+ messages in thread
From: syzbot @ 2024-04-30 15:47 UTC (permalink / raw)
To: hdanton, linux-kernel, syzkaller-bugs
Hello,
syzbot has tested the proposed patch but the reproducer is still triggering an issue:
WARNING: suspicious RCU usage in __do_softirq
=============================
WARNING: suspicious RCU usage
6.9.0-rc5-next-20240426-syzkaller-dirty #0 Not tainted
-----------------------------
kernel/rcu/tree.c:276 Illegal rcu_softirq_qs() in RCU read-side critical section!
other info that might help us debug this:
rcu_scheduler_active = 2, debug_locks = 1
1 lock held by ksoftirqd/0/16:
#0: ffffffff8e333b20 (rcu_read_lock_sched){....}-{1:2}, at: rcu_lock_acquire include/linux/rcupdate.h:329 [inline]
#0: ffffffff8e333b20 (rcu_read_lock_sched){....}-{1:2}, at: rcu_read_lock_sched include/linux/rcupdate.h:933 [inline]
#0: ffffffff8e333b20 (rcu_read_lock_sched){....}-{1:2}, at: pfn_valid include/linux/mmzone.h:2022 [inline]
#0: ffffffff8e333b20 (rcu_read_lock_sched){....}-{1:2}, at: __virt_addr_valid+0x183/0x520 arch/x86/mm/physaddr.c:65
stack backtrace:
CPU: 0 PID: 16 Comm: ksoftirqd/0 Not tainted 6.9.0-rc5-next-20240426-syzkaller-dirty #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 03/27/2024
Call Trace:
<IRQ>
__dump_stack lib/dump_stack.c:88 [inline]
dump_stack_lvl+0x241/0x360 lib/dump_stack.c:114
lockdep_rcu_suspicious+0x221/0x340 kernel/locking/lockdep.c:6712
rcu_softirq_qs+0xd9/0x370 kernel/rcu/tree.c:273
__do_softirq+0x5fd/0x980 kernel/softirq.c:568
invoke_softirq kernel/softirq.c:428 [inline]
__irq_exit_rcu+0xf2/0x1c0 kernel/softirq.c:633
irq_exit_rcu+0x9/0x30 kernel/softirq.c:645
instr_sysvec_apic_timer_interrupt arch/x86/kernel/apic/apic.c:1043 [inline]
sysvec_apic_timer_interrupt+0xa6/0xc0 arch/x86/kernel/apic/apic.c:1043
</IRQ>
<TASK>
asm_sysvec_apic_timer_interrupt+0x1a/0x20 arch/x86/include/asm/idtentry.h:702
RIP: 0010:lock_acquire+0x264/0x550 kernel/locking/lockdep.c:5758
Code: 2b 00 74 08 4c 89 f7 e8 aa 61 89 00 f6 44 24 61 02 0f 85 85 01 00 00 41 f7 c7 00 02 00 00 74 01 fb 48 c7 44 24 40 0e 36 e0 45 <4b> c7 44 25 00 00 00 00 00 43 c7 44 25 09 00 00 00 00 43 c7 44 25
RSP: 0018:ffffc900001578e0 EFLAGS: 00000206
RAX: 0000000000000001 RBX: 1ffff9200002af28 RCX: 0000000000000001
RDX: dffffc0000000000 RSI: ffffffff8bcac4c0 RDI: ffffffff8c1fc4e0
RBP: ffffc90000157a40 R08: ffffffff92f96587 R09: 1ffffffff25f2cb0
R10: dffffc0000000000 R11: fffffbfff25f2cb1 R12: 1ffff9200002af24
R13: dffffc0000000000 R14: ffffc90000157940 R15: 0000000000000246
rcu_lock_acquire include/linux/rcupdate.h:329 [inline]
rcu_read_lock_sched include/linux/rcupdate.h:933 [inline]
pfn_valid include/linux/mmzone.h:2022 [inline]
__virt_addr_valid+0x1a0/0x520 arch/x86/mm/physaddr.c:65
kasan_addr_to_slab+0xd/0x80 mm/kasan/common.c:37
__kasan_record_aux_stack+0x11/0xc0 mm/kasan/generic.c:526
__call_rcu_common kernel/rcu/tree.c:3103 [inline]
call_rcu+0x167/0xa70 kernel/rcu/tree.c:3207
context_switch kernel/sched/core.c:5411 [inline]
__schedule+0x17f0/0x4a50 kernel/sched/core.c:6745
__schedule_loop kernel/sched/core.c:6822 [inline]
schedule+0x14b/0x320 kernel/sched/core.c:6837
smpboot_thread_fn+0x61e/0xa30 kernel/smpboot.c:160
kthread+0x2f0/0x390 kernel/kthread.c:389
ret_from_fork+0x4b/0x80 arch/x86/kernel/process.c:147
ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:244
</TASK>
----------------
Code disassembly (best guess):
0: 2b 00 sub (%rax),%eax
2: 74 08 je 0xc
4: 4c 89 f7 mov %r14,%rdi
7: e8 aa 61 89 00 call 0x8961b6
c: f6 44 24 61 02 testb $0x2,0x61(%rsp)
11: 0f 85 85 01 00 00 jne 0x19c
17: 41 f7 c7 00 02 00 00 test $0x200,%r15d
1e: 74 01 je 0x21
20: fb sti
21: 48 c7 44 24 40 0e 36 movq $0x45e0360e,0x40(%rsp)
28: e0 45
* 2a: 4b c7 44 25 00 00 00 movq $0x0,0x0(%r13,%r12,1) <-- trapping instruction
31: 00 00
33: 43 c7 44 25 09 00 00 movl $0x0,0x9(%r13,%r12,1)
3a: 00 00
3c: 43 rex.XB
3d: c7 .byte 0xc7
3e: 44 rex.R
3f: 25 .byte 0x25
Tested on:
commit: bb7a2467 Add linux-next specific files for 20240426
git tree: https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
console output: https://syzkaller.appspot.com/x/log.txt?x=11b1a228980000
kernel config: https://syzkaller.appspot.com/x/.config?x=5c6a0288262dd108
dashboard link: https://syzkaller.appspot.com/bug?extid=98edc2df894917b3431f
compiler: Debian clang version 15.0.6, GNU ld (GNU Binutils for Debian) 2.40
patch: https://syzkaller.appspot.com/x/patch.diff?x=16d4837f180000
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [syzbot] [net?] [virt?] [kvm?] KASAN: slab-use-after-free Read in vhost_task_fn
2024-04-30 8:25 [syzbot] [net?] [virt?] [kvm?] KASAN: slab-use-after-free Read in vhost_task_fn syzbot
2024-04-30 9:31 ` Edward Adam Davis
2024-04-30 11:02 ` Hillf Danton
@ 2024-04-30 11:57 ` Edward Adam Davis
2024-04-30 15:34 ` syzbot
2024-04-30 13:05 ` [PATCH next] vhost_task: after freeing vhost_task it should not be accessed " Edward Adam Davis
` (4 subsequent siblings)
7 siblings, 1 reply; 27+ messages in thread
From: Edward Adam Davis @ 2024-04-30 11:57 UTC (permalink / raw)
To: syzbot+98edc2df894917b3431f; +Cc: linux-kernel, syzkaller-bugs
please test uaf in vhost_task_fn
#syz test https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git bb7a2467e6be
diff --git a/kernel/vhost_task.c b/kernel/vhost_task.c
index 48c289947b99..e87d60dde323 100644
--- a/kernel/vhost_task.c
+++ b/kernel/vhost_task.c
@@ -21,9 +21,10 @@ struct vhost_task {
unsigned long flags;
struct task_struct *task;
/* serialize SIGKILL and vhost_task_stop calls */
- struct mutex exit_mutex;
};
+static DEFINE_MUTEX(exit_mutex);
+
static int vhost_task_fn(void *data)
{
struct vhost_task *vtsk = data;
@@ -51,7 +52,7 @@ static int vhost_task_fn(void *data)
schedule();
}
- mutex_lock(&vtsk->exit_mutex);
+ mutex_lock(&exit_mutex);
/*
* If a vhost_task_stop and SIGKILL race, we can ignore the SIGKILL.
* When the vhost layer has called vhost_task_stop it's already stopped
@@ -62,7 +63,7 @@ static int vhost_task_fn(void *data)
vtsk->handle_sigkill(vtsk->data);
}
complete(&vtsk->exited);
- mutex_unlock(&vtsk->exit_mutex);
+ mutex_unlock(&exit_mutex);
do_exit(0);
}
@@ -88,12 +89,12 @@ EXPORT_SYMBOL_GPL(vhost_task_wake);
*/
void vhost_task_stop(struct vhost_task *vtsk)
{
- mutex_lock(&vtsk->exit_mutex);
+ mutex_lock(&exit_mutex);
if (!test_bit(VHOST_TASK_FLAGS_KILLED, &vtsk->flags)) {
set_bit(VHOST_TASK_FLAGS_STOP, &vtsk->flags);
vhost_task_wake(vtsk);
}
- mutex_unlock(&vtsk->exit_mutex);
+ mutex_unlock(&exit_mutex);
/*
* Make sure vhost_task_fn is no longer accessing the vhost_task before
@@ -135,7 +136,6 @@ struct vhost_task *vhost_task_create(bool (*fn)(void *),
if (!vtsk)
return NULL;
init_completion(&vtsk->exited);
- mutex_init(&vtsk->exit_mutex);
vtsk->data = arg;
vtsk->fn = fn;
vtsk->handle_sigkill = handle_sigkill;
^ permalink raw reply related [flat|nested] 27+ messages in thread* [PATCH next] vhost_task: after freeing vhost_task it should not be accessed in vhost_task_fn
2024-04-30 8:25 [syzbot] [net?] [virt?] [kvm?] KASAN: slab-use-after-free Read in vhost_task_fn syzbot
` (2 preceding siblings ...)
2024-04-30 11:57 ` Edward Adam Davis
@ 2024-04-30 13:05 ` Edward Adam Davis
2024-04-30 16:23 ` Mike Christie
2024-04-30 22:50 ` [syzbot] [net?] [virt?] [kvm?] KASAN: slab-use-after-free Read " Hillf Danton
` (3 subsequent siblings)
7 siblings, 1 reply; 27+ messages in thread
From: Edward Adam Davis @ 2024-04-30 13:05 UTC (permalink / raw)
To: syzbot+98edc2df894917b3431f
Cc: jasowang, kvm, linux-kernel, michael.christie, mst, netdev,
syzkaller-bugs, virtualization
[syzbot reported]
BUG: KASAN: slab-use-after-free in instrument_atomic_read include/linux/instrumented.h:68 [inline]
BUG: KASAN: slab-use-after-free in atomic_long_read include/linux/atomic/atomic-instrumented.h:3188 [inline]
BUG: KASAN: slab-use-after-free in __mutex_unlock_slowpath+0xef/0x750 kernel/locking/mutex.c:921
Read of size 8 at addr ffff888023632880 by task vhost-5104/5105
CPU: 0 PID: 5105 Comm: vhost-5104 Not tainted 6.9.0-rc5-next-20240426-syzkaller #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 03/27/2024
Call Trace:
<TASK>
__dump_stack lib/dump_stack.c:88 [inline]
dump_stack_lvl+0x241/0x360 lib/dump_stack.c:114
print_address_description mm/kasan/report.c:377 [inline]
print_report+0x169/0x550 mm/kasan/report.c:488
kasan_report+0x143/0x180 mm/kasan/report.c:601
kasan_check_range+0x282/0x290 mm/kasan/generic.c:189
instrument_atomic_read include/linux/instrumented.h:68 [inline]
atomic_long_read include/linux/atomic/atomic-instrumented.h:3188 [inline]
__mutex_unlock_slowpath+0xef/0x750 kernel/locking/mutex.c:921
vhost_task_fn+0x3bc/0x3f0 kernel/vhost_task.c:65
ret_from_fork+0x4b/0x80 arch/x86/kernel/process.c:147
ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:244
</TASK>
Allocated by task 5104:
kasan_save_stack mm/kasan/common.c:47 [inline]
kasan_save_track+0x3f/0x80 mm/kasan/common.c:68
poison_kmalloc_redzone mm/kasan/common.c:370 [inline]
__kasan_kmalloc+0x98/0xb0 mm/kasan/common.c:387
kasan_kmalloc include/linux/kasan.h:211 [inline]
kmalloc_trace_noprof+0x19c/0x2b0 mm/slub.c:4146
kmalloc_noprof include/linux/slab.h:660 [inline]
kzalloc_noprof include/linux/slab.h:778 [inline]
vhost_task_create+0x149/0x300 kernel/vhost_task.c:134
vhost_worker_create+0x17b/0x3f0 drivers/vhost/vhost.c:667
vhost_dev_set_owner+0x563/0x940 drivers/vhost/vhost.c:945
vhost_dev_ioctl+0xda/0xda0 drivers/vhost/vhost.c:2108
vhost_vsock_dev_ioctl+0x2bb/0xfa0 drivers/vhost/vsock.c:875
vfs_ioctl fs/ioctl.c:51 [inline]
__do_sys_ioctl fs/ioctl.c:907 [inline]
__se_sys_ioctl+0xfc/0x170 fs/ioctl.c:893
do_syscall_x64 arch/x86/entry/common.c:52 [inline]
do_syscall_64+0xf5/0x240 arch/x86/entry/common.c:83
entry_SYSCALL_64_after_hwframe+0x77/0x7f
Freed by task 5104:
kasan_save_stack mm/kasan/common.c:47 [inline]
kasan_save_track+0x3f/0x80 mm/kasan/common.c:68
kasan_save_free_info+0x40/0x50 mm/kasan/generic.c:579
poison_slab_object+0xe0/0x150 mm/kasan/common.c:240
__kasan_slab_free+0x37/0x60 mm/kasan/common.c:256
kasan_slab_free include/linux/kasan.h:184 [inline]
slab_free_hook mm/slub.c:2190 [inline]
slab_free mm/slub.c:4430 [inline]
kfree+0x149/0x350 mm/slub.c:4551
vhost_worker_destroy drivers/vhost/vhost.c:629 [inline]
vhost_workers_free drivers/vhost/vhost.c:648 [inline]
vhost_dev_cleanup+0x9b0/0xba0 drivers/vhost/vhost.c:1051
vhost_vsock_dev_release+0x3aa/0x410 drivers/vhost/vsock.c:751
__fput+0x406/0x8b0 fs/file_table.c:422
__do_sys_close fs/open.c:1555 [inline]
__se_sys_close fs/open.c:1540 [inline]
__x64_sys_close+0x7f/0x110 fs/open.c:1540
do_syscall_x64 arch/x86/entry/common.c:52 [inline]
do_syscall_64+0xf5/0x240 arch/x86/entry/common.c:83
entry_SYSCALL_64_after_hwframe+0x77/0x7f
[Fix]
Delete the member exit_mutex from the struct vhost_task and replace it with a
declared global static mutex.
Fixes: a3df30984f4f ("vhost_task: Handle SIGKILL by flushing work and exiting")
Reported--by: syzbot+98edc2df894917b3431f@syzkaller.appspotmail.com
Signed-off-by: Edward Adam Davis <eadavis@qq.com>
---
kernel/vhost_task.c | 13 ++++++-------
1 file changed, 6 insertions(+), 7 deletions(-)
diff --git a/kernel/vhost_task.c b/kernel/vhost_task.c
index 48c289947b99..375356499867 100644
--- a/kernel/vhost_task.c
+++ b/kernel/vhost_task.c
@@ -20,10 +20,10 @@ struct vhost_task {
struct completion exited;
unsigned long flags;
struct task_struct *task;
- /* serialize SIGKILL and vhost_task_stop calls */
- struct mutex exit_mutex;
};
+static DEFINE_MUTEX(exit_mutex); //serialize SIGKILL and vhost_task_stop calls
+
static int vhost_task_fn(void *data)
{
struct vhost_task *vtsk = data;
@@ -51,7 +51,7 @@ static int vhost_task_fn(void *data)
schedule();
}
- mutex_lock(&vtsk->exit_mutex);
+ mutex_lock(&exit_mutex);
/*
* If a vhost_task_stop and SIGKILL race, we can ignore the SIGKILL.
* When the vhost layer has called vhost_task_stop it's already stopped
@@ -62,7 +62,7 @@ static int vhost_task_fn(void *data)
vtsk->handle_sigkill(vtsk->data);
}
complete(&vtsk->exited);
- mutex_unlock(&vtsk->exit_mutex);
+ mutex_unlock(&exit_mutex);
do_exit(0);
}
@@ -88,12 +88,12 @@ EXPORT_SYMBOL_GPL(vhost_task_wake);
*/
void vhost_task_stop(struct vhost_task *vtsk)
{
- mutex_lock(&vtsk->exit_mutex);
+ mutex_lock(&exit_mutex);
if (!test_bit(VHOST_TASK_FLAGS_KILLED, &vtsk->flags)) {
set_bit(VHOST_TASK_FLAGS_STOP, &vtsk->flags);
vhost_task_wake(vtsk);
}
- mutex_unlock(&vtsk->exit_mutex);
+ mutex_unlock(&exit_mutex);
/*
* Make sure vhost_task_fn is no longer accessing the vhost_task before
@@ -135,7 +135,6 @@ struct vhost_task *vhost_task_create(bool (*fn)(void *),
if (!vtsk)
return NULL;
init_completion(&vtsk->exited);
- mutex_init(&vtsk->exit_mutex);
vtsk->data = arg;
vtsk->fn = fn;
vtsk->handle_sigkill = handle_sigkill;
--
2.43.0
^ permalink raw reply related [flat|nested] 27+ messages in thread* Re: [PATCH next] vhost_task: after freeing vhost_task it should not be accessed in vhost_task_fn
2024-04-30 13:05 ` [PATCH next] vhost_task: after freeing vhost_task it should not be accessed " Edward Adam Davis
@ 2024-04-30 16:23 ` Mike Christie
2024-04-30 18:06 ` Michael S. Tsirkin
2024-05-01 0:15 ` Hillf Danton
0 siblings, 2 replies; 27+ messages in thread
From: Mike Christie @ 2024-04-30 16:23 UTC (permalink / raw)
To: Edward Adam Davis, syzbot+98edc2df894917b3431f
Cc: jasowang, kvm, linux-kernel, mst, netdev, syzkaller-bugs,
virtualization
On 4/30/24 8:05 AM, Edward Adam Davis wrote:
> static int vhost_task_fn(void *data)
> {
> struct vhost_task *vtsk = data;
> @@ -51,7 +51,7 @@ static int vhost_task_fn(void *data)
> schedule();
> }
>
> - mutex_lock(&vtsk->exit_mutex);
> + mutex_lock(&exit_mutex);
> /*
> * If a vhost_task_stop and SIGKILL race, we can ignore the SIGKILL.
> * When the vhost layer has called vhost_task_stop it's already stopped
> @@ -62,7 +62,7 @@ static int vhost_task_fn(void *data)
> vtsk->handle_sigkill(vtsk->data);
> }
> complete(&vtsk->exited);
> - mutex_unlock(&vtsk->exit_mutex);
> + mutex_unlock(&exit_mutex);
>
Edward, thanks for the patch. I think though I just needed to swap the
order of the calls above.
Instead of:
complete(&vtsk->exited);
mutex_unlock(&vtsk->exit_mutex);
it should have been:
mutex_unlock(&vtsk->exit_mutex);
complete(&vtsk->exited);
If my analysis is correct, then Michael do you want me to resubmit a
patch on top of your vhost branch or resubmit the entire patchset?
^ permalink raw reply [flat|nested] 27+ messages in thread* Re: [PATCH next] vhost_task: after freeing vhost_task it should not be accessed in vhost_task_fn
2024-04-30 16:23 ` Mike Christie
@ 2024-04-30 18:06 ` Michael S. Tsirkin
2024-05-01 0:15 ` Hillf Danton
1 sibling, 0 replies; 27+ messages in thread
From: Michael S. Tsirkin @ 2024-04-30 18:06 UTC (permalink / raw)
To: Mike Christie
Cc: Edward Adam Davis, syzbot+98edc2df894917b3431f, jasowang, kvm,
linux-kernel, netdev, syzkaller-bugs, virtualization
On Tue, Apr 30, 2024 at 11:23:04AM -0500, Mike Christie wrote:
> On 4/30/24 8:05 AM, Edward Adam Davis wrote:
> > static int vhost_task_fn(void *data)
> > {
> > struct vhost_task *vtsk = data;
> > @@ -51,7 +51,7 @@ static int vhost_task_fn(void *data)
> > schedule();
> > }
> >
> > - mutex_lock(&vtsk->exit_mutex);
> > + mutex_lock(&exit_mutex);
> > /*
> > * If a vhost_task_stop and SIGKILL race, we can ignore the SIGKILL.
> > * When the vhost layer has called vhost_task_stop it's already stopped
> > @@ -62,7 +62,7 @@ static int vhost_task_fn(void *data)
> > vtsk->handle_sigkill(vtsk->data);
> > }
> > complete(&vtsk->exited);
> > - mutex_unlock(&vtsk->exit_mutex);
> > + mutex_unlock(&exit_mutex);
> >
>
> Edward, thanks for the patch. I think though I just needed to swap the
> order of the calls above.
>
> Instead of:
>
> complete(&vtsk->exited);
> mutex_unlock(&vtsk->exit_mutex);
>
> it should have been:
>
> mutex_unlock(&vtsk->exit_mutex);
> complete(&vtsk->exited);
>
> If my analysis is correct, then Michael do you want me to resubmit a
> patch on top of your vhost branch or resubmit the entire patchset?
Resubmit all please.
--
MST
^ permalink raw reply [flat|nested] 27+ messages in thread* Re: [PATCH next] vhost_task: after freeing vhost_task it should not be accessed in vhost_task_fn
2024-04-30 16:23 ` Mike Christie
2024-04-30 18:06 ` Michael S. Tsirkin
@ 2024-05-01 0:15 ` Hillf Danton
2024-05-01 1:01 ` Mike Christie
2024-05-01 6:01 ` Michael S. Tsirkin
1 sibling, 2 replies; 27+ messages in thread
From: Hillf Danton @ 2024-05-01 0:15 UTC (permalink / raw)
To: Mike Christie
Cc: Michael S. Tsirkin, Edward Adam Davis,
syzbot+98edc2df894917b3431f, jasowang, kvm, linux-kernel, netdev,
syzkaller-bugs, virtualization
On Tue, Apr 30, 2024 at 11:23:04AM -0500, Mike Christie wrote:
> On 4/30/24 8:05 AM, Edward Adam Davis wrote:
> > static int vhost_task_fn(void *data)
> > {
> > struct vhost_task *vtsk = data;
> > @@ -51,7 +51,7 @@ static int vhost_task_fn(void *data)
> > schedule();
> > }
> >
> > - mutex_lock(&vtsk->exit_mutex);
> > + mutex_lock(&exit_mutex);
> > /*
> > * If a vhost_task_stop and SIGKILL race, we can ignore the SIGKILL.
> > * When the vhost layer has called vhost_task_stop it's already stopped
> > @@ -62,7 +62,7 @@ static int vhost_task_fn(void *data)
> > vtsk->handle_sigkill(vtsk->data);
> > }
> > complete(&vtsk->exited);
> > - mutex_unlock(&vtsk->exit_mutex);
> > + mutex_unlock(&exit_mutex);
> >
>
> Edward, thanks for the patch. I think though I just needed to swap the
> order of the calls above.
>
> Instead of:
>
> complete(&vtsk->exited);
> mutex_unlock(&vtsk->exit_mutex);
>
> it should have been:
>
> mutex_unlock(&vtsk->exit_mutex);
> complete(&vtsk->exited);
JFYI Edward did it [1]
[1] https://lore.kernel.org/lkml/tencent_546DA49414E876EEBECF2C78D26D242EE50A@qq.com/
>
> If my analysis is correct, then Michael do you want me to resubmit a
> patch on top of your vhost branch or resubmit the entire patchset?
^ permalink raw reply [flat|nested] 27+ messages in thread* Re: [PATCH next] vhost_task: after freeing vhost_task it should not be accessed in vhost_task_fn
2024-05-01 0:15 ` Hillf Danton
@ 2024-05-01 1:01 ` Mike Christie
2024-05-01 5:52 ` Michael S. Tsirkin
2024-05-01 6:01 ` Michael S. Tsirkin
1 sibling, 1 reply; 27+ messages in thread
From: Mike Christie @ 2024-05-01 1:01 UTC (permalink / raw)
To: Hillf Danton
Cc: Michael S. Tsirkin, Edward Adam Davis,
syzbot+98edc2df894917b3431f, jasowang, kvm, linux-kernel, netdev,
syzkaller-bugs, virtualization
On 4/30/24 7:15 PM, Hillf Danton wrote:
> On Tue, Apr 30, 2024 at 11:23:04AM -0500, Mike Christie wrote:
>> On 4/30/24 8:05 AM, Edward Adam Davis wrote:
>>> static int vhost_task_fn(void *data)
>>> {
>>> struct vhost_task *vtsk = data;
>>> @@ -51,7 +51,7 @@ static int vhost_task_fn(void *data)
>>> schedule();
>>> }
>>>
>>> - mutex_lock(&vtsk->exit_mutex);
>>> + mutex_lock(&exit_mutex);
>>> /*
>>> * If a vhost_task_stop and SIGKILL race, we can ignore the SIGKILL.
>>> * When the vhost layer has called vhost_task_stop it's already stopped
>>> @@ -62,7 +62,7 @@ static int vhost_task_fn(void *data)
>>> vtsk->handle_sigkill(vtsk->data);
>>> }
>>> complete(&vtsk->exited);
>>> - mutex_unlock(&vtsk->exit_mutex);
>>> + mutex_unlock(&exit_mutex);
>>>
>>
>> Edward, thanks for the patch. I think though I just needed to swap the
>> order of the calls above.
>>
>> Instead of:
>>
>> complete(&vtsk->exited);
>> mutex_unlock(&vtsk->exit_mutex);
>>
>> it should have been:
>>
>> mutex_unlock(&vtsk->exit_mutex);
>> complete(&vtsk->exited);
>
> JFYI Edward did it [1]
>
> [1] https://lore.kernel.org/lkml/tencent_546DA49414E876EEBECF2C78D26D242EE50A@qq.com/
Thanks.
I tested the code with that change and it no longer triggers the UAF.
I've fixed up the original patch that had the bug and am going to
resubmit the patchset like how Michael requested.
^ permalink raw reply [flat|nested] 27+ messages in thread* Re: [PATCH next] vhost_task: after freeing vhost_task it should not be accessed in vhost_task_fn
2024-05-01 1:01 ` Mike Christie
@ 2024-05-01 5:52 ` Michael S. Tsirkin
0 siblings, 0 replies; 27+ messages in thread
From: Michael S. Tsirkin @ 2024-05-01 5:52 UTC (permalink / raw)
To: Mike Christie
Cc: Hillf Danton, Edward Adam Davis, syzbot+98edc2df894917b3431f,
jasowang, kvm, linux-kernel, netdev, syzkaller-bugs,
virtualization
On Tue, Apr 30, 2024 at 08:01:11PM -0500, Mike Christie wrote:
> On 4/30/24 7:15 PM, Hillf Danton wrote:
> > On Tue, Apr 30, 2024 at 11:23:04AM -0500, Mike Christie wrote:
> >> On 4/30/24 8:05 AM, Edward Adam Davis wrote:
> >>> static int vhost_task_fn(void *data)
> >>> {
> >>> struct vhost_task *vtsk = data;
> >>> @@ -51,7 +51,7 @@ static int vhost_task_fn(void *data)
> >>> schedule();
> >>> }
> >>>
> >>> - mutex_lock(&vtsk->exit_mutex);
> >>> + mutex_lock(&exit_mutex);
> >>> /*
> >>> * If a vhost_task_stop and SIGKILL race, we can ignore the SIGKILL.
> >>> * When the vhost layer has called vhost_task_stop it's already stopped
> >>> @@ -62,7 +62,7 @@ static int vhost_task_fn(void *data)
> >>> vtsk->handle_sigkill(vtsk->data);
> >>> }
> >>> complete(&vtsk->exited);
> >>> - mutex_unlock(&vtsk->exit_mutex);
> >>> + mutex_unlock(&exit_mutex);
> >>>
> >>
> >> Edward, thanks for the patch. I think though I just needed to swap the
> >> order of the calls above.
> >>
> >> Instead of:
> >>
> >> complete(&vtsk->exited);
> >> mutex_unlock(&vtsk->exit_mutex);
> >>
> >> it should have been:
> >>
> >> mutex_unlock(&vtsk->exit_mutex);
> >> complete(&vtsk->exited);
> >
> > JFYI Edward did it [1]
> >
> > [1] https://lore.kernel.org/lkml/tencent_546DA49414E876EEBECF2C78D26D242EE50A@qq.com/
>
> Thanks.
>
> I tested the code with that change and it no longer triggers the UAF.
Weird but syzcaller said that yes it triggers.
Compare
000000000000dcc0ca06174e65d4@google.com
which tests the order
mutex_unlock(&vtsk->exit_mutex);
complete(&vtsk->exited);
that you like and says it triggers
and
00000000000097bda906175219bc@google.com
which says it does not trigger.
Whatever you do please send it to syzcaller in the original
thread and then when you post please include the syzcaller report.
Given this gets confusing I'm fine with just a fixup patch,
and note in the commit log where I should squash it.
> I've fixed up the original patch that had the bug and am going to
> resubmit the patchset like how Michael requested.
>
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH next] vhost_task: after freeing vhost_task it should not be accessed in vhost_task_fn
2024-05-01 0:15 ` Hillf Danton
2024-05-01 1:01 ` Mike Christie
@ 2024-05-01 6:01 ` Michael S. Tsirkin
2024-05-01 7:50 ` Hillf Danton
1 sibling, 1 reply; 27+ messages in thread
From: Michael S. Tsirkin @ 2024-05-01 6:01 UTC (permalink / raw)
To: Hillf Danton
Cc: Mike Christie, Edward Adam Davis, syzbot+98edc2df894917b3431f,
jasowang, kvm, linux-kernel, netdev, syzkaller-bugs,
virtualization
On Wed, May 01, 2024 at 08:15:44AM +0800, Hillf Danton wrote:
> On Tue, Apr 30, 2024 at 11:23:04AM -0500, Mike Christie wrote:
> > On 4/30/24 8:05 AM, Edward Adam Davis wrote:
> > > static int vhost_task_fn(void *data)
> > > {
> > > struct vhost_task *vtsk = data;
> > > @@ -51,7 +51,7 @@ static int vhost_task_fn(void *data)
> > > schedule();
> > > }
> > >
> > > - mutex_lock(&vtsk->exit_mutex);
> > > + mutex_lock(&exit_mutex);
> > > /*
> > > * If a vhost_task_stop and SIGKILL race, we can ignore the SIGKILL.
> > > * When the vhost layer has called vhost_task_stop it's already stopped
> > > @@ -62,7 +62,7 @@ static int vhost_task_fn(void *data)
> > > vtsk->handle_sigkill(vtsk->data);
> > > }
> > > complete(&vtsk->exited);
> > > - mutex_unlock(&vtsk->exit_mutex);
> > > + mutex_unlock(&exit_mutex);
> > >
> >
> > Edward, thanks for the patch. I think though I just needed to swap the
> > order of the calls above.
> >
> > Instead of:
> >
> > complete(&vtsk->exited);
> > mutex_unlock(&vtsk->exit_mutex);
> >
> > it should have been:
> >
> > mutex_unlock(&vtsk->exit_mutex);
> > complete(&vtsk->exited);
>
> JFYI Edward did it [1]
>
> [1] https://lore.kernel.org/lkml/tencent_546DA49414E876EEBECF2C78D26D242EE50A@qq.com/
and then it failed testing.
> >
> > If my analysis is correct, then Michael do you want me to resubmit a
> > patch on top of your vhost branch or resubmit the entire patchset?
^ permalink raw reply [flat|nested] 27+ messages in thread* Re: [PATCH next] vhost_task: after freeing vhost_task it should not be accessed in vhost_task_fn
2024-05-01 6:01 ` Michael S. Tsirkin
@ 2024-05-01 7:50 ` Hillf Danton
2024-05-01 15:57 ` Mike Christie
0 siblings, 1 reply; 27+ messages in thread
From: Hillf Danton @ 2024-05-01 7:50 UTC (permalink / raw)
To: Michael S. Tsirkin
Cc: Mike Christie, Edward Adam Davis, syzbot+98edc2df894917b3431f,
jasowang, kvm, linux-kernel, netdev, syzkaller-bugs,
virtualization
On Wed, 1 May 2024 02:01:20 -0400 Michael S. Tsirkin <mst@redhat.com>
>
> and then it failed testing.
>
So did my patch [1] but then the reason was spotted [2,3]
[1] https://lore.kernel.org/lkml/20240430110209.4310-1-hdanton@sina.com/
[2] https://lore.kernel.org/lkml/20240430225005.4368-1-hdanton@sina.com/
[3] https://lore.kernel.org/lkml/000000000000a7f8470617589ff2@google.com/
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH next] vhost_task: after freeing vhost_task it should not be accessed in vhost_task_fn
2024-05-01 7:50 ` Hillf Danton
@ 2024-05-01 15:57 ` Mike Christie
2024-05-01 16:04 ` Michael S. Tsirkin
2024-05-01 16:15 ` Michael S. Tsirkin
0 siblings, 2 replies; 27+ messages in thread
From: Mike Christie @ 2024-05-01 15:57 UTC (permalink / raw)
To: Hillf Danton, Michael S. Tsirkin
Cc: Edward Adam Davis, syzbot+98edc2df894917b3431f, jasowang, kvm,
linux-kernel, netdev, syzkaller-bugs, virtualization
On 5/1/24 2:50 AM, Hillf Danton wrote:
> On Wed, 1 May 2024 02:01:20 -0400 Michael S. Tsirkin <mst@redhat.com>
>>
>> and then it failed testing.
>>
> So did my patch [1] but then the reason was spotted [2,3]
>
> [1] https://lore.kernel.org/lkml/20240430110209.4310-1-hdanton@sina.com/
> [2] https://lore.kernel.org/lkml/20240430225005.4368-1-hdanton@sina.com/
> [3] https://lore.kernel.org/lkml/000000000000a7f8470617589ff2@google.com/
Just to make sure I understand the conclusion.
Edward's patch that just swaps the order of the calls:
https://lore.kernel.org/lkml/tencent_546DA49414E876EEBECF2C78D26D242EE50A@qq.com/
fixes the UAF. I tested the same in my setup. However, when you guys tested it
with sysbot, it also triggered a softirq/RCU warning.
The softirq/RCU part of the issue is fixed with this commit:
https://lore.kernel.org/all/20240427102808.29356-1-qiang.zhang1211@gmail.com/
commit 1dd1eff161bd55968d3d46bc36def62d71fb4785
Author: Zqiang <qiang.zhang1211@gmail.com>
Date: Sat Apr 27 18:28:08 2024 +0800
softirq: Fix suspicious RCU usage in __do_softirq()
The problem was that I was testing with -next master which has that patch.
It looks like you guys were testing against bb7a2467e6be which didn't have
the patch, and so that's why you guys still hit the softirq/RCU issue. Later
when you added that patch to your patch, it worked with syzbot.
So is it safe to assume that the softirq/RCU patch above will be upstream
when the vhost changes go in or is there a tag I need to add to my patches?
^ permalink raw reply [flat|nested] 27+ messages in thread* Re: [PATCH next] vhost_task: after freeing vhost_task it should not be accessed in vhost_task_fn
2024-05-01 15:57 ` Mike Christie
@ 2024-05-01 16:04 ` Michael S. Tsirkin
2024-05-01 16:15 ` Michael S. Tsirkin
1 sibling, 0 replies; 27+ messages in thread
From: Michael S. Tsirkin @ 2024-05-01 16:04 UTC (permalink / raw)
To: Mike Christie
Cc: Hillf Danton, Edward Adam Davis, syzbot+98edc2df894917b3431f,
jasowang, kvm, linux-kernel, netdev, syzkaller-bugs,
virtualization
On Wed, May 01, 2024 at 10:57:38AM -0500, Mike Christie wrote:
> On 5/1/24 2:50 AM, Hillf Danton wrote:
> > On Wed, 1 May 2024 02:01:20 -0400 Michael S. Tsirkin <mst@redhat.com>
> >>
> >> and then it failed testing.
> >>
> > So did my patch [1] but then the reason was spotted [2,3]
> >
> > [1] https://lore.kernel.org/lkml/20240430110209.4310-1-hdanton@sina.com/
> > [2] https://lore.kernel.org/lkml/20240430225005.4368-1-hdanton@sina.com/
> > [3] https://lore.kernel.org/lkml/000000000000a7f8470617589ff2@google.com/
>
> Just to make sure I understand the conclusion.
>
> Edward's patch that just swaps the order of the calls:
>
> https://lore.kernel.org/lkml/tencent_546DA49414E876EEBECF2C78D26D242EE50A@qq.com/
>
> fixes the UAF. I tested the same in my setup. However, when you guys tested it
> with sysbot, it also triggered a softirq/RCU warning.
>
> The softirq/RCU part of the issue is fixed with this commit:
>
> https://lore.kernel.org/all/20240427102808.29356-1-qiang.zhang1211@gmail.com/
>
> commit 1dd1eff161bd55968d3d46bc36def62d71fb4785
> Author: Zqiang <qiang.zhang1211@gmail.com>
> Date: Sat Apr 27 18:28:08 2024 +0800
>
> softirq: Fix suspicious RCU usage in __do_softirq()
>
> The problem was that I was testing with -next master which has that patch.
> It looks like you guys were testing against bb7a2467e6be which didn't have
> the patch, and so that's why you guys still hit the softirq/RCU issue. Later
> when you added that patch to your patch, it worked with syzbot.
>
> So is it safe to assume that the softirq/RCU patch above will be upstream
> when the vhost changes go in or is there a tag I need to add to my patches?
Two points:
- I do not want bisect broken. If you depend on this patch either I pick
it too before your patch, or we defer until 1dd1eff161bd55968d3d46bc36def62d71fb4785
is merged. You can also ask for that patch to be merged in this cycle.
- Do not assume - pls push somewhere a hash based on vhost that syzbot can test
and confirm all is well. Thanks!
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH next] vhost_task: after freeing vhost_task it should not be accessed in vhost_task_fn
2024-05-01 15:57 ` Mike Christie
2024-05-01 16:04 ` Michael S. Tsirkin
@ 2024-05-01 16:15 ` Michael S. Tsirkin
1 sibling, 0 replies; 27+ messages in thread
From: Michael S. Tsirkin @ 2024-05-01 16:15 UTC (permalink / raw)
To: Mike Christie
Cc: Hillf Danton, Edward Adam Davis, syzbot+98edc2df894917b3431f,
jasowang, kvm, linux-kernel, netdev, syzkaller-bugs,
virtualization
On Wed, May 01, 2024 at 10:57:38AM -0500, Mike Christie wrote:
> On 5/1/24 2:50 AM, Hillf Danton wrote:
> > On Wed, 1 May 2024 02:01:20 -0400 Michael S. Tsirkin <mst@redhat.com>
> >>
> >> and then it failed testing.
> >>
> > So did my patch [1] but then the reason was spotted [2,3]
> >
> > [1] https://lore.kernel.org/lkml/20240430110209.4310-1-hdanton@sina.com/
> > [2] https://lore.kernel.org/lkml/20240430225005.4368-1-hdanton@sina.com/
> > [3] https://lore.kernel.org/lkml/000000000000a7f8470617589ff2@google.com/
>
> Just to make sure I understand the conclusion.
>
> Edward's patch that just swaps the order of the calls:
>
> https://lore.kernel.org/lkml/tencent_546DA49414E876EEBECF2C78D26D242EE50A@qq.com/
>
> fixes the UAF. I tested the same in my setup. However, when you guys tested it
> with sysbot, it also triggered a softirq/RCU warning.
>
> The softirq/RCU part of the issue is fixed with this commit:
>
> https://lore.kernel.org/all/20240427102808.29356-1-qiang.zhang1211@gmail.com/
>
> commit 1dd1eff161bd55968d3d46bc36def62d71fb4785
> Author: Zqiang <qiang.zhang1211@gmail.com>
> Date: Sat Apr 27 18:28:08 2024 +0800
>
> softirq: Fix suspicious RCU usage in __do_softirq()
>
> The problem was that I was testing with -next master which has that patch.
> It looks like you guys were testing against bb7a2467e6be which didn't have
> the patch, and so that's why you guys still hit the softirq/RCU issue. Later
> when you added that patch to your patch, it worked with syzbot.
>
> So is it safe to assume that the softirq/RCU patch above will be upstream
> when the vhost changes go in or is there a tag I need to add to my patches?
That patch is upstream now. I rebased and asked syzbot to test
https://lore.kernel.org/lkml/tencent_546DA49414E876EEBECF2C78D26D242EE50A@qq.com/
on top.
If that passes I will squash.
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [syzbot] [net?] [virt?] [kvm?] KASAN: slab-use-after-free Read in vhost_task_fn
2024-04-30 8:25 [syzbot] [net?] [virt?] [kvm?] KASAN: slab-use-after-free Read in vhost_task_fn syzbot
` (3 preceding siblings ...)
2024-04-30 13:05 ` [PATCH next] vhost_task: after freeing vhost_task it should not be accessed " Edward Adam Davis
@ 2024-04-30 22:50 ` Hillf Danton
2024-04-30 23:21 ` syzbot
2024-05-01 3:44 ` Edward Adam Davis
` (2 subsequent siblings)
7 siblings, 1 reply; 27+ messages in thread
From: Hillf Danton @ 2024-04-30 22:50 UTC (permalink / raw)
To: syzbot; +Cc: linux-kernel, syzkaller-bugs
On Tue, 30 Apr 2024 01:25:26 -0700
> syzbot found the following issue on:
>
> HEAD commit: bb7a2467e6be Add linux-next specific files for 20240426
> git tree: linux-next
> C reproducer: https://syzkaller.appspot.com/x/repro.c?x=16c30028980000
#syz test https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git bb7a2467e6be
--- x/kernel/vhost_task.c
+++ y/kernel/vhost_task.c
@@ -100,6 +100,8 @@ void vhost_task_stop(struct vhost_task *
* freeing it below.
*/
wait_for_completion(&vtsk->exited);
+ mutex_lock(&vtsk->exit_mutex);
+ mutex_unlock(&vtsk->exit_mutex);
kfree(vtsk);
}
EXPORT_SYMBOL_GPL(vhost_task_stop);
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -508,7 +508,7 @@ static inline bool lockdep_softirq_start(void) { return false; }
static inline void lockdep_softirq_end(bool in_hardirq) { }
#endif
-asmlinkage __visible void __softirq_entry __do_softirq(void)
+static void handle_softirqs(bool ksirqd)
{
unsigned long end = jiffies + MAX_SOFTIRQ_TIME;
unsigned long old_flags = current->flags;
@@ -563,8 +563,7 @@ restart:
pending >>= softirq_bit;
}
- if (!IS_ENABLED(CONFIG_PREEMPT_RT) &&
- __this_cpu_read(ksoftirqd) == current)
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT) && ksirqd)
rcu_softirq_qs();
local_irq_disable();
@@ -584,6 +583,11 @@ restart:
current_restore_flags(old_flags, PF_MEMALLOC);
}
+asmlinkage __visible void __softirq_entry __do_softirq(void)
+{
+ handle_softirqs(false);
+}
+
/**
* irq_enter_rcu - Enter an interrupt context with RCU watching
*/
@@ -921,7 +925,7 @@ static void run_ksoftirqd(unsigned int cpu)
* We can safely run softirq on inline stack, as we are not deep
* in the task stack here.
*/
- __do_softirq();
+ handle_softirqs(true);
ksoftirqd_run_end();
cond_resched();
return;
--
^ permalink raw reply [flat|nested] 27+ messages in thread* Re: [syzbot] [net?] [virt?] [kvm?] KASAN: slab-use-after-free Read in vhost_task_fn
2024-04-30 8:25 [syzbot] [net?] [virt?] [kvm?] KASAN: slab-use-after-free Read in vhost_task_fn syzbot
` (4 preceding siblings ...)
2024-04-30 22:50 ` [syzbot] [net?] [virt?] [kvm?] KASAN: slab-use-after-free Read " Hillf Danton
@ 2024-05-01 3:44 ` Edward Adam Davis
2024-05-01 10:13 ` syzbot
2024-05-01 16:12 ` Michael S. Tsirkin
2024-05-05 3:07 ` Edward Adam Davis
7 siblings, 1 reply; 27+ messages in thread
From: Edward Adam Davis @ 2024-05-01 3:44 UTC (permalink / raw)
To: syzbot+98edc2df894917b3431f; +Cc: linux-kernel, syzkaller-bugs
please test uaf in vhost_task_fn
#syz test https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git bb7a2467e6be
diff --git a/kernel/vhost_task.c b/kernel/vhost_task.c
index 48c289947b99..8800f5acc007 100644
--- a/kernel/vhost_task.c
+++ b/kernel/vhost_task.c
@@ -61,8 +61,8 @@ static int vhost_task_fn(void *data)
set_bit(VHOST_TASK_FLAGS_KILLED, &vtsk->flags);
vtsk->handle_sigkill(vtsk->data);
}
- complete(&vtsk->exited);
mutex_unlock(&vtsk->exit_mutex);
+ complete(&vtsk->exited);
do_exit(0);
}
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -508,7 +508,7 @@ static inline bool lockdep_softirq_start(void) { return false; }
static inline void lockdep_softirq_end(bool in_hardirq) { }
#endif
-asmlinkage __visible void __softirq_entry __do_softirq(void)
+static void handle_softirqs(bool ksirqd)
{
unsigned long end = jiffies + MAX_SOFTIRQ_TIME;
unsigned long old_flags = current->flags;
@@ -563,8 +563,7 @@ restart:
pending >>= softirq_bit;
}
- if (!IS_ENABLED(CONFIG_PREEMPT_RT) &&
- __this_cpu_read(ksoftirqd) == current)
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT) && ksirqd)
rcu_softirq_qs();
local_irq_disable();
@@ -584,6 +583,11 @@ restart:
current_restore_flags(old_flags, PF_MEMALLOC);
}
+asmlinkage __visible void __softirq_entry __do_softirq(void)
+{
+ handle_softirqs(false);
+}
+
/**
* irq_enter_rcu - Enter an interrupt context with RCU watching
*/
@@ -921,7 +925,7 @@ static void run_ksoftirqd(unsigned int cpu)
* We can safely run softirq on inline stack, as we are not deep
* in the task stack here.
*/
- __do_softirq();
+ handle_softirqs(true);
ksoftirqd_run_end();
cond_resched();
return;
^ permalink raw reply related [flat|nested] 27+ messages in thread* Re: [syzbot] [net?] [virt?] [kvm?] KASAN: slab-use-after-free Read in vhost_task_fn
2024-04-30 8:25 [syzbot] [net?] [virt?] [kvm?] KASAN: slab-use-after-free Read in vhost_task_fn syzbot
` (5 preceding siblings ...)
2024-05-01 3:44 ` Edward Adam Davis
@ 2024-05-01 16:12 ` Michael S. Tsirkin
2024-05-01 16:56 ` syzbot
2024-05-05 3:07 ` Edward Adam Davis
7 siblings, 1 reply; 27+ messages in thread
From: Michael S. Tsirkin @ 2024-05-01 16:12 UTC (permalink / raw)
To: syzbot
Cc: jasowang, kvm, linux-kernel, michael.christie, netdev,
syzkaller-bugs, virtualization
#syz test https://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost.git f138e94c1f0dbeae721917694fb2203446a68ea9
^ permalink raw reply [flat|nested] 27+ messages in thread* Re: [syzbot] [net?] [virt?] [kvm?] KASAN: slab-use-after-free Read in vhost_task_fn
2024-05-01 16:12 ` Michael S. Tsirkin
@ 2024-05-01 16:56 ` syzbot
0 siblings, 0 replies; 27+ messages in thread
From: syzbot @ 2024-05-01 16:56 UTC (permalink / raw)
To: jasowang, kvm, linux-kernel, michael.christie, mst, netdev,
syzkaller-bugs, virtualization
Hello,
syzbot has tested the proposed patch and the reproducer did not trigger any issue:
Reported-and-tested-by: syzbot+98edc2df894917b3431f@syzkaller.appspotmail.com
Tested on:
commit: f138e94c KASAN: slab-use-after-free Read in vhost_task..
git tree: https://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost.git
console output: https://syzkaller.appspot.com/x/log.txt?x=10a152a7180000
kernel config: https://syzkaller.appspot.com/x/.config?x=3714fc09f933e505
dashboard link: https://syzkaller.appspot.com/bug?extid=98edc2df894917b3431f
compiler: Debian clang version 15.0.6, GNU ld (GNU Binutils for Debian) 2.40
Note: no patches were applied.
Note: testing is done by a robot and is best-effort only.
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [syzbot] [net?] [virt?] [kvm?] KASAN: slab-use-after-free Read in vhost_task_fn
2024-04-30 8:25 [syzbot] [net?] [virt?] [kvm?] KASAN: slab-use-after-free Read in vhost_task_fn syzbot
` (6 preceding siblings ...)
2024-05-01 16:12 ` Michael S. Tsirkin
@ 2024-05-05 3:07 ` Edward Adam Davis
2024-05-05 3:40 ` syzbot
7 siblings, 1 reply; 27+ messages in thread
From: Edward Adam Davis @ 2024-05-05 3:07 UTC (permalink / raw)
To: syzbot+98edc2df894917b3431f; +Cc: linux-kernel, syzkaller-bugs
please test uaf in vhost_task_fn
#syz test https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git bb7a2467e6be
diff --git a/kernel/softirq.c b/kernel/softirq.c
index b315b21fb28c..02582017759a 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -508,7 +508,7 @@ static inline bool lockdep_softirq_start(void) { return false; }
static inline void lockdep_softirq_end(bool in_hardirq) { }
#endif
-asmlinkage __visible void __softirq_entry __do_softirq(void)
+static void handle_softirqs(bool ksirqd)
{
unsigned long end = jiffies + MAX_SOFTIRQ_TIME;
unsigned long old_flags = current->flags;
@@ -563,8 +563,7 @@ asmlinkage __visible void __softirq_entry __do_softirq(void)
pending >>= softirq_bit;
}
- if (!IS_ENABLED(CONFIG_PREEMPT_RT) &&
- __this_cpu_read(ksoftirqd) == current)
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT) && ksirqd)
rcu_softirq_qs();
local_irq_disable();
@@ -584,6 +583,11 @@ asmlinkage __visible void __softirq_entry __do_softirq(void)
current_restore_flags(old_flags, PF_MEMALLOC);
}
+asmlinkage __visible void __softirq_entry __do_softirq(void)
+{
+ handle_softirqs(false);
+}
+
/**
* irq_enter_rcu - Enter an interrupt context with RCU watching
*/
@@ -921,7 +925,7 @@ static void run_ksoftirqd(unsigned int cpu)
* We can safely run softirq on inline stack, as we are not deep
* in the task stack here.
*/
- __do_softirq();
+ handle_softirqs(true);
ksoftirqd_run_end();
cond_resched();
return;
diff --git a/kernel/vhost_task.c b/kernel/vhost_task.c
index 48c289947b99..8800f5acc007 100644
--- a/kernel/vhost_task.c
+++ b/kernel/vhost_task.c
@@ -61,8 +61,8 @@ static int vhost_task_fn(void *data)
set_bit(VHOST_TASK_FLAGS_KILLED, &vtsk->flags);
vtsk->handle_sigkill(vtsk->data);
}
- complete(&vtsk->exited);
mutex_unlock(&vtsk->exit_mutex);
+ complete(&vtsk->exited);
do_exit(0);
}
^ permalink raw reply related [flat|nested] 27+ messages in thread