From: Kunwu Chan <kunwu.chan@linux.dev>
To: ast@kernel.org, daniel@iogearbox.net, andrii@kernel.org,
martin.lau@linux.dev, eddyz87@gmail.com, song@kernel.org,
yonghong.song@linux.dev, john.fastabend@gmail.com,
kpsingh@kernel.org, sdf@fomichev.me, haoluo@google.com,
jolsa@kernel.org, bigeasy@linutronix.de, clrkwllms@kernel.org,
rostedt@goodmis.org, tglx@linutronix.de
Cc: bpf@vger.kernel.org, linux-kernel@vger.kernel.org,
linux-rt-devel@lists.linux.dev, Kunwu Chan <chentao@kylinos.cn>,
syzbot+b506de56cbbb63148c33@syzkaller.appspotmail.com
Subject: [PATCH] bpf: Convert lpm_trie::lock to 'raw_spinlock_t'
Date: Fri, 8 Nov 2024 14:32:14 +0800 [thread overview]
Message-ID: <20241108063214.578120-1-kunwu.chan@linux.dev> (raw)
From: Kunwu Chan <chentao@kylinos.cn>
When PREEMPT_RT is enabled, 'spinlock_t' becomes preemptible
and bpf program has owned a raw_spinlock under a interrupt handler,
which results in invalid lock acquire context.
[ BUG: Invalid wait context ]
6.12.0-rc5-next-20241031-syzkaller #0 Not tainted
-----------------------------
swapper/0/0 is trying to lock:
ffff8880261e7a00 (&trie->lock){....}-{3:3},
at: trie_delete_elem+0x96/0x6a0 kernel/bpf/lpm_trie.c:462
other info that might help us debug this:
context-{3:3}
5 locks held by swapper/0/0:
#0: ffff888020bb75c8 (&vp_dev->lock){-...}-{3:3},
at: vp_vring_interrupt drivers/virtio/virtio_pci_common.c:80 [inline]
#0: ffff888020bb75c8 (&vp_dev->lock){-...}-{3:3},
at: vp_interrupt+0x142/0x200 drivers/virtio/virtio_pci_common.c:113
#1: ffff88814174a120 (&vb->stop_update_lock){-...}-{3:3},
at: spin_lock include/linux/spinlock.h:351 [inline]
#1: ffff88814174a120 (&vb->stop_update_lock){-...}-{3:3},
at: stats_request+0x6f/0x230 drivers/virtio/virtio_balloon.c:438
#2: ffffffff8e939f20 (rcu_read_lock){....}-{1:3},
at: rcu_lock_acquire include/linux/rcupdate.h:337 [inline]
#2: ffffffff8e939f20 (rcu_read_lock){....}-{1:3},
at: rcu_read_lock include/linux/rcupdate.h:849 [inline]
#2: ffffffff8e939f20 (rcu_read_lock){....}-{1:3},
at: __queue_work+0x199/0xf50 kernel/workqueue.c:2259
#3: ffff8880b863dd18 (&pool->lock){-.-.}-{2:2},
at: __queue_work+0x759/0xf50
#4: ffffffff8e939f20 (rcu_read_lock){....}-{1:3},
at: rcu_lock_acquire include/linux/rcupdate.h:337 [inline]
#4: ffffffff8e939f20 (rcu_read_lock){....}-{1:3},
at: rcu_read_lock include/linux/rcupdate.h:849 [inline]
#4: ffffffff8e939f20 (rcu_read_lock){....}-{1:3},
at: __bpf_trace_run kernel/trace/bpf_trace.c:2339 [inline]
#4: ffffffff8e939f20 (rcu_read_lock){....}-{1:3},
at: bpf_trace_run1+0x1d6/0x520 kernel/trace/bpf_trace.c:2380
stack backtrace:
CPU: 0 UID: 0 PID: 0 Comm: swapper/0 Not tainted
6.12.0-rc5-next-20241031-syzkaller #0
Hardware name: Google Compute Engine/Google Compute Engine,
BIOS Google 09/13/2024
Call Trace:
<IRQ>
__dump_stack lib/dump_stack.c:94 [inline]
dump_stack_lvl+0x241/0x360 lib/dump_stack.c:120
print_lock_invalid_wait_context kernel/locking/lockdep.c:4826 [inline]
check_wait_context kernel/locking/lockdep.c:4898 [inline]
__lock_acquire+0x15a8/0x2100 kernel/locking/lockdep.c:5176
lock_acquire+0x1ed/0x550 kernel/locking/lockdep.c:5849
__raw_spin_lock_irqsave include/linux/spinlock_api_smp.h:110 [inline]
_raw_spin_lock_irqsave+0xd5/0x120 kernel/locking/spinlock.c:162
trie_delete_elem+0x96/0x6a0 kernel/bpf/lpm_trie.c:462
bpf_prog_2c29ac5cdc6b1842+0x43/0x47
bpf_dispatcher_nop_func include/linux/bpf.h:1290 [inline]
__bpf_prog_run include/linux/filter.h:701 [inline]
bpf_prog_run include/linux/filter.h:708 [inline]
__bpf_trace_run kernel/trace/bpf_trace.c:2340 [inline]
bpf_trace_run1+0x2ca/0x520 kernel/trace/bpf_trace.c:2380
trace_workqueue_activate_work+0x186/0x1f0 include/trace/events/workqueue.h:59
__queue_work+0xc7b/0xf50 kernel/workqueue.c:2338
queue_work_on+0x1c2/0x380 kernel/workqueue.c:2390
queue_work include/linux/workqueue.h:662 [inline]
stats_request+0x1a3/0x230 drivers/virtio/virtio_balloon.c:441
vring_interrupt+0x21d/0x380 drivers/virtio/virtio_ring.c:2595
vp_vring_interrupt drivers/virtio/virtio_pci_common.c:82 [inline]
vp_interrupt+0x192/0x200 drivers/virtio/virtio_pci_common.c:113
__handle_irq_event_percpu+0x29a/0xa80 kernel/irq/handle.c:158
handle_irq_event_percpu kernel/irq/handle.c:193 [inline]
handle_irq_event+0x89/0x1f0 kernel/irq/handle.c:210
handle_fasteoi_irq+0x48a/0xae0 kernel/irq/chip.c:720
generic_handle_irq_desc include/linux/irqdesc.h:173 [inline]
handle_irq arch/x86/kernel/irq.c:247 [inline]
call_irq_handler arch/x86/kernel/irq.c:259 [inline]
__common_interrupt+0x136/0x230 arch/x86/kernel/irq.c:285
common_interrupt+0xb4/0xd0 arch/x86/kernel/irq.c:278
</IRQ>
Reported-by: syzbot+b506de56cbbb63148c33@syzkaller.appspotmail.com
Closes: https://lore.kernel.org/bpf/6723db4a.050a0220.35b515.0168.GAE@google.com/
Fixes: 66150d0dde03 ("bpf, lpm: Make locking RT friendly")
Signed-off-by: Kunwu Chan <chentao@kylinos.cn>
---
kernel/bpf/lpm_trie.c | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c
index 9b60eda0f727..373cdcfa0505 100644
--- a/kernel/bpf/lpm_trie.c
+++ b/kernel/bpf/lpm_trie.c
@@ -35,7 +35,7 @@ struct lpm_trie {
size_t n_entries;
size_t max_prefixlen;
size_t data_size;
- spinlock_t lock;
+ raw_spinlock_t lock;
};
/* This trie implements a longest prefix match algorithm that can be used to
@@ -330,7 +330,7 @@ static long trie_update_elem(struct bpf_map *map,
if (key->prefixlen > trie->max_prefixlen)
return -EINVAL;
- spin_lock_irqsave(&trie->lock, irq_flags);
+ raw_spin_lock_irqsave(&trie->lock, irq_flags);
/* Allocate and fill a new node */
@@ -437,7 +437,7 @@ static long trie_update_elem(struct bpf_map *map,
kfree(im_node);
}
- spin_unlock_irqrestore(&trie->lock, irq_flags);
+ raw_spin_unlock_irqrestore(&trie->lock, irq_flags);
kfree_rcu(free_node, rcu);
return ret;
@@ -459,7 +459,7 @@ static long trie_delete_elem(struct bpf_map *map, void *_key)
if (key->prefixlen > trie->max_prefixlen)
return -EINVAL;
- spin_lock_irqsave(&trie->lock, irq_flags);
+ raw_spin_lock_irqsave(&trie->lock, irq_flags);
/* Walk the tree looking for an exact key/length match and keeping
* track of the path we traverse. We will need to know the node
@@ -535,7 +535,7 @@ static long trie_delete_elem(struct bpf_map *map, void *_key)
free_node = node;
out:
- spin_unlock_irqrestore(&trie->lock, irq_flags);
+ raw_spin_unlock_irqrestore(&trie->lock, irq_flags);
kfree_rcu(free_parent, rcu);
kfree_rcu(free_node, rcu);
@@ -581,7 +581,7 @@ static struct bpf_map *trie_alloc(union bpf_attr *attr)
offsetof(struct bpf_lpm_trie_key_u8, data);
trie->max_prefixlen = trie->data_size * 8;
- spin_lock_init(&trie->lock);
+ raw_spin_lock_init(&trie->lock);
return &trie->map;
}
--
2.34.1
next reply other threads:[~2024-11-08 6:32 UTC|newest]
Thread overview: 14+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-11-08 6:32 Kunwu Chan [this message]
2024-11-08 20:22 ` [PATCH] bpf: Convert lpm_trie::lock to 'raw_spinlock_t' Alexei Starovoitov
2024-11-09 2:46 ` Hou Tao
2024-11-10 0:04 ` Alexei Starovoitov
2024-11-10 2:08 ` Hou Tao
2024-11-14 10:09 ` Sebastian Sewior
2024-11-12 15:08 ` Thomas Gleixner
2024-11-14 2:43 ` Kunwu Chan
2024-11-15 22:29 ` Thomas Gleixner
2024-11-16 9:21 ` Sebastian Andrzej Siewior
2024-11-16 16:01 ` Alexei Starovoitov
2024-11-16 16:15 ` Thomas Weißschuh
2024-11-16 16:42 ` Alexei Starovoitov
2024-11-17 1:24 ` Hou Tao
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20241108063214.578120-1-kunwu.chan@linux.dev \
--to=kunwu.chan@linux.dev \
--cc=andrii@kernel.org \
--cc=ast@kernel.org \
--cc=bigeasy@linutronix.de \
--cc=bpf@vger.kernel.org \
--cc=chentao@kylinos.cn \
--cc=clrkwllms@kernel.org \
--cc=daniel@iogearbox.net \
--cc=eddyz87@gmail.com \
--cc=haoluo@google.com \
--cc=john.fastabend@gmail.com \
--cc=jolsa@kernel.org \
--cc=kpsingh@kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-rt-devel@lists.linux.dev \
--cc=martin.lau@linux.dev \
--cc=rostedt@goodmis.org \
--cc=sdf@fomichev.me \
--cc=song@kernel.org \
--cc=syzbot+b506de56cbbb63148c33@syzkaller.appspotmail.com \
--cc=tglx@linutronix.de \
--cc=yonghong.song@linux.dev \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.