All of lore.kernel.org
 help / color / mirror / Atom feed
From: Kunwu Chan <kunwu.chan@linux.dev>
To: ast@kernel.org, daniel@iogearbox.net, andrii@kernel.org,
	martin.lau@linux.dev, eddyz87@gmail.com, song@kernel.org,
	yonghong.song@linux.dev, john.fastabend@gmail.com,
	kpsingh@kernel.org, sdf@fomichev.me, haoluo@google.com,
	jolsa@kernel.org, bigeasy@linutronix.de, clrkwllms@kernel.org,
	rostedt@goodmis.org, tglx@linutronix.de
Cc: bpf@vger.kernel.org, linux-kernel@vger.kernel.org,
	linux-rt-devel@lists.linux.dev, Kunwu Chan <chentao@kylinos.cn>,
	syzbot+b506de56cbbb63148c33@syzkaller.appspotmail.com
Subject: [PATCH] bpf: Convert lpm_trie::lock to 'raw_spinlock_t'
Date: Fri,  8 Nov 2024 14:32:14 +0800	[thread overview]
Message-ID: <20241108063214.578120-1-kunwu.chan@linux.dev> (raw)

From: Kunwu Chan <chentao@kylinos.cn>

When PREEMPT_RT is enabled, 'spinlock_t' becomes preemptible
and bpf program has owned a raw_spinlock under a interrupt handler,
which results in invalid lock acquire context.

[ BUG: Invalid wait context ]
6.12.0-rc5-next-20241031-syzkaller #0 Not tainted
-----------------------------
swapper/0/0 is trying to lock:
ffff8880261e7a00 (&trie->lock){....}-{3:3},
at: trie_delete_elem+0x96/0x6a0 kernel/bpf/lpm_trie.c:462
other info that might help us debug this:
context-{3:3}
5 locks held by swapper/0/0:
 #0: ffff888020bb75c8 (&vp_dev->lock){-...}-{3:3},
at: vp_vring_interrupt drivers/virtio/virtio_pci_common.c:80 [inline]
 #0: ffff888020bb75c8 (&vp_dev->lock){-...}-{3:3},
at: vp_interrupt+0x142/0x200 drivers/virtio/virtio_pci_common.c:113
 #1: ffff88814174a120 (&vb->stop_update_lock){-...}-{3:3},
at: spin_lock include/linux/spinlock.h:351 [inline]
 #1: ffff88814174a120 (&vb->stop_update_lock){-...}-{3:3},
at: stats_request+0x6f/0x230 drivers/virtio/virtio_balloon.c:438
 #2: ffffffff8e939f20 (rcu_read_lock){....}-{1:3},
at: rcu_lock_acquire include/linux/rcupdate.h:337 [inline]
 #2: ffffffff8e939f20 (rcu_read_lock){....}-{1:3},
at: rcu_read_lock include/linux/rcupdate.h:849 [inline]
 #2: ffffffff8e939f20 (rcu_read_lock){....}-{1:3},
at: __queue_work+0x199/0xf50 kernel/workqueue.c:2259
 #3: ffff8880b863dd18 (&pool->lock){-.-.}-{2:2},
at: __queue_work+0x759/0xf50
 #4: ffffffff8e939f20 (rcu_read_lock){....}-{1:3},
at: rcu_lock_acquire include/linux/rcupdate.h:337 [inline]
 #4: ffffffff8e939f20 (rcu_read_lock){....}-{1:3},
at: rcu_read_lock include/linux/rcupdate.h:849 [inline]
 #4: ffffffff8e939f20 (rcu_read_lock){....}-{1:3},
at: __bpf_trace_run kernel/trace/bpf_trace.c:2339 [inline]
 #4: ffffffff8e939f20 (rcu_read_lock){....}-{1:3},
at: bpf_trace_run1+0x1d6/0x520 kernel/trace/bpf_trace.c:2380
stack backtrace:
CPU: 0 UID: 0 PID: 0 Comm: swapper/0 Not tainted
6.12.0-rc5-next-20241031-syzkaller #0
Hardware name: Google Compute Engine/Google Compute Engine,
BIOS Google 09/13/2024
Call Trace:
 <IRQ>
 __dump_stack lib/dump_stack.c:94 [inline]
 dump_stack_lvl+0x241/0x360 lib/dump_stack.c:120
 print_lock_invalid_wait_context kernel/locking/lockdep.c:4826 [inline]
 check_wait_context kernel/locking/lockdep.c:4898 [inline]
 __lock_acquire+0x15a8/0x2100 kernel/locking/lockdep.c:5176
 lock_acquire+0x1ed/0x550 kernel/locking/lockdep.c:5849
 __raw_spin_lock_irqsave include/linux/spinlock_api_smp.h:110 [inline]
 _raw_spin_lock_irqsave+0xd5/0x120 kernel/locking/spinlock.c:162
 trie_delete_elem+0x96/0x6a0 kernel/bpf/lpm_trie.c:462
 bpf_prog_2c29ac5cdc6b1842+0x43/0x47
 bpf_dispatcher_nop_func include/linux/bpf.h:1290 [inline]
 __bpf_prog_run include/linux/filter.h:701 [inline]
 bpf_prog_run include/linux/filter.h:708 [inline]
 __bpf_trace_run kernel/trace/bpf_trace.c:2340 [inline]
 bpf_trace_run1+0x2ca/0x520 kernel/trace/bpf_trace.c:2380
 trace_workqueue_activate_work+0x186/0x1f0 include/trace/events/workqueue.h:59
 __queue_work+0xc7b/0xf50 kernel/workqueue.c:2338
 queue_work_on+0x1c2/0x380 kernel/workqueue.c:2390
 queue_work include/linux/workqueue.h:662 [inline]
 stats_request+0x1a3/0x230 drivers/virtio/virtio_balloon.c:441
 vring_interrupt+0x21d/0x380 drivers/virtio/virtio_ring.c:2595
 vp_vring_interrupt drivers/virtio/virtio_pci_common.c:82 [inline]
 vp_interrupt+0x192/0x200 drivers/virtio/virtio_pci_common.c:113
 __handle_irq_event_percpu+0x29a/0xa80 kernel/irq/handle.c:158
 handle_irq_event_percpu kernel/irq/handle.c:193 [inline]
 handle_irq_event+0x89/0x1f0 kernel/irq/handle.c:210
 handle_fasteoi_irq+0x48a/0xae0 kernel/irq/chip.c:720
 generic_handle_irq_desc include/linux/irqdesc.h:173 [inline]
 handle_irq arch/x86/kernel/irq.c:247 [inline]
 call_irq_handler arch/x86/kernel/irq.c:259 [inline]
 __common_interrupt+0x136/0x230 arch/x86/kernel/irq.c:285
 common_interrupt+0xb4/0xd0 arch/x86/kernel/irq.c:278
 </IRQ>

Reported-by: syzbot+b506de56cbbb63148c33@syzkaller.appspotmail.com
Closes: https://lore.kernel.org/bpf/6723db4a.050a0220.35b515.0168.GAE@google.com/
Fixes: 66150d0dde03 ("bpf, lpm: Make locking RT friendly")
Signed-off-by: Kunwu Chan <chentao@kylinos.cn>
---
 kernel/bpf/lpm_trie.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c
index 9b60eda0f727..373cdcfa0505 100644
--- a/kernel/bpf/lpm_trie.c
+++ b/kernel/bpf/lpm_trie.c
@@ -35,7 +35,7 @@ struct lpm_trie {
 	size_t				n_entries;
 	size_t				max_prefixlen;
 	size_t				data_size;
-	spinlock_t			lock;
+	raw_spinlock_t			lock;
 };
 
 /* This trie implements a longest prefix match algorithm that can be used to
@@ -330,7 +330,7 @@ static long trie_update_elem(struct bpf_map *map,
 	if (key->prefixlen > trie->max_prefixlen)
 		return -EINVAL;
 
-	spin_lock_irqsave(&trie->lock, irq_flags);
+	raw_spin_lock_irqsave(&trie->lock, irq_flags);
 
 	/* Allocate and fill a new node */
 
@@ -437,7 +437,7 @@ static long trie_update_elem(struct bpf_map *map,
 		kfree(im_node);
 	}
 
-	spin_unlock_irqrestore(&trie->lock, irq_flags);
+	raw_spin_unlock_irqrestore(&trie->lock, irq_flags);
 	kfree_rcu(free_node, rcu);
 
 	return ret;
@@ -459,7 +459,7 @@ static long trie_delete_elem(struct bpf_map *map, void *_key)
 	if (key->prefixlen > trie->max_prefixlen)
 		return -EINVAL;
 
-	spin_lock_irqsave(&trie->lock, irq_flags);
+	raw_spin_lock_irqsave(&trie->lock, irq_flags);
 
 	/* Walk the tree looking for an exact key/length match and keeping
 	 * track of the path we traverse.  We will need to know the node
@@ -535,7 +535,7 @@ static long trie_delete_elem(struct bpf_map *map, void *_key)
 	free_node = node;
 
 out:
-	spin_unlock_irqrestore(&trie->lock, irq_flags);
+	raw_spin_unlock_irqrestore(&trie->lock, irq_flags);
 	kfree_rcu(free_parent, rcu);
 	kfree_rcu(free_node, rcu);
 
@@ -581,7 +581,7 @@ static struct bpf_map *trie_alloc(union bpf_attr *attr)
 			  offsetof(struct bpf_lpm_trie_key_u8, data);
 	trie->max_prefixlen = trie->data_size * 8;
 
-	spin_lock_init(&trie->lock);
+	raw_spin_lock_init(&trie->lock);
 
 	return &trie->map;
 }
-- 
2.34.1


             reply	other threads:[~2024-11-08  6:32 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-11-08  6:32 Kunwu Chan [this message]
2024-11-08 20:22 ` [PATCH] bpf: Convert lpm_trie::lock to 'raw_spinlock_t' Alexei Starovoitov
2024-11-09  2:46   ` Hou Tao
2024-11-10  0:04     ` Alexei Starovoitov
2024-11-10  2:08       ` Hou Tao
2024-11-14 10:09         ` Sebastian Sewior
2024-11-12 15:08 ` Thomas Gleixner
2024-11-14  2:43   ` Kunwu Chan
2024-11-15 22:29     ` Thomas Gleixner
2024-11-16  9:21       ` Sebastian Andrzej Siewior
2024-11-16 16:01         ` Alexei Starovoitov
2024-11-16 16:15           ` Thomas Weißschuh
2024-11-16 16:42             ` Alexei Starovoitov
2024-11-17  1:24               ` Hou Tao

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20241108063214.578120-1-kunwu.chan@linux.dev \
    --to=kunwu.chan@linux.dev \
    --cc=andrii@kernel.org \
    --cc=ast@kernel.org \
    --cc=bigeasy@linutronix.de \
    --cc=bpf@vger.kernel.org \
    --cc=chentao@kylinos.cn \
    --cc=clrkwllms@kernel.org \
    --cc=daniel@iogearbox.net \
    --cc=eddyz87@gmail.com \
    --cc=haoluo@google.com \
    --cc=john.fastabend@gmail.com \
    --cc=jolsa@kernel.org \
    --cc=kpsingh@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-rt-devel@lists.linux.dev \
    --cc=martin.lau@linux.dev \
    --cc=rostedt@goodmis.org \
    --cc=sdf@fomichev.me \
    --cc=song@kernel.org \
    --cc=syzbot+b506de56cbbb63148c33@syzkaller.appspotmail.com \
    --cc=tglx@linutronix.de \
    --cc=yonghong.song@linux.dev \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.