From: Kumar Kartikeya Dwivedi <memxor@gmail.com>
To: bpf@vger.kernel.org, linux-kernel@vger.kernel.org
Cc: Linus Torvalds <torvalds@linux-foundation.org>,
Peter Zijlstra <peterz@infradead.org>,
Will Deacon <will@kernel.org>, Waiman Long <llong@redhat.com>,
Alexei Starovoitov <ast@kernel.org>,
Andrii Nakryiko <andrii@kernel.org>,
Daniel Borkmann <daniel@iogearbox.net>,
Martin KaFai Lau <martin.lau@kernel.org>,
Eduard Zingerman <eddyz87@gmail.com>,
"Paul E. McKenney" <paulmck@kernel.org>,
Tejun Heo <tj@kernel.org>, Barret Rhoden <brho@google.com>,
Josh Don <joshdon@google.com>, Dohyun Kim <dohyunkim@google.com>,
linux-arm-kernel@lists.infradead.org, kkd@meta.com,
kernel-team@meta.com
Subject: [PATCH bpf-next v4 21/25] bpf: Convert lpm_trie.c to rqspinlock
Date: Sat, 15 Mar 2025 21:05:37 -0700 [thread overview]
Message-ID: <20250316040541.108729-22-memxor@gmail.com> (raw)
In-Reply-To: <20250316040541.108729-1-memxor@gmail.com>
Convert all LPM trie usage of raw_spinlock to rqspinlock.
Note that rcu_dereference_protected in trie_delete_elem is switched over
to plain rcu_dereference, the RCU read lock should be held from BPF
program side or eBPF syscall path, and the trie->lock is just acquired
before the dereference. It is not clear the reason the protected variant
was used from the commit history, but the above reasoning makes sense so
switch over.
Closes: https://lore.kernel.org/lkml/000000000000adb08b061413919e@google.com
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
---
kernel/bpf/lpm_trie.c | 25 ++++++++++++++-----------
1 file changed, 14 insertions(+), 11 deletions(-)
diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c
index e8a772e64324..be66d7e520e0 100644
--- a/kernel/bpf/lpm_trie.c
+++ b/kernel/bpf/lpm_trie.c
@@ -15,6 +15,7 @@
#include <net/ipv6.h>
#include <uapi/linux/btf.h>
#include <linux/btf_ids.h>
+#include <asm/rqspinlock.h>
#include <linux/bpf_mem_alloc.h>
/* Intermediate node */
@@ -36,7 +37,7 @@ struct lpm_trie {
size_t n_entries;
size_t max_prefixlen;
size_t data_size;
- raw_spinlock_t lock;
+ rqspinlock_t lock;
};
/* This trie implements a longest prefix match algorithm that can be used to
@@ -342,7 +343,9 @@ static long trie_update_elem(struct bpf_map *map,
if (!new_node)
return -ENOMEM;
- raw_spin_lock_irqsave(&trie->lock, irq_flags);
+ ret = raw_res_spin_lock_irqsave(&trie->lock, irq_flags);
+ if (ret)
+ goto out_free;
new_node->prefixlen = key->prefixlen;
RCU_INIT_POINTER(new_node->child[0], NULL);
@@ -356,8 +359,7 @@ static long trie_update_elem(struct bpf_map *map,
*/
slot = &trie->root;
- while ((node = rcu_dereference_protected(*slot,
- lockdep_is_held(&trie->lock)))) {
+ while ((node = rcu_dereference(*slot))) {
matchlen = longest_prefix_match(trie, node, key);
if (node->prefixlen != matchlen ||
@@ -442,8 +444,8 @@ static long trie_update_elem(struct bpf_map *map,
rcu_assign_pointer(*slot, im_node);
out:
- raw_spin_unlock_irqrestore(&trie->lock, irq_flags);
-
+ raw_res_spin_unlock_irqrestore(&trie->lock, irq_flags);
+out_free:
if (ret)
bpf_mem_cache_free(&trie->ma, new_node);
bpf_mem_cache_free_rcu(&trie->ma, free_node);
@@ -467,7 +469,9 @@ static long trie_delete_elem(struct bpf_map *map, void *_key)
if (key->prefixlen > trie->max_prefixlen)
return -EINVAL;
- raw_spin_lock_irqsave(&trie->lock, irq_flags);
+ ret = raw_res_spin_lock_irqsave(&trie->lock, irq_flags);
+ if (ret)
+ return ret;
/* Walk the tree looking for an exact key/length match and keeping
* track of the path we traverse. We will need to know the node
@@ -478,8 +482,7 @@ static long trie_delete_elem(struct bpf_map *map, void *_key)
trim = &trie->root;
trim2 = trim;
parent = NULL;
- while ((node = rcu_dereference_protected(
- *trim, lockdep_is_held(&trie->lock)))) {
+ while ((node = rcu_dereference(*trim))) {
matchlen = longest_prefix_match(trie, node, key);
if (node->prefixlen != matchlen ||
@@ -543,7 +546,7 @@ static long trie_delete_elem(struct bpf_map *map, void *_key)
free_node = node;
out:
- raw_spin_unlock_irqrestore(&trie->lock, irq_flags);
+ raw_res_spin_unlock_irqrestore(&trie->lock, irq_flags);
bpf_mem_cache_free_rcu(&trie->ma, free_parent);
bpf_mem_cache_free_rcu(&trie->ma, free_node);
@@ -592,7 +595,7 @@ static struct bpf_map *trie_alloc(union bpf_attr *attr)
offsetof(struct bpf_lpm_trie_key_u8, data);
trie->max_prefixlen = trie->data_size * 8;
- raw_spin_lock_init(&trie->lock);
+ raw_res_spin_lock_init(&trie->lock);
/* Allocate intermediate and leaf nodes from the same allocator */
leaf_size = sizeof(struct lpm_trie_node) + trie->data_size +
--
2.47.1
next prev parent reply other threads:[~2025-03-16 4:43 UTC|newest]
Thread overview: 27+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-03-16 4:05 [PATCH bpf-next v4 00/25] Resilient Queued Spin Lock Kumar Kartikeya Dwivedi
2025-03-16 4:05 ` [PATCH bpf-next v4 01/25] locking: Move MCS struct definition to public header Kumar Kartikeya Dwivedi
2025-03-16 4:05 ` [PATCH bpf-next v4 02/25] locking: Move common qspinlock helpers to a private header Kumar Kartikeya Dwivedi
2025-03-16 4:05 ` [PATCH bpf-next v4 03/25] locking: Allow obtaining result of arch_mcs_spin_lock_contended Kumar Kartikeya Dwivedi
2025-03-16 4:05 ` [PATCH bpf-next v4 04/25] locking: Copy out qspinlock.c to kernel/bpf/rqspinlock.c Kumar Kartikeya Dwivedi
2025-03-16 4:05 ` [PATCH bpf-next v4 05/25] rqspinlock: Add rqspinlock.h header Kumar Kartikeya Dwivedi
2025-03-16 4:05 ` [PATCH bpf-next v4 06/25] rqspinlock: Drop PV and virtualization support Kumar Kartikeya Dwivedi
2025-03-16 4:05 ` [PATCH bpf-next v4 07/25] rqspinlock: Add support for timeouts Kumar Kartikeya Dwivedi
2025-03-16 4:05 ` [PATCH bpf-next v4 08/25] rqspinlock: Hardcode cond_acquire loops for arm64 Kumar Kartikeya Dwivedi
2025-03-16 4:05 ` [PATCH bpf-next v4 09/25] rqspinlock: Protect pending bit owners from stalls Kumar Kartikeya Dwivedi
2025-03-16 4:05 ` [PATCH bpf-next v4 10/25] rqspinlock: Protect waiters in queue " Kumar Kartikeya Dwivedi
2025-03-16 4:05 ` [PATCH bpf-next v4 11/25] rqspinlock: Protect waiters in trylock fallback " Kumar Kartikeya Dwivedi
2025-03-16 4:05 ` [PATCH bpf-next v4 12/25] rqspinlock: Add deadlock detection and recovery Kumar Kartikeya Dwivedi
2025-03-16 4:05 ` [PATCH bpf-next v4 13/25] rqspinlock: Add a test-and-set fallback Kumar Kartikeya Dwivedi
2025-03-16 4:05 ` [PATCH bpf-next v4 14/25] rqspinlock: Add basic support for CONFIG_PARAVIRT Kumar Kartikeya Dwivedi
2025-03-16 4:05 ` [PATCH bpf-next v4 15/25] rqspinlock: Add helper to print a splat on timeout or deadlock Kumar Kartikeya Dwivedi
2025-03-16 4:05 ` [PATCH bpf-next v4 16/25] rqspinlock: Add macros for rqspinlock usage Kumar Kartikeya Dwivedi
2025-03-16 4:05 ` [PATCH bpf-next v4 17/25] rqspinlock: Add entry to Makefile, MAINTAINERS Kumar Kartikeya Dwivedi
2025-03-16 4:05 ` [PATCH bpf-next v4 18/25] rqspinlock: Add locktorture support Kumar Kartikeya Dwivedi
2025-03-16 4:05 ` [PATCH bpf-next v4 19/25] bpf: Convert hashtab.c to rqspinlock Kumar Kartikeya Dwivedi
2025-03-16 4:05 ` [PATCH bpf-next v4 20/25] bpf: Convert percpu_freelist.c " Kumar Kartikeya Dwivedi
2025-03-16 4:05 ` Kumar Kartikeya Dwivedi [this message]
2025-03-16 4:05 ` [PATCH bpf-next v4 22/25] bpf: Introduce rqspinlock kfuncs Kumar Kartikeya Dwivedi
2025-03-16 4:05 ` [PATCH bpf-next v4 23/25] bpf: Implement verifier support for rqspinlock Kumar Kartikeya Dwivedi
2025-03-16 4:05 ` [PATCH bpf-next v4 24/25] bpf: Maintain FIFO property for rqspinlock unlock Kumar Kartikeya Dwivedi
2025-03-16 4:05 ` [PATCH bpf-next v4 25/25] selftests/bpf: Add tests for rqspinlock Kumar Kartikeya Dwivedi
2025-03-18 20:32 ` [PATCH bpf-next v4 00/25] Resilient Queued Spin Lock Alexei Starovoitov
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250316040541.108729-22-memxor@gmail.com \
--to=memxor@gmail.com \
--cc=andrii@kernel.org \
--cc=ast@kernel.org \
--cc=bpf@vger.kernel.org \
--cc=brho@google.com \
--cc=daniel@iogearbox.net \
--cc=dohyunkim@google.com \
--cc=eddyz87@gmail.com \
--cc=joshdon@google.com \
--cc=kernel-team@meta.com \
--cc=kkd@meta.com \
--cc=linux-arm-kernel@lists.infradead.org \
--cc=linux-kernel@vger.kernel.org \
--cc=llong@redhat.com \
--cc=martin.lau@kernel.org \
--cc=paulmck@kernel.org \
--cc=peterz@infradead.org \
--cc=tj@kernel.org \
--cc=torvalds@linux-foundation.org \
--cc=will@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).