public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Kumar Kartikeya Dwivedi <memxor@gmail.com>
To: bpf@vger.kernel.org, linux-kernel@vger.kernel.org
Cc: Linus Torvalds <torvalds@linux-foundation.org>,
	Peter Zijlstra <peterz@infradead.org>,
	Waiman Long <llong@redhat.com>,
	Alexei Starovoitov <ast@kernel.org>,
	Andrii Nakryiko <andrii@kernel.org>,
	Daniel Borkmann <daniel@iogearbox.net>,
	Martin KaFai Lau <martin.lau@kernel.org>,
	Eduard Zingerman <eddyz87@gmail.com>,
	"Paul E. McKenney" <paulmck@kernel.org>,
	Tejun Heo <tj@kernel.org>, Barret Rhoden <brho@google.com>,
	Josh Don <joshdon@google.com>, Dohyun Kim <dohyunkim@google.com>,
	kernel-team@meta.com
Subject: [PATCH bpf-next v1 19/22] bpf: Convert lpm_trie.c to rqspinlock
Date: Tue,  7 Jan 2025 06:00:01 -0800	[thread overview]
Message-ID: <20250107140004.2732830-20-memxor@gmail.com> (raw)
In-Reply-To: <20250107140004.2732830-1-memxor@gmail.com>

Convert all LPM trie usage of raw_spinlock to rqspinlock.

Note that rcu_dereference_protected in trie_delete_elem is switched over
to plain rcu_dereference, the RCU read lock should be held from BPF
program side or eBPF syscall path, and the trie->lock is just acquired
before the dereference. It is not clear the reason the protected variant
was used from the commit history, but the above reasoning makes sense so
switch over.

Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
---
 kernel/bpf/lpm_trie.c | 25 ++++++++++++++-----------
 1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c
index f8bc1e096182..a92d1eeafb33 100644
--- a/kernel/bpf/lpm_trie.c
+++ b/kernel/bpf/lpm_trie.c
@@ -15,6 +15,7 @@
 #include <net/ipv6.h>
 #include <uapi/linux/btf.h>
 #include <linux/btf_ids.h>
+#include <asm/rqspinlock.h>
 #include <linux/bpf_mem_alloc.h>
 
 /* Intermediate node */
@@ -36,7 +37,7 @@ struct lpm_trie {
 	size_t				n_entries;
 	size_t				max_prefixlen;
 	size_t				data_size;
-	raw_spinlock_t			lock;
+	rqspinlock_t			lock;
 };
 
 /* This trie implements a longest prefix match algorithm that can be used to
@@ -349,7 +350,9 @@ static long trie_update_elem(struct bpf_map *map,
 	if (!new_node)
 		return -ENOMEM;
 
-	raw_spin_lock_irqsave(&trie->lock, irq_flags);
+	ret = raw_res_spin_lock_irqsave(&trie->lock, irq_flags);
+	if (ret)
+		goto out_free;
 
 	new_node->prefixlen = key->prefixlen;
 	RCU_INIT_POINTER(new_node->child[0], NULL);
@@ -363,8 +366,7 @@ static long trie_update_elem(struct bpf_map *map,
 	 */
 	slot = &trie->root;
 
-	while ((node = rcu_dereference_protected(*slot,
-					lockdep_is_held(&trie->lock)))) {
+	while ((node = rcu_dereference(*slot))) {
 		matchlen = longest_prefix_match(trie, node, key);
 
 		if (node->prefixlen != matchlen ||
@@ -450,8 +452,8 @@ static long trie_update_elem(struct bpf_map *map,
 	rcu_assign_pointer(*slot, im_node);
 
 out:
-	raw_spin_unlock_irqrestore(&trie->lock, irq_flags);
-
+	raw_res_spin_unlock_irqrestore(&trie->lock, irq_flags);
+out_free:
 	migrate_disable();
 	if (ret)
 		bpf_mem_cache_free(&trie->ma, new_node);
@@ -477,7 +479,9 @@ static long trie_delete_elem(struct bpf_map *map, void *_key)
 	if (key->prefixlen > trie->max_prefixlen)
 		return -EINVAL;
 
-	raw_spin_lock_irqsave(&trie->lock, irq_flags);
+	ret = raw_res_spin_lock_irqsave(&trie->lock, irq_flags);
+	if (ret)
+		return ret;
 
 	/* Walk the tree looking for an exact key/length match and keeping
 	 * track of the path we traverse.  We will need to know the node
@@ -488,8 +492,7 @@ static long trie_delete_elem(struct bpf_map *map, void *_key)
 	trim = &trie->root;
 	trim2 = trim;
 	parent = NULL;
-	while ((node = rcu_dereference_protected(
-		       *trim, lockdep_is_held(&trie->lock)))) {
+	while ((node = rcu_dereference(*trim))) {
 		matchlen = longest_prefix_match(trie, node, key);
 
 		if (node->prefixlen != matchlen ||
@@ -553,7 +556,7 @@ static long trie_delete_elem(struct bpf_map *map, void *_key)
 	free_node = node;
 
 out:
-	raw_spin_unlock_irqrestore(&trie->lock, irq_flags);
+	raw_res_spin_unlock_irqrestore(&trie->lock, irq_flags);
 
 	migrate_disable();
 	bpf_mem_cache_free_rcu(&trie->ma, free_parent);
@@ -604,7 +607,7 @@ static struct bpf_map *trie_alloc(union bpf_attr *attr)
 			  offsetof(struct bpf_lpm_trie_key_u8, data);
 	trie->max_prefixlen = trie->data_size * 8;
 
-	raw_spin_lock_init(&trie->lock);
+	raw_res_spin_lock_init(&trie->lock);
 
 	/* Allocate intermediate and leaf nodes from the same allocator */
 	leaf_size = sizeof(struct lpm_trie_node) + trie->data_size +
-- 
2.43.5


  parent reply	other threads:[~2025-01-07 14:00 UTC|newest]

Thread overview: 63+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-01-07 13:59 [PATCH bpf-next v1 00/22] Resilient Queued Spin Lock Kumar Kartikeya Dwivedi
2025-01-07 13:59 ` [PATCH bpf-next v1 01/22] locking: Move MCS struct definition to public header Kumar Kartikeya Dwivedi
2025-01-07 13:59 ` [PATCH bpf-next v1 02/22] locking: Move common qspinlock helpers to a private header Kumar Kartikeya Dwivedi
2025-01-07 13:59 ` [PATCH bpf-next v1 03/22] locking: Allow obtaining result of arch_mcs_spin_lock_contended Kumar Kartikeya Dwivedi
2025-01-07 13:59 ` [PATCH bpf-next v1 04/22] locking: Copy out qspinlock.c to rqspinlock.c Kumar Kartikeya Dwivedi
2025-01-07 13:59 ` [PATCH bpf-next v1 05/22] rqspinlock: Add rqspinlock.h header Kumar Kartikeya Dwivedi
2025-01-07 13:59 ` [PATCH bpf-next v1 06/22] rqspinlock: Drop PV and virtualization support Kumar Kartikeya Dwivedi
2025-01-07 13:59 ` [PATCH bpf-next v1 07/22] rqspinlock: Add support for timeouts Kumar Kartikeya Dwivedi
2025-01-07 14:50   ` Peter Zijlstra
2025-01-07 17:14     ` Kumar Kartikeya Dwivedi
2025-01-07 13:59 ` [PATCH bpf-next v1 08/22] rqspinlock: Protect pending bit owners from stalls Kumar Kartikeya Dwivedi
2025-01-07 14:51   ` Peter Zijlstra
2025-01-07 17:14     ` Kumar Kartikeya Dwivedi
2025-01-07 19:17       ` Peter Zijlstra
2025-01-07 19:22         ` Peter Zijlstra
2025-01-07 19:54           ` Kumar Kartikeya Dwivedi
2025-01-08  2:19   ` Waiman Long
2025-01-08 20:13     ` Kumar Kartikeya Dwivedi
2025-01-07 13:59 ` [PATCH bpf-next v1 09/22] rqspinlock: Protect waiters in queue " Kumar Kartikeya Dwivedi
2025-01-08  3:38   ` Waiman Long
2025-01-08 20:42     ` Kumar Kartikeya Dwivedi
2025-01-07 13:59 ` [PATCH bpf-next v1 10/22] rqspinlock: Protect waiters in trylock fallback " Kumar Kartikeya Dwivedi
2025-01-07 13:59 ` [PATCH bpf-next v1 11/22] rqspinlock: Add deadlock detection and recovery Kumar Kartikeya Dwivedi
2025-01-08 16:06   ` Waiman Long
2025-01-08 20:19     ` Kumar Kartikeya Dwivedi
2025-01-09  0:32       ` Waiman Long
2025-01-07 13:59 ` [PATCH bpf-next v1 12/22] rqspinlock: Add basic support for CONFIG_PARAVIRT Kumar Kartikeya Dwivedi
2025-01-08 16:27   ` Waiman Long
2025-01-08 20:32     ` Kumar Kartikeya Dwivedi
2025-01-09  0:48       ` Waiman Long
2025-01-09  2:42         ` Alexei Starovoitov
2025-01-09  2:58           ` Waiman Long
2025-01-09  3:37             ` Alexei Starovoitov
2025-01-09  3:46               ` Waiman Long
2025-01-09  3:53                 ` Alexei Starovoitov
2025-01-09  3:58                   ` Waiman Long
2025-01-07 13:59 ` [PATCH bpf-next v1 13/22] rqspinlock: Add helper to print a splat on timeout or deadlock Kumar Kartikeya Dwivedi
2025-01-07 13:59 ` [PATCH bpf-next v1 14/22] rqspinlock: Add macros for rqspinlock usage Kumar Kartikeya Dwivedi
2025-01-08 16:55   ` Waiman Long
2025-01-08 20:41     ` Kumar Kartikeya Dwivedi
2025-01-09  1:11       ` Waiman Long
2025-01-09  3:30         ` Alexei Starovoitov
2025-01-09  4:09           ` Waiman Long
2025-01-07 13:59 ` [PATCH bpf-next v1 15/22] rqspinlock: Add locktorture support Kumar Kartikeya Dwivedi
2025-01-07 13:59 ` [PATCH bpf-next v1 16/22] rqspinlock: Add entry to Makefile, MAINTAINERS Kumar Kartikeya Dwivedi
2025-01-07 13:59 ` [PATCH bpf-next v1 17/22] bpf: Convert hashtab.c to rqspinlock Kumar Kartikeya Dwivedi
2025-01-07 14:00 ` [PATCH bpf-next v1 18/22] bpf: Convert percpu_freelist.c " Kumar Kartikeya Dwivedi
2025-01-07 14:00 ` Kumar Kartikeya Dwivedi [this message]
2025-01-07 14:00 ` [PATCH bpf-next v1 20/22] bpf: Introduce rqspinlock kfuncs Kumar Kartikeya Dwivedi
2025-01-08 10:23   ` kernel test robot
2025-01-08 10:23   ` kernel test robot
2025-01-08 10:44   ` kernel test robot
2025-01-07 14:00 ` [PATCH bpf-next v1 21/22] bpf: Implement verifier support for rqspinlock Kumar Kartikeya Dwivedi
2025-01-07 14:00 ` [PATCH bpf-next v1 22/22] selftests/bpf: Add tests " Kumar Kartikeya Dwivedi
2025-01-07 23:54 ` [PATCH bpf-next v1 00/22] Resilient Queued Spin Lock Linus Torvalds
2025-01-08  9:18   ` Peter Zijlstra
2025-01-08 20:12     ` Kumar Kartikeya Dwivedi
2025-01-08 20:30       ` Linus Torvalds
2025-01-08 21:06         ` Kumar Kartikeya Dwivedi
2025-01-08 21:30         ` Paul E. McKenney
2025-01-09 13:59       ` Waiman Long
2025-01-09 21:13         ` Kumar Kartikeya Dwivedi
2025-01-09 21:18           ` Waiman Long

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250107140004.2732830-20-memxor@gmail.com \
    --to=memxor@gmail.com \
    --cc=andrii@kernel.org \
    --cc=ast@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=brho@google.com \
    --cc=daniel@iogearbox.net \
    --cc=dohyunkim@google.com \
    --cc=eddyz87@gmail.com \
    --cc=joshdon@google.com \
    --cc=kernel-team@meta.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=llong@redhat.com \
    --cc=martin.lau@kernel.org \
    --cc=paulmck@kernel.org \
    --cc=peterz@infradead.org \
    --cc=tj@kernel.org \
    --cc=torvalds@linux-foundation.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox