public inbox for bpf@vger.kernel.org
 help / color / mirror / Atom feed
From: Kumar Kartikeya Dwivedi <memxor@gmail.com>
To: bpf@vger.kernel.org, linux-kernel@vger.kernel.org
Cc: Linus Torvalds <torvalds@linux-foundation.org>,
	Peter Zijlstra <peterz@infradead.org>,
	Will Deacon <will@kernel.org>, Waiman Long <llong@redhat.com>,
	Alexei Starovoitov <ast@kernel.org>,
	Andrii Nakryiko <andrii@kernel.org>,
	Daniel Borkmann <daniel@iogearbox.net>,
	Martin KaFai Lau <martin.lau@kernel.org>,
	Eduard Zingerman <eddyz87@gmail.com>,
	"Paul E. McKenney" <paulmck@kernel.org>,
	Tejun Heo <tj@kernel.org>, Barret Rhoden <brho@google.com>,
	Josh Don <joshdon@google.com>, Dohyun Kim <dohyunkim@google.com>,
	linux-arm-kernel@lists.infradead.org, kkd@meta.com,
	kernel-team@meta.com
Subject: [PATCH bpf-next v3 20/25] bpf: Convert percpu_freelist.c to rqspinlock
Date: Mon,  3 Mar 2025 07:23:00 -0800	[thread overview]
Message-ID: <20250303152305.3195648-21-memxor@gmail.com> (raw)
In-Reply-To: <20250303152305.3195648-1-memxor@gmail.com>

Convert the percpu_freelist.c code to use rqspinlock, and remove the
extralist fallback and trylock-based acquisitions to avoid deadlocks.

Key thing to note is the retained while (true) loop to search through
other CPUs when failing to push a node due to locking errors. This
retains the behavior of the old code, where it would keep trying until
it would be able to successfully push the node back into the freelist of
a CPU.

Technically, we should start iteration for this loop from
raw_smp_processor_id() + 1, but to avoid hitting the edge of nr_cpus,
we skip execution in the loop body instead.

Closes: https://lore.kernel.org/bpf/CAPPBnEa1_pZ6W24+WwtcNFvTUHTHO7KUmzEbOcMqxp+m2o15qQ@mail.gmail.com
Closes: https://lore.kernel.org/bpf/CAPPBnEYm+9zduStsZaDnq93q1jPLqO-PiKX9jy0MuL8LCXmCrQ@mail.gmail.com
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
---
 kernel/bpf/percpu_freelist.c | 113 ++++++++---------------------------
 kernel/bpf/percpu_freelist.h |   4 +-
 2 files changed, 27 insertions(+), 90 deletions(-)

diff --git a/kernel/bpf/percpu_freelist.c b/kernel/bpf/percpu_freelist.c
index 034cf87b54e9..632762b57299 100644
--- a/kernel/bpf/percpu_freelist.c
+++ b/kernel/bpf/percpu_freelist.c
@@ -14,11 +14,9 @@ int pcpu_freelist_init(struct pcpu_freelist *s)
 	for_each_possible_cpu(cpu) {
 		struct pcpu_freelist_head *head = per_cpu_ptr(s->freelist, cpu);
 
-		raw_spin_lock_init(&head->lock);
+		raw_res_spin_lock_init(&head->lock);
 		head->first = NULL;
 	}
-	raw_spin_lock_init(&s->extralist.lock);
-	s->extralist.first = NULL;
 	return 0;
 }
 
@@ -34,58 +32,39 @@ static inline void pcpu_freelist_push_node(struct pcpu_freelist_head *head,
 	WRITE_ONCE(head->first, node);
 }
 
-static inline void ___pcpu_freelist_push(struct pcpu_freelist_head *head,
+static inline bool ___pcpu_freelist_push(struct pcpu_freelist_head *head,
 					 struct pcpu_freelist_node *node)
 {
-	raw_spin_lock(&head->lock);
-	pcpu_freelist_push_node(head, node);
-	raw_spin_unlock(&head->lock);
-}
-
-static inline bool pcpu_freelist_try_push_extra(struct pcpu_freelist *s,
-						struct pcpu_freelist_node *node)
-{
-	if (!raw_spin_trylock(&s->extralist.lock))
+	if (raw_res_spin_lock(&head->lock))
 		return false;
-
-	pcpu_freelist_push_node(&s->extralist, node);
-	raw_spin_unlock(&s->extralist.lock);
+	pcpu_freelist_push_node(head, node);
+	raw_res_spin_unlock(&head->lock);
 	return true;
 }
 
-static inline void ___pcpu_freelist_push_nmi(struct pcpu_freelist *s,
-					     struct pcpu_freelist_node *node)
+void __pcpu_freelist_push(struct pcpu_freelist *s,
+			struct pcpu_freelist_node *node)
 {
-	int cpu, orig_cpu;
+	struct pcpu_freelist_head *head;
+	int cpu;
 
-	orig_cpu = raw_smp_processor_id();
-	while (1) {
-		for_each_cpu_wrap(cpu, cpu_possible_mask, orig_cpu) {
-			struct pcpu_freelist_head *head;
+	if (___pcpu_freelist_push(this_cpu_ptr(s->freelist), node))
+		return;
 
+	while (true) {
+		for_each_cpu_wrap(cpu, cpu_possible_mask, raw_smp_processor_id()) {
+			if (cpu == raw_smp_processor_id())
+				continue;
 			head = per_cpu_ptr(s->freelist, cpu);
-			if (raw_spin_trylock(&head->lock)) {
-				pcpu_freelist_push_node(head, node);
-				raw_spin_unlock(&head->lock);
-				return;
-			}
-		}
-
-		/* cannot lock any per cpu lock, try extralist */
-		if (pcpu_freelist_try_push_extra(s, node))
+			if (raw_res_spin_lock(&head->lock))
+				continue;
+			pcpu_freelist_push_node(head, node);
+			raw_res_spin_unlock(&head->lock);
 			return;
+		}
 	}
 }
 
-void __pcpu_freelist_push(struct pcpu_freelist *s,
-			struct pcpu_freelist_node *node)
-{
-	if (in_nmi())
-		___pcpu_freelist_push_nmi(s, node);
-	else
-		___pcpu_freelist_push(this_cpu_ptr(s->freelist), node);
-}
-
 void pcpu_freelist_push(struct pcpu_freelist *s,
 			struct pcpu_freelist_node *node)
 {
@@ -120,71 +99,29 @@ void pcpu_freelist_populate(struct pcpu_freelist *s, void *buf, u32 elem_size,
 
 static struct pcpu_freelist_node *___pcpu_freelist_pop(struct pcpu_freelist *s)
 {
+	struct pcpu_freelist_node *node = NULL;
 	struct pcpu_freelist_head *head;
-	struct pcpu_freelist_node *node;
 	int cpu;
 
 	for_each_cpu_wrap(cpu, cpu_possible_mask, raw_smp_processor_id()) {
 		head = per_cpu_ptr(s->freelist, cpu);
 		if (!READ_ONCE(head->first))
 			continue;
-		raw_spin_lock(&head->lock);
+		if (raw_res_spin_lock(&head->lock))
+			continue;
 		node = head->first;
 		if (node) {
 			WRITE_ONCE(head->first, node->next);
-			raw_spin_unlock(&head->lock);
+			raw_res_spin_unlock(&head->lock);
 			return node;
 		}
-		raw_spin_unlock(&head->lock);
+		raw_res_spin_unlock(&head->lock);
 	}
-
-	/* per cpu lists are all empty, try extralist */
-	if (!READ_ONCE(s->extralist.first))
-		return NULL;
-	raw_spin_lock(&s->extralist.lock);
-	node = s->extralist.first;
-	if (node)
-		WRITE_ONCE(s->extralist.first, node->next);
-	raw_spin_unlock(&s->extralist.lock);
-	return node;
-}
-
-static struct pcpu_freelist_node *
-___pcpu_freelist_pop_nmi(struct pcpu_freelist *s)
-{
-	struct pcpu_freelist_head *head;
-	struct pcpu_freelist_node *node;
-	int cpu;
-
-	for_each_cpu_wrap(cpu, cpu_possible_mask, raw_smp_processor_id()) {
-		head = per_cpu_ptr(s->freelist, cpu);
-		if (!READ_ONCE(head->first))
-			continue;
-		if (raw_spin_trylock(&head->lock)) {
-			node = head->first;
-			if (node) {
-				WRITE_ONCE(head->first, node->next);
-				raw_spin_unlock(&head->lock);
-				return node;
-			}
-			raw_spin_unlock(&head->lock);
-		}
-	}
-
-	/* cannot pop from per cpu lists, try extralist */
-	if (!READ_ONCE(s->extralist.first) || !raw_spin_trylock(&s->extralist.lock))
-		return NULL;
-	node = s->extralist.first;
-	if (node)
-		WRITE_ONCE(s->extralist.first, node->next);
-	raw_spin_unlock(&s->extralist.lock);
 	return node;
 }
 
 struct pcpu_freelist_node *__pcpu_freelist_pop(struct pcpu_freelist *s)
 {
-	if (in_nmi())
-		return ___pcpu_freelist_pop_nmi(s);
 	return ___pcpu_freelist_pop(s);
 }
 
diff --git a/kernel/bpf/percpu_freelist.h b/kernel/bpf/percpu_freelist.h
index 3c76553cfe57..914798b74967 100644
--- a/kernel/bpf/percpu_freelist.h
+++ b/kernel/bpf/percpu_freelist.h
@@ -5,15 +5,15 @@
 #define __PERCPU_FREELIST_H__
 #include <linux/spinlock.h>
 #include <linux/percpu.h>
+#include <asm/rqspinlock.h>
 
 struct pcpu_freelist_head {
 	struct pcpu_freelist_node *first;
-	raw_spinlock_t lock;
+	rqspinlock_t lock;
 };
 
 struct pcpu_freelist {
 	struct pcpu_freelist_head __percpu *freelist;
-	struct pcpu_freelist_head extralist;
 };
 
 struct pcpu_freelist_node {
-- 
2.43.5


  parent reply	other threads:[~2025-03-03 15:23 UTC|newest]

Thread overview: 27+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-03-03 15:22 [PATCH bpf-next v3 00/25] Resilient Queued Spin Lock Kumar Kartikeya Dwivedi
2025-03-03 15:22 ` [PATCH bpf-next v3 01/25] locking: Move MCS struct definition to public header Kumar Kartikeya Dwivedi
2025-03-03 15:22 ` [PATCH bpf-next v3 02/25] locking: Move common qspinlock helpers to a private header Kumar Kartikeya Dwivedi
2025-03-03 15:22 ` [PATCH bpf-next v3 03/25] locking: Allow obtaining result of arch_mcs_spin_lock_contended Kumar Kartikeya Dwivedi
2025-03-03 15:22 ` [PATCH bpf-next v3 04/25] locking: Copy out qspinlock.c to rqspinlock.c Kumar Kartikeya Dwivedi
2025-03-03 15:22 ` [PATCH bpf-next v3 05/25] rqspinlock: Add rqspinlock.h header Kumar Kartikeya Dwivedi
2025-03-03 15:22 ` [PATCH bpf-next v3 06/25] rqspinlock: Drop PV and virtualization support Kumar Kartikeya Dwivedi
2025-03-03 15:22 ` [PATCH bpf-next v3 07/25] rqspinlock: Add support for timeouts Kumar Kartikeya Dwivedi
2025-03-03 15:22 ` [PATCH bpf-next v3 08/25] rqspinlock: Hardcode cond_acquire loops for arm64 Kumar Kartikeya Dwivedi
2025-03-03 15:22 ` [PATCH bpf-next v3 09/25] rqspinlock: Protect pending bit owners from stalls Kumar Kartikeya Dwivedi
2025-03-03 15:22 ` [PATCH bpf-next v3 10/25] rqspinlock: Protect waiters in queue " Kumar Kartikeya Dwivedi
2025-03-03 15:22 ` [PATCH bpf-next v3 11/25] rqspinlock: Protect waiters in trylock fallback " Kumar Kartikeya Dwivedi
2025-03-03 15:22 ` [PATCH bpf-next v3 12/25] rqspinlock: Add deadlock detection and recovery Kumar Kartikeya Dwivedi
2025-03-03 15:22 ` [PATCH bpf-next v3 13/25] rqspinlock: Add a test-and-set fallback Kumar Kartikeya Dwivedi
2025-03-03 15:22 ` [PATCH bpf-next v3 14/25] rqspinlock: Add basic support for CONFIG_PARAVIRT Kumar Kartikeya Dwivedi
2025-03-03 15:22 ` [PATCH bpf-next v3 15/25] rqspinlock: Add helper to print a splat on timeout or deadlock Kumar Kartikeya Dwivedi
2025-03-03 15:22 ` [PATCH bpf-next v3 16/25] rqspinlock: Add macros for rqspinlock usage Kumar Kartikeya Dwivedi
2025-03-03 15:22 ` [PATCH bpf-next v3 17/25] rqspinlock: Add locktorture support Kumar Kartikeya Dwivedi
2025-03-03 15:22 ` [PATCH bpf-next v3 18/25] rqspinlock: Add entry to Makefile, MAINTAINERS Kumar Kartikeya Dwivedi
2025-03-03 15:22 ` [PATCH bpf-next v3 19/25] bpf: Convert hashtab.c to rqspinlock Kumar Kartikeya Dwivedi
2025-03-03 15:23 ` Kumar Kartikeya Dwivedi [this message]
2025-03-03 15:23 ` [PATCH bpf-next v3 21/25] bpf: Convert lpm_trie.c " Kumar Kartikeya Dwivedi
2025-03-03 15:23 ` [PATCH bpf-next v3 22/25] bpf: Introduce rqspinlock kfuncs Kumar Kartikeya Dwivedi
2025-03-03 15:23 ` [PATCH bpf-next v3 23/25] bpf: Implement verifier support for rqspinlock Kumar Kartikeya Dwivedi
2025-03-03 15:23 ` [PATCH bpf-next v3 24/25] bpf: Maintain FIFO property for rqspinlock unlock Kumar Kartikeya Dwivedi
2025-03-03 15:23 ` [PATCH bpf-next v3 25/25] selftests/bpf: Add tests for rqspinlock Kumar Kartikeya Dwivedi
2025-03-14 23:49 ` [PATCH bpf-next v3 00/25] Resilient Queued Spin Lock Kumar Kartikeya Dwivedi

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250303152305.3195648-21-memxor@gmail.com \
    --to=memxor@gmail.com \
    --cc=andrii@kernel.org \
    --cc=ast@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=brho@google.com \
    --cc=daniel@iogearbox.net \
    --cc=dohyunkim@google.com \
    --cc=eddyz87@gmail.com \
    --cc=joshdon@google.com \
    --cc=kernel-team@meta.com \
    --cc=kkd@meta.com \
    --cc=linux-arm-kernel@lists.infradead.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=llong@redhat.com \
    --cc=martin.lau@kernel.org \
    --cc=paulmck@kernel.org \
    --cc=peterz@infradead.org \
    --cc=tj@kernel.org \
    --cc=torvalds@linux-foundation.org \
    --cc=will@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox