public inbox for bpf@vger.kernel.org
 help / color / mirror / Atom feed
From: Boqun Feng <boqun@kernel.org>
To: Joel Fernandes <joelagnelf@nvidia.com>,
	"Paul E. McKenney" <paulmck@kernel.org>
Cc: Kumar Kartikeya Dwivedi <memxor@gmail.com>,
	Sebastian Andrzej Siewior <bigeasy@linutronix.de>,
	frederic@kernel.org, neeraj.iitr10@gmail.com, urezki@gmail.com,
	boqun.feng@gmail.com, rcu@vger.kernel.org,
	Tejun Heo <tj@kernel.org>,
	bpf@vger.kernel.org, Alexei Starovoitov <ast@kernel.org>,
	Daniel Borkmann <daniel@iogearbox.net>,
	John Fastabend <john.fastabend@gmail.com>,
	Boqun Feng <boqun@kernel.org>, Andrea Righi <arighi@nvidia.com>,
	Zqiang <qiang.zhang@linux.dev>
Subject: [PATCH v2] rcu: Use an intermediate irq_work to start process_srcu()
Date: Fri, 20 Mar 2026 15:29:16 -0700	[thread overview]
Message-ID: <20260320222916.19987-1-boqun@kernel.org> (raw)
In-Reply-To: <ab2yd35rm6OgZUmb@gpd4>

Since commit c27cea4416a3 ("rcu: Re-implement RCU Tasks Trace in terms
of SRCU-fast") we switched to SRCU in BPF. However as BPF instrument can
happen basically everywhere (including where a scheduler lock is held),
call_srcu() now needs to avoid acquiring scheduler lock because
otherwise it could cause deadlock [1]. Fix this by following what the
previous RCU Tasks Trace did: using an irq_work to delay the queuing of
the work to start process_srcu().

[boqun: Apply Joel's feedback]
[boqun: Apply Andrea's test feedback]

Reported-by: Andrea Righi <arighi@nvidia.com>
Closes: https://lore.kernel.org/all/abjzvz_tL_siV17s@gpd4/
Fixes: commit c27cea4416a3 ("rcu: Re-implement RCU Tasks Trace in terms of SRCU-fast")
Link: https://lore.kernel.org/rcu/3c4c5a29-24ea-492d-aeee-e0d9605b4183@nvidia.com/ [1]
Suggested-by: Zqiang <qiang.zhang@linux.dev>
Tested-by: Andrea Righi <arighi@nvidia.com>
Signed-off-by: Boqun Feng <boqun@kernel.org>
---
 include/linux/srcutree.h |  1 +
 kernel/rcu/srcutree.c    | 30 ++++++++++++++++++++++++++++--
 2 files changed, 29 insertions(+), 2 deletions(-)

diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h
index dfb31d11ff05..be76fa4fc170 100644
--- a/include/linux/srcutree.h
+++ b/include/linux/srcutree.h
@@ -95,6 +95,7 @@ struct srcu_usage {
 	unsigned long reschedule_jiffies;
 	unsigned long reschedule_count;
 	struct delayed_work work;
+	struct irq_work irq_work;
 	struct srcu_struct *srcu_ssp;
 };
 
diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
index 2328827f8775..e08aaacad695 100644
--- a/kernel/rcu/srcutree.c
+++ b/kernel/rcu/srcutree.c
@@ -19,6 +19,7 @@
 #include <linux/mutex.h>
 #include <linux/percpu.h>
 #include <linux/preempt.h>
+#include <linux/irq_work.h>
 #include <linux/rcupdate_wait.h>
 #include <linux/sched.h>
 #include <linux/smp.h>
@@ -75,6 +76,7 @@ static bool __read_mostly srcu_init_done;
 static void srcu_invoke_callbacks(struct work_struct *work);
 static void srcu_reschedule(struct srcu_struct *ssp, unsigned long delay);
 static void process_srcu(struct work_struct *work);
+static void srcu_irq_work(struct irq_work *work);
 static void srcu_delay_timer(struct timer_list *t);
 
 /*
@@ -216,6 +218,7 @@ static int init_srcu_struct_fields(struct srcu_struct *ssp, bool is_static)
 	mutex_init(&ssp->srcu_sup->srcu_barrier_mutex);
 	atomic_set(&ssp->srcu_sup->srcu_barrier_cpu_cnt, 0);
 	INIT_DELAYED_WORK(&ssp->srcu_sup->work, process_srcu);
+	init_irq_work(&ssp->srcu_sup->irq_work, srcu_irq_work);
 	ssp->srcu_sup->sda_is_static = is_static;
 	if (!is_static) {
 		ssp->sda = alloc_percpu(struct srcu_data);
@@ -713,6 +716,8 @@ void cleanup_srcu_struct(struct srcu_struct *ssp)
 		return; /* Just leak it! */
 	if (WARN_ON(srcu_readers_active(ssp)))
 		return; /* Just leak it! */
+	/* Wait for irq_work to finish first as it may queue a new work. */
+	irq_work_sync(&sup->irq_work);
 	flush_delayed_work(&sup->work);
 	for_each_possible_cpu(cpu) {
 		struct srcu_data *sdp = per_cpu_ptr(ssp->sda, cpu);
@@ -1118,9 +1123,13 @@ static void srcu_funnel_gp_start(struct srcu_struct *ssp, struct srcu_data *sdp,
 		// it isn't.  And it does not have to be.  After all, it
 		// can only be executed during early boot when there is only
 		// the one boot CPU running with interrupts still disabled.
+		//
+		// Use an irq_work here to avoid acquiring runqueue lock with
+		// srcu rcu_node::lock held. BPF instrument could introduce the
+		// opposite dependency, hence we need to break the possible
+		// locking dependency here.
 		if (likely(srcu_init_done))
-			queue_delayed_work(rcu_gp_wq, &sup->work,
-					   !!srcu_get_delay(ssp));
+			irq_work_queue(&sup->irq_work);
 		else if (list_empty(&sup->work.work.entry))
 			list_add(&sup->work.work.entry, &srcu_boot_list);
 	}
@@ -1979,6 +1988,23 @@ static void process_srcu(struct work_struct *work)
 	srcu_reschedule(ssp, curdelay);
 }
 
+static void srcu_irq_work(struct irq_work *work)
+{
+	struct srcu_struct *ssp;
+	struct srcu_usage *sup;
+	unsigned long delay;
+	unsigned long flags;
+
+	sup = container_of(work, struct srcu_usage, irq_work);
+	ssp = sup->srcu_ssp;
+
+	raw_spin_lock_irqsave_rcu_node(ssp->srcu_sup, flags);
+	delay = srcu_get_delay(ssp);
+	raw_spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, flags);
+
+	queue_delayed_work(rcu_gp_wq, &sup->work, !!delay);
+}
+
 void srcutorture_get_gp_data(struct srcu_struct *ssp, int *flags,
 			     unsigned long *gp_seq)
 {
-- 
2.50.1 (Apple Git-155)


  parent reply	other threads:[~2026-03-20 22:29 UTC|newest]

Thread overview: 65+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <3c4c5a29-24ea-492d-aeee-e0d9605b4183@nvidia.com>
     [not found] ` <absesPf-10awctq9@tardis.local>
     [not found]   ` <absfZGD3en9TUA1U@tardis.local>
     [not found]     ` <absj6rZbyLZY8PPz@tardis.local>
     [not found]       ` <da4313b1-9ff5-46e9-b3b7-206dbcb72602@nvidia.com>
     [not found]         ` <abs026TUV9oAg_Xy@tardis.local>
     [not found]           ` <abtMhd_LVp3uL_pA@tardis.local>
     [not found]             ` <20260319090315.Ec_eXAg4@linutronix.de>
     [not found]               ` <abwkD0mOdAbD9ENJ@tardis.local>
     [not found]                 ` <20260319163350.c7WuYOM9@linutronix.de>
2026-03-19 16:48                   ` Next-level bug in SRCU implementation of RCU Tasks Trace + PREEMPT_RT Boqun Feng
2026-03-19 16:59                     ` Kumar Kartikeya Dwivedi
2026-03-19 17:27                       ` Boqun Feng
2026-03-19 18:41                         ` Kumar Kartikeya Dwivedi
2026-03-19 20:14                           ` Boqun Feng
2026-03-19 20:21                             ` Joel Fernandes
2026-03-19 20:39                               ` Boqun Feng
2026-03-20 15:34                                 ` Paul E. McKenney
2026-03-20 15:59                                   ` Boqun Feng
2026-03-20 16:24                                     ` Paul E. McKenney
2026-03-20 16:57                                       ` Boqun Feng
2026-03-20 17:54                                         ` Joel Fernandes
2026-03-20 18:14                                           ` [PATCH] rcu: Use an intermediate irq_work to start process_srcu() Boqun Feng
2026-03-20 19:18                                             ` Joel Fernandes
2026-03-20 20:47                                             ` Andrea Righi
2026-03-20 20:54                                               ` Boqun Feng
2026-03-20 21:00                                                 ` Andrea Righi
2026-03-20 21:02                                                   ` Andrea Righi
2026-03-20 21:06                                                     ` Boqun Feng
2026-03-20 22:29                                               ` Boqun Feng [this message]
2026-03-23 21:09                                                 ` [PATCH v2] " Joel Fernandes
2026-03-23 22:18                                                   ` Boqun Feng
2026-03-23 22:50                                                     ` Joel Fernandes
2026-03-24 11:27                                                 ` Frederic Weisbecker
2026-03-24 14:56                                                   ` Joel Fernandes
2026-03-24 14:56                                                   ` Alexei Starovoitov
2026-03-24 17:36                                                     ` Boqun Feng
2026-03-24 18:40                                                       ` Joel Fernandes
2026-03-24 19:23                                                       ` Paul E. McKenney
2026-03-26 19:12                                                 ` patchwork-bot+netdevbpf
2026-03-21  4:27                                             ` [PATCH] " Zqiang
2026-03-21 18:15                                               ` Boqun Feng
2026-03-21 10:10                                             ` Paul E. McKenney
2026-03-21 17:15                                               ` Boqun Feng
2026-03-21 17:41                                                 ` Paul E. McKenney
2026-03-21 18:06                                                   ` Boqun Feng
2026-03-21 19:31                                                     ` Paul E. McKenney
2026-03-21 19:45                                                       ` Boqun Feng
2026-03-21 20:07                                                         ` Paul E. McKenney
2026-03-21 20:08                                                           ` Boqun Feng
2026-03-22 10:09                                                             ` Paul E. McKenney
2026-03-22 16:16                                                               ` Boqun Feng
2026-03-22 17:09                                                                 ` Paul E. McKenney
2026-03-22 17:31                                                                   ` Boqun Feng
2026-03-22 17:44                                                                     ` Paul E. McKenney
2026-03-22 18:17                                                                       ` Boqun Feng
2026-03-22 19:47                                                                         ` Paul E. McKenney
2026-03-22 20:26                                                                           ` Boqun Feng
2026-03-23  7:50                                                                             ` Paul E. McKenney
2026-03-20 18:20                                           ` Next-level bug in SRCU implementation of RCU Tasks Trace + PREEMPT_RT Boqun Feng
2026-03-20 23:11                                         ` Paul E. McKenney
2026-03-21  3:29                                           ` Paul E. McKenney
2026-03-21 17:03                                       ` [RFC PATCH] rcu-tasks: Avoid using mod_timer() in call_rcu_tasks_generic() Boqun Feng
2026-03-23 15:17                                         ` Boqun Feng
2026-03-23 20:37                                           ` Joel Fernandes
2026-03-23 21:50                                           ` Kumar Kartikeya Dwivedi
2026-03-23 22:13                                             ` Boqun Feng
2026-03-20 16:15                             ` Next-level bug in SRCU implementation of RCU Tasks Trace + PREEMPT_RT Boqun Feng
2026-03-20 16:24                               ` Paul E. McKenney
2026-03-19 17:02                     ` Sebastian Andrzej Siewior
2026-03-19 17:44                       ` Boqun Feng
2026-03-19 18:42                         ` Joel Fernandes
2026-03-19 20:20                           ` Boqun Feng
2026-03-19 20:26                             ` Joel Fernandes
2026-03-19 20:45                               ` Joel Fernandes

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260320222916.19987-1-boqun@kernel.org \
    --to=boqun@kernel.org \
    --cc=arighi@nvidia.com \
    --cc=ast@kernel.org \
    --cc=bigeasy@linutronix.de \
    --cc=boqun.feng@gmail.com \
    --cc=bpf@vger.kernel.org \
    --cc=daniel@iogearbox.net \
    --cc=frederic@kernel.org \
    --cc=joelagnelf@nvidia.com \
    --cc=john.fastabend@gmail.com \
    --cc=memxor@gmail.com \
    --cc=neeraj.iitr10@gmail.com \
    --cc=paulmck@kernel.org \
    --cc=qiang.zhang@linux.dev \
    --cc=rcu@vger.kernel.org \
    --cc=tj@kernel.org \
    --cc=urezki@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox