All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Harry Yoo (Oracle)" <harry@kernel.org>
To: Andrew Morton <akpm@linux-foundation.org>,
	Vlastimil Babka <vbabka@kernel.org>
Cc: Christoph Lameter <cl@gentwo.org>,
	David Rientjes <rientjes@google.com>,
	Roman Gushchin <roman.gushchin@linux.dev>,
	Hao Li <hao.li@linux.dev>, Alexei Starovoitov <ast@kernel.org>,
	Uladzislau Rezki <urezki@gmail.com>,
	"Paul E . McKenney" <paulmck@kernel.org>,
	Frederic Weisbecker <frederic@kernel.org>,
	Neeraj Upadhyay <neeraj.upadhyay@kernel.org>,
	Joel Fernandes <joelagnelf@nvidia.com>,
	Josh Triplett <josh@joshtriplett.org>,
	Boqun Feng <boqun@kernel.org>, Zqiang <qiang.zhang@linux.dev>,
	Steven Rostedt <rostedt@goodmis.org>,
	Mathieu Desnoyers <mathieu.desnoyers@efficios.com>,
	Lai Jiangshan <jiangshanlai@gmail.com>,
	rcu@vger.kernel.org, linux-mm@kvack.org
Subject: [PATCH 7/8] mm/slab: introduce deferred submission of rcu sheaves
Date: Thu, 16 Apr 2026 18:10:21 +0900	[thread overview]
Message-ID: <20260416091022.36823-8-harry@kernel.org> (raw)
In-Reply-To: <20260416091022.36823-1-harry@kernel.org>

Instead of falling back when the rcu sheaf becomes full, implement
deferred submission of rcu sheaves. If kfree_rcu_sheaf() is invoked
by kfree_rcu_nolock() (!allow_spin) and IRQs are disabled, the CPU might
be in the middle of call_rcu() and thus defer call_rcu() with irq_work.

Submit all deferred RCU sheaves to call_rcu() before calling
rcu_barrier() to ensure the promise of kvfree_rcu_barrier().

An alternative approach could be to implement this in the RCU subsystem,
tracking if it's safe to call call_rcu() and allowing falling back to
deferred call_rcu() at the cost of more expensive rcu_barrier() calls.

Suggested-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Harry Yoo (Oracle) <harry@kernel.org>
---
 mm/slab.h        |  2 ++
 mm/slab_common.c | 49 ++++++++++++++++++++++++++++++++++++++++++++++--
 mm/slub.c        | 12 ++++--------
 3 files changed, 53 insertions(+), 10 deletions(-)

diff --git a/mm/slab.h b/mm/slab.h
index bdad5f389490..9ba3aad1eeb2 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -411,6 +411,8 @@ static inline bool is_kmalloc_normal(struct kmem_cache *s)
 
 #ifdef CONFIG_KVFREE_RCU_BATCHED
 bool __kfree_rcu_sheaf(struct kmem_cache *s, void *obj, bool allow_spin);
+void rcu_free_sheaf(struct rcu_head *head);
+void submit_rcu_sheaf(struct rcu_head *head, bool allow_spin);
 void flush_all_rcu_sheaves(void);
 void flush_rcu_sheaves_on_cache(struct kmem_cache *s);
 #endif
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 347e52f1538c..226009b10c4a 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -1314,8 +1314,11 @@ struct kfree_rcu_cpu {
 	// Objects queued on a lockless linked list, used to free objects
 	// in unknown contexts when trylock fails.
 	struct llist_head defer_head;
-
 	struct irq_work defer_free;
+
+	struct llist_head defer_call_rcu_head;
+	struct irq_work defer_call_rcu;
+
 	struct irq_work sched_delayed_monitor;
 	struct irq_work run_page_cache_worker;
 
@@ -1345,11 +1348,14 @@ struct kfree_rcu_cpu {
 static void defer_kfree_rcu_irq_work_fn(struct irq_work *work);
 static void sched_delayed_monitor_irq_work_fn(struct irq_work *work);
 static void run_page_cache_worker_irq_work_fn(struct irq_work *work);
+static void defer_call_rcu_irq_work_fn(struct irq_work *work);
 
 static DEFINE_PER_CPU(struct kfree_rcu_cpu, krc) = {
 	.lock = __RAW_SPIN_LOCK_UNLOCKED(krc.lock),
 	.defer_head = LLIST_HEAD_INIT(defer_head),
 	.defer_free = IRQ_WORK_INIT(defer_kfree_rcu_irq_work_fn),
+	.defer_call_rcu_head = LLIST_HEAD_INIT(defer_call_rcu_head),
+	.defer_call_rcu = IRQ_WORK_INIT(defer_call_rcu_irq_work_fn),
 	.sched_delayed_monitor =
 		IRQ_WORK_INIT_LAZY(sched_delayed_monitor_irq_work_fn),
 	.run_page_cache_worker =
@@ -1374,8 +1380,12 @@ void defer_kvfree_rcu_barrier(void)
 {
 	int cpu;
 
-	for_each_possible_cpu(cpu)
+	for_each_possible_cpu(cpu) {
 		irq_work_sync(&per_cpu_ptr(&krc, cpu)->defer_free);
+#ifdef CONFIG_KVFREE_RCU_BATCHED
+		irq_work_sync(&per_cpu_ptr(&krc, cpu)->defer_call_rcu);
+#endif
+	}
 }
 
 static void *object_start_addr(void *ptr)
@@ -1524,6 +1534,21 @@ static void sched_delayed_monitor_irq_work_fn(struct irq_work *work)
 	schedule_delayed_monitor_work(krcp);
 }
 
+static void defer_call_rcu_irq_work_fn(struct irq_work *work)
+{
+	struct kfree_rcu_cpu *krcp;
+	struct llist_node *llnode, *pos, *t;
+
+	krcp = container_of(work, struct kfree_rcu_cpu, defer_call_rcu);
+
+	if (llist_empty(&krcp->defer_call_rcu_head))
+		return;
+
+	llnode = llist_del_all(&krcp->defer_call_rcu_head);
+	llist_for_each_safe(pos, t, llnode)
+		call_rcu((struct rcu_head *)pos, rcu_free_sheaf);
+}
+
 static __always_inline void
 debug_rcu_bhead_unqueue(struct kvfree_rcu_bulk_data *bhead)
 {
@@ -2187,6 +2212,26 @@ void kvfree_call_rcu_ptr(struct rcu_ptr *head, void *ptr, bool allow_spin)
 }
 EXPORT_SYMBOL_GPL(kvfree_call_rcu_ptr);
 
+static inline void defer_call_rcu(struct rcu_head *head)
+{
+	struct kfree_rcu_cpu *krcp;
+
+	VM_WARN_ON_ONCE(!irqs_disabled());
+
+	krcp = this_cpu_ptr(&krc);
+	if (llist_add((struct llist_node *)head, &krcp->defer_call_rcu_head))
+		irq_work_queue(&krcp->defer_call_rcu);
+}
+
+void submit_rcu_sheaf(struct rcu_head *head, bool allow_spin)
+{
+	/* Might be in the middle of call_rcu(), defer it */
+	if (unlikely(!allow_spin && irqs_disabled()))
+		defer_call_rcu(head);
+	else
+		call_rcu(head, rcu_free_sheaf);
+}
+
 static inline void __kvfree_rcu_barrier(void)
 {
 	struct kfree_rcu_cpu_work *krwp;
diff --git a/mm/slub.c b/mm/slub.c
index 91b8827d65da..1c3451166498 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -4152,6 +4152,8 @@ static int slub_cpu_dead(unsigned int cpu)
 			__pcs_flush_all_cpu(s, cpu);
 	}
 	mutex_unlock(&slab_mutex);
+
+	/* pending IRQ work should have been flushed before going offline */
 	return 0;
 }
 
@@ -5847,7 +5849,7 @@ bool free_to_pcs(struct kmem_cache *s, void *object, bool allow_spin)
 }
 
 #ifdef CONFIG_KVFREE_RCU_BATCHED
-static void rcu_free_sheaf(struct rcu_head *head)
+void rcu_free_sheaf(struct rcu_head *head)
 {
 	struct slab_sheaf *sheaf;
 	struct node_barn *barn = NULL;
@@ -5999,12 +6001,6 @@ bool __kfree_rcu_sheaf(struct kmem_cache *s, void *obj, bool allow_spin)
 	if (likely(rcu_sheaf->size < s->sheaf_capacity)) {
 		rcu_sheaf = NULL;
 	} else {
-		if (unlikely(!allow_spin)) {
-			/* call_rcu() cannot be called in an unknown context */
-			rcu_sheaf->size--;
-			local_unlock(&s->cpu_sheaves->lock);
-			goto fail;
-		}
 		pcs->rcu_free = NULL;
 		rcu_sheaf->node = numa_node_id();
 	}
@@ -6014,7 +6010,7 @@ bool __kfree_rcu_sheaf(struct kmem_cache *s, void *obj, bool allow_spin)
 	 * flush_all_rcu_sheaves() doesn't miss this sheaf
 	 */
 	if (rcu_sheaf)
-		call_rcu(&rcu_sheaf->rcu_head, rcu_free_sheaf);
+		submit_rcu_sheaf(&rcu_sheaf->rcu_head, allow_spin);
 
 	local_unlock(&s->cpu_sheaves->lock);
 
-- 
2.43.0



  parent reply	other threads:[~2026-04-16  9:10 UTC|newest]

Thread overview: 40+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-04-16  9:10 [RFC PATCH v2 0/8] kvfree_rcu() improvements Harry Yoo (Oracle)
2026-04-16  9:10 ` [PATCH 1/8] mm/slab: introduce k[v]free_rcu() with struct rcu_ptr Harry Yoo (Oracle)
2026-04-22 14:41   ` Vlastimil Babka (SUSE)
2026-04-23  1:36     ` Harry Yoo (Oracle)
2026-04-16  9:10 ` [PATCH 2/8] fs/dcache: use rcu_ptr instead of rcu_head for external names Harry Yoo (Oracle)
2026-04-21 20:21   ` Al Viro
2026-04-22  1:16     ` Harry Yoo (Oracle)
2026-04-16  9:10 ` [PATCH 3/8] mm/slab: move kfree_rcu_cpu[_work] definitions Harry Yoo (Oracle)
2026-04-16  9:10 ` [PATCH 4/8] mm/slab: introduce kfree_rcu_nolock() Harry Yoo (Oracle)
2026-04-21 22:46   ` Alexei Starovoitov
2026-04-21 23:10     ` Paul E. McKenney
2026-04-21 23:14       ` Alexei Starovoitov
2026-04-22  3:02       ` Harry Yoo (Oracle)
2026-04-22 14:42   ` Uladzislau Rezki
2026-04-23  1:08     ` Harry Yoo (Oracle)
2026-04-23  1:56       ` Harry Yoo (Oracle)
2026-04-27 18:08         ` Vlastimil Babka (SUSE)
2026-04-27 18:51           ` Paul E. McKenney
2026-04-23  2:14       ` Harry Yoo (Oracle)
2026-04-23  4:23     ` Harry Yoo (Oracle)
2026-04-23 11:35       ` Uladzislau Rezki
2026-04-28 13:12         ` Harry Yoo (Oracle)
2026-04-30 12:10           ` Uladzislau Rezki
2026-04-27 13:08   ` Vlastimil Babka (SUSE)
2026-04-16  9:10 ` [PATCH 5/8] mm/slab: make kfree_rcu_nolock() work with sheaves Harry Yoo (Oracle)
2026-04-27 13:32   ` Vlastimil Babka (SUSE)
2026-04-27 13:53     ` Vlastimil Babka (SUSE)
2026-04-27 14:45       ` Alexei Starovoitov
2026-04-27 15:08         ` Vlastimil Babka (SUSE)
2026-04-27 15:11           ` Alexei Starovoitov
2026-04-16  9:10 ` [PATCH 6/8] mm/slab: wrap rcu sheaf handling with ifdef Harry Yoo (Oracle)
2026-04-27 15:47   ` Vlastimil Babka (SUSE)
2026-04-16  9:10 ` Harry Yoo (Oracle) [this message]
2026-04-21 22:51   ` [PATCH 7/8] mm/slab: introduce deferred submission of rcu sheaves Alexei Starovoitov
2026-04-22  3:11     ` Harry Yoo (Oracle)
2026-04-27 15:55   ` Vlastimil Babka (SUSE)
2026-04-16  9:10 ` [PATCH 8/8] lib/tests/slub_kunit: add a test case for kfree_rcu_nolock() Harry Yoo (Oracle)
2026-04-22 14:30 ` [RFC PATCH v2 0/8] kvfree_rcu() improvements Vlastimil Babka (SUSE)
2026-04-22 22:41   ` Paul E. McKenney
2026-04-23  1:31   ` Harry Yoo (Oracle)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260416091022.36823-8-harry@kernel.org \
    --to=harry@kernel.org \
    --cc=akpm@linux-foundation.org \
    --cc=ast@kernel.org \
    --cc=boqun@kernel.org \
    --cc=cl@gentwo.org \
    --cc=frederic@kernel.org \
    --cc=hao.li@linux.dev \
    --cc=jiangshanlai@gmail.com \
    --cc=joelagnelf@nvidia.com \
    --cc=josh@joshtriplett.org \
    --cc=linux-mm@kvack.org \
    --cc=mathieu.desnoyers@efficios.com \
    --cc=neeraj.upadhyay@kernel.org \
    --cc=paulmck@kernel.org \
    --cc=qiang.zhang@linux.dev \
    --cc=rcu@vger.kernel.org \
    --cc=rientjes@google.com \
    --cc=roman.gushchin@linux.dev \
    --cc=rostedt@goodmis.org \
    --cc=urezki@gmail.com \
    --cc=vbabka@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.