From: Marcelo Tosatti <mtosatti@redhat.com>
To: linux-kernel@vger.kernel.org, cgroups@vger.kernel.org,
linux-mm@kvack.org
Cc: Johannes Weiner <hannes@cmpxchg.org>,
Michal Hocko <mhocko@kernel.org>,
Roman Gushchin <roman.gushchin@linux.dev>,
Shakeel Butt <shakeel.butt@linux.dev>,
Muchun Song <muchun.song@linux.dev>,
Andrew Morton <akpm@linux-foundation.org>,
Christoph Lameter <cl@linux.com>,
Pekka Enberg <penberg@kernel.org>,
David Rientjes <rientjes@google.com>,
Joonsoo Kim <iamjoonsoo.kim@lge.com>,
Vlastimil Babka <vbabka@suse.cz>,
Hyeonggon Yoo <42.hyeyoo@gmail.com>,
Leonardo Bras <leobras@redhat.com>,
Thomas Gleixner <tglx@linutronix.de>,
Waiman Long <longman@redhat.com>,
Boqun Feng <boqun.feng@gmail.com>,
Marcelo Tosatti <mtosatti@redhat.com>
Subject: [PATCH 3/4] swap: apply new queue_percpu_work_on() interface
Date: Fri, 06 Feb 2026 11:34:33 -0300 [thread overview]
Message-ID: <20260206143741.589656953@redhat.com> (raw)
In-Reply-To: 20260206143430.021026873@redhat.com
Make use of the new qpw_{un,}lock*() and queue_percpu_work_on()
interface to improve performance & latency on PREEMPT_RT kernels.
For functions that may be scheduled in a different cpu, replace
local_{un,}lock*() by qpw_{un,}lock*(), and replace schedule_work_on() by
queue_percpu_work_on(). The same happens for flush_work() and
flush_percpu_work().
The change requires allocation of qpw_structs instead of a work_structs,
and changing parameters of a few functions to include the cpu parameter.
This should bring no relevant performance impact on non-RT kernels:
For functions that may be scheduled in a different cpu, the local_*lock's
this_cpu_ptr() becomes a per_cpu_ptr(smp_processor_id()).
Signed-off-by: Leonardo Bras <leobras@redhat.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
mm/internal.h | 4 +-
mm/mlock.c | 71 ++++++++++++++++++++++++++++++++------------
mm/page_alloc.c | 2 -
mm/swap.c | 90 +++++++++++++++++++++++++++++++-------------------------
4 files changed, 108 insertions(+), 59 deletions(-)
Index: slab/mm/mlock.c
===================================================================
--- slab.orig/mm/mlock.c
+++ slab/mm/mlock.c
@@ -25,17 +25,16 @@
#include <linux/memcontrol.h>
#include <linux/mm_inline.h>
#include <linux/secretmem.h>
+#include <linux/qpw.h>
#include "internal.h"
struct mlock_fbatch {
- local_lock_t lock;
+ qpw_lock_t lock;
struct folio_batch fbatch;
};
-static DEFINE_PER_CPU(struct mlock_fbatch, mlock_fbatch) = {
- .lock = INIT_LOCAL_LOCK(lock),
-};
+static DEFINE_PER_CPU(struct mlock_fbatch, mlock_fbatch);
bool can_do_mlock(void)
{
@@ -209,18 +208,25 @@ static void mlock_folio_batch(struct fol
folios_put(fbatch);
}
-void mlock_drain_local(void)
+void mlock_drain_cpu(int cpu)
{
struct folio_batch *fbatch;
- local_lock(&mlock_fbatch.lock);
- fbatch = this_cpu_ptr(&mlock_fbatch.fbatch);
+ qpw_lock(&mlock_fbatch.lock, cpu);
+ fbatch = per_cpu_ptr(&mlock_fbatch.fbatch, cpu);
if (folio_batch_count(fbatch))
mlock_folio_batch(fbatch);
- local_unlock(&mlock_fbatch.lock);
+ qpw_unlock(&mlock_fbatch.lock, cpu);
}
-void mlock_drain_remote(int cpu)
+void mlock_drain_local(void)
+{
+ migrate_disable();
+ mlock_drain_cpu(smp_processor_id());
+ migrate_enable();
+}
+
+void mlock_drain_offline(int cpu)
{
struct folio_batch *fbatch;
@@ -242,9 +248,12 @@ bool need_mlock_drain(int cpu)
void mlock_folio(struct folio *folio)
{
struct folio_batch *fbatch;
+ int cpu;
- local_lock(&mlock_fbatch.lock);
- fbatch = this_cpu_ptr(&mlock_fbatch.fbatch);
+ migrate_disable();
+ cpu = smp_processor_id();
+ qpw_lock(&mlock_fbatch.lock, cpu);
+ fbatch = per_cpu_ptr(&mlock_fbatch.fbatch, cpu);
if (!folio_test_set_mlocked(folio)) {
int nr_pages = folio_nr_pages(folio);
@@ -257,7 +266,8 @@ void mlock_folio(struct folio *folio)
if (!folio_batch_add(fbatch, mlock_lru(folio)) ||
!folio_may_be_lru_cached(folio) || lru_cache_disabled())
mlock_folio_batch(fbatch);
- local_unlock(&mlock_fbatch.lock);
+ qpw_unlock(&mlock_fbatch.lock, cpu);
+ migrate_enable();
}
/**
@@ -268,9 +278,13 @@ void mlock_new_folio(struct folio *folio
{
struct folio_batch *fbatch;
int nr_pages = folio_nr_pages(folio);
+ int cpu;
+
+ migrate_disable();
+ cpu = smp_processor_id();
+ qpw_lock(&mlock_fbatch.lock, cpu);
- local_lock(&mlock_fbatch.lock);
- fbatch = this_cpu_ptr(&mlock_fbatch.fbatch);
+ fbatch = per_cpu_ptr(&mlock_fbatch.fbatch, cpu);
folio_set_mlocked(folio);
zone_stat_mod_folio(folio, NR_MLOCK, nr_pages);
@@ -280,7 +294,8 @@ void mlock_new_folio(struct folio *folio
if (!folio_batch_add(fbatch, mlock_new(folio)) ||
!folio_may_be_lru_cached(folio) || lru_cache_disabled())
mlock_folio_batch(fbatch);
- local_unlock(&mlock_fbatch.lock);
+ migrate_enable();
+ qpw_unlock(&mlock_fbatch.lock, cpu);
}
/**
@@ -290,9 +305,13 @@ void mlock_new_folio(struct folio *folio
void munlock_folio(struct folio *folio)
{
struct folio_batch *fbatch;
+ int cpu;
- local_lock(&mlock_fbatch.lock);
- fbatch = this_cpu_ptr(&mlock_fbatch.fbatch);
+ migrate_disable();
+ cpu = smp_processor_id();
+ qpw_lock(&mlock_fbatch.lock, cpu);
+
+ fbatch = per_cpu_ptr(&mlock_fbatch.fbatch, cpu);
/*
* folio_test_clear_mlocked(folio) must be left to __munlock_folio(),
* which will check whether the folio is multiply mlocked.
@@ -301,7 +320,8 @@ void munlock_folio(struct folio *folio)
if (!folio_batch_add(fbatch, folio) ||
!folio_may_be_lru_cached(folio) || lru_cache_disabled())
mlock_folio_batch(fbatch);
- local_unlock(&mlock_fbatch.lock);
+ qpw_unlock(&mlock_fbatch.lock, cpu);
+ migrate_enable();
}
static inline unsigned int folio_mlock_step(struct folio *folio,
@@ -823,3 +843,18 @@ void user_shm_unlock(size_t size, struct
spin_unlock(&shmlock_user_lock);
put_ucounts(ucounts);
}
+
+int __init mlock_init(void)
+{
+ unsigned int cpu;
+
+ for_each_possible_cpu(cpu) {
+ struct mlock_fbatch *fbatch = &per_cpu(mlock_fbatch, cpu);
+
+ qpw_lock_init(&fbatch->lock);
+ }
+
+ return 0;
+}
+
+module_init(mlock_init);
Index: slab/mm/swap.c
===================================================================
--- slab.orig/mm/swap.c
+++ slab/mm/swap.c
@@ -35,7 +35,7 @@
#include <linux/uio.h>
#include <linux/hugetlb.h>
#include <linux/page_idle.h>
-#include <linux/local_lock.h>
+#include <linux/qpw.h>
#include <linux/buffer_head.h>
#include "internal.h"
@@ -52,7 +52,7 @@ struct cpu_fbatches {
* The following folio batches are grouped together because they are protected
* by disabling preemption (and interrupts remain enabled).
*/
- local_lock_t lock;
+ qpw_lock_t lock;
struct folio_batch lru_add;
struct folio_batch lru_deactivate_file;
struct folio_batch lru_deactivate;
@@ -61,14 +61,11 @@ struct cpu_fbatches {
struct folio_batch lru_activate;
#endif
/* Protecting the following batches which require disabling interrupts */
- local_lock_t lock_irq;
+ qpw_lock_t lock_irq;
struct folio_batch lru_move_tail;
};
-static DEFINE_PER_CPU(struct cpu_fbatches, cpu_fbatches) = {
- .lock = INIT_LOCAL_LOCK(lock),
- .lock_irq = INIT_LOCAL_LOCK(lock_irq),
-};
+static DEFINE_PER_CPU(struct cpu_fbatches, cpu_fbatches);
static void __page_cache_release(struct folio *folio, struct lruvec **lruvecp,
unsigned long *flagsp)
@@ -183,22 +180,24 @@ static void __folio_batch_add_and_move(s
struct folio *folio, move_fn_t move_fn, bool disable_irq)
{
unsigned long flags;
+ int cpu;
folio_get(folio);
+ cpu = smp_processor_id();
if (disable_irq)
- local_lock_irqsave(&cpu_fbatches.lock_irq, flags);
+ qpw_lock_irqsave(&cpu_fbatches.lock_irq, flags, cpu);
else
- local_lock(&cpu_fbatches.lock);
+ qpw_lock(&cpu_fbatches.lock, cpu);
- if (!folio_batch_add(this_cpu_ptr(fbatch), folio) ||
+ if (!folio_batch_add(per_cpu_ptr(fbatch, cpu), folio) ||
!folio_may_be_lru_cached(folio) || lru_cache_disabled())
- folio_batch_move_lru(this_cpu_ptr(fbatch), move_fn);
+ folio_batch_move_lru(per_cpu_ptr(fbatch, cpu), move_fn);
if (disable_irq)
- local_unlock_irqrestore(&cpu_fbatches.lock_irq, flags);
+ qpw_unlock_irqrestore(&cpu_fbatches.lock_irq, flags, cpu);
else
- local_unlock(&cpu_fbatches.lock);
+ qpw_unlock(&cpu_fbatches.lock, cpu);
}
#define folio_batch_add_and_move(folio, op) \
@@ -358,9 +357,10 @@ static void __lru_cache_activate_folio(s
{
struct folio_batch *fbatch;
int i;
+ int cpu = smp_processor_id();
- local_lock(&cpu_fbatches.lock);
- fbatch = this_cpu_ptr(&cpu_fbatches.lru_add);
+ qpw_lock(&cpu_fbatches.lock, cpu);
+ fbatch = per_cpu_ptr(&cpu_fbatches.lru_add, cpu);
/*
* Search backwards on the optimistic assumption that the folio being
@@ -381,7 +381,7 @@ static void __lru_cache_activate_folio(s
}
}
- local_unlock(&cpu_fbatches.lock);
+ qpw_unlock(&cpu_fbatches.lock, cpu);
}
#ifdef CONFIG_LRU_GEN
@@ -653,9 +653,9 @@ void lru_add_drain_cpu(int cpu)
unsigned long flags;
/* No harm done if a racing interrupt already did this */
- local_lock_irqsave(&cpu_fbatches.lock_irq, flags);
+ qpw_lock_irqsave(&cpu_fbatches.lock_irq, flags, cpu);
folio_batch_move_lru(fbatch, lru_move_tail);
- local_unlock_irqrestore(&cpu_fbatches.lock_irq, flags);
+ qpw_unlock_irqrestore(&cpu_fbatches.lock_irq, flags, cpu);
}
fbatch = &fbatches->lru_deactivate_file;
@@ -733,10 +733,12 @@ void folio_mark_lazyfree(struct folio *f
void lru_add_drain(void)
{
- local_lock(&cpu_fbatches.lock);
- lru_add_drain_cpu(smp_processor_id());
- local_unlock(&cpu_fbatches.lock);
- mlock_drain_local();
+ int cpu = smp_processor_id();
+
+ qpw_lock(&cpu_fbatches.lock, cpu);
+ lru_add_drain_cpu(cpu);
+ qpw_unlock(&cpu_fbatches.lock, cpu);
+ mlock_drain_cpu(cpu);
}
/*
@@ -745,30 +747,32 @@ void lru_add_drain(void)
* the same cpu. It shouldn't be a problem in !SMP case since
* the core is only one and the locks will disable preemption.
*/
-static void lru_add_mm_drain(void)
+static void lru_add_mm_drain(int cpu)
{
- local_lock(&cpu_fbatches.lock);
- lru_add_drain_cpu(smp_processor_id());
- local_unlock(&cpu_fbatches.lock);
- mlock_drain_local();
+ qpw_lock(&cpu_fbatches.lock, cpu);
+ lru_add_drain_cpu(cpu);
+ qpw_unlock(&cpu_fbatches.lock, cpu);
+ mlock_drain_cpu(cpu);
}
void lru_add_drain_cpu_zone(struct zone *zone)
{
- local_lock(&cpu_fbatches.lock);
- lru_add_drain_cpu(smp_processor_id());
+ int cpu = smp_processor_id();
+
+ qpw_lock(&cpu_fbatches.lock, cpu);
+ lru_add_drain_cpu(cpu);
drain_local_pages(zone);
- local_unlock(&cpu_fbatches.lock);
- mlock_drain_local();
+ qpw_unlock(&cpu_fbatches.lock, cpu);
+ mlock_drain_cpu(cpu);
}
#ifdef CONFIG_SMP
-static DEFINE_PER_CPU(struct work_struct, lru_add_drain_work);
+static DEFINE_PER_CPU(struct qpw_struct, lru_add_drain_qpw);
-static void lru_add_drain_per_cpu(struct work_struct *dummy)
+static void lru_add_drain_per_cpu(struct work_struct *w)
{
- lru_add_mm_drain();
+ lru_add_mm_drain(qpw_get_cpu(w));
}
static DEFINE_PER_CPU(struct work_struct, bh_add_drain_work);
@@ -883,12 +887,12 @@ static inline void __lru_add_drain_all(b
cpumask_clear(&has_mm_work);
cpumask_clear(&has_bh_work);
for_each_online_cpu(cpu) {
- struct work_struct *mm_work = &per_cpu(lru_add_drain_work, cpu);
+ struct qpw_struct *mm_qpw = &per_cpu(lru_add_drain_qpw, cpu);
struct work_struct *bh_work = &per_cpu(bh_add_drain_work, cpu);
if (cpu_needs_mm_drain(cpu)) {
- INIT_WORK(mm_work, lru_add_drain_per_cpu);
- queue_work_on(cpu, mm_percpu_wq, mm_work);
+ INIT_QPW(mm_qpw, lru_add_drain_per_cpu, cpu);
+ queue_percpu_work_on(cpu, mm_percpu_wq, mm_qpw);
__cpumask_set_cpu(cpu, &has_mm_work);
}
@@ -900,7 +904,7 @@ static inline void __lru_add_drain_all(b
}
for_each_cpu(cpu, &has_mm_work)
- flush_work(&per_cpu(lru_add_drain_work, cpu));
+ flush_percpu_work(&per_cpu(lru_add_drain_qpw, cpu));
for_each_cpu(cpu, &has_bh_work)
flush_work(&per_cpu(bh_add_drain_work, cpu));
@@ -950,7 +954,7 @@ void lru_cache_disable(void)
#ifdef CONFIG_SMP
__lru_add_drain_all(true);
#else
- lru_add_mm_drain();
+ lru_add_mm_drain(smp_processor_id());
invalidate_bh_lrus_cpu();
#endif
}
@@ -1124,6 +1128,7 @@ static const struct ctl_table swap_sysct
void __init swap_setup(void)
{
unsigned long megs = PAGES_TO_MB(totalram_pages());
+ unsigned int cpu;
/* Use a smaller cluster for small-memory machines */
if (megs < 16)
@@ -1136,4 +1141,11 @@ void __init swap_setup(void)
*/
register_sysctl_init("vm", swap_sysctl_table);
+
+ for_each_possible_cpu(cpu) {
+ struct cpu_fbatches *fbatches = &per_cpu(cpu_fbatches, cpu);
+
+ qpw_lock_init(&fbatches->lock);
+ qpw_lock_init(&fbatches->lock_irq);
+ }
}
Index: slab/mm/internal.h
===================================================================
--- slab.orig/mm/internal.h
+++ slab/mm/internal.h
@@ -1061,10 +1061,12 @@ static inline void munlock_vma_folio(str
munlock_folio(folio);
}
+int __init mlock_init(void);
void mlock_new_folio(struct folio *folio);
bool need_mlock_drain(int cpu);
void mlock_drain_local(void);
-void mlock_drain_remote(int cpu);
+void mlock_drain_cpu(int cpu);
+void mlock_drain_offline(int cpu);
extern pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma);
Index: slab/mm/page_alloc.c
===================================================================
--- slab.orig/mm/page_alloc.c
+++ slab/mm/page_alloc.c
@@ -6251,7 +6251,7 @@ static int page_alloc_cpu_dead(unsigned
struct zone *zone;
lru_add_drain_cpu(cpu);
- mlock_drain_remote(cpu);
+ mlock_drain_offline(cpu);
drain_pages(cpu);
/*
next prev parent reply other threads:[~2026-02-06 14:40 UTC|newest]
Thread overview: 60+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-02-06 14:34 [PATCH 0/4] Introduce QPW for per-cpu operations Marcelo Tosatti
2026-02-06 14:34 ` [PATCH 1/4] Introducing qpw_lock() and per-cpu queue & flush work Marcelo Tosatti
2026-02-06 15:20 ` Marcelo Tosatti
2026-02-07 0:16 ` Leonardo Bras
2026-02-11 12:09 ` Marcelo Tosatti
2026-02-14 21:32 ` Leonardo Bras
2026-02-06 14:34 ` [PATCH 2/4] mm/swap: move bh draining into a separate workqueue Marcelo Tosatti
2026-02-06 14:34 ` Marcelo Tosatti [this message]
2026-02-07 1:06 ` [PATCH 3/4] swap: apply new queue_percpu_work_on() interface Leonardo Bras
2026-02-26 15:49 ` Marcelo Tosatti
2026-03-08 17:35 ` Leonardo Bras
2026-02-06 14:34 ` [PATCH 4/4] slub: " Marcelo Tosatti
2026-02-07 1:27 ` Leonardo Bras
2026-02-06 23:56 ` [PATCH 0/4] Introduce QPW for per-cpu operations Leonardo Bras
2026-02-10 14:01 ` Michal Hocko
2026-02-11 12:01 ` Marcelo Tosatti
2026-02-11 12:11 ` Marcelo Tosatti
2026-02-14 21:35 ` Leonardo Bras
2026-02-11 16:38 ` Michal Hocko
2026-02-11 16:50 ` Marcelo Tosatti
2026-02-11 16:59 ` Vlastimil Babka
2026-02-11 17:07 ` Michal Hocko
2026-02-14 22:02 ` Leonardo Bras
2026-02-16 11:00 ` Michal Hocko
2026-02-19 15:27 ` Marcelo Tosatti
2026-02-19 19:30 ` Michal Hocko
2026-02-20 14:30 ` Marcelo Tosatti
2026-02-23 9:18 ` Michal Hocko
2026-03-03 10:55 ` Frederic Weisbecker
2026-02-23 21:56 ` Frederic Weisbecker
2026-02-24 17:23 ` Marcelo Tosatti
2026-02-25 21:49 ` Frederic Weisbecker
2026-02-26 7:06 ` Michal Hocko
2026-02-26 11:41 ` Marcelo Tosatti
2026-03-03 11:08 ` Frederic Weisbecker
2026-02-20 10:48 ` Vlastimil Babka
2026-02-20 12:31 ` Michal Hocko
2026-02-20 17:35 ` Marcelo Tosatti
2026-02-20 17:58 ` Vlastimil Babka
2026-02-20 19:01 ` Marcelo Tosatti
2026-02-23 9:11 ` Michal Hocko
2026-02-23 11:20 ` Marcelo Tosatti
2026-02-24 14:40 ` Frederic Weisbecker
2026-02-24 18:12 ` Marcelo Tosatti
2026-02-20 16:51 ` Marcelo Tosatti
2026-02-20 16:55 ` Marcelo Tosatti
2026-02-20 22:38 ` Leonardo Bras
2026-02-23 18:09 ` Vlastimil Babka
2026-02-26 18:24 ` Marcelo Tosatti
2026-02-20 21:58 ` Leonardo Bras
2026-02-23 9:06 ` Michal Hocko
2026-02-28 1:23 ` Leonardo Bras
2026-03-03 0:19 ` Marcelo Tosatti
2026-03-08 17:41 ` Leonardo Bras
2026-03-09 9:52 ` Vlastimil Babka (SUSE)
2026-03-11 0:01 ` Leonardo Bras
2026-03-10 21:24 ` Marcelo Tosatti
2026-03-11 0:03 ` Leonardo Bras
2026-03-11 10:23 ` Marcelo Tosatti
2026-02-19 13:15 ` Marcelo Tosatti
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260206143741.589656953@redhat.com \
--to=mtosatti@redhat.com \
--cc=42.hyeyoo@gmail.com \
--cc=akpm@linux-foundation.org \
--cc=boqun.feng@gmail.com \
--cc=cgroups@vger.kernel.org \
--cc=cl@linux.com \
--cc=hannes@cmpxchg.org \
--cc=iamjoonsoo.kim@lge.com \
--cc=leobras@redhat.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=longman@redhat.com \
--cc=mhocko@kernel.org \
--cc=muchun.song@linux.dev \
--cc=penberg@kernel.org \
--cc=rientjes@google.com \
--cc=roman.gushchin@linux.dev \
--cc=shakeel.butt@linux.dev \
--cc=tglx@linutronix.de \
--cc=vbabka@suse.cz \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.