* [PATCH v2 1/5] mm/filemap: Retry fault by VMA lock if the lock was released for I/O
2026-04-30 4:04 [PATCH v2 0/5] mm: reduce mmap_lock contention and improve page fault performance Barry Song (Xiaomi)
@ 2026-04-30 4:04 ` Barry Song (Xiaomi)
2026-04-30 4:04 ` [PATCH v2 2/5] mm/swapin: Retry swapin " Barry Song (Xiaomi)
` (4 subsequent siblings)
5 siblings, 0 replies; 8+ messages in thread
From: Barry Song (Xiaomi) @ 2026-04-30 4:04 UTC (permalink / raw)
To: akpm, linux-mm, willy
Cc: david, ljs, liam, vbabka, rppt, surenb, mhocko, jack, pfalcato,
wanglian, chentao, lianux.mm, kunwu.chan, liyangouwen1, chrisl,
kasong, shikemeng, nphamcs, bhe, youngjun.park, linux-arm-kernel,
linux-kernel, loongarch, linuxppc-dev, linux-riscv, linux-s390,
Barry Song
From: Oven Liyang <liyangouwen1@oppo.com>
If the current page fault is using the per-VMA lock, and we only released
the lock to wait for I/O completion (e.g., using folio_lock()), then when
the fault is retried after the I/O completes, it should still qualify for
the per-VMA-lock path.
Acked-by: Pedro Falcato <pfalcato@suse.de>
Tested-by: Wang Lian <wanglian@kylinos.cn>
Tested-by: Kunwu Chan <chentao@kylinos.cn>
Reviewed-by: Wang Lian <lianux.mm@gmail.com>
Reviewed-by: Kunwu Chan <kunwu.chan@gmail.com>
Signed-off-by: Oven Liyang <liyangouwen1@oppo.com>
Co-developed-by: Barry Song <baohua@kernel.org>
Signed-off-by: Barry Song <baohua@kernel.org>
---
arch/arm/mm/fault.c | 5 +++++
arch/arm64/mm/fault.c | 5 +++++
arch/loongarch/mm/fault.c | 4 ++++
arch/powerpc/mm/fault.c | 5 ++++-
arch/riscv/mm/fault.c | 4 ++++
arch/s390/mm/fault.c | 4 ++++
arch/x86/mm/fault.c | 4 ++++
include/linux/mm_types.h | 9 +++++----
mm/filemap.c | 5 ++++-
9 files changed, 39 insertions(+), 6 deletions(-)
diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
index e62cc4be5adf..5971e02845f7 100644
--- a/arch/arm/mm/fault.c
+++ b/arch/arm/mm/fault.c
@@ -391,6 +391,7 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
if (!(flags & FAULT_FLAG_USER))
goto lock_mmap;
+retry_vma:
vma = lock_vma_under_rcu(mm, addr);
if (!vma)
goto lock_mmap;
@@ -420,6 +421,10 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
goto no_context;
return 0;
}
+
+ /* If the first try is only about waiting for the I/O to complete */
+ if (fault & VM_FAULT_RETRY_VMA)
+ goto retry_vma;
lock_mmap:
retry:
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 739800835920..d0362a3e11b7 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -673,6 +673,7 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr,
if (!(mm_flags & FAULT_FLAG_USER))
goto lock_mmap;
+retry_vma:
vma = lock_vma_under_rcu(mm, addr);
if (!vma)
goto lock_mmap;
@@ -719,6 +720,10 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr,
goto no_context;
return 0;
}
+
+ /* If the first try is only about waiting for the I/O to complete */
+ if (fault & VM_FAULT_RETRY_VMA)
+ goto retry_vma;
lock_mmap:
retry:
diff --git a/arch/loongarch/mm/fault.c b/arch/loongarch/mm/fault.c
index 2c93d33356e5..738f495560c0 100644
--- a/arch/loongarch/mm/fault.c
+++ b/arch/loongarch/mm/fault.c
@@ -219,6 +219,7 @@ static void __kprobes __do_page_fault(struct pt_regs *regs,
if (!(flags & FAULT_FLAG_USER))
goto lock_mmap;
+retry_vma:
vma = lock_vma_under_rcu(mm, address);
if (!vma)
goto lock_mmap;
@@ -265,6 +266,9 @@ static void __kprobes __do_page_fault(struct pt_regs *regs,
no_context(regs, write, address);
return;
}
+ /* If the first try is only about waiting for the I/O to complete */
+ if (fault & VM_FAULT_RETRY_VMA)
+ goto retry_vma;
lock_mmap:
retry:
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 806c74e0d5ab..cb7ffc20c760 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -487,6 +487,7 @@ static int ___do_page_fault(struct pt_regs *regs, unsigned long address,
if (!(flags & FAULT_FLAG_USER))
goto lock_mmap;
+retry_vma:
vma = lock_vma_under_rcu(mm, address);
if (!vma)
goto lock_mmap;
@@ -516,7 +517,9 @@ static int ___do_page_fault(struct pt_regs *regs, unsigned long address,
if (fault_signal_pending(fault, regs))
return user_mode(regs) ? 0 : SIGBUS;
-
+ /* If the first try is only about waiting for the I/O to complete */
+ if (fault & VM_FAULT_RETRY_VMA)
+ goto retry_vma;
lock_mmap:
/* When running in the kernel we expect faults to occur only to
diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c
index 04ed6f8acae4..b94cf57c2b9a 100644
--- a/arch/riscv/mm/fault.c
+++ b/arch/riscv/mm/fault.c
@@ -347,6 +347,7 @@ void handle_page_fault(struct pt_regs *regs)
if (!(flags & FAULT_FLAG_USER))
goto lock_mmap;
+retry_vma:
vma = lock_vma_under_rcu(mm, addr);
if (!vma)
goto lock_mmap;
@@ -376,6 +377,9 @@ void handle_page_fault(struct pt_regs *regs)
no_context(regs, addr);
return;
}
+ /* If the first try is only about waiting for the I/O to complete */
+ if (fault & VM_FAULT_RETRY_VMA)
+ goto retry_vma;
lock_mmap:
retry:
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 191cc53caead..e0576e629f65 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -294,6 +294,7 @@ static void do_exception(struct pt_regs *regs, int access)
flags |= FAULT_FLAG_WRITE;
if (!(flags & FAULT_FLAG_USER))
goto lock_mmap;
+retry_vma:
vma = lock_vma_under_rcu(mm, address);
if (!vma)
goto lock_mmap;
@@ -318,6 +319,9 @@ static void do_exception(struct pt_regs *regs, int access)
handle_fault_error_nolock(regs, 0);
return;
}
+ /* If the first try is only about waiting for the I/O to complete */
+ if (fault & VM_FAULT_RETRY_VMA)
+ goto retry_vma;
lock_mmap:
retry:
vma = lock_mm_and_find_vma(mm, address, regs);
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index f0e77e084482..0589fc693eea 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -1322,6 +1322,7 @@ void do_user_addr_fault(struct pt_regs *regs,
if (!(flags & FAULT_FLAG_USER))
goto lock_mmap;
+retry_vma:
vma = lock_vma_under_rcu(mm, address);
if (!vma)
goto lock_mmap;
@@ -1351,6 +1352,9 @@ void do_user_addr_fault(struct pt_regs *regs,
ARCH_DEFAULT_PKEY);
return;
}
+ /* If the first try is only about waiting for the I/O to complete */
+ if (fault & VM_FAULT_RETRY_VMA)
+ goto retry_vma;
lock_mmap:
retry:
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index a308e2c23b82..5907200ea587 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -1678,10 +1678,11 @@ enum vm_fault_reason {
VM_FAULT_NOPAGE = (__force vm_fault_t)0x000100,
VM_FAULT_LOCKED = (__force vm_fault_t)0x000200,
VM_FAULT_RETRY = (__force vm_fault_t)0x000400,
- VM_FAULT_FALLBACK = (__force vm_fault_t)0x000800,
- VM_FAULT_DONE_COW = (__force vm_fault_t)0x001000,
- VM_FAULT_NEEDDSYNC = (__force vm_fault_t)0x002000,
- VM_FAULT_COMPLETED = (__force vm_fault_t)0x004000,
+ VM_FAULT_RETRY_VMA = (__force vm_fault_t)0x000800,
+ VM_FAULT_FALLBACK = (__force vm_fault_t)0x001000,
+ VM_FAULT_DONE_COW = (__force vm_fault_t)0x002000,
+ VM_FAULT_NEEDDSYNC = (__force vm_fault_t)0x004000,
+ VM_FAULT_COMPLETED = (__force vm_fault_t)0x008000,
VM_FAULT_HINDEX_MASK = (__force vm_fault_t)0x0f0000,
};
diff --git a/mm/filemap.c b/mm/filemap.c
index ab34cab2416a..a045b771e8de 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -3525,6 +3525,7 @@ vm_fault_t filemap_fault(struct vm_fault *vmf)
struct folio *folio;
vm_fault_t ret = 0;
bool mapping_locked = false;
+ bool retry_by_vma_lock = false;
max_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
if (unlikely(index >= max_idx))
@@ -3621,6 +3622,8 @@ vm_fault_t filemap_fault(struct vm_fault *vmf)
*/
if (fpin) {
folio_unlock(folio);
+ if (vmf->flags & FAULT_FLAG_VMA_LOCK)
+ retry_by_vma_lock = true;
goto out_retry;
}
if (mapping_locked)
@@ -3671,7 +3674,7 @@ vm_fault_t filemap_fault(struct vm_fault *vmf)
filemap_invalidate_unlock_shared(mapping);
if (fpin)
fput(fpin);
- return ret | VM_FAULT_RETRY;
+ return ret | VM_FAULT_RETRY | (retry_by_vma_lock ? VM_FAULT_RETRY_VMA : 0);
}
EXPORT_SYMBOL(filemap_fault);
--
2.39.3 (Apple Git-146)
^ permalink raw reply related [flat|nested] 8+ messages in thread* [PATCH v2 2/5] mm/swapin: Retry swapin by VMA lock if the lock was released for I/O
2026-04-30 4:04 [PATCH v2 0/5] mm: reduce mmap_lock contention and improve page fault performance Barry Song (Xiaomi)
2026-04-30 4:04 ` [PATCH v2 1/5] mm/filemap: Retry fault by VMA lock if the lock was released for I/O Barry Song (Xiaomi)
@ 2026-04-30 4:04 ` Barry Song (Xiaomi)
2026-04-30 4:04 ` [PATCH v2 3/5] mm: Move folio_lock_or_retry() and drop __folio_lock_or_retry() Barry Song (Xiaomi)
` (3 subsequent siblings)
5 siblings, 0 replies; 8+ messages in thread
From: Barry Song (Xiaomi) @ 2026-04-30 4:04 UTC (permalink / raw)
To: akpm, linux-mm, willy
Cc: david, ljs, liam, vbabka, rppt, surenb, mhocko, jack, pfalcato,
wanglian, chentao, lianux.mm, kunwu.chan, liyangouwen1, chrisl,
kasong, shikemeng, nphamcs, bhe, youngjun.park, linux-arm-kernel,
linux-kernel, loongarch, linuxppc-dev, linux-riscv, linux-s390,
Barry Song (Xiaomi)
If the current do_swap_page() took the per-VMA lock and we dropped it only
to wait for I/O completion (e.g., use folio_wait_locked()), then when
do_swap_page() is retried after the I/O completes, it should still qualify
for the per-VMA-lock path.
Tested-by: Wang Lian <wanglian@kylinos.cn>
Tested-by: Kunwu Chan <chentao@kylinos.cn>
Reviewed-by: Wang Lian <lianux.mm@gmail.com>
Reviewed-by: Kunwu Chan <kunwu.chan@gmail.com>
Signed-off-by: Barry Song (Xiaomi) <baohua@kernel.org>
---
mm/memory.c | 10 ++++++++--
1 file changed, 8 insertions(+), 2 deletions(-)
diff --git a/mm/memory.c b/mm/memory.c
index 199214f8de08..00ee1599d637 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -4791,6 +4791,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
unsigned long page_idx;
unsigned long address;
pte_t *ptep;
+ bool retry_by_vma_lock = false;
if (!pte_unmap_same(vmf))
goto out;
@@ -4896,8 +4897,13 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
swapcache = folio;
ret |= folio_lock_or_retry(folio, vmf);
- if (ret & VM_FAULT_RETRY)
+ if (ret & VM_FAULT_RETRY) {
+ if (fault_flag_allow_retry_first(vmf->flags) &&
+ !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT) &&
+ (vmf->flags & FAULT_FLAG_VMA_LOCK))
+ retry_by_vma_lock = true;
goto out_release;
+ }
page = folio_file_page(folio, swp_offset(entry));
/*
@@ -5182,7 +5188,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
}
if (si)
put_swap_device(si);
- return ret;
+ return ret | (retry_by_vma_lock ? VM_FAULT_RETRY_VMA : 0);
}
static bool pte_range_none(pte_t *pte, int nr_pages)
--
2.39.3 (Apple Git-146)
^ permalink raw reply related [flat|nested] 8+ messages in thread* [PATCH v2 3/5] mm: Move folio_lock_or_retry() and drop __folio_lock_or_retry()
2026-04-30 4:04 [PATCH v2 0/5] mm: reduce mmap_lock contention and improve page fault performance Barry Song (Xiaomi)
2026-04-30 4:04 ` [PATCH v2 1/5] mm/filemap: Retry fault by VMA lock if the lock was released for I/O Barry Song (Xiaomi)
2026-04-30 4:04 ` [PATCH v2 2/5] mm/swapin: Retry swapin " Barry Song (Xiaomi)
@ 2026-04-30 4:04 ` Barry Song (Xiaomi)
2026-04-30 4:04 ` [PATCH v2 4/5] mm: Don't retry page fault if folio is uptodate during swap-in Barry Song (Xiaomi)
` (2 subsequent siblings)
5 siblings, 0 replies; 8+ messages in thread
From: Barry Song (Xiaomi) @ 2026-04-30 4:04 UTC (permalink / raw)
To: akpm, linux-mm, willy
Cc: david, ljs, liam, vbabka, rppt, surenb, mhocko, jack, pfalcato,
wanglian, chentao, lianux.mm, kunwu.chan, liyangouwen1, chrisl,
kasong, shikemeng, nphamcs, bhe, youngjun.park, linux-arm-kernel,
linux-kernel, loongarch, linuxppc-dev, linux-riscv, linux-s390,
Barry Song (Xiaomi)
folio_lock_or_retry() is effectively only used in mm/memory.c,
not in the filemap code. Move it there and make it static.
The helper __folio_lock_or_retry() can be folded into
folio_lock_or_retry(), allowing it to be removed.
Signed-off-by: Barry Song (Xiaomi) <baohua@kernel.org>
---
include/linux/pagemap.h | 17 -------------
mm/filemap.c | 45 ----------------------------------
mm/memory.c | 53 +++++++++++++++++++++++++++++++++++++++++
3 files changed, 53 insertions(+), 62 deletions(-)
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 1f50991b43e3..500ab783bf70 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -1101,7 +1101,6 @@ static inline bool wake_page_match(struct wait_page_queue *wait_page,
void __folio_lock(struct folio *folio);
int __folio_lock_killable(struct folio *folio);
-vm_fault_t __folio_lock_or_retry(struct folio *folio, struct vm_fault *vmf);
void unlock_page(struct page *page);
void folio_unlock(struct folio *folio);
@@ -1198,22 +1197,6 @@ static inline int folio_lock_killable(struct folio *folio)
return 0;
}
-/*
- * folio_lock_or_retry - Lock the folio, unless this would block and the
- * caller indicated that it can handle a retry.
- *
- * Return value and mmap_lock implications depend on flags; see
- * __folio_lock_or_retry().
- */
-static inline vm_fault_t folio_lock_or_retry(struct folio *folio,
- struct vm_fault *vmf)
-{
- might_sleep();
- if (!folio_trylock(folio))
- return __folio_lock_or_retry(folio, vmf);
- return 0;
-}
-
/*
* This is exported only for folio_wait_locked/folio_wait_writeback, etc.,
* and should not be used directly.
diff --git a/mm/filemap.c b/mm/filemap.c
index a045b771e8de..b532d6cbafc8 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1740,51 +1740,6 @@ static int __folio_lock_async(struct folio *folio, struct wait_page_queue *wait)
return ret;
}
-/*
- * Return values:
- * 0 - folio is locked.
- * non-zero - folio is not locked.
- * mmap_lock or per-VMA lock has been released (mmap_read_unlock() or
- * vma_end_read()), unless flags had both FAULT_FLAG_ALLOW_RETRY and
- * FAULT_FLAG_RETRY_NOWAIT set, in which case the lock is still held.
- *
- * If neither ALLOW_RETRY nor KILLABLE are set, will always return 0
- * with the folio locked and the mmap_lock/per-VMA lock is left unperturbed.
- */
-vm_fault_t __folio_lock_or_retry(struct folio *folio, struct vm_fault *vmf)
-{
- unsigned int flags = vmf->flags;
-
- if (fault_flag_allow_retry_first(flags)) {
- /*
- * CAUTION! In this case, mmap_lock/per-VMA lock is not
- * released even though returning VM_FAULT_RETRY.
- */
- if (flags & FAULT_FLAG_RETRY_NOWAIT)
- return VM_FAULT_RETRY;
-
- release_fault_lock(vmf);
- if (flags & FAULT_FLAG_KILLABLE)
- folio_wait_locked_killable(folio);
- else
- folio_wait_locked(folio);
- return VM_FAULT_RETRY;
- }
- if (flags & FAULT_FLAG_KILLABLE) {
- bool ret;
-
- ret = __folio_lock_killable(folio);
- if (ret) {
- release_fault_lock(vmf);
- return VM_FAULT_RETRY;
- }
- } else {
- __folio_lock(folio);
- }
-
- return 0;
-}
-
/**
* page_cache_next_miss() - Find the next gap in the page cache.
* @mapping: Mapping.
diff --git a/mm/memory.c b/mm/memory.c
index 00ee1599d637..0c740ca363cc 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -4442,6 +4442,59 @@ void unmap_mapping_range(struct address_space *mapping,
}
EXPORT_SYMBOL(unmap_mapping_range);
+/*
+ * folio_lock_or_retry - Lock the folio, unless this would block and the
+ * caller indicated that it can handle a retry.
+ *
+ * Return values:
+ * 0 - folio is locked.
+ * non-zero - folio is not locked.
+ * mmap_lock or per-VMA lock has been released (mmap_read_unlock() or
+ * vma_end_read()), unless flags had both FAULT_FLAG_ALLOW_RETRY and
+ * FAULT_FLAG_RETRY_NOWAIT set, in which case the lock is still held.
+ *
+ * If neither ALLOW_RETRY nor KILLABLE are set, will always return 0
+ * with the folio locked and the mmap_lock/per-VMA lock is left unperturbed.
+ */
+static inline vm_fault_t folio_lock_or_retry(struct folio *folio,
+ struct vm_fault *vmf)
+{
+ unsigned int flags = vmf->flags;
+
+ might_sleep();
+ if (folio_trylock(folio))
+ return 0;
+
+ if (fault_flag_allow_retry_first(flags)) {
+ /*
+ * CAUTION! In this case, mmap_lock/per-VMA lock is not
+ * released even though returning VM_FAULT_RETRY.
+ */
+ if (flags & FAULT_FLAG_RETRY_NOWAIT)
+ return VM_FAULT_RETRY;
+
+ release_fault_lock(vmf);
+ if (flags & FAULT_FLAG_KILLABLE)
+ folio_wait_locked_killable(folio);
+ else
+ folio_wait_locked(folio);
+ return VM_FAULT_RETRY;
+ }
+ if (flags & FAULT_FLAG_KILLABLE) {
+ bool ret;
+
+ ret = __folio_lock_killable(folio);
+ if (ret) {
+ release_fault_lock(vmf);
+ return VM_FAULT_RETRY;
+ }
+ } else {
+ __folio_lock(folio);
+ }
+
+ return 0;
+}
+
/*
* Restore a potential device exclusive pte to a working pte entry
*/
--
2.39.3 (Apple Git-146)
^ permalink raw reply related [flat|nested] 8+ messages in thread* [PATCH v2 4/5] mm: Don't retry page fault if folio is uptodate during swap-in
2026-04-30 4:04 [PATCH v2 0/5] mm: reduce mmap_lock contention and improve page fault performance Barry Song (Xiaomi)
` (2 preceding siblings ...)
2026-04-30 4:04 ` [PATCH v2 3/5] mm: Move folio_lock_or_retry() and drop __folio_lock_or_retry() Barry Song (Xiaomi)
@ 2026-04-30 4:04 ` Barry Song (Xiaomi)
2026-04-30 12:35 ` Matthew Wilcox
2026-04-30 4:04 ` [PATCH v2 5/5] mm/filemap: Avoid retrying page faults on uptodate folios in filemap faults Barry Song (Xiaomi)
2026-04-30 12:37 ` [PATCH v2 0/5] mm: reduce mmap_lock contention and improve page fault performance Matthew Wilcox
5 siblings, 1 reply; 8+ messages in thread
From: Barry Song (Xiaomi) @ 2026-04-30 4:04 UTC (permalink / raw)
To: akpm, linux-mm, willy
Cc: david, ljs, liam, vbabka, rppt, surenb, mhocko, jack, pfalcato,
wanglian, chentao, lianux.mm, kunwu.chan, liyangouwen1, chrisl,
kasong, shikemeng, nphamcs, bhe, youngjun.park, linux-arm-kernel,
linux-kernel, loongarch, linuxppc-dev, linux-riscv, linux-s390,
Barry Song (Xiaomi)
If we are waiting for long I/O to complete, it makes sense to
avoid holding locks for too long. However, if the folio is
uptodate, we are likely only waiting for a concurrent PTE
update to finish. Retrying the entire page fault seems
excessive.
Signed-off-by: Barry Song (Xiaomi) <baohua@kernel.org>
---
mm/memory.c | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/mm/memory.c b/mm/memory.c
index 0c740ca363cc..a2e4f2d87ec8 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -4949,6 +4949,13 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
}
swapcache = folio;
+ /*
+ * If the folio is uptodate, we are likely only waiting for
+ * another concurrent PTE mapping to complete, which should
+ * be brief. No need to drop the lock and retry the fault.
+ */
+ if (folio_test_uptodate(folio))
+ vmf->flags &= ~FAULT_FLAG_ALLOW_RETRY;
ret |= folio_lock_or_retry(folio, vmf);
if (ret & VM_FAULT_RETRY) {
if (fault_flag_allow_retry_first(vmf->flags) &&
--
2.39.3 (Apple Git-146)
^ permalink raw reply related [flat|nested] 8+ messages in thread* Re: [PATCH v2 4/5] mm: Don't retry page fault if folio is uptodate during swap-in
2026-04-30 4:04 ` [PATCH v2 4/5] mm: Don't retry page fault if folio is uptodate during swap-in Barry Song (Xiaomi)
@ 2026-04-30 12:35 ` Matthew Wilcox
0 siblings, 0 replies; 8+ messages in thread
From: Matthew Wilcox @ 2026-04-30 12:35 UTC (permalink / raw)
To: Barry Song (Xiaomi)
Cc: akpm, linux-mm, david, ljs, liam, vbabka, rppt, surenb, mhocko,
jack, pfalcato, wanglian, chentao, lianux.mm, kunwu.chan,
liyangouwen1, chrisl, kasong, shikemeng, nphamcs, bhe,
youngjun.park, linux-arm-kernel, linux-kernel, loongarch,
linuxppc-dev, linux-riscv, linux-s390
On Thu, Apr 30, 2026 at 12:04:26PM +0800, Barry Song (Xiaomi) wrote:
> If we are waiting for long I/O to complete, it makes sense to
> avoid holding locks for too long. However, if the folio is
> uptodate, we are likely only waiting for a concurrent PTE
> update to finish. Retrying the entire page fault seems
> excessive.
I think the idea is good, but the implementation is misplaced.
The check for folio_uptodate() should be inside folio_lock_or_retry()
rather than tampering with FAULT_FLAG_ALLOW_RETRY in its caller.
Similarly for your next patch.
> Signed-off-by: Barry Song (Xiaomi) <baohua@kernel.org>
> ---
> mm/memory.c | 7 +++++++
> 1 file changed, 7 insertions(+)
>
> diff --git a/mm/memory.c b/mm/memory.c
> index 0c740ca363cc..a2e4f2d87ec8 100644
> --- a/mm/memory.c
> +++ b/mm/memory.c
> @@ -4949,6 +4949,13 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
> }
>
> swapcache = folio;
> + /*
> + * If the folio is uptodate, we are likely only waiting for
> + * another concurrent PTE mapping to complete, which should
> + * be brief. No need to drop the lock and retry the fault.
> + */
> + if (folio_test_uptodate(folio))
> + vmf->flags &= ~FAULT_FLAG_ALLOW_RETRY;
> ret |= folio_lock_or_retry(folio, vmf);
> if (ret & VM_FAULT_RETRY) {
> if (fault_flag_allow_retry_first(vmf->flags) &&
> --
> 2.39.3 (Apple Git-146)
>
>
^ permalink raw reply [flat|nested] 8+ messages in thread
* [PATCH v2 5/5] mm/filemap: Avoid retrying page faults on uptodate folios in filemap faults
2026-04-30 4:04 [PATCH v2 0/5] mm: reduce mmap_lock contention and improve page fault performance Barry Song (Xiaomi)
` (3 preceding siblings ...)
2026-04-30 4:04 ` [PATCH v2 4/5] mm: Don't retry page fault if folio is uptodate during swap-in Barry Song (Xiaomi)
@ 2026-04-30 4:04 ` Barry Song (Xiaomi)
2026-04-30 12:37 ` [PATCH v2 0/5] mm: reduce mmap_lock contention and improve page fault performance Matthew Wilcox
5 siblings, 0 replies; 8+ messages in thread
From: Barry Song (Xiaomi) @ 2026-04-30 4:04 UTC (permalink / raw)
To: akpm, linux-mm, willy
Cc: david, ljs, liam, vbabka, rppt, surenb, mhocko, jack, pfalcato,
wanglian, chentao, lianux.mm, kunwu.chan, liyangouwen1, chrisl,
kasong, shikemeng, nphamcs, bhe, youngjun.park, linux-arm-kernel,
linux-kernel, loongarch, linuxppc-dev, linux-riscv, linux-s390,
Barry Song (Xiaomi)
For uptodate folios, we are not waiting on I/O. We should
be able to acquire the folio lock shortly, so there is no
need to drop per-vma locks and perform a full PF retry.
Signed-off-by: Barry Song (Xiaomi) <baohua@kernel.org>
---
mm/filemap.c | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/mm/filemap.c b/mm/filemap.c
index b532d6cbafc8..0d2f6af5d0fe 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -3533,6 +3533,13 @@ vm_fault_t filemap_fault(struct vm_fault *vmf)
}
}
+ /*
+ * If the folio is uptodate, we are likely only waiting for
+ * another concurrent PTE mapping to complete, which should
+ * be brief. No need to drop the lock and retry the fault.
+ */
+ if (folio_test_uptodate(folio))
+ vmf->flags &= ~FAULT_FLAG_ALLOW_RETRY;
if (!lock_folio_maybe_drop_mmap(vmf, folio, &fpin))
goto out_retry;
--
2.39.3 (Apple Git-146)
^ permalink raw reply related [flat|nested] 8+ messages in thread* Re: [PATCH v2 0/5] mm: reduce mmap_lock contention and improve page fault performance
2026-04-30 4:04 [PATCH v2 0/5] mm: reduce mmap_lock contention and improve page fault performance Barry Song (Xiaomi)
` (4 preceding siblings ...)
2026-04-30 4:04 ` [PATCH v2 5/5] mm/filemap: Avoid retrying page faults on uptodate folios in filemap faults Barry Song (Xiaomi)
@ 2026-04-30 12:37 ` Matthew Wilcox
5 siblings, 0 replies; 8+ messages in thread
From: Matthew Wilcox @ 2026-04-30 12:37 UTC (permalink / raw)
To: Barry Song (Xiaomi)
Cc: akpm, linux-mm, david, ljs, liam, vbabka, rppt, surenb, mhocko,
jack, pfalcato, wanglian, chentao, lianux.mm, kunwu.chan,
liyangouwen1, chrisl, kasong, shikemeng, nphamcs, bhe,
youngjun.park, linux-arm-kernel, linux-kernel, loongarch,
linuxppc-dev, linux-riscv, linux-s390
On Thu, Apr 30, 2026 at 12:04:22PM +0800, Barry Song (Xiaomi) wrote:
> (1) If we need to wait for I/O completion, we still drop the per-VMA lock, as
> current page fault handling already does. Holding it for too long may introduce
> various priority inversion issues on mobile devices. After I/O completes, we
> retry the page fault with the per-VMA lock, rather than falling back to
> mmap_lock.
You're going to have to do better than that. You know I hate the
additional complexity you're adding. You need to explain why my idea of
ripping out all the complexity now that we have per-VMA locks doesn't
work.
^ permalink raw reply [flat|nested] 8+ messages in thread