* [RFC PATCH 1/5] mm: zswap: decompress into a folio subpage
2026-05-08 20:18 [RFC PATCH 0/5] mm: support zswap-backed anonymous large folio swapin fujunjie
@ 2026-05-08 20:20 ` fujunjie
2026-05-08 20:20 ` [RFC PATCH 2/5] mm: zswap: add a zswap entry batch helper fujunjie
` (3 subsequent siblings)
4 siblings, 0 replies; 6+ messages in thread
From: fujunjie @ 2026-05-08 20:20 UTC (permalink / raw)
To: Andrew Morton, Chris Li, Kairui Song, Johannes Weiner, Nhat Pham,
Yosry Ahmed
Cc: linux-mm, linux-kernel, linux-doc, Jonathan Corbet,
David Hildenbrand, Ryan Roberts, Barry Song, Baolin Wang,
Chengming Zhou, Baoquan He, Lorenzo Stoakes
zswap_decompress() always writes to offset 0 of the target folio. That
is sufficient while zswap only loads order-0 folios, but large folio
swapin needs to fill each base page from its own zswap entry.
Pass the base-page index to zswap_decompress() and use it for the kmap
and scatterlist output offsets. Existing callers pass index 0, so this
is a preparatory change with no intended behavior change.
Signed-off-by: fujunjie <fujunjie1@qq.com>
---
mm/zswap.c | 12 +++++++-----
1 file changed, 7 insertions(+), 5 deletions(-)
diff --git a/mm/zswap.c b/mm/zswap.c
index 4b5149173b0e..afe38dfc5a29 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -921,12 +921,14 @@ static bool zswap_compress(struct page *page, struct zswap_entry *entry,
return comp_ret == 0 && alloc_ret == 0;
}
-static bool zswap_decompress(struct zswap_entry *entry, struct folio *folio)
+static bool zswap_decompress(struct zswap_entry *entry, struct folio *folio,
+ unsigned long index)
{
struct zswap_pool *pool = entry->pool;
struct scatterlist input[2]; /* zsmalloc returns an SG list 1-2 entries */
struct scatterlist output;
struct crypto_acomp_ctx *acomp_ctx;
+ size_t offset = index * PAGE_SIZE;
int ret = 0, dlen;
acomp_ctx = raw_cpu_ptr(pool->acomp_ctx);
@@ -939,14 +941,14 @@ static bool zswap_decompress(struct zswap_entry *entry, struct folio *folio)
WARN_ON_ONCE(input->length != PAGE_SIZE);
- dst = kmap_local_folio(folio, 0);
+ dst = kmap_local_folio(folio, offset);
memcpy_from_sglist(dst, input, 0, PAGE_SIZE);
dlen = PAGE_SIZE;
kunmap_local(dst);
flush_dcache_folio(folio);
} else {
sg_init_table(&output, 1);
- sg_set_folio(&output, folio, PAGE_SIZE, 0);
+ sg_set_folio(&output, folio, PAGE_SIZE, offset);
acomp_request_set_params(acomp_ctx->req, input, &output,
entry->length, PAGE_SIZE);
ret = crypto_acomp_decompress(acomp_ctx->req);
@@ -1034,7 +1036,7 @@ static int zswap_writeback_entry(struct zswap_entry *entry,
goto out;
}
- if (!zswap_decompress(entry, folio)) {
+ if (!zswap_decompress(entry, folio, 0)) {
ret = -EIO;
goto out;
}
@@ -1611,7 +1613,7 @@ int zswap_load(struct folio *folio)
if (!entry)
return -ENOENT;
- if (!zswap_decompress(entry, folio)) {
+ if (!zswap_decompress(entry, folio, 0)) {
folio_unlock(folio);
return -EIO;
}
--
2.34.1
^ permalink raw reply related [flat|nested] 6+ messages in thread* [RFC PATCH 2/5] mm: zswap: add a zswap entry batch helper
2026-05-08 20:18 [RFC PATCH 0/5] mm: support zswap-backed anonymous large folio swapin fujunjie
2026-05-08 20:20 ` [RFC PATCH 1/5] mm: zswap: decompress into a folio subpage fujunjie
@ 2026-05-08 20:20 ` fujunjie
2026-05-08 20:20 ` [RFC PATCH 3/5] mm: zswap: load fully stored large folios fujunjie
` (2 subsequent siblings)
4 siblings, 0 replies; 6+ messages in thread
From: fujunjie @ 2026-05-08 20:20 UTC (permalink / raw)
To: Andrew Morton, Chris Li, Kairui Song, Johannes Weiner, Nhat Pham,
Yosry Ahmed
Cc: linux-mm, linux-kernel, linux-doc, Jonathan Corbet,
David Hildenbrand, Ryan Roberts, Barry Song, Baolin Wang,
Chengming Zhou, Baoquan He, Lorenzo Stoakes
Large folio swapin has to know whether a contiguous swap range is backed
consistently by zswap. A range that is partly in zswap and partly on
disk cannot be read by the existing whole-folio swap_read_folio()
backend selection.
Add zswap_entry_batch(), mirroring the existing zeromap batch query: it
returns how many entries starting at a swap entry share the same zswap
presence, and optionally reports the first entry's state. The
CONFIG_ZSWAP=n stub reports that the whole range is not in zswap.
Signed-off-by: fujunjie <fujunjie1@qq.com>
---
include/linux/zswap.h | 9 +++++++++
mm/zswap.c | 36 ++++++++++++++++++++++++++++++++++++
2 files changed, 45 insertions(+)
diff --git a/include/linux/zswap.h b/include/linux/zswap.h
index 30c193a1207e..b9d71f027200 100644
--- a/include/linux/zswap.h
+++ b/include/linux/zswap.h
@@ -27,6 +27,7 @@ struct zswap_lruvec_state {
unsigned long zswap_total_pages(void);
bool zswap_store(struct folio *folio);
int zswap_load(struct folio *folio);
+int zswap_entry_batch(swp_entry_t swp, int max_nr, bool *is_zswap);
void zswap_invalidate(swp_entry_t swp);
int zswap_swapon(int type, unsigned long nr_pages);
void zswap_swapoff(int type);
@@ -49,6 +50,14 @@ static inline int zswap_load(struct folio *folio)
return -ENOENT;
}
+static inline int zswap_entry_batch(swp_entry_t swp, int max_nr,
+ bool *is_zswap)
+{
+ if (is_zswap)
+ *is_zswap = false;
+ return max_nr;
+}
+
static inline void zswap_invalidate(swp_entry_t swp) {}
static inline int zswap_swapon(int type, unsigned long nr_pages)
{
diff --git a/mm/zswap.c b/mm/zswap.c
index afe38dfc5a29..27c14b8edd15 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -234,6 +234,42 @@ static inline struct xarray *swap_zswap_tree(swp_entry_t swp)
>> ZSWAP_ADDRESS_SPACE_SHIFT];
}
+/*
+ * Return the number of contiguous swap entries that share the same zswap
+ * presence as @swp. If @is_zswap is not NULL, return @swp's zswap status.
+ *
+ * Context: callers must keep the swap type alive. The result is a snapshot
+ * of zswap xarray presence; callers must tolerate races by rechecking under
+ * the lock that matters for their operation or by falling back safely.
+ */
+int zswap_entry_batch(swp_entry_t swp, int max_nr, bool *is_zswap)
+{
+ pgoff_t offset = swp_offset(swp);
+ bool first;
+ int i;
+
+ if (zswap_never_enabled()) {
+ if (is_zswap)
+ *is_zswap = false;
+ return max_nr;
+ }
+
+ first = !!xa_load(swap_zswap_tree(swp), offset);
+ if (is_zswap)
+ *is_zswap = first;
+
+ for (i = 1; i < max_nr; i++) {
+ swp_entry_t entry = swp_entry(swp_type(swp), offset + i);
+ bool present;
+
+ present = !!xa_load(swap_zswap_tree(entry), offset + i);
+ if (present != first)
+ return i;
+ }
+
+ return max_nr;
+}
+
#define zswap_pool_debug(msg, p) \
pr_debug("%s pool %s\n", msg, (p)->tfm_name)
--
2.34.1
^ permalink raw reply related [flat|nested] 6+ messages in thread* [RFC PATCH 3/5] mm: zswap: load fully stored large folios
2026-05-08 20:18 [RFC PATCH 0/5] mm: support zswap-backed anonymous large folio swapin fujunjie
2026-05-08 20:20 ` [RFC PATCH 1/5] mm: zswap: decompress into a folio subpage fujunjie
2026-05-08 20:20 ` [RFC PATCH 2/5] mm: zswap: add a zswap entry batch helper fujunjie
@ 2026-05-08 20:20 ` fujunjie
2026-05-08 20:20 ` [RFC PATCH 4/5] mm: swap: fall back to order-0 after large swapin races fujunjie
2026-05-08 20:20 ` [RFC PATCH 5/5] mm: swap: allow zswap-backed large folio swapin fujunjie
4 siblings, 0 replies; 6+ messages in thread
From: fujunjie @ 2026-05-08 20:20 UTC (permalink / raw)
To: Andrew Morton, Chris Li, Kairui Song, Johannes Weiner, Nhat Pham,
Yosry Ahmed
Cc: linux-mm, linux-kernel, linux-doc, Jonathan Corbet,
David Hildenbrand, Ryan Roberts, Barry Song, Baolin Wang,
Chengming Zhou, Baoquan He, Lorenzo Stoakes
zswap_store() already stores every base page of a large folio as a
separate zswap entry and tears the whole folio back down on store
failure. The load side still rejects any large folio, which forces the
swapin path to avoid mTHP swapin once zswap has ever been enabled.
Use zswap_entry_batch() to distinguish three cases: the whole range is
absent from zswap and should fall through to the disk backend, the whole
range is present and can be decompressed one base page at a time, or the
range is mixed and must be treated as an invalid large-folio backend
selection.
After all entries decompress successfully, mark the folio uptodate and
dirty, account the mTHP swpin stat once for the folio, account one ZSWPIN
event per base page, and invalidate each zswap entry because the
swapcache folio becomes authoritative.
Signed-off-by: fujunjie <fujunjie1@qq.com>
---
Documentation/admin-guide/mm/transhuge.rst | 4 +-
mm/zswap.c | 65 ++++++++++++++--------
2 files changed, 45 insertions(+), 24 deletions(-)
diff --git a/Documentation/admin-guide/mm/transhuge.rst b/Documentation/admin-guide/mm/transhuge.rst
index 5fbc3d89bb07..05456906aff6 100644
--- a/Documentation/admin-guide/mm/transhuge.rst
+++ b/Documentation/admin-guide/mm/transhuge.rst
@@ -644,8 +644,8 @@ zswpout
piece without splitting.
swpin
- is incremented every time a huge page is swapped in from a non-zswap
- swap device in one piece.
+ is incremented every time a huge page is swapped in from swap I/O or
+ zswap in one piece.
swpin_fallback
is incremented if swapin fails to allocate or charge a huge page
diff --git a/mm/zswap.c b/mm/zswap.c
index 27c14b8edd15..863ca1e896ed 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -28,6 +28,7 @@
#include <crypto/acompress.h>
#include <crypto/scatterwalk.h>
#include <linux/zswap.h>
+#include <linux/huge_mm.h>
#include <linux/mm_types.h>
#include <linux/page-flags.h>
#include <linux/swapops.h>
@@ -1614,20 +1615,23 @@ bool zswap_store(struct folio *folio)
* NOT marked up-to-date, so that an IO error is emitted (e.g. do_swap_page()
* will SIGBUS).
*
- * -EINVAL: if the swapped out content was in zswap, but the page belongs
- * to a large folio, which is not supported by zswap. The folio is unlocked,
- * but NOT marked up-to-date, so that an IO error is emitted (e.g.
- * do_swap_page() will SIGBUS).
+ * -EINVAL: if the folio spans a mix of zswap and non-zswap entries. The
+ * folio is unlocked, but NOT marked up-to-date, so that an IO error is
+ * emitted (e.g. do_swap_page() will SIGBUS). Large folio swapin should
+ * reject such ranges before calling zswap_load().
*
- * -ENOENT: if the swapped out content was not in zswap. The folio remains
+ * -ENOENT: if the swapped out content was not in zswap. For a large folio,
+ * this means the whole folio range was not in zswap. The folio remains
* locked on return.
*/
int zswap_load(struct folio *folio)
{
swp_entry_t swp = folio->swap;
pgoff_t offset = swp_offset(swp);
- struct xarray *tree = swap_zswap_tree(swp);
struct zswap_entry *entry;
+ int nr_pages = folio_nr_pages(folio);
+ bool is_zswap;
+ int index;
VM_WARN_ON_ONCE(!folio_test_locked(folio));
VM_WARN_ON_ONCE(!folio_test_swapcache(folio));
@@ -1635,30 +1639,36 @@ int zswap_load(struct folio *folio)
if (zswap_never_enabled())
return -ENOENT;
- /*
- * Large folios should not be swapped in while zswap is being used, as
- * they are not properly handled. Zswap does not properly load large
- * folios, and a large folio may only be partially in zswap.
- */
- if (WARN_ON_ONCE(folio_test_large(folio))) {
+ if (zswap_entry_batch(swp, nr_pages, &is_zswap) != nr_pages) {
+ WARN_ON_ONCE(folio_test_large(folio));
folio_unlock(folio);
return -EINVAL;
}
- entry = xa_load(tree, offset);
- if (!entry)
+ if (!is_zswap)
return -ENOENT;
- if (!zswap_decompress(entry, folio, 0)) {
- folio_unlock(folio);
- return -EIO;
+ for (index = 0; index < nr_pages; index++) {
+ swp_entry_t entry_swp = swp_entry(swp_type(swp),
+ offset + index);
+ struct xarray *tree = swap_zswap_tree(entry_swp);
+
+ entry = xa_load(tree, offset + index);
+ if (WARN_ON_ONCE(!entry)) {
+ folio_unlock(folio);
+ return -EINVAL;
+ }
+
+ if (!zswap_decompress(entry, folio, index)) {
+ folio_unlock(folio);
+ return -EIO;
+ }
}
folio_mark_uptodate(folio);
- count_vm_event(ZSWPIN);
- if (entry->objcg)
- count_objcg_events(entry->objcg, ZSWPIN, 1);
+ count_mthp_stat(folio_order(folio), MTHP_STAT_SWPIN);
+ count_vm_events(ZSWPIN, nr_pages);
/*
* We are reading into the swapcache, invalidate zswap entry.
@@ -1668,8 +1678,19 @@ int zswap_load(struct folio *folio)
* compression work.
*/
folio_mark_dirty(folio);
- xa_erase(tree, offset);
- zswap_entry_free(entry);
+
+ for (index = 0; index < nr_pages; index++) {
+ swp_entry_t entry_swp = swp_entry(swp_type(swp),
+ offset + index);
+ struct xarray *tree = swap_zswap_tree(entry_swp);
+
+ entry = xa_erase(tree, offset + index);
+ if (WARN_ON_ONCE(!entry))
+ continue;
+ if (entry->objcg)
+ count_objcg_events(entry->objcg, ZSWPIN, 1);
+ zswap_entry_free(entry);
+ }
folio_unlock(folio);
return 0;
--
2.34.1
^ permalink raw reply related [flat|nested] 6+ messages in thread* [RFC PATCH 4/5] mm: swap: fall back to order-0 after large swapin races
2026-05-08 20:18 [RFC PATCH 0/5] mm: support zswap-backed anonymous large folio swapin fujunjie
` (2 preceding siblings ...)
2026-05-08 20:20 ` [RFC PATCH 3/5] mm: zswap: load fully stored large folios fujunjie
@ 2026-05-08 20:20 ` fujunjie
2026-05-08 20:20 ` [RFC PATCH 5/5] mm: swap: allow zswap-backed large folio swapin fujunjie
4 siblings, 0 replies; 6+ messages in thread
From: fujunjie @ 2026-05-08 20:20 UTC (permalink / raw)
To: Andrew Morton, Chris Li, Kairui Song, Johannes Weiner, Nhat Pham,
Yosry Ahmed
Cc: linux-mm, linux-kernel, linux-doc, Jonathan Corbet,
David Hildenbrand, Ryan Roberts, Barry Song, Baolin Wang,
Chengming Zhou, Baoquan He, Lorenzo Stoakes
swapin_folio() documents that a large folio insertion race returns NULL
so the caller can fall back to order-0 swapin. do_swap_page() currently
turns that NULL into VM_FAULT_OOM if the PTE is unchanged, which is
harsher than necessary and gets in the way of rejecting large folio
ranges for backend reasons.
Move the synchronous swapin sequence into a helper and retry with an
order-0 folio when a large folio cannot be inserted into the swap cache.
Count the event as an mTHP swapin fallback before dropping the failed
large allocation.
Signed-off-by: fujunjie <fujunjie1@qq.com>
---
mm/memory.c | 50 +++++++++++++++++++++++++++++++++++++++-----------
1 file changed, 39 insertions(+), 11 deletions(-)
diff --git a/mm/memory.c b/mm/memory.c
index ea6568571131..84e3b77b8293 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -4757,6 +4757,44 @@ static struct folio *alloc_swap_folio(struct vm_fault *vmf)
}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+static struct folio *swapin_synchronous_folio(swp_entry_t entry,
+ struct vm_fault *vmf)
+{
+ struct folio *swapcache, *folio;
+ bool large;
+ int order;
+
+ folio = alloc_swap_folio(vmf);
+ if (!folio)
+ return NULL;
+
+ large = folio_test_large(folio);
+ order = folio_order(folio);
+
+ /*
+ * folio is charged, so swapin can only fail due to raced swapin and
+ * return NULL.
+ */
+ swapcache = swapin_folio(entry, folio);
+ if (swapcache == folio)
+ return folio;
+
+ if (!swapcache && large)
+ count_mthp_stat(order, MTHP_STAT_SWPIN_FALLBACK);
+ folio_put(folio);
+ if (swapcache || !large)
+ return swapcache;
+
+ folio = __alloc_swap_folio(vmf);
+ if (!folio)
+ return NULL;
+
+ swapcache = swapin_folio(entry, folio);
+ if (swapcache != folio)
+ folio_put(folio);
+ return swapcache;
+}
+
/* Sanity check that a folio is fully exclusive */
static void check_swap_exclusive(struct folio *folio, swp_entry_t entry,
unsigned int nr_pages)
@@ -4860,17 +4898,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
swap_update_readahead(folio, vma, vmf->address);
if (!folio) {
if (data_race(si->flags & SWP_SYNCHRONOUS_IO)) {
- folio = alloc_swap_folio(vmf);
- if (folio) {
- /*
- * folio is charged, so swapin can only fail due
- * to raced swapin and return NULL.
- */
- swapcache = swapin_folio(entry, folio);
- if (swapcache != folio)
- folio_put(folio);
- folio = swapcache;
- }
+ folio = swapin_synchronous_folio(entry, vmf);
} else {
folio = swapin_readahead(entry, GFP_HIGHUSER_MOVABLE, vmf);
}
--
2.34.1
^ permalink raw reply related [flat|nested] 6+ messages in thread* [RFC PATCH 5/5] mm: swap: allow zswap-backed large folio swapin
2026-05-08 20:18 [RFC PATCH 0/5] mm: support zswap-backed anonymous large folio swapin fujunjie
` (3 preceding siblings ...)
2026-05-08 20:20 ` [RFC PATCH 4/5] mm: swap: fall back to order-0 after large swapin races fujunjie
@ 2026-05-08 20:20 ` fujunjie
4 siblings, 0 replies; 6+ messages in thread
From: fujunjie @ 2026-05-08 20:20 UTC (permalink / raw)
To: Andrew Morton, Chris Li, Kairui Song, Johannes Weiner, Nhat Pham,
Yosry Ahmed
Cc: linux-mm, linux-kernel, linux-doc, Jonathan Corbet,
David Hildenbrand, Ryan Roberts, Barry Song, Baolin Wang,
Chengming Zhou, Baoquan He, Lorenzo Stoakes
alloc_swap_folio() has been falling back to order-0 in the anonymous
synchronous swapin path whenever zswap was ever enabled, because a large
folio range could contain a mixture of zswap and non-zswap entries and
zswap_load() could not handle large folios.
zswap_load() can now load a range that is fully present in zswap, and
zswap_entry_batch() can identify mixed zswap ranges. Use that check
alongside the existing zeromap and swapcache checks when selecting a large
folio for anonymous swapin, and recheck before inserting a large folio into
the swap cache while holding the swap cluster lock.
With mixed zswap ranges rejected and the insertion-race fallback in place,
remove the blanket zswap_never_enabled() fallback from the anonymous swapin
path so all-zswap and all-disk anonymous ranges can use mTHP swapin. Shmem
keeps its existing zswap fallback and is outside this RFC.
Signed-off-by: fujunjie <fujunjie1@qq.com>
---
mm/memory.c | 21 ++++++---------------
mm/swap_state.c | 23 +++++++++++++++--------
2 files changed, 21 insertions(+), 23 deletions(-)
diff --git a/mm/memory.c b/mm/memory.c
index 84e3b77b8293..0be249108de1 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -78,6 +78,7 @@
#include <linux/sched/sysctl.h>
#include <linux/pgalloc.h>
#include <linux/uaccess.h>
+#include <linux/zswap.h>
#include <trace/events/kmem.h>
@@ -4635,13 +4636,11 @@ static bool can_swapin_thp(struct vm_fault *vmf, pte_t *ptep, int nr_pages)
if (swap_pte_batch(ptep, nr_pages, pte) != nr_pages)
return false;
- /*
- * swap_read_folio() can't handle the case a large folio is hybridly
- * from different backends. And they are likely corner cases. Similar
- * things might be added once zswap support large folios.
- */
+ /* swap_read_folio() can't handle hybrid backend large folios. */
if (unlikely(swap_zeromap_batch(entry, nr_pages, NULL) != nr_pages))
return false;
+ if (unlikely(zswap_entry_batch(entry, nr_pages, NULL) != nr_pages))
+ return false;
if (unlikely(non_swapcache_batch(entry, nr_pages) != nr_pages))
return false;
@@ -4690,14 +4689,6 @@ static struct folio *alloc_swap_folio(struct vm_fault *vmf)
if (unlikely(userfaultfd_armed(vma)))
goto fallback;
- /*
- * A large swapped out folio could be partially or fully in zswap. We
- * lack handling for such cases, so fallback to swapping in order-0
- * folio.
- */
- if (!zswap_never_enabled())
- goto fallback;
-
entry = softleaf_from_pte(vmf->orig_pte);
/*
* Get a list of all the (large) orders below PMD_ORDER that are enabled
@@ -4772,8 +4763,8 @@ static struct folio *swapin_synchronous_folio(swp_entry_t entry,
order = folio_order(folio);
/*
- * folio is charged, so swapin can only fail due to raced swapin and
- * return NULL.
+ * folio is charged, so NULL means the large folio could not be
+ * inserted and needs order-0 fallback.
*/
swapcache = swapin_folio(entry, folio);
if (swapcache == folio)
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 1415a5c54a43..4e58fad5e5f0 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -22,6 +22,7 @@
#include <linux/vmalloc.h>
#include <linux/huge_mm.h>
#include <linux/shmem_fs.h>
+#include <linux/zswap.h>
#include "internal.h"
#include "swap_table.h"
#include "swap.h"
@@ -207,6 +208,11 @@ static int swap_cache_add_folio(struct folio *folio, swp_entry_t entry,
if (swp_tb_is_shadow(old_tb))
shadow = swp_tb_to_shadow(old_tb);
} while (++ci_off < ci_end);
+ if (unlikely(folio_test_large(folio) &&
+ zswap_entry_batch(entry, nr_pages, NULL) != nr_pages)) {
+ err = -EAGAIN;
+ goto failed;
+ }
__swap_cache_add_folio(ci, folio, entry);
swap_cluster_unlock(ci);
if (shadowp)
@@ -460,7 +466,8 @@ void swap_update_readahead(struct folio *folio, struct vm_area_struct *vma,
*
* Context: Caller must protect the swap device with reference count or locks.
* Return: Returns the folio being added on success. Returns the existing folio
- * if @entry is already cached. Returns NULL if raced with swapin or swapoff.
+ * if @entry is already cached. Returns NULL if raced with swapin or swapoff,
+ * or if a large folio fails a backend recheck before insertion.
*/
static struct folio *__swap_cache_prepare_and_add(swp_entry_t entry,
struct folio *folio,
@@ -483,10 +490,10 @@ static struct folio *__swap_cache_prepare_and_add(swp_entry_t entry,
/*
* Large order allocation needs special handling on
- * race: if a smaller folio exists in cache, swapin needs
- * to fallback to order 0, and doing a swap cache lookup
- * might return a folio that is irrelevant to the faulting
- * entry because @entry is aligned down. Just return NULL.
+ * race or backend recheck failure: swapin needs to fall back
+ * to order 0, and doing a swap cache lookup might return a
+ * folio that is irrelevant to the faulting entry because
+ * @entry is aligned down. Just return NULL.
*/
if (ret != -EEXIST || folio_test_large(folio))
goto failed;
@@ -567,9 +574,9 @@ struct folio *swap_cache_alloc_folio(swp_entry_t entry, gfp_t gfp_mask,
* with the folio size.
*
* Return: returns pointer to @folio on success. If folio is a large folio
- * and this raced with another swapin, NULL will be returned to allow fallback
- * to order 0. Else, if another folio was already added to the swap cache,
- * return that swap cache folio instead.
+ * and it raced with another swapin or failed a backend recheck, NULL will be
+ * returned to allow fallback to order 0. Else, if another folio was already
+ * added to the swap cache, return that swap cache folio instead.
*/
struct folio *swapin_folio(swp_entry_t entry, struct folio *folio)
{
--
2.34.1
^ permalink raw reply related [flat|nested] 6+ messages in thread