From: Kairui Song <ryncsn@gmail.com>
To: linux-mm@kvack.org
Cc: Andrew Morton <akpm@linux-foundation.org>,
Matthew Wilcox <willy@infradead.org>,
Hugh Dickins <hughd@google.com>, Chris Li <chrisl@kernel.org>,
David Hildenbrand <david@redhat.com>,
Yosry Ahmed <yosryahmed@google.com>,
"Huang, Ying" <ying.huang@linux.alibaba.com>,
Nhat Pham <nphamcs@gmail.com>,
Johannes Weiner <hannes@cmpxchg.org>,
Baolin Wang <baolin.wang@linux.alibaba.com>,
Baoquan He <bhe@redhat.com>, Barry Song <baohua@kernel.org>,
Kalesh Singh <kaleshsingh@google.com>,
Kemeng Shi <shikemeng@huaweicloud.com>,
Tim Chen <tim.c.chen@linux.intel.com>,
Ryan Roberts <ryan.roberts@arm.com>,
linux-kernel@vger.kernel.org, Kairui Song <kasong@tencent.com>
Subject: [PATCH 10/28] mm, swap: add a swap helper for bypassing only read ahead
Date: Thu, 15 May 2025 04:17:10 +0800 [thread overview]
Message-ID: <20250514201729.48420-11-ryncsn@gmail.com> (raw)
In-Reply-To: <20250514201729.48420-1-ryncsn@gmail.com>
From: Kairui Song <kasong@tencent.com>
The swap cache now has a very low overhead, bypassing it is not helpful
anymore. To prepare for unifying the swap in path, introduce a new
helper that only bypasses read ahead and does not bypass the swap cache.
Signed-off-by: Kairui Song <kasong@tencent.com>
---
mm/swap.h | 6 ++
mm/swap_state.c | 158 ++++++++++++++++++++++++++++++------------------
2 files changed, 105 insertions(+), 59 deletions(-)
diff --git a/mm/swap.h b/mm/swap.h
index fec7d6e751ae..aab6bf9c3a8a 100644
--- a/mm/swap.h
+++ b/mm/swap.h
@@ -217,6 +217,7 @@ struct folio *swap_cluster_readahead(swp_entry_t entry, gfp_t flag,
struct mempolicy *mpol, pgoff_t ilx);
struct folio *swapin_readahead(swp_entry_t entry, gfp_t flag,
struct vm_fault *vmf);
+struct folio *swapin_entry(swp_entry_t entry, struct folio *folio);
void swap_update_readahead(struct folio *folio, struct vm_area_struct *vma,
unsigned long addr);
@@ -303,6 +304,11 @@ static inline struct folio *swapin_readahead(swp_entry_t swp, gfp_t gfp_mask,
return NULL;
}
+static inline struct folio *swapin_entry(swp_entry_t ent, struct folio *folio)
+{
+ return NULL;
+}
+
static inline void swap_update_readahead(struct folio *folio,
struct vm_area_struct *vma, unsigned long addr)
{
diff --git a/mm/swap_state.c b/mm/swap_state.c
index fe71706e29d9..d68687295f52 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -353,54 +353,26 @@ void swap_update_readahead(struct folio *folio,
}
}
-struct folio *__swapin_cache_alloc(swp_entry_t entry, gfp_t gfp_mask,
- struct mempolicy *mpol, pgoff_t ilx, bool *new_page_allocated,
- bool skip_if_exists)
+static struct folio *__swapin_cache_add_prepare(swp_entry_t entry,
+ struct folio *folio,
+ bool skip_if_exists)
{
- struct swap_info_struct *si = swp_info(entry);
- struct folio *folio;
- struct folio *new_folio = NULL;
- struct folio *result = NULL;
+ int nr_pages = folio_nr_pages(folio);
+ struct folio *exist;
void *shadow = NULL;
+ int err;
- *new_page_allocated = false;
for (;;) {
- int err;
-
/*
- * Check the swap cache first, if a cached folio is found,
- * return it unlocked. The caller will lock and check it.
+ * Caller should have checked swap cache and swap count
+ * already, try prepare the swap map directly, it will still
+ * fail with -ENOENT or -EEXIST if the entry is gone or raced.
*/
- folio = swap_cache_get_folio(entry);
- if (folio)
- goto got_folio;
-
- /*
- * Just skip read ahead for unused swap slot.
- */
- if (!swap_entry_swapped(si, entry))
- goto put_and_return;
-
- /*
- * Get a new folio to read into from swap. Allocate it now if
- * new_folio not exist, before marking swap_map SWAP_HAS_CACHE,
- * when -EEXIST will cause any racers to loop around until we
- * add it to cache.
- */
- if (!new_folio) {
- new_folio = folio_alloc_mpol(gfp_mask, 0, mpol, ilx, numa_node_id());
- if (!new_folio)
- goto put_and_return;
- }
-
- /*
- * Swap entry may have been freed since our caller observed it.
- */
- err = swapcache_prepare(entry, 1);
+ err = swapcache_prepare(entry, nr_pages);
if (!err)
break;
else if (err != -EEXIST)
- goto put_and_return;
+ return NULL;
/*
* Protect against a recursive call to __swapin_cache_alloc()
@@ -411,7 +383,11 @@ struct folio *__swapin_cache_alloc(swp_entry_t entry, gfp_t gfp_mask,
* __swapin_cache_alloc() in the writeback path.
*/
if (skip_if_exists)
- goto put_and_return;
+ return NULL;
+
+ exist = swap_cache_get_folio(entry);
+ if (exist)
+ return exist;
/*
* We might race against __swap_cache_del_folio(), and
@@ -426,35 +402,99 @@ struct folio *__swapin_cache_alloc(swp_entry_t entry, gfp_t gfp_mask,
/*
* The swap entry is ours to swap in. Prepare the new folio.
*/
- __folio_set_locked(new_folio);
- __folio_set_swapbacked(new_folio);
+ __folio_set_locked(folio);
+ __folio_set_swapbacked(folio);
- if (mem_cgroup_swapin_charge_folio(new_folio, NULL, gfp_mask, entry))
- goto fail_unlock;
-
- if (swap_cache_add_folio(entry, new_folio, &shadow))
+ if (swap_cache_add_folio(entry, folio, &shadow))
goto fail_unlock;
memcg1_swapin(entry, 1);
if (shadow)
- workingset_refault(new_folio, shadow);
+ workingset_refault(folio, shadow);
/* Caller will initiate read into locked new_folio */
- folio_add_lru(new_folio);
- *new_page_allocated = true;
- folio = new_folio;
-got_folio:
- result = folio;
- goto put_and_return;
+ folio_add_lru(folio);
+ return folio;
fail_unlock:
- put_swap_folio(new_folio, entry);
- folio_unlock(new_folio);
-put_and_return:
- if (!(*new_page_allocated) && new_folio)
- folio_put(new_folio);
- return result;
+ put_swap_folio(folio, entry);
+ folio_unlock(folio);
+ return NULL;
+}
+
+struct folio *__swapin_cache_alloc(swp_entry_t entry, gfp_t gfp_mask,
+ struct mempolicy *mpol, pgoff_t ilx, bool *new_page_allocated,
+ bool skip_if_exists)
+{
+ struct swap_info_struct *si = swp_info(entry);
+ struct folio *swapcache = NULL, *folio = NULL;
+
+ /*
+ * Check the swap cache first, if a cached folio is found,
+ * return it unlocked. The caller will lock and check it.
+ */
+ swapcache = swap_cache_get_folio(entry);
+ if (swapcache)
+ goto out;
+
+ /*
+ * Just skip read ahead for unused swap slot.
+ */
+ if (!swap_entry_swapped(si, entry))
+ goto out;
+
+ /*
+ * Get a new folio to read into from swap. Allocate it now if
+ * new_folio not exist, before marking swap_map SWAP_HAS_CACHE,
+ * when -EEXIST will cause any racers to loop around until we
+ * add it to cache.
+ */
+ folio = folio_alloc_mpol(gfp_mask, 0, mpol, ilx, numa_node_id());
+ if (!folio)
+ goto out;
+
+ if (mem_cgroup_swapin_charge_folio(folio, NULL, gfp_mask, entry))
+ goto out;
+
+ swapcache = __swapin_cache_add_prepare(entry, folio, skip_if_exists);
+out:
+ if (swapcache && swapcache == folio) {
+ *new_page_allocated = true;
+ } else {
+ if (folio)
+ folio_put(folio);
+ *new_page_allocated = false;
+ }
+
+ return swapcache;
+}
+
+/**
+ * swapin_entry - swap-in one or multiple entries skipping readahead
+ *
+ * @entry: swap entry to swap in
+ * @folio: pre allocated folio
+ *
+ * Reads @entry into @folio. @folio will be added to swap cache first, if
+ * this raced with another users, only one user will successfully add its
+ * folio into swap cache, and that folio will be returned for all readers.
+ *
+ * If @folio is a large folio, the entry will be rounded down to match
+ * the folio start and the whole folio will be read in.
+ */
+struct folio *swapin_entry(swp_entry_t entry, struct folio *folio)
+{
+ struct folio *swapcache;
+ pgoff_t offset = swp_offset(entry);
+ unsigned long nr_pages = folio_nr_pages(folio);
+ VM_WARN_ON_ONCE(nr_pages > SWAPFILE_CLUSTER);
+
+ entry = swp_entry(swp_type(entry), ALIGN_DOWN(offset, nr_pages));
+ swapcache = __swapin_cache_add_prepare(entry, folio, false);
+ if (swapcache == folio)
+ swap_read_folio(folio, NULL);
+ return swapcache;
}
/*
--
2.49.0
next prev parent reply other threads:[~2025-05-14 20:18 UTC|newest]
Thread overview: 56+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-05-14 20:17 [PATCH 00/28] mm, swap: introduce swap table Kairui Song
2025-05-14 20:17 ` [PATCH 01/28] mm, swap: don't scan every fragment cluster Kairui Song
2025-05-14 20:17 ` [PATCH 02/28] mm, swap: consolidate the helper for mincore Kairui Song
2025-05-14 20:17 ` [PATCH 03/28] mm/shmem, swap: remove SWAP_MAP_SHMEM Kairui Song
2025-05-14 20:17 ` [PATCH 04/28] mm, swap: split readahead update out of swap cache lookup Kairui Song
2025-05-14 20:17 ` [PATCH 05/28] mm, swap: sanitize swap cache lookup convention Kairui Song
2025-05-19 4:38 ` Barry Song
2025-05-20 3:31 ` Kairui Song
2025-05-20 4:41 ` Barry Song
2025-05-20 19:09 ` Kairui Song
2025-05-20 22:33 ` Barry Song
2025-05-21 2:45 ` Kairui Song
2025-05-21 3:24 ` Barry Song
2025-05-23 2:29 ` Barry Song
2025-05-23 20:01 ` Kairui Song
2025-05-27 7:58 ` Barry Song
2025-05-27 15:11 ` Kairui Song
2025-05-30 8:49 ` Kairui Song
2025-05-30 19:24 ` Kairui Song
2025-05-14 20:17 ` [PATCH 06/28] mm, swap: rearrange swap cluster definition and helpers Kairui Song
2025-05-19 6:26 ` Barry Song
2025-05-20 3:50 ` Kairui Song
2025-05-14 20:17 ` [PATCH 07/28] mm, swap: tidy up swap device and cluster info helpers Kairui Song
2025-05-14 20:17 ` [PATCH 08/28] mm, swap: use swap table for the swap cache and switch API Kairui Song
2025-05-14 20:17 ` [PATCH 09/28] mm/swap: rename __read_swap_cache_async to __swapin_cache_alloc Kairui Song
2025-05-14 20:17 ` Kairui Song [this message]
2025-05-14 20:17 ` [PATCH 11/28] mm, swap: clean up and consolidate helper for mTHP swapin check Kairui Song
2025-05-15 9:31 ` Klara Modin
2025-05-15 9:39 ` Kairui Song
2025-05-19 7:08 ` Barry Song
2025-05-19 11:09 ` Kairui Song
2025-05-19 11:57 ` Barry Song
2025-05-14 20:17 ` [PATCH 12/28] mm, swap: never bypass the swap cache for SWP_SYNCHRONOUS_IO Kairui Song
2025-05-14 20:17 ` [PATCH 13/28] mm/shmem, swap: avoid redundant Xarray lookup during swapin Kairui Song
2025-05-14 20:17 ` [PATCH 14/28] mm/shmem: never bypass the swap cache for SWP_SYNCHRONOUS_IO Kairui Song
2025-05-14 20:17 ` [PATCH 15/28] mm, swap: split locked entry freeing into a standalone helper Kairui Song
2025-05-14 20:17 ` [PATCH 16/28] mm, swap: use swap cache as the swap in synchronize layer Kairui Song
2025-05-14 20:17 ` [PATCH 17/28] mm, swap: sanitize swap entry management workflow Kairui Song
2025-05-14 20:17 ` [PATCH 18/28] mm, swap: rename and introduce folio_free_swap_cache Kairui Song
2025-05-14 20:17 ` [PATCH 19/28] mm, swap: clean up and improve swap entries batch freeing Kairui Song
2025-05-14 20:17 ` [PATCH 20/28] mm, swap: check swap table directly for checking cache Kairui Song
2025-06-19 10:38 ` Baoquan He
2025-06-19 10:50 ` Kairui Song
2025-06-20 8:04 ` Baoquan He
2025-05-14 20:17 ` [PATCH 21/28] mm, swap: add folio to swap cache directly on allocation Kairui Song
2025-05-14 20:17 ` [PATCH 22/28] mm, swap: drop the SWAP_HAS_CACHE flag Kairui Song
2025-05-14 20:17 ` [PATCH 23/28] mm, swap: remove no longer needed _swap_info_get Kairui Song
2025-05-14 20:17 ` [PATCH 24/28] mm, swap: implement helpers for reserving data in swap table Kairui Song
2025-05-15 9:40 ` Klara Modin
2025-05-16 2:35 ` Kairui Song
2025-05-14 20:17 ` [PATCH 25/28] mm/workingset: leave highest 8 bits empty for anon shadow Kairui Song
2025-05-14 20:17 ` [PATCH 26/28] mm, swap: minor clean up for swapon Kairui Song
2025-05-14 20:17 ` [PATCH 27/28] mm, swap: use swap table to track swap count Kairui Song
2025-05-14 20:17 ` [PATCH 28/28] mm, swap: implement dynamic allocation of swap table Kairui Song
2025-05-21 18:36 ` Nhat Pham
2025-05-22 4:13 ` Kairui Song
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250514201729.48420-11-ryncsn@gmail.com \
--to=ryncsn@gmail.com \
--cc=akpm@linux-foundation.org \
--cc=baohua@kernel.org \
--cc=baolin.wang@linux.alibaba.com \
--cc=bhe@redhat.com \
--cc=chrisl@kernel.org \
--cc=david@redhat.com \
--cc=hannes@cmpxchg.org \
--cc=hughd@google.com \
--cc=kaleshsingh@google.com \
--cc=kasong@tencent.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=nphamcs@gmail.com \
--cc=ryan.roberts@arm.com \
--cc=shikemeng@huaweicloud.com \
--cc=tim.c.chen@linux.intel.com \
--cc=willy@infradead.org \
--cc=ying.huang@linux.alibaba.com \
--cc=yosryahmed@google.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.