From: Kairui Song <ryncsn@gmail.com>
To: linux-mm@kvack.org
Cc: Andrew Morton <akpm@linux-foundation.org>,
Matthew Wilcox <willy@infradead.org>,
Hugh Dickins <hughd@google.com>, Chris Li <chrisl@kernel.org>,
David Hildenbrand <david@redhat.com>,
Yosry Ahmed <yosryahmed@google.com>,
"Huang, Ying" <ying.huang@linux.alibaba.com>,
Nhat Pham <nphamcs@gmail.com>,
Johannes Weiner <hannes@cmpxchg.org>,
Baolin Wang <baolin.wang@linux.alibaba.com>,
Baoquan He <bhe@redhat.com>, Barry Song <baohua@kernel.org>,
Kalesh Singh <kaleshsingh@google.com>,
Kemeng Shi <shikemeng@huaweicloud.com>,
Tim Chen <tim.c.chen@linux.intel.com>,
Ryan Roberts <ryan.roberts@arm.com>,
linux-kernel@vger.kernel.org, Kairui Song <kasong@tencent.com>
Subject: [PATCH 20/28] mm, swap: check swap table directly for checking cache
Date: Thu, 15 May 2025 04:17:20 +0800 [thread overview]
Message-ID: <20250514201729.48420-21-ryncsn@gmail.com> (raw)
In-Reply-To: <20250514201729.48420-1-ryncsn@gmail.com>
From: Kairui Song <kasong@tencent.com>
Instead of looking at the swap map, check swap table directly to tell if
a swap entry has cache. Prepare for remove SWAP_HAS_CACHE.
Signed-off-by: Kairui Song <kasong@tencent.com>
---
mm/memory.c | 12 +++++------
mm/swap.h | 6 ++++++
mm/swap_state.c | 11 ++++++++++
mm/swapfile.c | 54 +++++++++++++++++++++++--------------------------
4 files changed, 48 insertions(+), 35 deletions(-)
diff --git a/mm/memory.c b/mm/memory.c
index a70624a55aa2..a9a548575e72 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -4314,15 +4314,15 @@ static struct folio *__alloc_swap_folio(struct vm_fault *vmf)
}
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-static inline int non_swapcache_batch(swp_entry_t entry, int max_nr)
+static inline int non_swapcache_batch(swp_entry_t entry, unsigned int max_nr)
{
- struct swap_info_struct *si = swp_info(entry);
- pgoff_t offset = swp_offset(entry);
- int i;
+ unsigned int i;
for (i = 0; i < max_nr; i++) {
- if ((si->swap_map[offset + i] & SWAP_HAS_CACHE))
- return i;
+ /* Page table lock pins the swap entries / swap device */
+ if (swap_cache_check_folio(entry))
+ break;
+ entry.val++;
}
return i;
diff --git a/mm/swap.h b/mm/swap.h
index 467996dafbae..2ae4624a0e48 100644
--- a/mm/swap.h
+++ b/mm/swap.h
@@ -186,6 +186,7 @@ static inline struct address_space *swap_address_space(swp_entry_t entry)
extern struct folio *swap_cache_get_folio(swp_entry_t entry);
extern struct folio *swap_cache_add_folio(swp_entry_t entry, struct folio *folio,
void **shadow, bool swapin);
+extern bool swap_cache_check_folio(swp_entry_t entry);
extern void *swap_cache_get_shadow(swp_entry_t entry);
/* Below helpers requires the caller to lock the swap cluster. */
extern void __swap_cache_del_folio(swp_entry_t entry,
@@ -395,6 +396,11 @@ static inline void *swap_cache_get_shadow(swp_entry_t end)
return NULL;
}
+static inline bool swap_cache_check_folio(swp_entry_t entry)
+{
+ return false;
+}
+
static inline unsigned int folio_swap_flags(struct folio *folio)
{
return 0;
diff --git a/mm/swap_state.c b/mm/swap_state.c
index c8bb16835612..ea6a1741db5c 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -266,6 +266,17 @@ struct folio *swap_cache_get_folio(swp_entry_t entry)
return folio;
}
+/*
+ * Check if a swap entry has folio cached, may return false positive.
+ * Caller must hold a reference of the swap device or pin it in other ways.
+ */
+bool swap_cache_check_folio(swp_entry_t entry)
+{
+ swp_te_t swp_te;
+ swp_te = __swap_table_get(swp_cluster(entry), swp_offset(entry));
+ return swp_te_is_folio(swp_te);
+}
+
/*
* If we are the only user, then try to free up the swap cache.
*
diff --git a/mm/swapfile.c b/mm/swapfile.c
index ef233466725e..0f2a499ff2c9 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -181,15 +181,19 @@ static long swap_usage_in_pages(struct swap_info_struct *si)
#define TTRS_FULL 0x4
static bool swap_only_has_cache(struct swap_info_struct *si,
- unsigned long offset, int nr_pages)
+ struct swap_cluster_info *ci,
+ unsigned long offset, int nr_pages)
{
unsigned char *map = si->swap_map + offset;
unsigned char *map_end = map + nr_pages;
+ swp_te_t entry;
do {
+ entry = __swap_table_get(ci, offset);
VM_BUG_ON(!(*map & SWAP_HAS_CACHE));
- if (*map != SWAP_HAS_CACHE)
+ if (*map)
return false;
+ offset++;
} while (++map < map_end);
return true;
@@ -247,11 +251,11 @@ static int __try_to_reclaim_swap(struct swap_info_struct *si,
/*
* It's safe to delete the folio from swap cache only if the folio's
- * swap_map is HAS_CACHE only, which means the slots have no page table
+ * entry is swap cache only, which means the slots have no page table
* reference or pending writeback, and can't be allocated to others.
*/
ci = swap_lock_cluster(si, offset);
- need_reclaim = swap_only_has_cache(si, offset, nr_pages);
+ need_reclaim = swap_only_has_cache(si, ci, offset, nr_pages);
swap_unlock_cluster(ci);
if (!need_reclaim)
goto out_unlock;
@@ -660,29 +664,21 @@ static bool cluster_reclaim_range(struct swap_info_struct *si,
spin_unlock(&ci->lock);
do {
- switch (READ_ONCE(map[offset])) {
- case 0:
- offset++;
+ if (swap_count(READ_ONCE(map[offset])))
break;
- case SWAP_HAS_CACHE:
- nr_reclaim = __try_to_reclaim_swap(si, offset, TTRS_ANYWAY);
- if (nr_reclaim > 0)
- offset += nr_reclaim;
- else
- goto out;
+ nr_reclaim = __try_to_reclaim_swap(si, offset, TTRS_ANYWAY);
+ if (nr_reclaim > 0)
+ offset += nr_reclaim;
+ else if (nr_reclaim < 1)
break;
- default:
- goto out;
- }
- } while (offset < end);
-out:
+ } while (++offset < end);
spin_lock(&ci->lock);
/*
* Recheck the range no matter reclaim succeeded or not, the slot
* could have been be freed while we are not holding the lock.
*/
for (offset = start; offset < end; offset++)
- if (READ_ONCE(map[offset]))
+ if (map[offset] || !swp_te_is_null(__swap_table_get(ci, offset)))
return false;
return true;
@@ -700,16 +696,13 @@ static bool cluster_scan_range(struct swap_info_struct *si,
return true;
for (offset = start; offset < end; offset++) {
- switch (READ_ONCE(map[offset])) {
- case 0:
- continue;
- case SWAP_HAS_CACHE:
+ if (swap_count(map[offset]))
+ return false;
+ if (swp_te_is_folio(__swap_table_get(ci, offset))) {
+ VM_WARN_ON_ONCE(!(map[offset] & SWAP_HAS_CACHE));
if (!vm_swap_full())
return false;
*need_reclaim = true;
- continue;
- default:
- return false;
}
}
@@ -821,7 +814,8 @@ static void swap_reclaim_full_clusters(struct swap_info_struct *si, bool force)
to_scan--;
while (offset < end) {
- if (READ_ONCE(map[offset]) == SWAP_HAS_CACHE) {
+ if (!swap_count(map[offset]) &&
+ swp_te_is_folio(__swap_table_get(ci, offset))) {
spin_unlock(&ci->lock);
nr_reclaim = __try_to_reclaim_swap(si, offset,
TTRS_ANYWAY);
@@ -1590,7 +1584,7 @@ void __swap_cache_put_entries(struct swap_info_struct *si,
struct swap_cluster_info *ci,
swp_entry_t entry, unsigned int size)
{
- if (swap_only_has_cache(si, swp_offset(entry), size))
+ if (swap_only_has_cache(si, ci, swp_offset(entry), size))
swap_free_entries(si, ci, swp_offset(entry), size);
else
for (int i = 0; i < size; i++, entry.val++)
@@ -1802,6 +1796,7 @@ void do_put_swap_entries(swp_entry_t entry, int nr)
struct swap_info_struct *si;
bool any_only_cache = false;
unsigned long offset;
+ swp_te_t swp_te;
si = get_swap_device(entry);
if (WARN_ON_ONCE(!si))
@@ -1826,7 +1821,8 @@ void do_put_swap_entries(swp_entry_t entry, int nr)
*/
for (offset = start_offset; offset < end_offset; offset += nr) {
nr = 1;
- if (READ_ONCE(si->swap_map[offset]) == SWAP_HAS_CACHE) {
+ swp_te = __swap_table_get(swp_offset_cluster(si, offset), offset);
+ if (!swap_count(si->swap_map[offset]) && swp_te_is_folio(swp_te)) {
/*
* Folios are always naturally aligned in swap so
* advance forward to the next boundary. Zero means no
--
2.49.0
next prev parent reply other threads:[~2025-05-14 20:19 UTC|newest]
Thread overview: 56+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-05-14 20:17 [PATCH 00/28] mm, swap: introduce swap table Kairui Song
2025-05-14 20:17 ` [PATCH 01/28] mm, swap: don't scan every fragment cluster Kairui Song
2025-05-14 20:17 ` [PATCH 02/28] mm, swap: consolidate the helper for mincore Kairui Song
2025-05-14 20:17 ` [PATCH 03/28] mm/shmem, swap: remove SWAP_MAP_SHMEM Kairui Song
2025-05-14 20:17 ` [PATCH 04/28] mm, swap: split readahead update out of swap cache lookup Kairui Song
2025-05-14 20:17 ` [PATCH 05/28] mm, swap: sanitize swap cache lookup convention Kairui Song
2025-05-19 4:38 ` Barry Song
2025-05-20 3:31 ` Kairui Song
2025-05-20 4:41 ` Barry Song
2025-05-20 19:09 ` Kairui Song
2025-05-20 22:33 ` Barry Song
2025-05-21 2:45 ` Kairui Song
2025-05-21 3:24 ` Barry Song
2025-05-23 2:29 ` Barry Song
2025-05-23 20:01 ` Kairui Song
2025-05-27 7:58 ` Barry Song
2025-05-27 15:11 ` Kairui Song
2025-05-30 8:49 ` Kairui Song
2025-05-30 19:24 ` Kairui Song
2025-05-14 20:17 ` [PATCH 06/28] mm, swap: rearrange swap cluster definition and helpers Kairui Song
2025-05-19 6:26 ` Barry Song
2025-05-20 3:50 ` Kairui Song
2025-05-14 20:17 ` [PATCH 07/28] mm, swap: tidy up swap device and cluster info helpers Kairui Song
2025-05-14 20:17 ` [PATCH 08/28] mm, swap: use swap table for the swap cache and switch API Kairui Song
2025-05-14 20:17 ` [PATCH 09/28] mm/swap: rename __read_swap_cache_async to __swapin_cache_alloc Kairui Song
2025-05-14 20:17 ` [PATCH 10/28] mm, swap: add a swap helper for bypassing only read ahead Kairui Song
2025-05-14 20:17 ` [PATCH 11/28] mm, swap: clean up and consolidate helper for mTHP swapin check Kairui Song
2025-05-15 9:31 ` Klara Modin
2025-05-15 9:39 ` Kairui Song
2025-05-19 7:08 ` Barry Song
2025-05-19 11:09 ` Kairui Song
2025-05-19 11:57 ` Barry Song
2025-05-14 20:17 ` [PATCH 12/28] mm, swap: never bypass the swap cache for SWP_SYNCHRONOUS_IO Kairui Song
2025-05-14 20:17 ` [PATCH 13/28] mm/shmem, swap: avoid redundant Xarray lookup during swapin Kairui Song
2025-05-14 20:17 ` [PATCH 14/28] mm/shmem: never bypass the swap cache for SWP_SYNCHRONOUS_IO Kairui Song
2025-05-14 20:17 ` [PATCH 15/28] mm, swap: split locked entry freeing into a standalone helper Kairui Song
2025-05-14 20:17 ` [PATCH 16/28] mm, swap: use swap cache as the swap in synchronize layer Kairui Song
2025-05-14 20:17 ` [PATCH 17/28] mm, swap: sanitize swap entry management workflow Kairui Song
2025-05-14 20:17 ` [PATCH 18/28] mm, swap: rename and introduce folio_free_swap_cache Kairui Song
2025-05-14 20:17 ` [PATCH 19/28] mm, swap: clean up and improve swap entries batch freeing Kairui Song
2025-05-14 20:17 ` Kairui Song [this message]
2025-06-19 10:38 ` [PATCH 20/28] mm, swap: check swap table directly for checking cache Baoquan He
2025-06-19 10:50 ` Kairui Song
2025-06-20 8:04 ` Baoquan He
2025-05-14 20:17 ` [PATCH 21/28] mm, swap: add folio to swap cache directly on allocation Kairui Song
2025-05-14 20:17 ` [PATCH 22/28] mm, swap: drop the SWAP_HAS_CACHE flag Kairui Song
2025-05-14 20:17 ` [PATCH 23/28] mm, swap: remove no longer needed _swap_info_get Kairui Song
2025-05-14 20:17 ` [PATCH 24/28] mm, swap: implement helpers for reserving data in swap table Kairui Song
2025-05-15 9:40 ` Klara Modin
2025-05-16 2:35 ` Kairui Song
2025-05-14 20:17 ` [PATCH 25/28] mm/workingset: leave highest 8 bits empty for anon shadow Kairui Song
2025-05-14 20:17 ` [PATCH 26/28] mm, swap: minor clean up for swapon Kairui Song
2025-05-14 20:17 ` [PATCH 27/28] mm, swap: use swap table to track swap count Kairui Song
2025-05-14 20:17 ` [PATCH 28/28] mm, swap: implement dynamic allocation of swap table Kairui Song
2025-05-21 18:36 ` Nhat Pham
2025-05-22 4:13 ` Kairui Song
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250514201729.48420-21-ryncsn@gmail.com \
--to=ryncsn@gmail.com \
--cc=akpm@linux-foundation.org \
--cc=baohua@kernel.org \
--cc=baolin.wang@linux.alibaba.com \
--cc=bhe@redhat.com \
--cc=chrisl@kernel.org \
--cc=david@redhat.com \
--cc=hannes@cmpxchg.org \
--cc=hughd@google.com \
--cc=kaleshsingh@google.com \
--cc=kasong@tencent.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=nphamcs@gmail.com \
--cc=ryan.roberts@arm.com \
--cc=shikemeng@huaweicloud.com \
--cc=tim.c.chen@linux.intel.com \
--cc=willy@infradead.org \
--cc=ying.huang@linux.alibaba.com \
--cc=yosryahmed@google.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.