linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Kairui Song <ryncsn@gmail.com>
To: linux-mm@kvack.org
Cc: Andrew Morton <akpm@linux-foundation.org>,
	Baoquan He <bhe@redhat.com>,  Barry Song <baohua@kernel.org>,
	Chris Li <chrisl@kernel.org>,  Nhat Pham <nphamcs@gmail.com>,
	Johannes Weiner <hannes@cmpxchg.org>,
	 Yosry Ahmed <yosry.ahmed@linux.dev>,
	David Hildenbrand <david@redhat.com>,
	 Youngjun Park <youngjun.park@lge.com>,
	Hugh Dickins <hughd@google.com>,
	 Baolin Wang <baolin.wang@linux.alibaba.com>,
	 "Huang, Ying" <ying.huang@linux.alibaba.com>,
	 Kemeng Shi <shikemeng@huaweicloud.com>,
	 Lorenzo Stoakes <lorenzo.stoakes@oracle.com>,
	 "Matthew Wilcox (Oracle)" <willy@infradead.org>,
	 linux-kernel@vger.kernel.org, Kairui Song <kasong@tencent.com>
Subject: [PATCH 07/19] mm/shmem: never bypass the swap cache for SWP_SYNCHRONOUS_IO
Date: Wed, 29 Oct 2025 23:58:33 +0800	[thread overview]
Message-ID: <20251029-swap-table-p2-v1-7-3d43f3b6ec32@tencent.com> (raw)
In-Reply-To: <20251029-swap-table-p2-v1-0-3d43f3b6ec32@tencent.com>

From: Kairui Song <kasong@tencent.com>

Now the overhead of the swap cache is trivial to none, bypassing the
swap cache is no longer a valid optimization.

We have removed the cache bypass swapin for anon memory, now do the same
for shmem. Many helpers and functions can be dropped now.

Signed-off-by: Kairui Song <kasong@tencent.com>
---
 mm/shmem.c    | 65 +++++++++++++++++------------------------------------------
 mm/swap.h     |  4 ----
 mm/swapfile.c | 35 +++++++++-----------------------
 3 files changed, 27 insertions(+), 77 deletions(-)

diff --git a/mm/shmem.c b/mm/shmem.c
index 6580f3cd24bb..759981435953 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2012,10 +2012,9 @@ static struct folio *shmem_swap_alloc_folio(struct inode *inode,
 		swp_entry_t entry, int order, gfp_t gfp)
 {
 	struct shmem_inode_info *info = SHMEM_I(inode);
+	struct folio *new, *swapcache;
 	int nr_pages = 1 << order;
-	struct folio *new;
 	gfp_t alloc_gfp;
-	void *shadow;
 
 	/*
 	 * We have arrived here because our zones are constrained, so don't
@@ -2055,34 +2054,19 @@ static struct folio *shmem_swap_alloc_folio(struct inode *inode,
 		goto fallback;
 	}
 
-	/*
-	 * Prevent parallel swapin from proceeding with the swap cache flag.
-	 *
-	 * Of course there is another possible concurrent scenario as well,
-	 * that is to say, the swap cache flag of a large folio has already
-	 * been set by swapcache_prepare(), while another thread may have
-	 * already split the large swap entry stored in the shmem mapping.
-	 * In this case, shmem_add_to_page_cache() will help identify the
-	 * concurrent swapin and return -EEXIST.
-	 */
-	if (swapcache_prepare(entry, nr_pages)) {
+	swapcache = swapin_folio(entry, new);
+	if (swapcache != new) {
 		folio_put(new);
-		new = ERR_PTR(-EEXIST);
-		/* Try smaller folio to avoid cache conflict */
-		goto fallback;
+		if (!swapcache) {
+			/*
+			 * The new folio is charged already, swapin can
+			 * only fail due to another raced swapin.
+			 */
+			new = ERR_PTR(-EEXIST);
+			goto fallback;
+		}
 	}
-
-	__folio_set_locked(new);
-	__folio_set_swapbacked(new);
-	new->swap = entry;
-
-	memcg1_swapin(entry, nr_pages);
-	shadow = swap_cache_get_shadow(entry);
-	if (shadow)
-		workingset_refault(new, shadow);
-	folio_add_lru(new);
-	swap_read_folio(new, NULL);
-	return new;
+	return swapcache;
 fallback:
 	/* Order 0 swapin failed, nothing to fallback to, abort */
 	if (!order)
@@ -2172,8 +2156,7 @@ static int shmem_replace_folio(struct folio **foliop, gfp_t gfp,
 }
 
 static void shmem_set_folio_swapin_error(struct inode *inode, pgoff_t index,
-					 struct folio *folio, swp_entry_t swap,
-					 bool skip_swapcache)
+					 struct folio *folio, swp_entry_t swap)
 {
 	struct address_space *mapping = inode->i_mapping;
 	swp_entry_t swapin_error;
@@ -2189,8 +2172,7 @@ static void shmem_set_folio_swapin_error(struct inode *inode, pgoff_t index,
 
 	nr_pages = folio_nr_pages(folio);
 	folio_wait_writeback(folio);
-	if (!skip_swapcache)
-		swap_cache_del_folio(folio);
+	swap_cache_del_folio(folio);
 	/*
 	 * Don't treat swapin error folio as alloced. Otherwise inode->i_blocks
 	 * won't be 0 when inode is released and thus trigger WARN_ON(i_blocks)
@@ -2289,7 +2271,6 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index,
 	swp_entry_t swap, index_entry;
 	struct swap_info_struct *si;
 	struct folio *folio = NULL;
-	bool skip_swapcache = false;
 	int error, nr_pages, order;
 	pgoff_t offset;
 
@@ -2332,7 +2313,6 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index,
 				folio = NULL;
 				goto failed;
 			}
-			skip_swapcache = true;
 		} else {
 			/* Cached swapin only supports order 0 folio */
 			folio = shmem_swapin_cluster(swap, gfp, info, index);
@@ -2388,9 +2368,8 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index,
 	 * and swap cache folios are never partially freed.
 	 */
 	folio_lock(folio);
-	if ((!skip_swapcache && !folio_test_swapcache(folio)) ||
-	    shmem_confirm_swap(mapping, index, swap) < 0 ||
-	    folio->swap.val != swap.val) {
+	if (!folio_matches_swap_entry(folio, swap) ||
+	    shmem_confirm_swap(mapping, index, swap) < 0) {
 		error = -EEXIST;
 		goto unlock;
 	}
@@ -2422,12 +2401,7 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index,
 	if (sgp == SGP_WRITE)
 		folio_mark_accessed(folio);
 
-	if (skip_swapcache) {
-		folio->swap.val = 0;
-		swapcache_clear(si, swap, nr_pages);
-	} else {
-		swap_cache_del_folio(folio);
-	}
+	swap_cache_del_folio(folio);
 	folio_mark_dirty(folio);
 	swap_free_nr(swap, nr_pages);
 	put_swap_device(si);
@@ -2438,14 +2412,11 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index,
 	if (shmem_confirm_swap(mapping, index, swap) < 0)
 		error = -EEXIST;
 	if (error == -EIO)
-		shmem_set_folio_swapin_error(inode, index, folio, swap,
-					     skip_swapcache);
+		shmem_set_folio_swapin_error(inode, index, folio, swap);
 unlock:
 	if (folio)
 		folio_unlock(folio);
 failed_nolock:
-	if (skip_swapcache)
-		swapcache_clear(si, folio->swap, folio_nr_pages(folio));
 	if (folio)
 		folio_put(folio);
 	put_swap_device(si);
diff --git a/mm/swap.h b/mm/swap.h
index 214e7d041030..e0f05babe13a 100644
--- a/mm/swap.h
+++ b/mm/swap.h
@@ -403,10 +403,6 @@ static inline int swap_writeout(struct folio *folio,
 	return 0;
 }
 
-static inline void swapcache_clear(struct swap_info_struct *si, swp_entry_t entry, int nr)
-{
-}
-
 static inline struct folio *swap_cache_get_folio(swp_entry_t entry)
 {
 	return NULL;
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 849be32377d9..3898c3a2be62 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1613,22 +1613,6 @@ struct swap_info_struct *get_swap_device(swp_entry_t entry)
 	return NULL;
 }
 
-static void swap_entries_put_cache(struct swap_info_struct *si,
-				   swp_entry_t entry, int nr)
-{
-	unsigned long offset = swp_offset(entry);
-	struct swap_cluster_info *ci;
-
-	ci = swap_cluster_lock(si, offset);
-	if (swap_only_has_cache(si, offset, nr)) {
-		swap_entries_free(si, ci, entry, nr);
-	} else {
-		for (int i = 0; i < nr; i++, entry.val++)
-			swap_entry_put_locked(si, ci, entry, SWAP_HAS_CACHE);
-	}
-	swap_cluster_unlock(ci);
-}
-
 static bool swap_entries_put_map(struct swap_info_struct *si,
 				 swp_entry_t entry, int nr)
 {
@@ -1764,13 +1748,21 @@ void swap_free_nr(swp_entry_t entry, int nr_pages)
 void put_swap_folio(struct folio *folio, swp_entry_t entry)
 {
 	struct swap_info_struct *si;
+	struct swap_cluster_info *ci;
+	unsigned long offset = swp_offset(entry);
 	int size = 1 << swap_entry_order(folio_order(folio));
 
 	si = _swap_info_get(entry);
 	if (!si)
 		return;
 
-	swap_entries_put_cache(si, entry, size);
+	ci = swap_cluster_lock(si, offset);
+	if (swap_only_has_cache(si, offset, size))
+		swap_entries_free(si, ci, entry, size);
+	else
+		for (int i = 0; i < size; i++, entry.val++)
+			swap_entry_put_locked(si, ci, entry, SWAP_HAS_CACHE);
+	swap_cluster_unlock(ci);
 }
 
 int __swap_count(swp_entry_t entry)
@@ -3778,15 +3770,6 @@ int swapcache_prepare(swp_entry_t entry, int nr)
 	return __swap_duplicate(entry, SWAP_HAS_CACHE, nr);
 }
 
-/*
- * Caller should ensure entries belong to the same folio so
- * the entries won't span cross cluster boundary.
- */
-void swapcache_clear(struct swap_info_struct *si, swp_entry_t entry, int nr)
-{
-	swap_entries_put_cache(si, entry, nr);
-}
-
 /*
  * add_swap_count_continuation - called when a swap count is duplicated
  * beyond SWAP_MAP_MAX, it allocates a new page and links that to the entry's

-- 
2.51.1


  parent reply	other threads:[~2025-10-29 15:59 UTC|newest]

Thread overview: 50+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-10-29 15:58 [PATCH 00/19] mm, swap: never bypass swap cache and cleanup flags (swap table phase II) Kairui Song
2025-10-29 15:58 ` [PATCH 01/19] mm/swap: rename __read_swap_cache_async to swap_cache_alloc_folio Kairui Song
2025-10-30 22:53   ` Yosry Ahmed
     [not found]     ` <CAGsJ_4x1P0ypm70De7qDcDxqvY93GEPW6X2sBS_xfSUem5_S2w@mail.gmail.com>
2025-11-03  9:02       ` Kairui Song
2025-11-03  9:10         ` Barry Song
2025-11-03 16:50         ` Yosry Ahmed
2025-10-29 15:58 ` [PATCH 02/19] mm, swap: split swap cache preparation loop into a standalone helper Kairui Song
2025-10-29 15:58 ` [PATCH 03/19] mm, swap: never bypass the swap cache even for SWP_SYNCHRONOUS_IO Kairui Song
2025-11-04  3:47   ` Barry Song
2025-11-04 10:44     ` Kairui Song
2025-10-29 15:58 ` [PATCH 04/19] mm, swap: always try to free swap cache for SWP_SYNCHRONOUS_IO devices Kairui Song
2025-11-04  4:19   ` Barry Song
2025-11-04  8:26     ` Barry Song
2025-11-04 10:55       ` Kairui Song
2025-10-29 15:58 ` [PATCH 05/19] mm, swap: simplify the code and reduce indention Kairui Song
2025-10-29 15:58 ` [PATCH 06/19] mm, swap: free the swap cache after folio is mapped Kairui Song
2025-11-04  9:14   ` Barry Song
2025-11-04 10:50     ` Kairui Song
2025-11-04 19:52       ` Barry Song
2025-10-29 15:58 ` Kairui Song [this message]
2025-10-29 15:58 ` [PATCH 08/19] mm/shmem, swap: remove SWAP_MAP_SHMEM Kairui Song
2025-10-29 15:58 ` [PATCH 09/19] mm, swap: swap entry of a bad slot should not be considered as swapped out Kairui Song
2025-10-29 15:58 ` [PATCH 10/19] mm, swap: consolidate cluster reclaim and check logic Kairui Song
2025-10-31  5:25   ` YoungJun Park
2025-10-31  7:11     ` Kairui Song
2025-10-29 15:58 ` [PATCH 11/19] mm, swap: split locked entry duplicating into a standalone helper Kairui Song
2025-10-29 15:58 ` [PATCH 12/19] mm, swap: use swap cache as the swap in synchronize layer Kairui Song
2025-10-29 19:25   ` kernel test robot
2025-10-29 15:58 ` [PATCH 13/19] mm, swap: remove workaround for unsynchronized swap map cache state Kairui Song
2025-11-07  3:07   ` Barry Song
2025-10-29 15:58 ` [PATCH 14/19] mm, swap: sanitize swap entry management workflow Kairui Song
2025-10-29 19:25   ` kernel test robot
2025-10-30  5:25     ` Kairui Song
2025-10-29 19:25   ` kernel test robot
2025-11-01  4:51   ` YoungJun Park
2025-11-01  8:59     ` Kairui Song
2025-11-01  9:08       ` YoungJun Park
2025-10-29 15:58 ` [PATCH 15/19] mm, swap: add folio to swap cache directly on allocation Kairui Song
2025-10-29 16:52   ` Kairui Song
2025-10-31  5:56   ` YoungJun Park
2025-10-31  7:02     ` Kairui Song
2025-10-29 15:58 ` [PATCH 16/19] mm, swap: check swap table directly for checking cache Kairui Song
2025-11-06 21:02   ` Barry Song
2025-11-07  3:13     ` Kairui Song
2025-10-29 15:58 ` [PATCH 17/19] mm, swap: clean up and improve swap entries freeing Kairui Song
2025-10-29 15:58 ` [PATCH 18/19] mm, swap: drop the SWAP_HAS_CACHE flag Kairui Song
2025-10-29 15:58 ` [PATCH 19/19] mm, swap: remove no longer needed _swap_info_get Kairui Song
2025-10-30 23:04 ` [PATCH 00/19] mm, swap: never bypass swap cache and cleanup flags (swap table phase II) Yosry Ahmed
2025-10-31  6:58   ` Kairui Song
2025-11-05  7:39 ` Chris Li

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20251029-swap-table-p2-v1-7-3d43f3b6ec32@tencent.com \
    --to=ryncsn@gmail.com \
    --cc=akpm@linux-foundation.org \
    --cc=baohua@kernel.org \
    --cc=baolin.wang@linux.alibaba.com \
    --cc=bhe@redhat.com \
    --cc=chrisl@kernel.org \
    --cc=david@redhat.com \
    --cc=hannes@cmpxchg.org \
    --cc=hughd@google.com \
    --cc=kasong@tencent.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=lorenzo.stoakes@oracle.com \
    --cc=nphamcs@gmail.com \
    --cc=shikemeng@huaweicloud.com \
    --cc=willy@infradead.org \
    --cc=ying.huang@linux.alibaba.com \
    --cc=yosry.ahmed@linux.dev \
    --cc=youngjun.park@lge.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).