public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Nhat Pham <nphamcs@gmail.com>
To: linux-mm@kvack.org
Cc: akpm@linux-foundation.org, hannes@cmpxchg.org, hughd@google.com,
	yosry.ahmed@linux.dev, mhocko@kernel.org,
	roman.gushchin@linux.dev, shakeel.butt@linux.dev,
	muchun.song@linux.dev, len.brown@intel.com,
	chengming.zhou@linux.dev, kasong@tencent.com, chrisl@kernel.org,
	huang.ying.caritas@gmail.com, ryan.roberts@arm.com,
	viro@zeniv.linux.org.uk, baohua@kernel.org, osalvador@suse.de,
	lorenzo.stoakes@oracle.com, christophe.leroy@csgroup.eu,
	pavel@kernel.org, kernel-team@meta.com,
	linux-kernel@vger.kernel.org, cgroups@vger.kernel.org,
	linux-pm@vger.kernel.org
Subject: [RFC PATCH 04/14] mm: swap: swap cache support for virtualized swap
Date: Mon,  7 Apr 2025 16:42:05 -0700	[thread overview]
Message-ID: <20250407234223.1059191-5-nphamcs@gmail.com> (raw)
In-Reply-To: <20250407234223.1059191-1-nphamcs@gmail.com>

Currently, the swap cache code assumes that the swap space is of a fixed
size. The virtual swap space is dynamically sized, so the existing
partitioning code cannot be easily reused.  A dynamic partitioning is
planned, but for now keep the design simple and just use a flat
swapcache for vswap.

Since the vswap's implementation has begun to diverge from the old
implementation, we also introduce a new build config
(CONFIG_VIRTUAL_SWAP). Users who do not select this config will get the
old implementation, with no behavioral change.

Signed-off-by: Nhat Pham <nphamcs@gmail.com>
---
 mm/Kconfig      | 13 ++++++++++
 mm/swap.h       | 22 ++++++++++------
 mm/swap_state.c | 68 +++++++++++++++++++++++++++++++++++++++++--------
 3 files changed, 85 insertions(+), 18 deletions(-)

diff --git a/mm/Kconfig b/mm/Kconfig
index 1b501db06417..1a6acdb64333 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -22,6 +22,19 @@ menuconfig SWAP
 	  used to provide more virtual memory than the actual RAM present
 	  in your computer.  If unsure say Y.
 
+config VIRTUAL_SWAP
+	bool "Swap space virtualization"
+	depends on SWAP
+	default n
+	help
+		When this is selected, the kernel is built with the new swap
+		design. This will allow us to decouple the swap backends
+		(zswap, on-disk swapfile, etc.), and save disk space when we
+		use zswap (or the zero-filled swap page optimization).
+
+		There might be more lock contentions with heavy swap use, since
+		the swap cache is no longer range partitioned.
+
 config ZSWAP
 	bool "Compressed cache for swap pages"
 	depends on SWAP
diff --git a/mm/swap.h b/mm/swap.h
index d5f8effa8015..06e20b1d79c4 100644
--- a/mm/swap.h
+++ b/mm/swap.h
@@ -22,22 +22,27 @@ void swap_write_unplug(struct swap_iocb *sio);
 int swap_writepage(struct page *page, struct writeback_control *wbc);
 void __swap_writepage(struct folio *folio, struct writeback_control *wbc);
 
-/* linux/mm/swap_state.c */
-/* One swap address space for each 64M swap space */
+/* Return the swap device position of the swap slot. */
+static inline loff_t swap_slot_pos(swp_slot_t slot)
+{
+	return ((loff_t)swp_slot_offset(slot)) << PAGE_SHIFT;
+}
+
 #define SWAP_ADDRESS_SPACE_SHIFT	14
 #define SWAP_ADDRESS_SPACE_PAGES	(1 << SWAP_ADDRESS_SPACE_SHIFT)
 #define SWAP_ADDRESS_SPACE_MASK		(SWAP_ADDRESS_SPACE_PAGES - 1)
+
+/* linux/mm/swap_state.c */
+#ifdef CONFIG_VIRTUAL_SWAP
+extern struct address_space *swap_address_space(swp_entry_t entry);
+#define swap_cache_index(entry) entry.val
+#else
+/* One swap address space for each 64M swap space */
 extern struct address_space *swapper_spaces[];
 #define swap_address_space(entry)			    \
 	(&swapper_spaces[swp_type(entry)][swp_offset(entry) \
 		>> SWAP_ADDRESS_SPACE_SHIFT])
 
-/* Return the swap device position of the swap slot. */
-static inline loff_t swap_slot_pos(swp_slot_t slot)
-{
-	return ((loff_t)swp_slot_offset(slot)) << PAGE_SHIFT;
-}
-
 /*
  * Return the swap cache index of the swap entry.
  */
@@ -46,6 +51,7 @@ static inline pgoff_t swap_cache_index(swp_entry_t entry)
 	BUILD_BUG_ON((SWP_OFFSET_MASK | SWAP_ADDRESS_SPACE_MASK) != SWP_OFFSET_MASK);
 	return swp_offset(entry) & SWAP_ADDRESS_SPACE_MASK;
 }
+#endif
 
 void show_swap_cache_info(void);
 bool add_to_swap(struct folio *folio);
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 055e555d3382..268338a0ea57 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -38,10 +38,19 @@ static const struct address_space_operations swap_aops = {
 #endif
 };
 
+#ifdef CONFIG_VIRTUAL_SWAP
+static struct address_space swapper_space __read_mostly;
+
+struct address_space *swap_address_space(swp_entry_t entry)
+{
+	return &swapper_space;
+}
+#else
 struct address_space *swapper_spaces[MAX_SWAPFILES] __read_mostly;
 static unsigned int nr_swapper_spaces[MAX_SWAPFILES] __read_mostly;
-static bool enable_vma_readahead __read_mostly = true;
+#endif
 
+static bool enable_vma_readahead __read_mostly = true;
 #define SWAP_RA_ORDER_CEILING	5
 
 #define SWAP_RA_WIN_SHIFT	(PAGE_SHIFT / 2)
@@ -260,6 +269,28 @@ void delete_from_swap_cache(struct folio *folio)
 	folio_ref_sub(folio, folio_nr_pages(folio));
 }
 
+#ifdef CONFIG_VIRTUAL_SWAP
+void clear_shadow_from_swap_cache(int type, unsigned long begin,
+				unsigned long end)
+{
+	swp_slot_t slot = swp_slot(type, begin);
+	swp_entry_t entry = swp_slot_to_swp_entry(slot);
+	unsigned long index = swap_cache_index(entry);
+	struct address_space *address_space = swap_address_space(entry);
+	void *old;
+	XA_STATE(xas, &address_space->i_pages, index);
+
+	xas_set_update(&xas, workingset_update_node);
+
+	xa_lock_irq(&address_space->i_pages);
+	xas_for_each(&xas, old, entry.val + end - begin) {
+		if (!xa_is_value(old))
+			continue;
+		xas_store(&xas, NULL);
+	}
+	xa_unlock_irq(&address_space->i_pages);
+}
+#else
 void clear_shadow_from_swap_cache(int type, unsigned long begin,
 				unsigned long end)
 {
@@ -290,6 +321,7 @@ void clear_shadow_from_swap_cache(int type, unsigned long begin,
 			break;
 	}
 }
+#endif
 
 /*
  * If we are the only user, then try to free up the swap cache.
@@ -718,23 +750,34 @@ struct folio *swap_cluster_readahead(swp_entry_t entry, gfp_t gfp_mask,
 	return folio;
 }
 
+static void init_swapper_space(struct address_space *space)
+{
+	xa_init_flags(&space->i_pages, XA_FLAGS_LOCK_IRQ);
+	atomic_set(&space->i_mmap_writable, 0);
+	space->a_ops = &swap_aops;
+	/* swap cache doesn't use writeback related tags */
+	mapping_set_no_writeback_tags(space);
+}
+
+#ifdef CONFIG_VIRTUAL_SWAP
 int init_swap_address_space(unsigned int type, unsigned long nr_pages)
 {
-	struct address_space *spaces, *space;
+	return 0;
+}
+
+void exit_swap_address_space(unsigned int type) {}
+#else
+int init_swap_address_space(unsigned int type, unsigned long nr_pages)
+{
+	struct address_space *spaces;
 	unsigned int i, nr;
 
 	nr = DIV_ROUND_UP(nr_pages, SWAP_ADDRESS_SPACE_PAGES);
 	spaces = kvcalloc(nr, sizeof(struct address_space), GFP_KERNEL);
 	if (!spaces)
 		return -ENOMEM;
-	for (i = 0; i < nr; i++) {
-		space = spaces + i;
-		xa_init_flags(&space->i_pages, XA_FLAGS_LOCK_IRQ);
-		atomic_set(&space->i_mmap_writable, 0);
-		space->a_ops = &swap_aops;
-		/* swap cache doesn't use writeback related tags */
-		mapping_set_no_writeback_tags(space);
-	}
+	for (i = 0; i < nr; i++)
+		init_swapper_space(spaces + i);
 	nr_swapper_spaces[type] = nr;
 	swapper_spaces[type] = spaces;
 
@@ -752,6 +795,7 @@ void exit_swap_address_space(unsigned int type)
 	nr_swapper_spaces[type] = 0;
 	swapper_spaces[type] = NULL;
 }
+#endif
 
 static int swap_vma_ra_win(struct vm_fault *vmf, unsigned long *start,
 			   unsigned long *end)
@@ -930,6 +974,10 @@ static int __init swap_init_sysfs(void)
 	int err;
 	struct kobject *swap_kobj;
 
+#ifdef CONFIG_VIRTUAL_SWAP
+	init_swapper_space(&swapper_space);
+#endif
+
 	swap_kobj = kobject_create_and_add("swap", mm_kobj);
 	if (!swap_kobj) {
 		pr_err("failed to create swap kobject\n");
-- 
2.47.1


  parent reply	other threads:[~2025-04-07 23:42 UTC|newest]

Thread overview: 35+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-04-07 23:42 [RFC PATCH 00/14] Virtual Swap Space Nhat Pham
2025-04-07 23:42 ` [RFC PATCH 01/14] swapfile: rearrange functions Nhat Pham
2025-04-07 23:42 ` [RFC PATCH 02/14] mm: swap: add an abstract API for locking out swapoff Nhat Pham
2025-04-07 23:42 ` [RFC PATCH 03/14] mm: swap: add a separate type for physical swap slots Nhat Pham
2025-04-08 14:15   ` Johannes Weiner
2025-04-08 15:11     ` Nhat Pham
2025-04-22 14:41     ` Yosry Ahmed
     [not found]     ` <6807ab09.670a0220.152ca3.502fSMTPIN_ADDED_BROKEN@mx.google.com>
2025-04-22 15:50       ` Nhat Pham
2025-04-22 18:50         ` Kairui Song
2025-04-07 23:42 ` Nhat Pham [this message]
2025-04-08 15:00   ` [RFC PATCH 04/14] mm: swap: swap cache support for virtualized swap Johannes Weiner
2025-04-08 15:34     ` Nhat Pham
2025-04-08 15:43       ` Nhat Pham
2025-04-07 23:42 ` [RFC PATCH 05/14] zswap: unify zswap tree " Nhat Pham
2025-04-07 23:42 ` [RFC PATCH 06/14] mm: swap: allocate a virtual swap slot for each swapped out page Nhat Pham
2025-04-07 23:42 ` [RFC PATCH 07/14] swap: implement the swap_cgroup API using virtual swap Nhat Pham
2025-04-07 23:42 ` [RFC PATCH 08/14] swap: manage swap entry lifetime at the virtual swap layer Nhat Pham
2025-04-07 23:42 ` [RFC PATCH 09/14] swap: implement locking out swapoff using virtual swap slot Nhat Pham
2025-04-07 23:42 ` [RFC PATCH 10/14] mm: swap: decouple virtual swap slot from backing store Nhat Pham
2025-04-07 23:42 ` [RFC PATCH 11/14] memcg: swap: only charge physical swap slots Nhat Pham
2025-04-07 23:42 ` [RFC PATCH 12/14] vswap: support THP swapin and batch free_swap_and_cache Nhat Pham
2025-04-07 23:42 ` [RFC PATCH 13/14] swap: simplify swapoff using virtual swap Nhat Pham
2025-04-07 23:42 ` [RFC PATCH 14/14] zswap: do not start zswap shrinker if there is no physical swap slots Nhat Pham
2025-04-08 13:04 ` [RFC PATCH 00/14] Virtual Swap Space Usama Arif
2025-04-08 15:20   ` Nhat Pham
2025-04-08 15:45   ` Johannes Weiner
2025-04-08 16:25     ` Nhat Pham
2025-04-08 16:27       ` Nhat Pham
2025-04-08 16:22 ` Kairui Song
2025-04-08 16:47   ` Nhat Pham
2025-04-08 16:59     ` Kairui Song
2025-04-22 14:43       ` Yosry Ahmed
2025-04-22 14:56 ` Yosry Ahmed
     [not found] ` <6807afd0.a70a0220.2ae8b9.e07cSMTPIN_ADDED_BROKEN@mx.google.com>
2025-04-22 17:15   ` Nhat Pham
2025-04-22 19:29     ` Nhat Pham

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250407234223.1059191-5-nphamcs@gmail.com \
    --to=nphamcs@gmail.com \
    --cc=akpm@linux-foundation.org \
    --cc=baohua@kernel.org \
    --cc=cgroups@vger.kernel.org \
    --cc=chengming.zhou@linux.dev \
    --cc=chrisl@kernel.org \
    --cc=christophe.leroy@csgroup.eu \
    --cc=hannes@cmpxchg.org \
    --cc=huang.ying.caritas@gmail.com \
    --cc=hughd@google.com \
    --cc=kasong@tencent.com \
    --cc=kernel-team@meta.com \
    --cc=len.brown@intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linux-pm@vger.kernel.org \
    --cc=lorenzo.stoakes@oracle.com \
    --cc=mhocko@kernel.org \
    --cc=muchun.song@linux.dev \
    --cc=osalvador@suse.de \
    --cc=pavel@kernel.org \
    --cc=roman.gushchin@linux.dev \
    --cc=ryan.roberts@arm.com \
    --cc=shakeel.butt@linux.dev \
    --cc=viro@zeniv.linux.org.uk \
    --cc=yosry.ahmed@linux.dev \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox