From: Nhat Pham <nphamcs@gmail.com>
To: linux-mm@kvack.org
Cc: akpm@linux-foundation.org, hannes@cmpxchg.org, hughd@google.com,
yosry.ahmed@linux.dev, mhocko@kernel.org,
roman.gushchin@linux.dev, shakeel.butt@linux.dev,
muchun.song@linux.dev, len.brown@intel.com,
chengming.zhou@linux.dev, kasong@tencent.com, chrisl@kernel.org,
huang.ying.caritas@gmail.com, ryan.roberts@arm.com,
viro@zeniv.linux.org.uk, baohua@kernel.org, osalvador@suse.de,
lorenzo.stoakes@oracle.com, christophe.leroy@csgroup.eu,
pavel@kernel.org, kernel-team@meta.com,
linux-kernel@vger.kernel.org, cgroups@vger.kernel.org,
linux-pm@vger.kernel.org
Subject: [RFC PATCH 04/14] mm: swap: swap cache support for virtualized swap
Date: Mon, 7 Apr 2025 16:42:05 -0700 [thread overview]
Message-ID: <20250407234223.1059191-5-nphamcs@gmail.com> (raw)
In-Reply-To: <20250407234223.1059191-1-nphamcs@gmail.com>
Currently, the swap cache code assumes that the swap space is of a fixed
size. The virtual swap space is dynamically sized, so the existing
partitioning code cannot be easily reused. A dynamic partitioning is
planned, but for now keep the design simple and just use a flat
swapcache for vswap.
Since the vswap's implementation has begun to diverge from the old
implementation, we also introduce a new build config
(CONFIG_VIRTUAL_SWAP). Users who do not select this config will get the
old implementation, with no behavioral change.
Signed-off-by: Nhat Pham <nphamcs@gmail.com>
---
mm/Kconfig | 13 ++++++++++
mm/swap.h | 22 ++++++++++------
mm/swap_state.c | 68 +++++++++++++++++++++++++++++++++++++++++--------
3 files changed, 85 insertions(+), 18 deletions(-)
diff --git a/mm/Kconfig b/mm/Kconfig
index 1b501db06417..1a6acdb64333 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -22,6 +22,19 @@ menuconfig SWAP
used to provide more virtual memory than the actual RAM present
in your computer. If unsure say Y.
+config VIRTUAL_SWAP
+ bool "Swap space virtualization"
+ depends on SWAP
+ default n
+ help
+ When this is selected, the kernel is built with the new swap
+ design. This will allow us to decouple the swap backends
+ (zswap, on-disk swapfile, etc.), and save disk space when we
+ use zswap (or the zero-filled swap page optimization).
+
+ There might be more lock contentions with heavy swap use, since
+ the swap cache is no longer range partitioned.
+
config ZSWAP
bool "Compressed cache for swap pages"
depends on SWAP
diff --git a/mm/swap.h b/mm/swap.h
index d5f8effa8015..06e20b1d79c4 100644
--- a/mm/swap.h
+++ b/mm/swap.h
@@ -22,22 +22,27 @@ void swap_write_unplug(struct swap_iocb *sio);
int swap_writepage(struct page *page, struct writeback_control *wbc);
void __swap_writepage(struct folio *folio, struct writeback_control *wbc);
-/* linux/mm/swap_state.c */
-/* One swap address space for each 64M swap space */
+/* Return the swap device position of the swap slot. */
+static inline loff_t swap_slot_pos(swp_slot_t slot)
+{
+ return ((loff_t)swp_slot_offset(slot)) << PAGE_SHIFT;
+}
+
#define SWAP_ADDRESS_SPACE_SHIFT 14
#define SWAP_ADDRESS_SPACE_PAGES (1 << SWAP_ADDRESS_SPACE_SHIFT)
#define SWAP_ADDRESS_SPACE_MASK (SWAP_ADDRESS_SPACE_PAGES - 1)
+
+/* linux/mm/swap_state.c */
+#ifdef CONFIG_VIRTUAL_SWAP
+extern struct address_space *swap_address_space(swp_entry_t entry);
+#define swap_cache_index(entry) entry.val
+#else
+/* One swap address space for each 64M swap space */
extern struct address_space *swapper_spaces[];
#define swap_address_space(entry) \
(&swapper_spaces[swp_type(entry)][swp_offset(entry) \
>> SWAP_ADDRESS_SPACE_SHIFT])
-/* Return the swap device position of the swap slot. */
-static inline loff_t swap_slot_pos(swp_slot_t slot)
-{
- return ((loff_t)swp_slot_offset(slot)) << PAGE_SHIFT;
-}
-
/*
* Return the swap cache index of the swap entry.
*/
@@ -46,6 +51,7 @@ static inline pgoff_t swap_cache_index(swp_entry_t entry)
BUILD_BUG_ON((SWP_OFFSET_MASK | SWAP_ADDRESS_SPACE_MASK) != SWP_OFFSET_MASK);
return swp_offset(entry) & SWAP_ADDRESS_SPACE_MASK;
}
+#endif
void show_swap_cache_info(void);
bool add_to_swap(struct folio *folio);
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 055e555d3382..268338a0ea57 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -38,10 +38,19 @@ static const struct address_space_operations swap_aops = {
#endif
};
+#ifdef CONFIG_VIRTUAL_SWAP
+static struct address_space swapper_space __read_mostly;
+
+struct address_space *swap_address_space(swp_entry_t entry)
+{
+ return &swapper_space;
+}
+#else
struct address_space *swapper_spaces[MAX_SWAPFILES] __read_mostly;
static unsigned int nr_swapper_spaces[MAX_SWAPFILES] __read_mostly;
-static bool enable_vma_readahead __read_mostly = true;
+#endif
+static bool enable_vma_readahead __read_mostly = true;
#define SWAP_RA_ORDER_CEILING 5
#define SWAP_RA_WIN_SHIFT (PAGE_SHIFT / 2)
@@ -260,6 +269,28 @@ void delete_from_swap_cache(struct folio *folio)
folio_ref_sub(folio, folio_nr_pages(folio));
}
+#ifdef CONFIG_VIRTUAL_SWAP
+void clear_shadow_from_swap_cache(int type, unsigned long begin,
+ unsigned long end)
+{
+ swp_slot_t slot = swp_slot(type, begin);
+ swp_entry_t entry = swp_slot_to_swp_entry(slot);
+ unsigned long index = swap_cache_index(entry);
+ struct address_space *address_space = swap_address_space(entry);
+ void *old;
+ XA_STATE(xas, &address_space->i_pages, index);
+
+ xas_set_update(&xas, workingset_update_node);
+
+ xa_lock_irq(&address_space->i_pages);
+ xas_for_each(&xas, old, entry.val + end - begin) {
+ if (!xa_is_value(old))
+ continue;
+ xas_store(&xas, NULL);
+ }
+ xa_unlock_irq(&address_space->i_pages);
+}
+#else
void clear_shadow_from_swap_cache(int type, unsigned long begin,
unsigned long end)
{
@@ -290,6 +321,7 @@ void clear_shadow_from_swap_cache(int type, unsigned long begin,
break;
}
}
+#endif
/*
* If we are the only user, then try to free up the swap cache.
@@ -718,23 +750,34 @@ struct folio *swap_cluster_readahead(swp_entry_t entry, gfp_t gfp_mask,
return folio;
}
+static void init_swapper_space(struct address_space *space)
+{
+ xa_init_flags(&space->i_pages, XA_FLAGS_LOCK_IRQ);
+ atomic_set(&space->i_mmap_writable, 0);
+ space->a_ops = &swap_aops;
+ /* swap cache doesn't use writeback related tags */
+ mapping_set_no_writeback_tags(space);
+}
+
+#ifdef CONFIG_VIRTUAL_SWAP
int init_swap_address_space(unsigned int type, unsigned long nr_pages)
{
- struct address_space *spaces, *space;
+ return 0;
+}
+
+void exit_swap_address_space(unsigned int type) {}
+#else
+int init_swap_address_space(unsigned int type, unsigned long nr_pages)
+{
+ struct address_space *spaces;
unsigned int i, nr;
nr = DIV_ROUND_UP(nr_pages, SWAP_ADDRESS_SPACE_PAGES);
spaces = kvcalloc(nr, sizeof(struct address_space), GFP_KERNEL);
if (!spaces)
return -ENOMEM;
- for (i = 0; i < nr; i++) {
- space = spaces + i;
- xa_init_flags(&space->i_pages, XA_FLAGS_LOCK_IRQ);
- atomic_set(&space->i_mmap_writable, 0);
- space->a_ops = &swap_aops;
- /* swap cache doesn't use writeback related tags */
- mapping_set_no_writeback_tags(space);
- }
+ for (i = 0; i < nr; i++)
+ init_swapper_space(spaces + i);
nr_swapper_spaces[type] = nr;
swapper_spaces[type] = spaces;
@@ -752,6 +795,7 @@ void exit_swap_address_space(unsigned int type)
nr_swapper_spaces[type] = 0;
swapper_spaces[type] = NULL;
}
+#endif
static int swap_vma_ra_win(struct vm_fault *vmf, unsigned long *start,
unsigned long *end)
@@ -930,6 +974,10 @@ static int __init swap_init_sysfs(void)
int err;
struct kobject *swap_kobj;
+#ifdef CONFIG_VIRTUAL_SWAP
+ init_swapper_space(&swapper_space);
+#endif
+
swap_kobj = kobject_create_and_add("swap", mm_kobj);
if (!swap_kobj) {
pr_err("failed to create swap kobject\n");
--
2.47.1
next prev parent reply other threads:[~2025-04-07 23:42 UTC|newest]
Thread overview: 35+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-04-07 23:42 [RFC PATCH 00/14] Virtual Swap Space Nhat Pham
2025-04-07 23:42 ` [RFC PATCH 01/14] swapfile: rearrange functions Nhat Pham
2025-04-07 23:42 ` [RFC PATCH 02/14] mm: swap: add an abstract API for locking out swapoff Nhat Pham
2025-04-07 23:42 ` [RFC PATCH 03/14] mm: swap: add a separate type for physical swap slots Nhat Pham
2025-04-08 14:15 ` Johannes Weiner
2025-04-08 15:11 ` Nhat Pham
2025-04-22 14:41 ` Yosry Ahmed
[not found] ` <6807ab09.670a0220.152ca3.502fSMTPIN_ADDED_BROKEN@mx.google.com>
2025-04-22 15:50 ` Nhat Pham
2025-04-22 18:50 ` Kairui Song
2025-04-07 23:42 ` Nhat Pham [this message]
2025-04-08 15:00 ` [RFC PATCH 04/14] mm: swap: swap cache support for virtualized swap Johannes Weiner
2025-04-08 15:34 ` Nhat Pham
2025-04-08 15:43 ` Nhat Pham
2025-04-07 23:42 ` [RFC PATCH 05/14] zswap: unify zswap tree " Nhat Pham
2025-04-07 23:42 ` [RFC PATCH 06/14] mm: swap: allocate a virtual swap slot for each swapped out page Nhat Pham
2025-04-07 23:42 ` [RFC PATCH 07/14] swap: implement the swap_cgroup API using virtual swap Nhat Pham
2025-04-07 23:42 ` [RFC PATCH 08/14] swap: manage swap entry lifetime at the virtual swap layer Nhat Pham
2025-04-07 23:42 ` [RFC PATCH 09/14] swap: implement locking out swapoff using virtual swap slot Nhat Pham
2025-04-07 23:42 ` [RFC PATCH 10/14] mm: swap: decouple virtual swap slot from backing store Nhat Pham
2025-04-07 23:42 ` [RFC PATCH 11/14] memcg: swap: only charge physical swap slots Nhat Pham
2025-04-07 23:42 ` [RFC PATCH 12/14] vswap: support THP swapin and batch free_swap_and_cache Nhat Pham
2025-04-07 23:42 ` [RFC PATCH 13/14] swap: simplify swapoff using virtual swap Nhat Pham
2025-04-07 23:42 ` [RFC PATCH 14/14] zswap: do not start zswap shrinker if there is no physical swap slots Nhat Pham
2025-04-08 13:04 ` [RFC PATCH 00/14] Virtual Swap Space Usama Arif
2025-04-08 15:20 ` Nhat Pham
2025-04-08 15:45 ` Johannes Weiner
2025-04-08 16:25 ` Nhat Pham
2025-04-08 16:27 ` Nhat Pham
2025-04-08 16:22 ` Kairui Song
2025-04-08 16:47 ` Nhat Pham
2025-04-08 16:59 ` Kairui Song
2025-04-22 14:43 ` Yosry Ahmed
2025-04-22 14:56 ` Yosry Ahmed
[not found] ` <6807afd0.a70a0220.2ae8b9.e07cSMTPIN_ADDED_BROKEN@mx.google.com>
2025-04-22 17:15 ` Nhat Pham
2025-04-22 19:29 ` Nhat Pham
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250407234223.1059191-5-nphamcs@gmail.com \
--to=nphamcs@gmail.com \
--cc=akpm@linux-foundation.org \
--cc=baohua@kernel.org \
--cc=cgroups@vger.kernel.org \
--cc=chengming.zhou@linux.dev \
--cc=chrisl@kernel.org \
--cc=christophe.leroy@csgroup.eu \
--cc=hannes@cmpxchg.org \
--cc=huang.ying.caritas@gmail.com \
--cc=hughd@google.com \
--cc=kasong@tencent.com \
--cc=kernel-team@meta.com \
--cc=len.brown@intel.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=linux-pm@vger.kernel.org \
--cc=lorenzo.stoakes@oracle.com \
--cc=mhocko@kernel.org \
--cc=muchun.song@linux.dev \
--cc=osalvador@suse.de \
--cc=pavel@kernel.org \
--cc=roman.gushchin@linux.dev \
--cc=ryan.roberts@arm.com \
--cc=shakeel.butt@linux.dev \
--cc=viro@zeniv.linux.org.uk \
--cc=yosry.ahmed@linux.dev \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox