From: Nhat Pham <nphamcs@gmail.com>
To: kasong@tencent.com
Cc: Liam.Howlett@oracle.com, akpm@linux-foundation.org,
apopple@nvidia.com, axelrasmussen@google.com, baohua@kernel.org,
baolin.wang@linux.alibaba.com, bhe@redhat.com, byungchul@sk.com,
cgroups@vger.kernel.org, chengming.zhou@linux.dev,
chrisl@kernel.org, corbet@lwn.net, david@kernel.org,
dev.jain@arm.com, gourry@gourry.net, hannes@cmpxchg.org,
hughd@google.com, jannh@google.com, joshua.hahnjy@gmail.com,
lance.yang@linux.dev, lenb@kernel.org, linux-doc@vger.kernel.org,
linux-kernel@vger.kernel.org, linux-mm@kvack.org,
linux-pm@vger.kernel.org, lorenzo.stoakes@oracle.com,
matthew.brost@intel.com, mhocko@suse.com, muchun.song@linux.dev,
npache@redhat.com, nphamcs@gmail.com, pavel@kernel.org,
peterx@redhat.com, peterz@infradead.org, pfalcato@suse.de,
rafael@kernel.org, rakie.kim@sk.com, roman.gushchin@linux.dev,
rppt@kernel.org, ryan.roberts@arm.com, shakeel.butt@linux.dev,
shikemeng@huaweicloud.com, surenb@google.com, tglx@kernel.org,
vbabka@suse.cz, weixugc@google.com, ying.huang@linux.alibaba.com,
yosry.ahmed@linux.dev, yuanchu@google.com,
zhengqi.arch@bytedance.com, ziy@nvidia.com, kernel-team@meta.com,
riel@surriel.com
Subject: [PATCH v5 11/21] zswap: move zswap entry management to the virtual swap descriptor
Date: Fri, 20 Mar 2026 12:27:25 -0700 [thread overview]
Message-ID: <20260320192735.748051-12-nphamcs@gmail.com> (raw)
In-Reply-To: <20260320192735.748051-1-nphamcs@gmail.com>
Remove the zswap tree and manage zswap entries directly
through the virtual swap descriptor. This re-partitions the zswap pool
(by virtual swap cluster), which eliminates zswap tree lock contention.
Signed-off-by: Nhat Pham <nphamcs@gmail.com>
---
include/linux/zswap.h | 6 +++
mm/vswap.c | 100 ++++++++++++++++++++++++++++++++++++++++++
mm/zswap.c | 40 -----------------
3 files changed, 106 insertions(+), 40 deletions(-)
diff --git a/include/linux/zswap.h b/include/linux/zswap.h
index 1a04caf283dc8..7eb3ce7e124fc 100644
--- a/include/linux/zswap.h
+++ b/include/linux/zswap.h
@@ -6,6 +6,7 @@
#include <linux/mm_types.h>
struct lruvec;
+struct zswap_entry;
extern atomic_long_t zswap_stored_pages;
@@ -33,6 +34,11 @@ void zswap_lruvec_state_init(struct lruvec *lruvec);
void zswap_folio_swapin(struct folio *folio);
bool zswap_is_enabled(void);
bool zswap_never_enabled(void);
+void *zswap_entry_store(swp_entry_t swpentry, struct zswap_entry *entry);
+void *zswap_entry_load(swp_entry_t swpentry);
+void *zswap_entry_erase(swp_entry_t swpentry);
+bool zswap_empty(swp_entry_t swpentry);
+
#else
struct zswap_lruvec_state {};
diff --git a/mm/vswap.c b/mm/vswap.c
index 3027294cd872b..9b2122647b850 100644
--- a/mm/vswap.c
+++ b/mm/vswap.c
@@ -10,6 +10,7 @@
#include <linux/swapops.h>
#include <linux/swap_cgroup.h>
#include <linux/cpuhotplug.h>
+#include <linux/zswap.h>
#include "swap.h"
#include "swap_table.h"
@@ -37,11 +38,13 @@
* Swap descriptor - metadata of a swapped out page.
*
* @slot: The handle to the physical swap slot backing this page.
+ * @zswap_entry: The zswap entry associated with this swap slot.
* @swap_cache: The folio in swap cache.
* @shadow: The shadow entry.
*/
struct swp_desc {
swp_slot_t slot;
+ struct zswap_entry *zswap_entry;
union {
struct folio *swap_cache;
void *shadow;
@@ -238,6 +241,7 @@ static void __vswap_alloc_from_cluster(struct vswap_cluster *cluster, int start)
for (i = 0; i < nr; i++) {
desc = &cluster->descriptors[start + i];
desc->slot.val = 0;
+ desc->zswap_entry = NULL;
}
cluster->count += nr;
}
@@ -1008,6 +1012,102 @@ void __swap_cache_replace_folio(struct folio *old, struct folio *new)
rcu_read_unlock();
}
+#ifdef CONFIG_ZSWAP
+/**
+ * zswap_entry_store - store a zswap entry for a swap entry
+ * @swpentry: the swap entry
+ * @entry: the zswap entry to store
+ *
+ * Stores a zswap entry in the swap descriptor for the given swap entry.
+ * The cluster is locked during the store operation.
+ *
+ * Return: the old zswap entry if one existed, NULL otherwise
+ */
+void *zswap_entry_store(swp_entry_t swpentry, struct zswap_entry *entry)
+{
+ struct vswap_cluster *cluster = NULL;
+ struct swp_desc *desc;
+ void *old;
+
+ rcu_read_lock();
+ desc = vswap_iter(&cluster, swpentry.val);
+ if (!desc) {
+ rcu_read_unlock();
+ return NULL;
+ }
+
+ old = desc->zswap_entry;
+ desc->zswap_entry = entry;
+ spin_unlock(&cluster->lock);
+ rcu_read_unlock();
+
+ return old;
+}
+
+/**
+ * zswap_entry_load - load a zswap entry for a swap entry
+ * @swpentry: the swap entry
+ *
+ * Loads the zswap entry from the swap descriptor for the given swap entry.
+ *
+ * Return: the zswap entry if one exists, NULL otherwise
+ */
+void *zswap_entry_load(swp_entry_t swpentry)
+{
+ struct vswap_cluster *cluster = NULL;
+ struct swp_desc *desc;
+ void *zswap_entry;
+
+ rcu_read_lock();
+ desc = vswap_iter(&cluster, swpentry.val);
+ if (!desc) {
+ rcu_read_unlock();
+ return NULL;
+ }
+
+ zswap_entry = desc->zswap_entry;
+ spin_unlock(&cluster->lock);
+ rcu_read_unlock();
+
+ return zswap_entry;
+}
+
+/**
+ * zswap_entry_erase - erase a zswap entry for a swap entry
+ * @swpentry: the swap entry
+ *
+ * Erases the zswap entry from the swap descriptor for the given swap entry.
+ * The cluster is locked during the erase operation.
+ *
+ * Return: the zswap entry that was erased, NULL if none existed
+ */
+void *zswap_entry_erase(swp_entry_t swpentry)
+{
+ struct vswap_cluster *cluster = NULL;
+ struct swp_desc *desc;
+ void *old;
+
+ rcu_read_lock();
+ desc = vswap_iter(&cluster, swpentry.val);
+ if (!desc) {
+ rcu_read_unlock();
+ return NULL;
+ }
+
+ old = desc->zswap_entry;
+ desc->zswap_entry = NULL;
+ spin_unlock(&cluster->lock);
+ rcu_read_unlock();
+
+ return old;
+}
+
+bool zswap_empty(swp_entry_t swpentry)
+{
+ return xa_empty(&vswap_cluster_map);
+}
+#endif /* CONFIG_ZSWAP */
+
int vswap_init(void)
{
int i;
diff --git a/mm/zswap.c b/mm/zswap.c
index f7313261673ff..72441131f094e 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -223,37 +223,6 @@ static bool zswap_has_pool;
* helpers and fwd declarations
**********************************/
-static DEFINE_XARRAY(zswap_tree);
-
-#define zswap_tree_index(entry) (entry.val)
-
-static inline void *zswap_entry_store(swp_entry_t swpentry,
- struct zswap_entry *entry)
-{
- pgoff_t offset = zswap_tree_index(swpentry);
-
- return xa_store(&zswap_tree, offset, entry, GFP_KERNEL);
-}
-
-static inline void *zswap_entry_load(swp_entry_t swpentry)
-{
- pgoff_t offset = zswap_tree_index(swpentry);
-
- return xa_load(&zswap_tree, offset);
-}
-
-static inline void *zswap_entry_erase(swp_entry_t swpentry)
-{
- pgoff_t offset = zswap_tree_index(swpentry);
-
- return xa_erase(&zswap_tree, offset);
-}
-
-static inline bool zswap_empty(swp_entry_t swpentry)
-{
- return xa_empty(&zswap_tree);
-}
-
#define zswap_pool_debug(msg, p) \
pr_debug("%s pool %s\n", msg, (p)->tfm_name)
@@ -1445,13 +1414,6 @@ static bool zswap_store_page(struct page *page,
goto compress_failed;
old = zswap_entry_store(page_swpentry, entry);
- if (xa_is_err(old)) {
- int err = xa_err(old);
-
- WARN_ONCE(err != -ENOMEM, "unexpected xarray error: %d\n", err);
- zswap_reject_alloc_fail++;
- goto store_failed;
- }
/*
* We may have had an existing entry that became stale when
@@ -1498,8 +1460,6 @@ static bool zswap_store_page(struct page *page,
return true;
-store_failed:
- zs_free(pool->zs_pool, entry->handle);
compress_failed:
zswap_entry_cache_free(entry);
return false;
--
2.52.0
next prev parent reply other threads:[~2026-03-20 19:27 UTC|newest]
Thread overview: 63+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-03-20 19:27 [PATCH v5 00/21] Virtual Swap Space Nhat Pham
2026-03-20 19:27 ` [PATCH v5 01/21] mm/swap: decouple swap cache from physical swap infrastructure Nhat Pham
2026-03-20 19:27 ` [PATCH v5 02/21] swap: rearrange the swap header file Nhat Pham
2026-03-20 19:27 ` [PATCH v5 03/21] mm: swap: add an abstract API for locking out swapoff Nhat Pham
2026-03-20 19:27 ` [PATCH v5 04/21] zswap: add new helpers for zswap entry operations Nhat Pham
2026-03-20 19:27 ` [PATCH v5 05/21] mm/swap: add a new function to check if a swap entry is in swap cached Nhat Pham
2026-03-20 19:27 ` [PATCH v5 06/21] mm: swap: add a separate type for physical swap slots Nhat Pham
2026-03-20 19:27 ` [PATCH v5 07/21] mm: create scaffolds for the new virtual swap implementation Nhat Pham
2026-03-20 19:27 ` [PATCH v5 08/21] zswap: prepare zswap for swap virtualization Nhat Pham
2026-03-20 19:27 ` [PATCH v5 09/21] mm: swap: allocate a virtual swap slot for each swapped out page Nhat Pham
2026-03-20 19:27 ` [PATCH v5 10/21] swap: move swap cache to virtual swap descriptor Nhat Pham
2026-03-20 19:27 ` Nhat Pham [this message]
2026-03-20 19:27 ` [PATCH v5 12/21] swap: implement the swap_cgroup API using virtual swap Nhat Pham
2026-03-20 19:27 ` [PATCH v5 13/21] swap: manage swap entry lifecycle at the virtual swap layer Nhat Pham
2026-03-20 19:27 ` [PATCH v5 14/21] mm: swap: decouple virtual swap slot from backing store Nhat Pham
2026-03-20 19:27 ` [PATCH v5 15/21] zswap: do not start zswap shrinker if there is no physical swap slots Nhat Pham
2026-03-20 19:27 ` [PATCH v5 16/21] swap: do not unnecesarily pin readahead swap entries Nhat Pham
2026-03-20 19:27 ` [PATCH v5 17/21] swapfile: remove zeromap bitmap Nhat Pham
2026-03-20 19:27 ` [PATCH v5 18/21] memcg: swap: only charge physical swap slots Nhat Pham
2026-03-20 19:27 ` [PATCH v5 19/21] swap: simplify swapoff using virtual swap Nhat Pham
2026-03-20 19:27 ` [PATCH v5 20/21] swapfile: replace the swap map with bitmaps Nhat Pham
2026-03-20 19:27 ` [PATCH v5 21/21] vswap: batch contiguous vswap free calls Nhat Pham
2026-03-21 18:22 ` [PATCH v5 00/21] Virtual Swap Space Andrew Morton
2026-03-22 2:18 ` Roman Gushchin
2026-03-23 10:08 ` Kairui Song
2026-03-23 15:32 ` Nhat Pham
2026-03-23 16:40 ` Kairui Song
2026-03-23 20:05 ` Nhat Pham
2026-04-14 17:23 ` Nhat Pham
2026-04-14 17:32 ` Nhat Pham
2026-04-16 18:46 ` Kairui Song
2026-03-25 18:53 ` YoungJun Park
2026-04-12 1:03 ` Nhat Pham
2026-04-14 3:09 ` YoungJun Park
2026-04-20 16:02 ` Nhat Pham
2026-03-24 13:19 ` Askar Safin
2026-03-24 17:23 ` Nhat Pham
2026-03-25 2:35 ` Askar Safin
2026-03-25 18:36 ` YoungJun Park
2026-04-12 1:40 ` Nhat Pham
2026-04-14 2:50 ` YoungJun Park
2026-04-14 3:28 ` Kairui Song
2026-04-14 16:35 ` Nhat Pham
2026-04-14 7:52 ` Christoph Hellwig
2026-04-22 0:26 ` Yosry Ahmed
2026-04-22 2:18 ` Kairui Song
2026-04-22 20:27 ` Yosry Ahmed
2026-04-23 6:16 ` Kairui Song
2026-04-23 20:47 ` Yosry Ahmed
2026-04-24 4:03 ` Gregory Price
2026-04-24 4:15 ` Kairui Song
2026-04-24 4:25 ` Kairui Song
2026-04-24 17:28 ` Nhat Pham
2026-04-24 18:04 ` Kairui Song
2026-04-24 18:08 ` Yosry Ahmed
2026-04-24 18:58 ` Kairui Song
2026-04-24 19:12 ` Yosry Ahmed
2026-04-24 19:51 ` Kairui Song
2026-04-27 18:23 ` Yosry Ahmed
2026-04-28 18:46 ` Kairui Song
2026-04-28 23:53 ` Yosry Ahmed
2026-05-01 13:42 ` Nhat Pham
2026-05-01 14:16 ` Nhat Pham
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260320192735.748051-12-nphamcs@gmail.com \
--to=nphamcs@gmail.com \
--cc=Liam.Howlett@oracle.com \
--cc=akpm@linux-foundation.org \
--cc=apopple@nvidia.com \
--cc=axelrasmussen@google.com \
--cc=baohua@kernel.org \
--cc=baolin.wang@linux.alibaba.com \
--cc=bhe@redhat.com \
--cc=byungchul@sk.com \
--cc=cgroups@vger.kernel.org \
--cc=chengming.zhou@linux.dev \
--cc=chrisl@kernel.org \
--cc=corbet@lwn.net \
--cc=david@kernel.org \
--cc=dev.jain@arm.com \
--cc=gourry@gourry.net \
--cc=hannes@cmpxchg.org \
--cc=hughd@google.com \
--cc=jannh@google.com \
--cc=joshua.hahnjy@gmail.com \
--cc=kasong@tencent.com \
--cc=kernel-team@meta.com \
--cc=lance.yang@linux.dev \
--cc=lenb@kernel.org \
--cc=linux-doc@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=linux-pm@vger.kernel.org \
--cc=lorenzo.stoakes@oracle.com \
--cc=matthew.brost@intel.com \
--cc=mhocko@suse.com \
--cc=muchun.song@linux.dev \
--cc=npache@redhat.com \
--cc=pavel@kernel.org \
--cc=peterx@redhat.com \
--cc=peterz@infradead.org \
--cc=pfalcato@suse.de \
--cc=rafael@kernel.org \
--cc=rakie.kim@sk.com \
--cc=riel@surriel.com \
--cc=roman.gushchin@linux.dev \
--cc=rppt@kernel.org \
--cc=ryan.roberts@arm.com \
--cc=shakeel.butt@linux.dev \
--cc=shikemeng@huaweicloud.com \
--cc=surenb@google.com \
--cc=tglx@kernel.org \
--cc=vbabka@suse.cz \
--cc=weixugc@google.com \
--cc=ying.huang@linux.alibaba.com \
--cc=yosry.ahmed@linux.dev \
--cc=yuanchu@google.com \
--cc=zhengqi.arch@bytedance.com \
--cc=ziy@nvidia.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.