From: Kairui Song <ryncsn@gmail.com>
To: linux-mm@kvack.org
Cc: Andrew Morton <akpm@linux-foundation.org>,
Matthew Wilcox <willy@infradead.org>,
Hugh Dickins <hughd@google.com>, Chris Li <chrisl@kernel.org>,
Barry Song <baohua@kernel.org>, Baoquan He <bhe@redhat.com>,
Nhat Pham <nphamcs@gmail.com>,
Kemeng Shi <shikemeng@huaweicloud.com>,
Baolin Wang <baolin.wang@linux.alibaba.com>,
Ying Huang <ying.huang@linux.alibaba.com>,
Johannes Weiner <hannes@cmpxchg.org>,
David Hildenbrand <david@redhat.com>,
Yosry Ahmed <yosryahmed@google.com>,
Lorenzo Stoakes <lorenzo.stoakes@oracle.com>,
Zi Yan <ziy@nvidia.com>,
linux-kernel@vger.kernel.org, Kairui Song <kasong@tencent.com>
Subject: [PATCH v2 06/15] mm, swap: rename and move some swap cluster definition and helpers
Date: Sat, 6 Sep 2025 03:13:48 +0800 [thread overview]
Message-ID: <20250905191357.78298-7-ryncsn@gmail.com> (raw)
In-Reply-To: <20250905191357.78298-1-ryncsn@gmail.com>
From: Kairui Song <kasong@tencent.com>
No feature change, move cluster related definitions and helpers to
mm/swap.h, also tidy up and add a "swap_" prefix for cluster lock/unlock
helpers, so they can be used outside of swap files. And while at it, add
kerneldoc.
Signed-off-by: Kairui Song <kasong@tencent.com>
Acked-by: Chris Li <chrisl@kernel.org>
Reviewed-by: Barry Song <baohua@kernel.org>
Acked-by: David Hildenbrand <david@redhat.com>
---
include/linux/swap.h | 34 ----------------
mm/swap.h | 70 ++++++++++++++++++++++++++++++++
mm/swapfile.c | 97 +++++++++++++-------------------------------
3 files changed, 99 insertions(+), 102 deletions(-)
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 23452f014ca1..7e1fe4ff3d30 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -235,40 +235,6 @@ enum {
/* Special value in each swap_map continuation */
#define SWAP_CONT_MAX 0x7f /* Max count */
-/*
- * We use this to track usage of a cluster. A cluster is a block of swap disk
- * space with SWAPFILE_CLUSTER pages long and naturally aligns in disk. All
- * free clusters are organized into a list. We fetch an entry from the list to
- * get a free cluster.
- *
- * The flags field determines if a cluster is free. This is
- * protected by cluster lock.
- */
-struct swap_cluster_info {
- spinlock_t lock; /*
- * Protect swap_cluster_info fields
- * other than list, and swap_info_struct->swap_map
- * elements corresponding to the swap cluster.
- */
- u16 count;
- u8 flags;
- u8 order;
- struct list_head list;
-};
-
-/* All on-list cluster must have a non-zero flag. */
-enum swap_cluster_flags {
- CLUSTER_FLAG_NONE = 0, /* For temporary off-list cluster */
- CLUSTER_FLAG_FREE,
- CLUSTER_FLAG_NONFULL,
- CLUSTER_FLAG_FRAG,
- /* Clusters with flags above are allocatable */
- CLUSTER_FLAG_USABLE = CLUSTER_FLAG_FRAG,
- CLUSTER_FLAG_FULL,
- CLUSTER_FLAG_DISCARD,
- CLUSTER_FLAG_MAX,
-};
-
/*
* The first page in the swap file is the swap header, which is always marked
* bad to prevent it from being allocated as an entry. This also prevents the
diff --git a/mm/swap.h b/mm/swap.h
index a69e18b12b45..39b27337bc0a 100644
--- a/mm/swap.h
+++ b/mm/swap.h
@@ -7,10 +7,80 @@ struct swap_iocb;
extern int page_cluster;
+#ifdef CONFIG_THP_SWAP
+#define SWAPFILE_CLUSTER HPAGE_PMD_NR
+#define swap_entry_order(order) (order)
+#else
+#define SWAPFILE_CLUSTER 256
+#define swap_entry_order(order) 0
+#endif
+
+/*
+ * We use this to track usage of a cluster. A cluster is a block of swap disk
+ * space with SWAPFILE_CLUSTER pages long and naturally aligns in disk. All
+ * free clusters are organized into a list. We fetch an entry from the list to
+ * get a free cluster.
+ *
+ * The flags field determines if a cluster is free. This is
+ * protected by cluster lock.
+ */
+struct swap_cluster_info {
+ spinlock_t lock; /*
+ * Protect swap_cluster_info fields
+ * other than list, and swap_info_struct->swap_map
+ * elements corresponding to the swap cluster.
+ */
+ u16 count;
+ u8 flags;
+ u8 order;
+ struct list_head list;
+};
+
+/* All on-list cluster must have a non-zero flag. */
+enum swap_cluster_flags {
+ CLUSTER_FLAG_NONE = 0, /* For temporary off-list cluster */
+ CLUSTER_FLAG_FREE,
+ CLUSTER_FLAG_NONFULL,
+ CLUSTER_FLAG_FRAG,
+ /* Clusters with flags above are allocatable */
+ CLUSTER_FLAG_USABLE = CLUSTER_FLAG_FRAG,
+ CLUSTER_FLAG_FULL,
+ CLUSTER_FLAG_DISCARD,
+ CLUSTER_FLAG_MAX,
+};
+
#ifdef CONFIG_SWAP
#include <linux/swapops.h> /* for swp_offset */
#include <linux/blk_types.h> /* for bio_end_io_t */
+static inline struct swap_cluster_info *swp_offset_cluster(
+ struct swap_info_struct *si, pgoff_t offset)
+{
+ return &si->cluster_info[offset / SWAPFILE_CLUSTER];
+}
+
+/**
+ * swap_cluster_lock - Lock and return the swap cluster of given offset.
+ * @si: swap device the cluster belongs to.
+ * @offset: the swap entry offset, pointing to a valid slot.
+ *
+ * Context: The caller must ensure the offset is in the valid range and
+ * protect the swap device with reference count or locks.
+ */
+static inline struct swap_cluster_info *swap_cluster_lock(
+ struct swap_info_struct *si, unsigned long offset)
+{
+ struct swap_cluster_info *ci = swp_offset_cluster(si, offset);
+
+ spin_lock(&ci->lock);
+ return ci;
+}
+
+static inline void swap_cluster_unlock(struct swap_cluster_info *ci)
+{
+ spin_unlock(&ci->lock);
+}
+
/* linux/mm/page_io.c */
int sio_pool_init(void);
struct swap_iocb;
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 1bd90f17440f..547ad4bfe1d8 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -58,9 +58,6 @@ static void swap_entries_free(struct swap_info_struct *si,
static void swap_range_alloc(struct swap_info_struct *si,
unsigned int nr_entries);
static bool folio_swapcache_freeable(struct folio *folio);
-static struct swap_cluster_info *lock_cluster(struct swap_info_struct *si,
- unsigned long offset);
-static inline void unlock_cluster(struct swap_cluster_info *ci);
static DEFINE_SPINLOCK(swap_lock);
static unsigned int nr_swapfiles;
@@ -257,9 +254,9 @@ static int __try_to_reclaim_swap(struct swap_info_struct *si,
* swap_map is HAS_CACHE only, which means the slots have no page table
* reference or pending writeback, and can't be allocated to others.
*/
- ci = lock_cluster(si, offset);
+ ci = swap_cluster_lock(si, offset);
need_reclaim = swap_only_has_cache(si, offset, nr_pages);
- unlock_cluster(ci);
+ swap_cluster_unlock(ci);
if (!need_reclaim)
goto out_unlock;
@@ -384,19 +381,6 @@ static void discard_swap_cluster(struct swap_info_struct *si,
}
}
-#ifdef CONFIG_THP_SWAP
-#define SWAPFILE_CLUSTER HPAGE_PMD_NR
-
-#define swap_entry_order(order) (order)
-#else
-#define SWAPFILE_CLUSTER 256
-
-/*
- * Define swap_entry_order() as constant to let compiler to optimize
- * out some code if !CONFIG_THP_SWAP
- */
-#define swap_entry_order(order) 0
-#endif
#define LATENCY_LIMIT 256
static inline bool cluster_is_empty(struct swap_cluster_info *info)
@@ -424,34 +408,12 @@ static inline unsigned int cluster_index(struct swap_info_struct *si,
return ci - si->cluster_info;
}
-static inline struct swap_cluster_info *offset_to_cluster(struct swap_info_struct *si,
- unsigned long offset)
-{
- return &si->cluster_info[offset / SWAPFILE_CLUSTER];
-}
-
static inline unsigned int cluster_offset(struct swap_info_struct *si,
struct swap_cluster_info *ci)
{
return cluster_index(si, ci) * SWAPFILE_CLUSTER;
}
-static inline struct swap_cluster_info *lock_cluster(struct swap_info_struct *si,
- unsigned long offset)
-{
- struct swap_cluster_info *ci;
-
- ci = offset_to_cluster(si, offset);
- spin_lock(&ci->lock);
-
- return ci;
-}
-
-static inline void unlock_cluster(struct swap_cluster_info *ci)
-{
- spin_unlock(&ci->lock);
-}
-
static void move_cluster(struct swap_info_struct *si,
struct swap_cluster_info *ci, struct list_head *list,
enum swap_cluster_flags new_flags)
@@ -807,7 +769,7 @@ static unsigned int alloc_swap_scan_cluster(struct swap_info_struct *si,
}
out:
relocate_cluster(si, ci);
- unlock_cluster(ci);
+ swap_cluster_unlock(ci);
if (si->flags & SWP_SOLIDSTATE) {
this_cpu_write(percpu_swap_cluster.offset[order], next);
this_cpu_write(percpu_swap_cluster.si[order], si);
@@ -874,7 +836,7 @@ static void swap_reclaim_full_clusters(struct swap_info_struct *si, bool force)
if (ci->flags == CLUSTER_FLAG_NONE)
relocate_cluster(si, ci);
- unlock_cluster(ci);
+ swap_cluster_unlock(ci);
if (to_scan <= 0)
break;
}
@@ -913,7 +875,7 @@ static unsigned long cluster_alloc_swap_entry(struct swap_info_struct *si, int o
if (offset == SWAP_ENTRY_INVALID)
goto new_cluster;
- ci = lock_cluster(si, offset);
+ ci = swap_cluster_lock(si, offset);
/* Cluster could have been used by another order */
if (cluster_is_usable(ci, order)) {
if (cluster_is_empty(ci))
@@ -921,7 +883,7 @@ static unsigned long cluster_alloc_swap_entry(struct swap_info_struct *si, int o
found = alloc_swap_scan_cluster(si, ci, offset,
order, usage);
} else {
- unlock_cluster(ci);
+ swap_cluster_unlock(ci);
}
if (found)
goto done;
@@ -1202,7 +1164,7 @@ static bool swap_alloc_fast(swp_entry_t *entry,
if (!si || !offset || !get_swap_device_info(si))
return false;
- ci = lock_cluster(si, offset);
+ ci = swap_cluster_lock(si, offset);
if (cluster_is_usable(ci, order)) {
if (cluster_is_empty(ci))
offset = cluster_offset(si, ci);
@@ -1210,7 +1172,7 @@ static bool swap_alloc_fast(swp_entry_t *entry,
if (found)
*entry = swp_entry(si->type, found);
} else {
- unlock_cluster(ci);
+ swap_cluster_unlock(ci);
}
put_swap_device(si);
@@ -1478,14 +1440,14 @@ static void swap_entries_put_cache(struct swap_info_struct *si,
unsigned long offset = swp_offset(entry);
struct swap_cluster_info *ci;
- ci = lock_cluster(si, offset);
- if (swap_only_has_cache(si, offset, nr))
+ ci = swap_cluster_lock(si, offset);
+ if (swap_only_has_cache(si, offset, nr)) {
swap_entries_free(si, ci, entry, nr);
- else {
+ } else {
for (int i = 0; i < nr; i++, entry.val++)
swap_entry_put_locked(si, ci, entry, SWAP_HAS_CACHE);
}
- unlock_cluster(ci);
+ swap_cluster_unlock(ci);
}
static bool swap_entries_put_map(struct swap_info_struct *si,
@@ -1503,7 +1465,7 @@ static bool swap_entries_put_map(struct swap_info_struct *si,
if (count != 1 && count != SWAP_MAP_SHMEM)
goto fallback;
- ci = lock_cluster(si, offset);
+ ci = swap_cluster_lock(si, offset);
if (!swap_is_last_map(si, offset, nr, &has_cache)) {
goto locked_fallback;
}
@@ -1512,21 +1474,20 @@ static bool swap_entries_put_map(struct swap_info_struct *si,
else
for (i = 0; i < nr; i++)
WRITE_ONCE(si->swap_map[offset + i], SWAP_HAS_CACHE);
- unlock_cluster(ci);
+ swap_cluster_unlock(ci);
return has_cache;
fallback:
- ci = lock_cluster(si, offset);
+ ci = swap_cluster_lock(si, offset);
locked_fallback:
for (i = 0; i < nr; i++, entry.val++) {
count = swap_entry_put_locked(si, ci, entry, 1);
if (count == SWAP_HAS_CACHE)
has_cache = true;
}
- unlock_cluster(ci);
+ swap_cluster_unlock(ci);
return has_cache;
-
}
/*
@@ -1576,7 +1537,7 @@ static void swap_entries_free(struct swap_info_struct *si,
unsigned char *map_end = map + nr_pages;
/* It should never free entries across different clusters */
- VM_BUG_ON(ci != offset_to_cluster(si, offset + nr_pages - 1));
+ VM_BUG_ON(ci != swp_offset_cluster(si, offset + nr_pages - 1));
VM_BUG_ON(cluster_is_empty(ci));
VM_BUG_ON(ci->count < nr_pages);
@@ -1651,9 +1612,9 @@ bool swap_entry_swapped(struct swap_info_struct *si, swp_entry_t entry)
struct swap_cluster_info *ci;
int count;
- ci = lock_cluster(si, offset);
+ ci = swap_cluster_lock(si, offset);
count = swap_count(si->swap_map[offset]);
- unlock_cluster(ci);
+ swap_cluster_unlock(ci);
return !!count;
}
@@ -1676,7 +1637,7 @@ int swp_swapcount(swp_entry_t entry)
offset = swp_offset(entry);
- ci = lock_cluster(si, offset);
+ ci = swap_cluster_lock(si, offset);
count = swap_count(si->swap_map[offset]);
if (!(count & COUNT_CONTINUED))
@@ -1699,7 +1660,7 @@ int swp_swapcount(swp_entry_t entry)
n *= (SWAP_CONT_MAX + 1);
} while (tmp_count & COUNT_CONTINUED);
out:
- unlock_cluster(ci);
+ swap_cluster_unlock(ci);
return count;
}
@@ -1714,7 +1675,7 @@ static bool swap_page_trans_huge_swapped(struct swap_info_struct *si,
int i;
bool ret = false;
- ci = lock_cluster(si, offset);
+ ci = swap_cluster_lock(si, offset);
if (nr_pages == 1) {
if (swap_count(map[roffset]))
ret = true;
@@ -1727,7 +1688,7 @@ static bool swap_page_trans_huge_swapped(struct swap_info_struct *si,
}
}
unlock_out:
- unlock_cluster(ci);
+ swap_cluster_unlock(ci);
return ret;
}
@@ -2660,8 +2621,8 @@ static void wait_for_allocation(struct swap_info_struct *si)
BUG_ON(si->flags & SWP_WRITEOK);
for (offset = 0; offset < end; offset += SWAPFILE_CLUSTER) {
- ci = lock_cluster(si, offset);
- unlock_cluster(ci);
+ ci = swap_cluster_lock(si, offset);
+ swap_cluster_unlock(ci);
}
}
@@ -3577,7 +3538,7 @@ static int __swap_duplicate(swp_entry_t entry, unsigned char usage, int nr)
offset = swp_offset(entry);
VM_WARN_ON(nr > SWAPFILE_CLUSTER - offset % SWAPFILE_CLUSTER);
VM_WARN_ON(usage == 1 && nr > 1);
- ci = lock_cluster(si, offset);
+ ci = swap_cluster_lock(si, offset);
err = 0;
for (i = 0; i < nr; i++) {
@@ -3632,7 +3593,7 @@ static int __swap_duplicate(swp_entry_t entry, unsigned char usage, int nr)
}
unlock_out:
- unlock_cluster(ci);
+ swap_cluster_unlock(ci);
return err;
}
@@ -3731,7 +3692,7 @@ int add_swap_count_continuation(swp_entry_t entry, gfp_t gfp_mask)
offset = swp_offset(entry);
- ci = lock_cluster(si, offset);
+ ci = swap_cluster_lock(si, offset);
count = swap_count(si->swap_map[offset]);
@@ -3791,7 +3752,7 @@ int add_swap_count_continuation(swp_entry_t entry, gfp_t gfp_mask)
out_unlock_cont:
spin_unlock(&si->cont_lock);
out:
- unlock_cluster(ci);
+ swap_cluster_unlock(ci);
put_swap_device(si);
outer:
if (page)
--
2.51.0
next prev parent reply other threads:[~2025-09-05 19:14 UTC|newest]
Thread overview: 80+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-09-05 19:13 [PATCH v2 00/15] mm, swap: introduce swap table as swap cache (phase I) Kairui Song
2025-09-05 19:13 ` [PATCH v2 01/15] docs/mm: add document for swap table Kairui Song
2025-09-05 23:58 ` Chris Li
2025-09-06 13:31 ` Kairui Song
2025-09-08 12:35 ` Baoquan He
2025-09-08 14:27 ` Kairui Song
2025-09-08 15:06 ` Baoquan He
2025-09-08 15:01 ` Chris Li
2025-09-08 15:09 ` Baoquan He
2025-09-08 15:52 ` Chris Li
2025-09-05 19:13 ` [PATCH v2 02/15] mm, swap: use unified helper for swap cache look up Kairui Song
2025-09-05 23:59 ` Chris Li
2025-09-08 11:43 ` David Hildenbrand
2025-09-05 19:13 ` [PATCH v2 03/15] mm, swap: fix swap cahe index error when retrying reclaim Kairui Song
2025-09-05 22:40 ` Nhat Pham
2025-09-06 6:30 ` Kairui Song
2025-09-06 1:51 ` Chris Li
2025-09-06 6:28 ` Kairui Song
2025-09-06 11:58 ` Chris Li
2025-09-08 3:08 ` Baolin Wang
2025-09-08 11:45 ` David Hildenbrand
2025-09-05 19:13 ` [PATCH v2 04/15] mm, swap: check page poison flag after locking it Kairui Song
2025-09-06 2:00 ` Chris Li
2025-09-08 12:11 ` David Hildenbrand
2025-09-09 14:54 ` Kairui Song
2025-09-09 15:18 ` David Hildenbrand
2025-09-05 19:13 ` [PATCH v2 05/15] mm, swap: always lock and check the swap cache folio before use Kairui Song
2025-09-06 2:12 ` Chris Li
2025-09-06 6:32 ` Kairui Song
2025-09-08 12:18 ` David Hildenbrand
2025-09-09 14:58 ` Kairui Song
2025-09-09 15:19 ` David Hildenbrand
2025-09-10 12:56 ` Kairui Song
2025-09-05 19:13 ` Kairui Song [this message]
2025-09-06 2:13 ` [PATCH v2 06/15] mm, swap: rename and move some swap cluster definition and helpers Chris Li
2025-09-08 3:03 ` Baolin Wang
2025-09-05 19:13 ` [PATCH v2 07/15] mm, swap: tidy up swap device and cluster info helpers Kairui Song
2025-09-06 2:14 ` Chris Li
2025-09-08 12:21 ` David Hildenbrand
2025-09-08 15:01 ` Kairui Song
2025-09-05 19:13 ` [PATCH v2 08/15] mm/shmem, swap: remove redundant error handling for replacing folio Kairui Song
2025-09-08 3:17 ` Baolin Wang
2025-09-08 9:28 ` Kairui Song
2025-09-05 19:13 ` [PATCH v2 09/15] mm, swap: cleanup swap cache API and add kerneldoc Kairui Song
2025-09-06 5:45 ` Chris Li
2025-09-08 0:11 ` Barry Song
2025-09-08 3:23 ` Baolin Wang
2025-09-08 12:23 ` David Hildenbrand
2025-09-05 19:13 ` [PATCH v2 10/15] mm, swap: wrap swap cache replacement with a helper Kairui Song
2025-09-06 7:09 ` Chris Li
2025-09-08 3:41 ` Baolin Wang
2025-09-08 10:44 ` Kairui Song
2025-09-09 1:18 ` Baolin Wang
2025-09-08 12:30 ` David Hildenbrand
2025-09-08 14:20 ` Kairui Song
2025-09-08 14:39 ` David Hildenbrand
2025-09-08 14:49 ` Kairui Song
2025-09-05 19:13 ` [PATCH v2 11/15] mm, swap: use the swap table for the swap cache and switch API Kairui Song
2025-09-06 15:28 ` Chris Li
2025-09-08 15:38 ` Kairui Song
2025-09-07 12:55 ` Klara Modin
2025-09-08 14:34 ` Kairui Song
2025-09-08 15:00 ` Klara Modin
2025-09-08 15:10 ` Kairui Song
2025-09-08 13:45 ` David Hildenbrand
2025-09-08 15:14 ` Kairui Song
2025-09-08 15:32 ` Kairui Song
2025-09-10 2:53 ` SeongJae Park
2025-09-10 2:56 ` Kairui Song
2025-09-05 19:13 ` [PATCH v2 12/15] mm, swap: mark swap address space ro and add context debug check Kairui Song
2025-09-06 15:35 ` Chris Li
2025-09-08 13:10 ` David Hildenbrand
2025-09-05 19:13 ` [PATCH v2 13/15] mm, swap: remove contention workaround for swap cache Kairui Song
2025-09-06 15:30 ` Chris Li
2025-09-08 13:12 ` David Hildenbrand
2025-09-05 19:13 ` [PATCH v2 14/15] mm, swap: implement dynamic allocation of swap table Kairui Song
2025-09-06 15:45 ` Chris Li
2025-09-08 14:58 ` Kairui Song
2025-09-05 19:13 ` [PATCH v2 15/15] mm, swap: use a single page for swap table when the size fits Kairui Song
2025-09-06 15:48 ` Chris Li
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250905191357.78298-7-ryncsn@gmail.com \
--to=ryncsn@gmail.com \
--cc=akpm@linux-foundation.org \
--cc=baohua@kernel.org \
--cc=baolin.wang@linux.alibaba.com \
--cc=bhe@redhat.com \
--cc=chrisl@kernel.org \
--cc=david@redhat.com \
--cc=hannes@cmpxchg.org \
--cc=hughd@google.com \
--cc=kasong@tencent.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=lorenzo.stoakes@oracle.com \
--cc=nphamcs@gmail.com \
--cc=shikemeng@huaweicloud.com \
--cc=willy@infradead.org \
--cc=ying.huang@linux.alibaba.com \
--cc=yosryahmed@google.com \
--cc=ziy@nvidia.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.