From: Kairui Song <ryncsn@gmail.com>
To: linux-mm@kvack.org
Cc: Andrew Morton <akpm@linux-foundation.org>,
Matthew Wilcox <willy@infradead.org>,
Hugh Dickins <hughd@google.com>, Chris Li <chrisl@kernel.org>,
Barry Song <baohua@kernel.org>, Baoquan He <bhe@redhat.com>,
Nhat Pham <nphamcs@gmail.com>,
Kemeng Shi <shikemeng@huaweicloud.com>,
Baolin Wang <baolin.wang@linux.alibaba.com>,
Ying Huang <ying.huang@linux.alibaba.com>,
Johannes Weiner <hannes@cmpxchg.org>,
David Hildenbrand <david@redhat.com>,
Yosry Ahmed <yosryahmed@google.com>,
Lorenzo Stoakes <lorenzo.stoakes@oracle.com>,
Zi Yan <ziy@nvidia.com>,
linux-kernel@vger.kernel.org, Kairui Song <kasong@tencent.com>
Subject: [PATCH 3/9] mm, swap: rename and move some swap cluster definition and helpers
Date: Sat, 23 Aug 2025 03:20:17 +0800 [thread overview]
Message-ID: <20250822192023.13477-4-ryncsn@gmail.com> (raw)
In-Reply-To: <20250822192023.13477-1-ryncsn@gmail.com>
From: Kairui Song <kasong@tencent.com>
No feature change, move cluster related definitions and helpers to
mm/swap.h, also tidy up and add a "swap_" prefix for cluster lock/unlock
helpers, so they can be used outside of swap files.
Signed-off-by: Kairui Song <kasong@tencent.com>
---
include/linux/swap.h | 34 ---------------
mm/swap.h | 63 ++++++++++++++++++++++++++++
mm/swapfile.c | 99 ++++++++++++++------------------------------
3 files changed, 93 insertions(+), 103 deletions(-)
diff --git a/include/linux/swap.h b/include/linux/swap.h
index c2da85cb7fe7..20efd9a34034 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -235,40 +235,6 @@ enum {
/* Special value in each swap_map continuation */
#define SWAP_CONT_MAX 0x7f /* Max count */
-/*
- * We use this to track usage of a cluster. A cluster is a block of swap disk
- * space with SWAPFILE_CLUSTER pages long and naturally aligns in disk. All
- * free clusters are organized into a list. We fetch an entry from the list to
- * get a free cluster.
- *
- * The flags field determines if a cluster is free. This is
- * protected by cluster lock.
- */
-struct swap_cluster_info {
- spinlock_t lock; /*
- * Protect swap_cluster_info fields
- * other than list, and swap_info_struct->swap_map
- * elements corresponding to the swap cluster.
- */
- u16 count;
- u8 flags;
- u8 order;
- struct list_head list;
-};
-
-/* All on-list cluster must have a non-zero flag. */
-enum swap_cluster_flags {
- CLUSTER_FLAG_NONE = 0, /* For temporary off-list cluster */
- CLUSTER_FLAG_FREE,
- CLUSTER_FLAG_NONFULL,
- CLUSTER_FLAG_FRAG,
- /* Clusters with flags above are allocatable */
- CLUSTER_FLAG_USABLE = CLUSTER_FLAG_FRAG,
- CLUSTER_FLAG_FULL,
- CLUSTER_FLAG_DISCARD,
- CLUSTER_FLAG_MAX,
-};
-
/*
* The first page in the swap file is the swap header, which is always marked
* bad to prevent it from being allocated as an entry. This also prevents the
diff --git a/mm/swap.h b/mm/swap.h
index bb2adbfd64a9..223b40f2d37e 100644
--- a/mm/swap.h
+++ b/mm/swap.h
@@ -7,10 +7,73 @@ struct swap_iocb;
extern int page_cluster;
+#ifdef CONFIG_THP_SWAP
+#define SWAPFILE_CLUSTER HPAGE_PMD_NR
+#define swap_entry_order(order) (order)
+#else
+#define SWAPFILE_CLUSTER 256
+#define swap_entry_order(order) 0
+#endif
+
+/*
+ * We use this to track usage of a cluster. A cluster is a block of swap disk
+ * space with SWAPFILE_CLUSTER pages long and naturally aligns in disk. All
+ * free clusters are organized into a list. We fetch an entry from the list to
+ * get a free cluster.
+ *
+ * The flags field determines if a cluster is free. This is
+ * protected by cluster lock.
+ */
+struct swap_cluster_info {
+ spinlock_t lock; /*
+ * Protect swap_cluster_info fields
+ * other than list, and swap_info_struct->swap_map
+ * elements corresponding to the swap cluster.
+ */
+ u16 count;
+ u8 flags;
+ u8 order;
+ struct list_head list;
+};
+
+/* All on-list cluster must have a non-zero flag. */
+enum swap_cluster_flags {
+ CLUSTER_FLAG_NONE = 0, /* For temporary off-list cluster */
+ CLUSTER_FLAG_FREE,
+ CLUSTER_FLAG_NONFULL,
+ CLUSTER_FLAG_FRAG,
+ /* Clusters with flags above are allocatable */
+ CLUSTER_FLAG_USABLE = CLUSTER_FLAG_FRAG,
+ CLUSTER_FLAG_FULL,
+ CLUSTER_FLAG_DISCARD,
+ CLUSTER_FLAG_MAX,
+};
+
#ifdef CONFIG_SWAP
#include <linux/swapops.h> /* for swp_offset */
#include <linux/blk_types.h> /* for bio_end_io_t */
+static inline struct swap_cluster_info *swp_offset_cluster(
+ struct swap_info_struct *si, pgoff_t offset)
+{
+ return &si->cluster_info[offset / SWAPFILE_CLUSTER];
+}
+
+static inline struct swap_cluster_info *swap_cluster_lock(
+ struct swap_info_struct *si,
+ unsigned long offset)
+{
+ struct swap_cluster_info *ci = swp_offset_cluster(si, offset);
+
+ spin_lock(&ci->lock);
+ return ci;
+}
+
+static inline void swap_cluster_unlock(struct swap_cluster_info *ci)
+{
+ spin_unlock(&ci->lock);
+}
+
/* linux/mm/page_io.c */
int sio_pool_init(void);
struct swap_iocb;
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 12f2580ebe8d..618cf4333a3d 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -58,9 +58,6 @@ static void swap_entries_free(struct swap_info_struct *si,
static void swap_range_alloc(struct swap_info_struct *si,
unsigned int nr_entries);
static bool folio_swapcache_freeable(struct folio *folio);
-static struct swap_cluster_info *lock_cluster(struct swap_info_struct *si,
- unsigned long offset);
-static inline void unlock_cluster(struct swap_cluster_info *ci);
static DEFINE_SPINLOCK(swap_lock);
static unsigned int nr_swapfiles;
@@ -259,9 +256,9 @@ static int __try_to_reclaim_swap(struct swap_info_struct *si,
* swap_map is HAS_CACHE only, which means the slots have no page table
* reference or pending writeback, and can't be allocated to others.
*/
- ci = lock_cluster(si, offset);
+ ci = swap_cluster_lock(si, offset);
need_reclaim = swap_only_has_cache(si, offset, nr_pages);
- unlock_cluster(ci);
+ swap_cluster_unlock(ci);
if (!need_reclaim)
goto out_unlock;
@@ -386,20 +383,7 @@ static void discard_swap_cluster(struct swap_info_struct *si,
}
}
-#ifdef CONFIG_THP_SWAP
-#define SWAPFILE_CLUSTER HPAGE_PMD_NR
-
-#define swap_entry_order(order) (order)
-#else
-#define SWAPFILE_CLUSTER 256
-
-/*
- * Define swap_entry_order() as constant to let compiler to optimize
- * out some code if !CONFIG_THP_SWAP
- */
-#define swap_entry_order(order) 0
-#endif
-#define LATENCY_LIMIT 256
+#define LATENCY_LIMIT 256
static inline bool cluster_is_empty(struct swap_cluster_info *info)
{
@@ -426,34 +410,12 @@ static inline unsigned int cluster_index(struct swap_info_struct *si,
return ci - si->cluster_info;
}
-static inline struct swap_cluster_info *offset_to_cluster(struct swap_info_struct *si,
- unsigned long offset)
-{
- return &si->cluster_info[offset / SWAPFILE_CLUSTER];
-}
-
static inline unsigned int cluster_offset(struct swap_info_struct *si,
struct swap_cluster_info *ci)
{
return cluster_index(si, ci) * SWAPFILE_CLUSTER;
}
-static inline struct swap_cluster_info *lock_cluster(struct swap_info_struct *si,
- unsigned long offset)
-{
- struct swap_cluster_info *ci;
-
- ci = offset_to_cluster(si, offset);
- spin_lock(&ci->lock);
-
- return ci;
-}
-
-static inline void unlock_cluster(struct swap_cluster_info *ci)
-{
- spin_unlock(&ci->lock);
-}
-
static void move_cluster(struct swap_info_struct *si,
struct swap_cluster_info *ci, struct list_head *list,
enum swap_cluster_flags new_flags)
@@ -809,7 +771,7 @@ static unsigned int alloc_swap_scan_cluster(struct swap_info_struct *si,
}
out:
relocate_cluster(si, ci);
- unlock_cluster(ci);
+ swap_cluster_unlock(ci);
if (si->flags & SWP_SOLIDSTATE) {
this_cpu_write(percpu_swap_cluster.offset[order], next);
this_cpu_write(percpu_swap_cluster.si[order], si);
@@ -876,7 +838,7 @@ static void swap_reclaim_full_clusters(struct swap_info_struct *si, bool force)
if (ci->flags == CLUSTER_FLAG_NONE)
relocate_cluster(si, ci);
- unlock_cluster(ci);
+ swap_cluster_unlock(ci);
if (to_scan <= 0)
break;
}
@@ -915,7 +877,7 @@ static unsigned long cluster_alloc_swap_entry(struct swap_info_struct *si, int o
if (offset == SWAP_ENTRY_INVALID)
goto new_cluster;
- ci = lock_cluster(si, offset);
+ ci = swap_cluster_lock(si, offset);
/* Cluster could have been used by another order */
if (cluster_is_usable(ci, order)) {
if (cluster_is_empty(ci))
@@ -923,7 +885,7 @@ static unsigned long cluster_alloc_swap_entry(struct swap_info_struct *si, int o
found = alloc_swap_scan_cluster(si, ci, offset,
order, usage);
} else {
- unlock_cluster(ci);
+ swap_cluster_unlock(ci);
}
if (found)
goto done;
@@ -1204,7 +1166,7 @@ static bool swap_alloc_fast(swp_entry_t *entry,
if (!si || !offset || !get_swap_device_info(si))
return false;
- ci = lock_cluster(si, offset);
+ ci = swap_cluster_lock(si, offset);
if (cluster_is_usable(ci, order)) {
if (cluster_is_empty(ci))
offset = cluster_offset(si, ci);
@@ -1212,7 +1174,7 @@ static bool swap_alloc_fast(swp_entry_t *entry,
if (found)
*entry = swp_entry(si->type, found);
} else {
- unlock_cluster(ci);
+ swap_cluster_unlock(ci);
}
put_swap_device(si);
@@ -1480,14 +1442,14 @@ static void swap_entries_put_cache(struct swap_info_struct *si,
unsigned long offset = swp_offset(entry);
struct swap_cluster_info *ci;
- ci = lock_cluster(si, offset);
- if (swap_only_has_cache(si, offset, nr))
+ ci = swap_cluster_lock(si, offset);
+ if (swap_only_has_cache(si, offset, nr)) {
swap_entries_free(si, ci, entry, nr);
- else {
+ } else {
for (int i = 0; i < nr; i++, entry.val++)
swap_entry_put_locked(si, ci, entry, SWAP_HAS_CACHE);
}
- unlock_cluster(ci);
+ swap_cluster_unlock(ci);
}
static bool swap_entries_put_map(struct swap_info_struct *si,
@@ -1505,7 +1467,7 @@ static bool swap_entries_put_map(struct swap_info_struct *si,
if (count != 1 && count != SWAP_MAP_SHMEM)
goto fallback;
- ci = lock_cluster(si, offset);
+ ci = swap_cluster_lock(si, offset);
if (!swap_is_last_map(si, offset, nr, &has_cache)) {
goto locked_fallback;
}
@@ -1514,21 +1476,20 @@ static bool swap_entries_put_map(struct swap_info_struct *si,
else
for (i = 0; i < nr; i++)
WRITE_ONCE(si->swap_map[offset + i], SWAP_HAS_CACHE);
- unlock_cluster(ci);
+ swap_cluster_unlock(ci);
return has_cache;
fallback:
- ci = lock_cluster(si, offset);
+ ci = swap_cluster_lock(si, offset);
locked_fallback:
for (i = 0; i < nr; i++, entry.val++) {
count = swap_entry_put_locked(si, ci, entry, 1);
if (count == SWAP_HAS_CACHE)
has_cache = true;
}
- unlock_cluster(ci);
+ swap_cluster_unlock(ci);
return has_cache;
-
}
/*
@@ -1578,7 +1539,7 @@ static void swap_entries_free(struct swap_info_struct *si,
unsigned char *map_end = map + nr_pages;
/* It should never free entries across different clusters */
- VM_BUG_ON(ci != offset_to_cluster(si, offset + nr_pages - 1));
+ VM_BUG_ON(ci != swp_offset_cluster(si, offset + nr_pages - 1));
VM_BUG_ON(cluster_is_empty(ci));
VM_BUG_ON(ci->count < nr_pages);
@@ -1653,9 +1614,9 @@ bool swap_entry_swapped(struct swap_info_struct *si, swp_entry_t entry)
struct swap_cluster_info *ci;
int count;
- ci = lock_cluster(si, offset);
+ ci = swap_cluster_lock(si, offset);
count = swap_count(si->swap_map[offset]);
- unlock_cluster(ci);
+ swap_cluster_unlock(ci);
return !!count;
}
@@ -1678,7 +1639,7 @@ int swp_swapcount(swp_entry_t entry)
offset = swp_offset(entry);
- ci = lock_cluster(si, offset);
+ ci = swap_cluster_lock(si, offset);
count = swap_count(si->swap_map[offset]);
if (!(count & COUNT_CONTINUED))
@@ -1701,7 +1662,7 @@ int swp_swapcount(swp_entry_t entry)
n *= (SWAP_CONT_MAX + 1);
} while (tmp_count & COUNT_CONTINUED);
out:
- unlock_cluster(ci);
+ swap_cluster_unlock(ci);
return count;
}
@@ -1716,7 +1677,7 @@ static bool swap_page_trans_huge_swapped(struct swap_info_struct *si,
int i;
bool ret = false;
- ci = lock_cluster(si, offset);
+ ci = swap_cluster_lock(si, offset);
if (nr_pages == 1) {
if (swap_count(map[roffset]))
ret = true;
@@ -1729,7 +1690,7 @@ static bool swap_page_trans_huge_swapped(struct swap_info_struct *si,
}
}
unlock_out:
- unlock_cluster(ci);
+ swap_cluster_unlock(ci);
return ret;
}
@@ -2662,8 +2623,8 @@ static void wait_for_allocation(struct swap_info_struct *si)
BUG_ON(si->flags & SWP_WRITEOK);
for (offset = 0; offset < end; offset += SWAPFILE_CLUSTER) {
- ci = lock_cluster(si, offset);
- unlock_cluster(ci);
+ ci = swap_cluster_lock(si, offset);
+ swap_cluster_unlock(ci);
}
}
@@ -3579,7 +3540,7 @@ static int __swap_duplicate(swp_entry_t entry, unsigned char usage, int nr)
offset = swp_offset(entry);
VM_WARN_ON(nr > SWAPFILE_CLUSTER - offset % SWAPFILE_CLUSTER);
VM_WARN_ON(usage == 1 && nr > 1);
- ci = lock_cluster(si, offset);
+ ci = swap_cluster_lock(si, offset);
err = 0;
for (i = 0; i < nr; i++) {
@@ -3634,7 +3595,7 @@ static int __swap_duplicate(swp_entry_t entry, unsigned char usage, int nr)
}
unlock_out:
- unlock_cluster(ci);
+ swap_cluster_unlock(ci);
return err;
}
@@ -3733,7 +3694,7 @@ int add_swap_count_continuation(swp_entry_t entry, gfp_t gfp_mask)
offset = swp_offset(entry);
- ci = lock_cluster(si, offset);
+ ci = swap_cluster_lock(si, offset);
count = swap_count(si->swap_map[offset]);
@@ -3793,7 +3754,7 @@ int add_swap_count_continuation(swp_entry_t entry, gfp_t gfp_mask)
out_unlock_cont:
spin_unlock(&si->cont_lock);
out:
- unlock_cluster(ci);
+ swap_cluster_unlock(ci);
put_swap_device(si);
outer:
if (page)
--
2.51.0
next prev parent reply other threads:[~2025-08-22 19:21 UTC|newest]
Thread overview: 97+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-08-22 19:20 [PATCH 0/9] mm, swap: introduce swap table as swap cache (phase I) Kairui Song
2025-08-22 19:20 ` [PATCH 1/9] mm, swap: use unified helper for swap cache look up Kairui Song
2025-08-27 2:47 ` Chris Li
2025-08-27 3:50 ` Chris Li
2025-08-27 13:45 ` Kairui Song
2025-08-27 3:52 ` Baoquan He
2025-08-27 13:46 ` Kairui Song
2025-08-28 3:20 ` Baolin Wang
2025-09-01 23:50 ` Barry Song
2025-09-02 6:12 ` Kairui Song
2025-09-02 6:52 ` Chris Li
2025-09-02 10:06 ` David Hildenbrand
2025-09-02 12:32 ` Chris Li
2025-09-02 13:18 ` David Hildenbrand
2025-09-02 16:38 ` Kairui Song
2025-09-02 10:10 ` David Hildenbrand
2025-09-02 17:13 ` Kairui Song
2025-09-03 8:00 ` David Hildenbrand
2025-09-03 17:41 ` Nhat Pham
2025-09-04 16:05 ` Kairui Song
2025-08-22 19:20 ` [PATCH 2/9] mm, swap: always lock and check the swap cache folio before use Kairui Song
2025-08-27 6:13 ` Chris Li
2025-08-27 13:44 ` Kairui Song
2025-08-30 1:42 ` Chris Li
2025-08-27 7:03 ` Chris Li
2025-08-27 14:35 ` Kairui Song
2025-08-28 3:41 ` Baolin Wang
2025-08-28 18:05 ` Kairui Song
2025-08-30 1:53 ` Chris Li
2025-08-30 15:15 ` Kairui Song
2025-08-30 17:17 ` Chris Li
2025-09-01 18:17 ` Kairui Song
2025-09-01 21:10 ` Chris Li
2025-09-02 5:40 ` Barry Song
2025-09-02 10:18 ` David Hildenbrand
2025-09-02 10:21 ` David Hildenbrand
2025-09-02 12:46 ` Chris Li
2025-09-02 13:27 ` Kairui Song
2025-08-22 19:20 ` Kairui Song [this message]
2025-08-30 2:31 ` [PATCH 3/9] mm, swap: rename and move some swap cluster definition and helpers Chris Li
2025-09-02 5:53 ` Barry Song
2025-09-02 10:20 ` David Hildenbrand
2025-09-02 12:50 ` Chris Li
2025-08-22 19:20 ` [PATCH 4/9] mm, swap: tidy up swap device and cluster info helpers Kairui Song
2025-08-27 3:47 ` Baoquan He
2025-08-27 17:44 ` Chris Li
2025-08-27 23:46 ` Baoquan He
2025-08-30 2:38 ` Chris Li
2025-09-02 6:01 ` Barry Song
2025-09-03 9:28 ` David Hildenbrand
2025-09-02 6:02 ` Barry Song
2025-09-02 13:33 ` David Hildenbrand
2025-09-02 15:03 ` Kairui Song
2025-09-03 8:11 ` David Hildenbrand
2025-08-22 19:20 ` [PATCH 5/9] mm/shmem, swap: remove redundant error handling for replacing folio Kairui Song
2025-08-25 3:02 ` Baolin Wang
2025-08-25 9:45 ` Kairui Song
2025-08-30 2:41 ` Chris Li
2025-09-03 8:25 ` David Hildenbrand
2025-08-22 19:20 ` [PATCH 6/9] mm, swap: use the swap table for the swap cache and switch API Kairui Song
2025-08-30 1:54 ` Baoquan He
2025-08-30 3:40 ` Chris Li
2025-08-30 3:34 ` Chris Li
2025-08-30 16:52 ` Kairui Song
2025-08-31 1:00 ` Chris Li
2025-09-02 11:51 ` Kairui Song
2025-09-02 9:55 ` Barry Song
2025-09-02 11:58 ` Kairui Song
2025-09-02 23:44 ` Barry Song
2025-09-03 2:12 ` Kairui Song
2025-09-03 2:31 ` Barry Song
2025-09-03 11:41 ` David Hildenbrand
2025-09-03 12:54 ` Kairui Song
2025-09-04 9:28 ` David Hildenbrand
2025-08-22 19:20 ` [PATCH 7/9] mm, swap: remove contention workaround for swap cache Kairui Song
2025-08-30 4:07 ` Chris Li
2025-08-30 15:24 ` Kairui Song
2025-08-31 15:54 ` Kairui Song
2025-08-31 20:06 ` Chris Li
2025-08-31 20:04 ` Chris Li
2025-09-02 10:06 ` Barry Song
2025-08-22 19:20 ` [PATCH 8/9] mm, swap: implement dynamic allocation of swap table Kairui Song
2025-08-30 4:17 ` Chris Li
2025-09-02 11:15 ` Barry Song
2025-09-02 13:17 ` Chris Li
2025-09-02 16:57 ` Kairui Song
2025-09-02 23:31 ` Barry Song
2025-09-03 2:13 ` Kairui Song
2025-09-03 12:35 ` Chris Li
2025-09-03 20:52 ` Barry Song
2025-09-04 6:50 ` Chris Li
2025-08-22 19:20 ` [PATCH 9/9] mm, swap: use a single page for swap table when the size fits Kairui Song
2025-08-30 4:23 ` Chris Li
2025-08-26 22:00 ` [PATCH 0/9] mm, swap: introduce swap table as swap cache (phase I) Chris Li
2025-08-30 5:44 ` Chris Li
2025-09-04 16:36 ` Kairui Song
2025-09-04 18:50 ` Chris Li
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250822192023.13477-4-ryncsn@gmail.com \
--to=ryncsn@gmail.com \
--cc=akpm@linux-foundation.org \
--cc=baohua@kernel.org \
--cc=baolin.wang@linux.alibaba.com \
--cc=bhe@redhat.com \
--cc=chrisl@kernel.org \
--cc=david@redhat.com \
--cc=hannes@cmpxchg.org \
--cc=hughd@google.com \
--cc=kasong@tencent.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=lorenzo.stoakes@oracle.com \
--cc=nphamcs@gmail.com \
--cc=shikemeng@huaweicloud.com \
--cc=willy@infradead.org \
--cc=ying.huang@linux.alibaba.com \
--cc=yosryahmed@google.com \
--cc=ziy@nvidia.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.