From: Kairui Song <ryncsn@gmail.com>
To: linux-mm@kvack.org
Cc: Andrew Morton <akpm@linux-foundation.org>,
Matthew Wilcox <willy@infradead.org>,
Hugh Dickins <hughd@google.com>, Chris Li <chrisl@kernel.org>,
David Hildenbrand <david@redhat.com>,
Yosry Ahmed <yosryahmed@google.com>,
"Huang, Ying" <ying.huang@linux.alibaba.com>,
Nhat Pham <nphamcs@gmail.com>,
Johannes Weiner <hannes@cmpxchg.org>,
Baolin Wang <baolin.wang@linux.alibaba.com>,
Baoquan He <bhe@redhat.com>, Barry Song <baohua@kernel.org>,
Kalesh Singh <kaleshsingh@google.com>,
Kemeng Shi <shikemeng@huaweicloud.com>,
Tim Chen <tim.c.chen@linux.intel.com>,
Ryan Roberts <ryan.roberts@arm.com>,
linux-kernel@vger.kernel.org, Kairui Song <kasong@tencent.com>
Subject: [PATCH 07/28] mm, swap: tidy up swap device and cluster info helpers
Date: Thu, 15 May 2025 04:17:07 +0800 [thread overview]
Message-ID: <20250514201729.48420-8-ryncsn@gmail.com> (raw)
In-Reply-To: <20250514201729.48420-1-ryncsn@gmail.com>
From: Kairui Song <kasong@tencent.com>
swp_swap_info is the common helper used for retrieving swap info in many
places. It has an internal check that may lead to a NULL return value,
but almost none of its caller checks the return value, and the internal
check is pointless. In fact, most of these callers already ensure the
entry is valid during that period and never expect a NULL value.
Tidy this up. If the caller ensures the swap entry / type is valid and
device is pinned, use swp_info / swp_type_info instead, which has more
debug checks and lower overhead as they are inline.
Caller that may expect a NULL value could use swp_get_info /
swp_type_get_info instead.
No feature change as the rearranged codes have no effect since they were
mostly ignored in some way, some new sanity checks are added for debug
built to catch potential misuse.
Signed-off-by: Kairui Song <kasong@tencent.com>
---
include/linux/swap.h | 6 ------
mm/memory.c | 2 +-
mm/page_io.c | 12 ++++++------
mm/swap.h | 32 ++++++++++++++++++++++++++++++--
mm/swap_state.c | 4 ++--
mm/swapfile.c | 35 ++++++++++++++++++-----------------
6 files changed, 57 insertions(+), 34 deletions(-)
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 1e7d9d55c39a..4239852fd203 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -453,7 +453,6 @@ extern sector_t swapdev_block(int, pgoff_t);
extern int __swap_count(swp_entry_t entry);
extern bool swap_entry_swapped(struct swap_info_struct *si, swp_entry_t entry);
extern int swp_swapcount(swp_entry_t entry);
-struct swap_info_struct *swp_swap_info(swp_entry_t entry);
struct backing_dev_info;
extern int init_swap_address_space(unsigned int type, unsigned long nr_pages);
extern void exit_swap_address_space(unsigned int type);
@@ -466,11 +465,6 @@ static inline void put_swap_device(struct swap_info_struct *si)
}
#else /* CONFIG_SWAP */
-static inline struct swap_info_struct *swp_swap_info(swp_entry_t entry)
-{
- return NULL;
-}
-
static inline struct swap_info_struct *get_swap_device(swp_entry_t entry)
{
return NULL;
diff --git a/mm/memory.c b/mm/memory.c
index 254be0e88801..cc1f6891cf99 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -4315,7 +4315,7 @@ static struct folio *__alloc_swap_folio(struct vm_fault *vmf)
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
static inline int non_swapcache_batch(swp_entry_t entry, int max_nr)
{
- struct swap_info_struct *si = swp_swap_info(entry);
+ struct swap_info_struct *si = swp_info(entry);
pgoff_t offset = swp_offset(entry);
int i;
diff --git a/mm/page_io.c b/mm/page_io.c
index 4bce19df557b..eaf6319b81ab 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -204,7 +204,7 @@ static bool is_folio_zero_filled(struct folio *folio)
static void swap_zeromap_folio_set(struct folio *folio)
{
struct obj_cgroup *objcg = get_obj_cgroup_from_folio(folio);
- struct swap_info_struct *sis = swp_swap_info(folio->swap);
+ struct swap_info_struct *sis = swp_info(folio->swap);
int nr_pages = folio_nr_pages(folio);
swp_entry_t entry;
unsigned int i;
@@ -223,7 +223,7 @@ static void swap_zeromap_folio_set(struct folio *folio)
static void swap_zeromap_folio_clear(struct folio *folio)
{
- struct swap_info_struct *sis = swp_swap_info(folio->swap);
+ struct swap_info_struct *sis = swp_info(folio->swap);
swp_entry_t entry;
unsigned int i;
@@ -375,7 +375,7 @@ static void sio_write_complete(struct kiocb *iocb, long ret)
static void swap_writepage_fs(struct folio *folio, struct writeback_control *wbc)
{
struct swap_iocb *sio = NULL;
- struct swap_info_struct *sis = swp_swap_info(folio->swap);
+ struct swap_info_struct *sis = swp_info(folio->swap);
struct file *swap_file = sis->swap_file;
loff_t pos = swap_dev_pos(folio->swap);
@@ -452,7 +452,7 @@ static void swap_writepage_bdev_async(struct folio *folio,
void __swap_writepage(struct folio *folio, struct writeback_control *wbc)
{
- struct swap_info_struct *sis = swp_swap_info(folio->swap);
+ struct swap_info_struct *sis = swp_info(folio->swap);
VM_BUG_ON_FOLIO(!folio_test_swapcache(folio), folio);
/*
@@ -543,7 +543,7 @@ static bool swap_read_folio_zeromap(struct folio *folio)
static void swap_read_folio_fs(struct folio *folio, struct swap_iocb **plug)
{
- struct swap_info_struct *sis = swp_swap_info(folio->swap);
+ struct swap_info_struct *sis = swp_info(folio->swap);
struct swap_iocb *sio = NULL;
loff_t pos = swap_dev_pos(folio->swap);
@@ -614,7 +614,7 @@ static void swap_read_folio_bdev_async(struct folio *folio,
void swap_read_folio(struct folio *folio, struct swap_iocb **plug)
{
- struct swap_info_struct *sis = swp_swap_info(folio->swap);
+ struct swap_info_struct *sis = swp_info(folio->swap);
bool synchronous = sis->flags & SWP_SYNCHRONOUS_IO;
bool workingset = folio_test_workingset(folio);
unsigned long pflags;
diff --git a/mm/swap.h b/mm/swap.h
index 38d37d241f1c..4982e6c2ad95 100644
--- a/mm/swap.h
+++ b/mm/swap.h
@@ -13,6 +13,8 @@ extern int page_cluster;
#define swap_entry_order(order) 0
#endif
+extern struct swap_info_struct *swap_info[];
+
/*
* We use this to track usage of a cluster. A cluster is a block of swap disk
* space with SWAPFILE_CLUSTER pages long and naturally aligns in disk. All
@@ -51,9 +53,29 @@ enum swap_cluster_flags {
#include <linux/swapops.h> /* for swp_offset */
#include <linux/blk_types.h> /* for bio_end_io_t */
+/*
+ * All swp_* function callers must ensure the entry is valid, and hold the
+ * swap device reference or pin the device in other ways. E.g, a locked
+ * folio in the swap cache makes sure its entries (folio->swap) are valid
+ * and won't be freed, the device is also pinned by its entries.
+ */
+static inline struct swap_info_struct *swp_type_info(int type)
+{
+ struct swap_info_struct *si;
+ si = READ_ONCE(swap_info[type]); /* rcu_dereference() */
+ VM_WARN_ON_ONCE(percpu_ref_is_zero(&si->users)); /* race with swapoff */
+ return si;
+}
+
+static inline struct swap_info_struct *swp_info(swp_entry_t entry)
+{
+ return swp_type_info(swp_type(entry));
+}
+
static inline struct swap_cluster_info *swp_offset_cluster(
struct swap_info_struct *si, pgoff_t offset)
{
+ VM_WARN_ON_ONCE(percpu_ref_is_zero(&si->users)); /* race with swapoff */
return &si->cluster_info[offset / SWAPFILE_CLUSTER];
}
@@ -62,6 +84,7 @@ static inline struct swap_cluster_info *swap_lock_cluster(
unsigned long offset)
{
struct swap_cluster_info *ci = swp_offset_cluster(si, offset);
+ VM_WARN_ON_ONCE(percpu_ref_is_zero(&si->users)); /* race with swapoff */
spin_lock(&ci->lock);
return ci;
}
@@ -154,7 +177,7 @@ void swap_update_readahead(struct folio *folio, struct vm_area_struct *vma,
static inline unsigned int folio_swap_flags(struct folio *folio)
{
- return swp_swap_info(folio->swap)->flags;
+ return swp_info(folio->swap)->flags;
}
/*
@@ -165,7 +188,7 @@ static inline unsigned int folio_swap_flags(struct folio *folio)
static inline int swap_zeromap_batch(swp_entry_t entry, int max_nr,
bool *is_zeromap)
{
- struct swap_info_struct *sis = swp_swap_info(entry);
+ struct swap_info_struct *sis = swp_info(entry);
unsigned long start = swp_offset(entry);
unsigned long end = start + max_nr;
bool first_bit;
@@ -184,6 +207,11 @@ static inline int swap_zeromap_batch(swp_entry_t entry, int max_nr,
#else /* CONFIG_SWAP */
struct swap_iocb;
+static inline struct swap_info_struct *swp_info(swp_entry_t entry)
+{
+ return NULL;
+}
+
static inline void swap_read_folio(struct folio *folio, struct swap_iocb **plug)
{
}
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 07c41676486a..db9efa64f64e 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -330,7 +330,7 @@ struct folio *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
struct mempolicy *mpol, pgoff_t ilx, bool *new_page_allocated,
bool skip_if_exists)
{
- struct swap_info_struct *si = swp_swap_info(entry);
+ struct swap_info_struct *si = swp_info(entry);
struct folio *folio;
struct folio *new_folio = NULL;
struct folio *result = NULL;
@@ -554,7 +554,7 @@ struct folio *swap_cluster_readahead(swp_entry_t entry, gfp_t gfp_mask,
unsigned long offset = entry_offset;
unsigned long start_offset, end_offset;
unsigned long mask;
- struct swap_info_struct *si = swp_swap_info(entry);
+ struct swap_info_struct *si = swp_info(entry);
struct blk_plug plug;
struct swap_iocb *splug = NULL;
bool page_allocated;
diff --git a/mm/swapfile.c b/mm/swapfile.c
index ba3fd99eb5fa..2f834069b7ad 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -102,7 +102,7 @@ static PLIST_HEAD(swap_active_head);
static struct plist_head *swap_avail_heads;
static DEFINE_SPINLOCK(swap_avail_lock);
-static struct swap_info_struct *swap_info[MAX_SWAPFILES];
+struct swap_info_struct *swap_info[MAX_SWAPFILES];
static DEFINE_MUTEX(swapon_mutex);
@@ -124,14 +124,20 @@ static DEFINE_PER_CPU(struct percpu_swap_cluster, percpu_swap_cluster) = {
.lock = INIT_LOCAL_LOCK(),
};
-static struct swap_info_struct *swap_type_to_swap_info(int type)
+/* May return NULL on invalid type, caller must check for NULL return */
+static struct swap_info_struct *swp_type_get_info(int type)
{
if (type >= MAX_SWAPFILES)
return NULL;
-
return READ_ONCE(swap_info[type]); /* rcu_dereference() */
}
+/* May return NULL on invalid entry, caller must check for NULL return */
+static struct swap_info_struct *swp_get_info(swp_entry_t entry)
+{
+ return swp_type_get_info(swp_type(entry));
+}
+
static inline unsigned char swap_count(unsigned char ent)
{
return ent & ~SWAP_HAS_CACHE; /* may include COUNT_CONTINUED flag */
@@ -343,7 +349,7 @@ offset_to_swap_extent(struct swap_info_struct *sis, unsigned long offset)
sector_t swap_folio_sector(struct folio *folio)
{
- struct swap_info_struct *sis = swp_swap_info(folio->swap);
+ struct swap_info_struct *sis = swp_info(folio->swap);
struct swap_extent *se;
sector_t sector;
pgoff_t offset;
@@ -1272,7 +1278,7 @@ static struct swap_info_struct *_swap_info_get(swp_entry_t entry)
if (!entry.val)
goto out;
- si = swp_swap_info(entry);
+ si = swp_get_info(entry);
if (!si)
goto bad_nofile;
if (data_race(!(si->flags & SWP_USED)))
@@ -1381,7 +1387,7 @@ struct swap_info_struct *get_swap_device(swp_entry_t entry)
if (!entry.val)
goto out;
- si = swp_swap_info(entry);
+ si = swp_get_info(entry);
if (!si)
goto bad_nofile;
if (!get_swap_device_info(si))
@@ -1560,7 +1566,7 @@ void put_swap_folio(struct folio *folio, swp_entry_t entry)
int __swap_count(swp_entry_t entry)
{
- struct swap_info_struct *si = swp_swap_info(entry);
+ struct swap_info_struct *si = swp_info(entry);
pgoff_t offset = swp_offset(entry);
return swap_count(si->swap_map[offset]);
@@ -1791,7 +1797,7 @@ void free_swap_and_cache_nr(swp_entry_t entry, int nr)
swp_entry_t get_swap_page_of_type(int type)
{
- struct swap_info_struct *si = swap_type_to_swap_info(type);
+ struct swap_info_struct *si = swp_type_get_info(type);
unsigned long offset;
swp_entry_t entry = {0};
@@ -1872,7 +1878,7 @@ int find_first_swap(dev_t *device)
*/
sector_t swapdev_block(int type, pgoff_t offset)
{
- struct swap_info_struct *si = swap_type_to_swap_info(type);
+ struct swap_info_struct *si = swp_type_get_info(type);
struct swap_extent *se;
if (!si || !(si->flags & SWP_WRITEOK))
@@ -2801,7 +2807,7 @@ static void *swap_start(struct seq_file *swap, loff_t *pos)
if (!l)
return SEQ_START_TOKEN;
- for (type = 0; (si = swap_type_to_swap_info(type)); type++) {
+ for (type = 0; (si = swp_type_get_info(type)); type++) {
if (!(si->flags & SWP_USED) || !si->swap_map)
continue;
if (!--l)
@@ -2822,7 +2828,7 @@ static void *swap_next(struct seq_file *swap, void *v, loff_t *pos)
type = si->type + 1;
++(*pos);
- for (; (si = swap_type_to_swap_info(type)); type++) {
+ for (; (si = swp_type_get_info(type)); type++) {
if (!(si->flags & SWP_USED) || !si->swap_map)
continue;
return si;
@@ -3483,7 +3489,7 @@ static int __swap_duplicate(swp_entry_t entry, unsigned char usage, int nr)
unsigned char has_cache;
int err, i;
- si = swp_swap_info(entry);
+ si = swp_get_info(entry);
if (WARN_ON_ONCE(!si)) {
pr_err("%s%08lx\n", Bad_file, entry.val);
return -EINVAL;
@@ -3598,11 +3604,6 @@ void swapcache_clear(struct swap_info_struct *si, swp_entry_t entry, int nr)
swap_entries_put_cache(si, entry, nr);
}
-struct swap_info_struct *swp_swap_info(swp_entry_t entry)
-{
- return swap_type_to_swap_info(swp_type(entry));
-}
-
/*
* add_swap_count_continuation - called when a swap count is duplicated
* beyond SWAP_MAP_MAX, it allocates a new page and links that to the entry's
--
2.49.0
next prev parent reply other threads:[~2025-05-14 20:18 UTC|newest]
Thread overview: 56+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-05-14 20:17 [PATCH 00/28] mm, swap: introduce swap table Kairui Song
2025-05-14 20:17 ` [PATCH 01/28] mm, swap: don't scan every fragment cluster Kairui Song
2025-05-14 20:17 ` [PATCH 02/28] mm, swap: consolidate the helper for mincore Kairui Song
2025-05-14 20:17 ` [PATCH 03/28] mm/shmem, swap: remove SWAP_MAP_SHMEM Kairui Song
2025-05-14 20:17 ` [PATCH 04/28] mm, swap: split readahead update out of swap cache lookup Kairui Song
2025-05-14 20:17 ` [PATCH 05/28] mm, swap: sanitize swap cache lookup convention Kairui Song
2025-05-19 4:38 ` Barry Song
2025-05-20 3:31 ` Kairui Song
2025-05-20 4:41 ` Barry Song
2025-05-20 19:09 ` Kairui Song
2025-05-20 22:33 ` Barry Song
2025-05-21 2:45 ` Kairui Song
2025-05-21 3:24 ` Barry Song
2025-05-23 2:29 ` Barry Song
2025-05-23 20:01 ` Kairui Song
2025-05-27 7:58 ` Barry Song
2025-05-27 15:11 ` Kairui Song
2025-05-30 8:49 ` Kairui Song
2025-05-30 19:24 ` Kairui Song
2025-05-14 20:17 ` [PATCH 06/28] mm, swap: rearrange swap cluster definition and helpers Kairui Song
2025-05-19 6:26 ` Barry Song
2025-05-20 3:50 ` Kairui Song
2025-05-14 20:17 ` Kairui Song [this message]
2025-05-14 20:17 ` [PATCH 08/28] mm, swap: use swap table for the swap cache and switch API Kairui Song
2025-05-14 20:17 ` [PATCH 09/28] mm/swap: rename __read_swap_cache_async to __swapin_cache_alloc Kairui Song
2025-05-14 20:17 ` [PATCH 10/28] mm, swap: add a swap helper for bypassing only read ahead Kairui Song
2025-05-14 20:17 ` [PATCH 11/28] mm, swap: clean up and consolidate helper for mTHP swapin check Kairui Song
2025-05-15 9:31 ` Klara Modin
2025-05-15 9:39 ` Kairui Song
2025-05-19 7:08 ` Barry Song
2025-05-19 11:09 ` Kairui Song
2025-05-19 11:57 ` Barry Song
2025-05-14 20:17 ` [PATCH 12/28] mm, swap: never bypass the swap cache for SWP_SYNCHRONOUS_IO Kairui Song
2025-05-14 20:17 ` [PATCH 13/28] mm/shmem, swap: avoid redundant Xarray lookup during swapin Kairui Song
2025-05-14 20:17 ` [PATCH 14/28] mm/shmem: never bypass the swap cache for SWP_SYNCHRONOUS_IO Kairui Song
2025-05-14 20:17 ` [PATCH 15/28] mm, swap: split locked entry freeing into a standalone helper Kairui Song
2025-05-14 20:17 ` [PATCH 16/28] mm, swap: use swap cache as the swap in synchronize layer Kairui Song
2025-05-14 20:17 ` [PATCH 17/28] mm, swap: sanitize swap entry management workflow Kairui Song
2025-05-14 20:17 ` [PATCH 18/28] mm, swap: rename and introduce folio_free_swap_cache Kairui Song
2025-05-14 20:17 ` [PATCH 19/28] mm, swap: clean up and improve swap entries batch freeing Kairui Song
2025-05-14 20:17 ` [PATCH 20/28] mm, swap: check swap table directly for checking cache Kairui Song
2025-06-19 10:38 ` Baoquan He
2025-06-19 10:50 ` Kairui Song
2025-06-20 8:04 ` Baoquan He
2025-05-14 20:17 ` [PATCH 21/28] mm, swap: add folio to swap cache directly on allocation Kairui Song
2025-05-14 20:17 ` [PATCH 22/28] mm, swap: drop the SWAP_HAS_CACHE flag Kairui Song
2025-05-14 20:17 ` [PATCH 23/28] mm, swap: remove no longer needed _swap_info_get Kairui Song
2025-05-14 20:17 ` [PATCH 24/28] mm, swap: implement helpers for reserving data in swap table Kairui Song
2025-05-15 9:40 ` Klara Modin
2025-05-16 2:35 ` Kairui Song
2025-05-14 20:17 ` [PATCH 25/28] mm/workingset: leave highest 8 bits empty for anon shadow Kairui Song
2025-05-14 20:17 ` [PATCH 26/28] mm, swap: minor clean up for swapon Kairui Song
2025-05-14 20:17 ` [PATCH 27/28] mm, swap: use swap table to track swap count Kairui Song
2025-05-14 20:17 ` [PATCH 28/28] mm, swap: implement dynamic allocation of swap table Kairui Song
2025-05-21 18:36 ` Nhat Pham
2025-05-22 4:13 ` Kairui Song
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250514201729.48420-8-ryncsn@gmail.com \
--to=ryncsn@gmail.com \
--cc=akpm@linux-foundation.org \
--cc=baohua@kernel.org \
--cc=baolin.wang@linux.alibaba.com \
--cc=bhe@redhat.com \
--cc=chrisl@kernel.org \
--cc=david@redhat.com \
--cc=hannes@cmpxchg.org \
--cc=hughd@google.com \
--cc=kaleshsingh@google.com \
--cc=kasong@tencent.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=nphamcs@gmail.com \
--cc=ryan.roberts@arm.com \
--cc=shikemeng@huaweicloud.com \
--cc=tim.c.chen@linux.intel.com \
--cc=willy@infradead.org \
--cc=ying.huang@linux.alibaba.com \
--cc=yosryahmed@google.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).