Linux Documentation
 help / color / mirror / Atom feed
* Re: [PATCH v3 2/3] Documentation: security-bugs: explain what is and is not a security bug
From: Greg KH @ 2026-05-12  5:46 UTC (permalink / raw)
  To: Jonathan Corbet
  Cc: Willy Tarreau, Leon Romanovsky, skhan, security, workflows,
	linux-doc, linux-kernel
In-Reply-To: <87a4u5u195.fsf@trenco.lwn.net>

On Mon, May 11, 2026 at 02:42:14PM -0600, Jonathan Corbet wrote:
> Willy Tarreau <w@1wt.eu> writes:
> 
> >> I can ship stuff Linusward quickly too... :)  But it's fine if Greg
> >> takes it, of course.
> >
> > Oh that's fine then. I thought you only delivered such updates into next
> > releases. I'm fine with either way of course! Let's pick the path of
> > least effort for each.
> 
> That's my normal procedure, since there are few docs changes that have
> greater urgency, but I do have a "fixes" branch.
> 
> Greg, what's your preference?  Unless I hear otherwise, I guess I'll
> apply it shortly.

Please apply it and take it through your tree, thanks!

greg k-h

^ permalink raw reply

* [PATCH 12/12] swap: move swap_info_struct to mm/swap.h
From: Christoph Hellwig @ 2026-05-12  5:35 UTC (permalink / raw)
  To: Andrew Morton, Chris Li, Kairui Song
  Cc: Christian Brauner, Darrick J . Wong , Jens Axboe, David Sterba,
	Theodore Ts'o, Jaegeuk Kim, Chao Yu, Trond Myklebust,
	Anna Schumaker, Namjae Jeon, Hyunchul Lee, Steve French,
	Paulo Alcantara, Carlos Maiolino, Damien Le Moal, Naohiro Aota,
	linux-xfs, linux-fsdevel, linux-doc, linux-mm, linux-block,
	linux-btrfs, linux-ext4, linux-f2fs-devel, linux-nfs, linux-cifs
In-Reply-To: <20260512053625.2950900-1-hch@lst.de>

swap_info_struct is now internal to the MM subsystem, so remove it from
the public header.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 include/linux/swap.h | 98 +-------------------------------------------
 mm/swap.h            | 92 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 94 insertions(+), 96 deletions(-)

diff --git a/include/linux/swap.h b/include/linux/swap.h
index 95237ee065c2..31eef9b74949 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -16,9 +16,9 @@
 #include <uapi/linux/mempolicy.h>
 #include <asm/page.h>
 
-struct notifier_block;
-
 struct bio;
+struct notifier_block;
+struct swap_info_struct;
 
 #define SWAP_FLAG_PREFER	0x8000	/* set if swap priority specified */
 #define SWAP_FLAG_PRIO_MASK	0x7fff
@@ -178,29 +178,6 @@ struct sysinfo;
 struct writeback_control;
 struct zone;
 
-/*
- * Max bad pages in the new format..
- */
-#define MAX_SWAP_BADPAGES \
-	((offsetof(union swap_header, magic.magic) - \
-	  offsetof(union swap_header, info.badpages)) / sizeof(int))
-
-enum {
-	SWP_USED	= (1 << 0),	/* is slot in swap_info[] used? */
-	SWP_WRITEOK	= (1 << 1),	/* ok to write to this swap?	*/
-	SWP_DISCARDABLE = (1 << 2),	/* blkdev support discard */
-	SWP_DISCARDING	= (1 << 3),	/* now discarding a free cluster */
-	SWP_SOLIDSTATE	= (1 << 4),	/* blkdev seeks are cheap */
-	SWP_BLKDEV	= (1 << 6),	/* its a block device */
-	SWP_ACTIVATED	= (1 << 7),	/* set after swap_activate success */
-	SWP_FS_OPS	= (1 << 8),	/* swapfile operations go through fs */
-	SWP_AREA_DISCARD = (1 << 9),	/* single-time swap area discards */
-	SWP_PAGE_DISCARD = (1 << 10),	/* freed swap page-cluster discards */
-	SWP_STABLE_WRITES = (1 << 11),	/* no overwrite PG_writeback pages */
-	SWP_SYNCHRONOUS_IO = (1 << 12),	/* synchronous IO is efficient */
-					/* add others here before... */
-};
-
 #define SWAP_CLUSTER_MAX 32UL
 #define SWAP_CLUSTER_MAX_SKIPPED (SWAP_CLUSTER_MAX << 10)
 #define COMPACT_CLUSTER_MAX SWAP_CLUSTER_MAX
@@ -219,56 +196,6 @@ enum {
 #define SWAP_NR_ORDERS		1
 #endif
 
-/*
- * We keep using same cluster for rotational device so IO will be sequential.
- * The purpose is to optimize SWAP throughput on these device.
- */
-struct swap_sequential_cluster {
-	unsigned int next[SWAP_NR_ORDERS]; /* Likely next allocation offset */
-};
-
-/*
- * The in-memory structure used to track swap areas.
- */
-struct swap_info_struct {
-	struct percpu_ref users;	/* indicate and keep swap device valid. */
-	unsigned long	flags;		/* SWP_USED etc: see above */
-	signed short	prio;		/* swap priority of this type */
-	struct plist_node list;		/* entry in swap_active_head */
-	signed char	type;		/* strange name for an index */
-	unsigned int	max;		/* size of this swap device */
-	unsigned long *zeromap;		/* kvmalloc'ed bitmap to track zero pages */
-	struct swap_cluster_info *cluster_info; /* cluster info. Only for SSD */
-	struct list_head free_clusters; /* free clusters list */
-	struct list_head full_clusters; /* full clusters list */
-	struct list_head nonfull_clusters[SWAP_NR_ORDERS];
-					/* list of cluster that contains at least one free slot */
-	struct list_head frag_clusters[SWAP_NR_ORDERS];
-					/* list of cluster that are fragmented or contented */
-	unsigned int pages;		/* total of usable pages of swap */
-	atomic_long_t inuse_pages;	/* number of those currently in use */
-	struct swap_sequential_cluster *global_cluster; /* Use one global cluster for rotating device */
-	spinlock_t global_cluster_lock;	/* Serialize usage of global cluster */
-	struct rb_root swap_extent_root;/* root of the swap extent rbtree */
-	struct block_device *bdev;	/* swap device or bdev of swap file */
-	struct file *swap_file;		/* seldom referenced */
-	struct completion comp;		/* seldom referenced */
-	spinlock_t lock;		/*
-					 * protect map scan related fields like
-					 * inuse_pages and all cluster lists.
-					 * Other fields are only changed
-					 * at swapon/swapoff, so are protected
-					 * by swap_lock. changing flags need
-					 * hold this lock and swap_lock. If
-					 * both locks need hold, hold swap_lock
-					 * first.
-					 */
-	struct work_struct discard_work; /* discard worker */
-	struct work_struct reclaim_work; /* reclaim worker */
-	struct list_head discard_clusters; /* discard clusters list */
-	struct plist_node avail_list;   /* entry in swap_avail_head */
-};
-
 static inline swp_entry_t page_swap_entry(struct page *page)
 {
 	struct folio *folio = page_folio(page);
@@ -423,10 +350,7 @@ int find_first_swap(dev_t *device);
 extern unsigned int count_swap_pages(int, int);
 extern sector_t swapdev_block(int, pgoff_t);
 extern int __swap_count(swp_entry_t entry);
-extern bool swap_entry_swapped(struct swap_info_struct *si, swp_entry_t entry);
 extern int swp_swapcount(swp_entry_t entry);
-struct backing_dev_info;
-extern struct swap_info_struct *get_swap_device(swp_entry_t entry);
 sector_t swap_folio_sector(struct folio *folio);
 
 /*
@@ -452,20 +376,7 @@ bool folio_free_swap(struct folio *folio);
 swp_entry_t swap_alloc_hibernation_slot(int type);
 void swap_free_hibernation_slot(swp_entry_t entry);
 
-static inline void put_swap_device(struct swap_info_struct *si)
-{
-	percpu_ref_put(&si->users);
-}
-
 #else /* CONFIG_SWAP */
-static inline struct swap_info_struct *get_swap_device(swp_entry_t entry)
-{
-	return NULL;
-}
-
-static inline void put_swap_device(struct swap_info_struct *si)
-{
-}
 
 #define get_nr_swap_pages()			0L
 #define total_swap_pages			0L
@@ -497,11 +408,6 @@ static inline int __swap_count(swp_entry_t entry)
 	return 0;
 }
 
-static inline bool swap_entry_swapped(struct swap_info_struct *si, swp_entry_t entry)
-{
-	return false;
-}
-
 static inline int swp_swapcount(swp_entry_t entry)
 {
 	return 0;
diff --git a/mm/swap.h b/mm/swap.h
index a77016f2423b..70974495bf15 100644
--- a/mm/swap.h
+++ b/mm/swap.h
@@ -8,6 +8,79 @@ struct swap_iocb;
 
 extern int page_cluster;
 
+/*
+ * We keep using same cluster for rotational device so IO will be sequential.
+ * The purpose is to optimize SWAP throughput on these device.
+ */
+struct swap_sequential_cluster {
+	unsigned int next[SWAP_NR_ORDERS]; /* Likely next allocation offset */
+};
+
+/*
+ * The in-memory structure used to track swap areas.
+ */
+struct swap_info_struct {
+	struct percpu_ref users;	/* indicate and keep swap device valid. */
+	unsigned long	flags;		/* SWP_USED etc: see above */
+	signed short	prio;		/* swap priority of this type */
+	struct plist_node list;		/* entry in swap_active_head */
+	signed char	type;		/* strange name for an index */
+	unsigned int	max;		/* size of this swap device */
+	unsigned long *zeromap;		/* kvmalloc'ed bitmap to track zero pages */
+	struct swap_cluster_info *cluster_info; /* cluster info. Only for SSD */
+	struct list_head free_clusters; /* free clusters list */
+	struct list_head full_clusters; /* full clusters list */
+	struct list_head nonfull_clusters[SWAP_NR_ORDERS];
+					/* list of cluster that contains at least one free slot */
+	struct list_head frag_clusters[SWAP_NR_ORDERS];
+					/* list of cluster that are fragmented or contented */
+	unsigned int pages;		/* total of usable pages of swap */
+	atomic_long_t inuse_pages;	/* number of those currently in use */
+	struct swap_sequential_cluster *global_cluster; /* Use one global cluster for rotating device */
+	spinlock_t global_cluster_lock;	/* Serialize usage of global cluster */
+	struct rb_root swap_extent_root;/* root of the swap extent rbtree */
+	struct block_device *bdev;	/* swap device or bdev of swap file */
+	struct file *swap_file;		/* seldom referenced */
+	struct completion comp;		/* seldom referenced */
+	spinlock_t lock;		/*
+					 * protect map scan related fields like
+					 * inuse_pages and all cluster lists.
+					 * Other fields are only changed
+					 * at swapon/swapoff, so are protected
+					 * by swap_lock. changing flags need
+					 * hold this lock and swap_lock. If
+					 * both locks need hold, hold swap_lock
+					 * first.
+					 */
+	struct work_struct discard_work; /* discard worker */
+	struct work_struct reclaim_work; /* reclaim worker */
+	struct list_head discard_clusters; /* discard clusters list */
+	struct plist_node avail_list;   /* entry in swap_avail_head */
+};
+
+/*
+ * Max bad pages in the new format..
+ */
+#define MAX_SWAP_BADPAGES \
+	((offsetof(union swap_header, magic.magic) - \
+	  offsetof(union swap_header, info.badpages)) / sizeof(int))
+
+enum {
+	SWP_USED	= (1 << 0),	/* is slot in swap_info[] used? */
+	SWP_WRITEOK	= (1 << 1),	/* ok to write to this swap?	*/
+	SWP_DISCARDABLE = (1 << 2),	/* blkdev support discard */
+	SWP_DISCARDING	= (1 << 3),	/* now discarding a free cluster */
+	SWP_SOLIDSTATE	= (1 << 4),	/* blkdev seeks are cheap */
+	SWP_BLKDEV	= (1 << 6),	/* its a block device */
+	SWP_ACTIVATED	= (1 << 7),	/* set after swap_activate success */
+	SWP_FS_OPS	= (1 << 8),	/* swapfile operations go through fs */
+	SWP_AREA_DISCARD = (1 << 9),	/* single-time swap area discards */
+	SWP_PAGE_DISCARD = (1 << 10),	/* freed swap page-cluster discards */
+	SWP_STABLE_WRITES = (1 << 11),	/* no overwrite PG_writeback pages */
+	SWP_SYNCHRONOUS_IO = (1 << 12),	/* synchronous IO is efficient */
+					/* add others here before... */
+};
+
 #ifdef CONFIG_THP_SWAP
 #define SWAPFILE_CLUSTER	HPAGE_PMD_NR
 #define swap_entry_order(order)	(order)
@@ -352,6 +425,13 @@ static inline int non_swapcache_batch(swp_entry_t entry, int max_nr)
 	return i;
 }
 
+bool swap_entry_swapped(struct swap_info_struct *si, swp_entry_t entry);
+struct swap_info_struct *get_swap_device(swp_entry_t entry);
+static inline void put_swap_device(struct swap_info_struct *si)
+{
+	percpu_ref_put(&si->users);
+}
+
 #else /* CONFIG_SWAP */
 struct swap_iocb;
 static inline struct swap_cluster_info *swap_cluster_lock(
@@ -498,5 +578,17 @@ static inline int non_swapcache_batch(swp_entry_t entry, int max_nr)
 {
 	return 0;
 }
+static inline bool swap_entry_swapped(struct swap_info_struct *si,
+		swp_entry_t entry)
+{
+	return false;
+}
+static inline struct swap_info_struct *get_swap_device(swp_entry_t entry)
+{
+	return NULL;
+}
+static inline void put_swap_device(struct swap_info_struct *si)
+{
+}
 #endif /* CONFIG_SWAP */
 #endif /* _MM_SWAP_H */
-- 
2.53.0


^ permalink raw reply related

* [PATCH 11/12] swap: move struct swap_extent to swapfile.c
From: Christoph Hellwig @ 2026-05-12  5:35 UTC (permalink / raw)
  To: Andrew Morton, Chris Li, Kairui Song
  Cc: Christian Brauner, Darrick J . Wong , Jens Axboe, David Sterba,
	Theodore Ts'o, Jaegeuk Kim, Chao Yu, Trond Myklebust,
	Anna Schumaker, Namjae Jeon, Hyunchul Lee, Steve French,
	Paulo Alcantara, Carlos Maiolino, Damien Le Moal, Naohiro Aota,
	linux-xfs, linux-fsdevel, linux-doc, linux-mm, linux-block,
	linux-btrfs, linux-ext4, linux-f2fs-devel, linux-nfs, linux-cifs
In-Reply-To: <20260512053625.2950900-1-hch@lst.de>

struct swap_extent is only used inside of mm/swapfile.c, so move it
there.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 include/linux/swap.h | 15 ---------------
 mm/swapfile.c        | 15 +++++++++++++++
 2 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/include/linux/swap.h b/include/linux/swap.h
index 916889738f08..95237ee065c2 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -178,21 +178,6 @@ struct sysinfo;
 struct writeback_control;
 struct zone;
 
-/*
- * A swap extent maps a range of a swapfile's PAGE_SIZE pages onto a range of
- * disk blocks.  A rbtree of swap extents maps the entire swapfile (Where the
- * term `swapfile' refers to either a blockdevice or an IS_REG file). Apart
- * from setup, they're handled identically.
- *
- * We always assume that blocks are of size PAGE_SIZE.
- */
-struct swap_extent {
-	struct rb_node rb_node;
-	pgoff_t start_page;
-	pgoff_t nr_pages;
-	sector_t start_block;
-};
-
 /*
  * Max bad pages in the new format..
  */
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 26852c2ad36e..c0479533f9ef 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -260,6 +260,21 @@ static int __try_to_reclaim_swap(struct swap_info_struct *si,
 	return ret;
 }
 
+/*
+ * A swap extent maps a range of a swapfile's PAGE_SIZE pages onto a range of
+ * disk blocks.  A rbtree of swap extents maps the entire swapfile (Where the
+ * term `swapfile' refers to either a blockdevice or an IS_REG file). Apart
+ * from setup, they're handled identically.
+ *
+ * We always assume that blocks are of size PAGE_SIZE.
+ */
+struct swap_extent {
+	struct rb_node rb_node;
+	pgoff_t start_page;
+	pgoff_t nr_pages;
+	sector_t start_block;
+};
+
 static inline struct swap_extent *first_se(struct swap_info_struct *sis)
 {
 	struct rb_node *rb = rb_first(&sis->swap_extent_root);
-- 
2.53.0


^ permalink raw reply related

* [PATCH 10/12] swap: add a swap_activate_fs_ops helper
From: Christoph Hellwig @ 2026-05-12  5:35 UTC (permalink / raw)
  To: Andrew Morton, Chris Li, Kairui Song
  Cc: Christian Brauner, Darrick J . Wong , Jens Axboe, David Sterba,
	Theodore Ts'o, Jaegeuk Kim, Chao Yu, Trond Myklebust,
	Anna Schumaker, Namjae Jeon, Hyunchul Lee, Steve French,
	Paulo Alcantara, Carlos Maiolino, Damien Le Moal, Naohiro Aota,
	linux-xfs, linux-fsdevel, linux-doc, linux-mm, linux-block,
	linux-btrfs, linux-ext4, linux-f2fs-devel, linux-nfs, linux-cifs
In-Reply-To: <20260512053625.2950900-1-hch@lst.de>

Add a helper abstracting away the low-level details of enabling
fs_ops-based swapping.  This prepares for taking swap_info_struct
private.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/nfs/file.c        | 4 +---
 fs/smb/client/file.c | 3 +--
 include/linux/swap.h | 5 +++++
 mm/swapfile.c        | 7 +++++++
 4 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 10ab2a923835..ce4d860c4e7a 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -588,7 +588,7 @@ int nfs_swap_activate(struct file *file, struct swap_info_struct *sis)
 	ret = rpc_clnt_swap_activate(clnt);
 	if (ret)
 		return ret;
-	ret = add_swap_extent(sis, sis->max, NULL, 0);
+	ret = swap_activate_fs_ops(sis);
 	if (ret < 0) {
 		rpc_clnt_swap_deactivate(clnt);
 		return ret;
@@ -596,8 +596,6 @@ int nfs_swap_activate(struct file *file, struct swap_info_struct *sis)
 
 	if (cl->rpc_ops->enable_swap)
 		cl->rpc_ops->enable_swap(inode);
-
-	sis->flags |= SWP_FS_OPS;
 	return 0;
 }
 EXPORT_SYMBOL_GPL(nfs_swap_activate);
diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c
index e1bbc65ce7f3..e11065be1e64 100644
--- a/fs/smb/client/file.c
+++ b/fs/smb/client/file.c
@@ -3326,8 +3326,7 @@ int cifs_swap_activate(struct file *swap_file, struct swap_info_struct *sis)
 	 * from reading or writing the file
 	 */
 
-	sis->flags |= SWP_FS_OPS;
-	return add_swap_extent(sis, sis->max, NULL, 0);
+	return swap_activate_fs_ops(sis);
 }
 
 void cifs_swap_deactivate(struct file *file)
diff --git a/include/linux/swap.h b/include/linux/swap.h
index b1cbb67ddd8e..916889738f08 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -406,6 +406,7 @@ extern void __meminit kswapd_stop(int nid);
 int add_swap_extent(struct swap_info_struct *sis, unsigned long nr_pages,
 		struct block_device *bdev, sector_t start_block);
 int generic_swap_activate(struct file *swap_file, struct swap_info_struct *sis);
+int swap_activate_fs_ops(struct swap_info_struct *sis);
 
 static inline unsigned long total_swapcache_pages(void)
 {
@@ -532,6 +533,10 @@ static inline int add_swap_extent(struct swap_info_struct *sis,
 {
 	return -EINVAL;
 }
+static inline int swap_activate_fs_ops(struct swap_info_struct *sis)
+{
+	return -EINVAL;
+}
 #endif /* CONFIG_SWAP */
 #ifdef CONFIG_MEMCG
 static inline int mem_cgroup_swappiness(struct mem_cgroup *memcg)
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 2c9d2af736c4..26852c2ad36e 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -2757,6 +2757,13 @@ add_swap_extent(struct swap_info_struct *sis, unsigned long nr_pages,
 }
 EXPORT_SYMBOL_GPL(add_swap_extent);
 
+int swap_activate_fs_ops(struct swap_info_struct *sis)
+{
+	sis->flags |= SWP_FS_OPS;
+	return add_swap_extent(sis, sis->max, NULL, 0);
+}
+EXPORT_SYMBOL_GPL(swap_activate_fs_ops);
+
 /*
  * A `swap extent' is a simple thing which maps a contiguous range of pages
  * onto a contiguous range of disk blocks.  A rbtree of swap extents is
-- 
2.53.0


^ permalink raw reply related

* [PATCH 09/12] swap: push down setting sis->bdev into ->swap_activate
From: Christoph Hellwig @ 2026-05-12  5:35 UTC (permalink / raw)
  To: Andrew Morton, Chris Li, Kairui Song
  Cc: Christian Brauner, Darrick J . Wong , Jens Axboe, David Sterba,
	Theodore Ts'o, Jaegeuk Kim, Chao Yu, Trond Myklebust,
	Anna Schumaker, Namjae Jeon, Hyunchul Lee, Steve French,
	Paulo Alcantara, Carlos Maiolino, Damien Le Moal, Naohiro Aota,
	linux-xfs, linux-fsdevel, linux-doc, linux-mm, linux-block,
	linux-btrfs, linux-ext4, linux-f2fs-devel, linux-nfs, linux-cifs
In-Reply-To: <20260512053625.2950900-1-hch@lst.de>

Only the file operation method knows what block device we'll swap
to.  So move down setting sis->bdev and the special blockdev flag
into ->swap_activate.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 block/fops.c         |  9 ++++++++-
 fs/btrfs/inode.c     |  7 ++++---
 fs/f2fs/data.c       |  3 ++-
 fs/iomap/swapfile.c  |  7 ++-----
 fs/nfs/file.c        |  2 +-
 fs/smb/client/file.c |  2 +-
 fs/xfs/xfs_file.c    |  6 ------
 include/linux/swap.h |  4 ++--
 mm/page_io.c         |  3 +--
 mm/swapfile.c        | 38 ++++++++++++--------------------------
 10 files changed, 33 insertions(+), 48 deletions(-)

diff --git a/block/fops.c b/block/fops.c
index 067e46299666..da09ce3f072f 100644
--- a/block/fops.c
+++ b/block/fops.c
@@ -951,9 +951,16 @@ static int blkdev_mmap_prepare(struct vm_area_desc *desc)
 
 static int blkdev_swap_activate(struct file *file, struct swap_info_struct *sis)
 {
+	struct block_device *bdev = I_BDEV(file->f_mapping->host);
 	loff_t isize = i_size_read(bdev_file_inode(file));
 
-	return add_swap_extent(sis, div_u64(isize, PAGE_SIZE), 0);
+	/*
+	 * The swap code performs arbitrary overwrites, which are not supported
+	 * on zones with sequential write constraints.
+	 */
+	if (bdev_is_zoned(bdev))
+		return -EINVAL;
+	return add_swap_extent(sis, div_u64(isize, PAGE_SIZE), bdev, 0);
 }
 
 const struct file_operations def_blk_fops = {
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index ee0a7947706a..84003c520530 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -10201,6 +10201,7 @@ static void btrfs_free_swapfile_pins(struct inode *inode)
 }
 
 struct btrfs_swap_info {
+	struct btrfs_device *device;
 	u64 start;
 	u64 block_start;
 	u64 block_len;
@@ -10214,7 +10215,8 @@ static int btrfs_add_swap_extent(struct swap_info_struct *sis,
 	first_ppage = PAGE_ALIGN(bsi->block_start) >> PAGE_SHIFT;
 	next_ppage = PAGE_ALIGN_DOWN(bsi->block_start + bsi->block_len) >> PAGE_SHIFT;
 
-	return add_swap_extent(sis, next_ppage - first_ppage, first_ppage);
+	return add_swap_extent(sis, next_ppage - first_ppage, bsi->device->bdev,
+			first_ppage);
 }
 
 void btrfs_swap_deactivate(struct file *file)
@@ -10503,6 +10505,7 @@ int btrfs_swap_activate(struct file *file, struct swap_info_struct *sis)
 			bsi.start = key.offset;
 			bsi.block_start = physical_block_start;
 			bsi.block_len = len;
+			bsi.device = device;
 		}
 
 		if (fatal_signal_pending(current)) {
@@ -10533,8 +10536,6 @@ int btrfs_swap_activate(struct file *file, struct swap_info_struct *sis)
 	up_write(&BTRFS_I(inode)->i_mmap_lock);
 	btrfs_free_backref_share_ctx(backref_ctx);
 	btrfs_free_path(path);
-	if (!ret && device)
-		sis->bdev = device->bdev;
 	return ret;
 }
 #endif
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 8bcf630df557..8d116ff517c9 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -4326,7 +4326,8 @@ static int check_swap_activate(struct swap_info_struct *sis,
 		/*
 		 * We found a PAGE_SIZE-length, PAGE_SIZE-aligned run of blocks
 		 */
-		ret = add_swap_extent(sis, nr_pblocks, pblock);
+		ret = add_swap_extent(sis, nr_pblocks, inode->i_sb->s_bdev,
+				pblock);
 		if (ret < 0)
 			goto out;
 		cur_lblock += nr_pblocks;
diff --git a/fs/iomap/swapfile.c b/fs/iomap/swapfile.c
index a4e0ca462cc4..862b4c02a8bd 100644
--- a/fs/iomap/swapfile.c
+++ b/fs/iomap/swapfile.c
@@ -50,10 +50,6 @@ static int iomap_swapfile_iter(struct iomap_iter *iter, struct file *file,
 	if (iomap->flags & IOMAP_F_SHARED)
 		return iomap_swapfile_fail(file, "has shared extents");
 
-	/* Only one bdev per swap file. */
-	if (iomap->bdev != sis->bdev)
-		return iomap_swapfile_fail(file, "outside the main device");
-
 	/*
 	 * Round the start up and the end down so that the physical extent
 	 * aligns to a page boundary.
@@ -61,7 +57,8 @@ static int iomap_swapfile_iter(struct iomap_iter *iter, struct file *file,
 	first_ppage = ALIGN(iomap->addr, PAGE_SIZE) >> PAGE_SHIFT;
 	next_ppage = ALIGN_DOWN(iomap->addr + iomap->length, PAGE_SIZE) >>
 			PAGE_SHIFT;
-	error = add_swap_extent(sis, next_ppage - first_ppage, first_ppage);
+	error = add_swap_extent(sis, next_ppage - first_ppage, iomap->bdev,
+			first_ppage);
 	if (error)
 		return error;
 	return iomap_iter_advance_full(iter);
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 2bc55d9d71e1..10ab2a923835 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -588,7 +588,7 @@ int nfs_swap_activate(struct file *file, struct swap_info_struct *sis)
 	ret = rpc_clnt_swap_activate(clnt);
 	if (ret)
 		return ret;
-	ret = add_swap_extent(sis, sis->max, 0);
+	ret = add_swap_extent(sis, sis->max, NULL, 0);
 	if (ret < 0) {
 		rpc_clnt_swap_deactivate(clnt);
 		return ret;
diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c
index 84459f87907e..e1bbc65ce7f3 100644
--- a/fs/smb/client/file.c
+++ b/fs/smb/client/file.c
@@ -3327,7 +3327,7 @@ int cifs_swap_activate(struct file *swap_file, struct swap_info_struct *sis)
 	 */
 
 	sis->flags |= SWP_FS_OPS;
-	return add_swap_extent(sis, sis->max, 0);
+	return add_swap_extent(sis, sis->max, NULL, 0);
 }
 
 void cifs_swap_deactivate(struct file *file)
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 41f7e19bd31f..74128ebf7161 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -2116,12 +2116,6 @@ xfs_file_swap_activate(
 	 */
 	xfs_inodegc_flush(ip->i_mount);
 
-	/*
-	 * Direct the swap code to the correct block device when this file
-	 * sits on the RT device.
-	 */
-	sis->bdev = xfs_inode_buftarg(ip)->bt_bdev;
-
 	return iomap_swap_activate(file, sis, &xfs_read_iomap_ops);
 }
 
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 657779485ae4..b1cbb67ddd8e 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -404,7 +404,7 @@ extern void __meminit kswapd_stop(int nid);
 #ifdef CONFIG_SWAP
 
 int add_swap_extent(struct swap_info_struct *sis, unsigned long nr_pages,
-		sector_t start_block);
+		struct block_device *bdev, sector_t start_block);
 int generic_swap_activate(struct file *swap_file, struct swap_info_struct *sis);
 
 static inline unsigned long total_swapcache_pages(void)
@@ -528,7 +528,7 @@ static inline bool folio_free_swap(struct folio *folio)
 
 static inline int add_swap_extent(struct swap_info_struct *sis,
 		unsigned long start_page, unsigned long nr_pages,
-		sector_t start_block)
+		struct block_device *bdev, sector_t start_block)
 {
 	return -EINVAL;
 }
diff --git a/mm/page_io.c b/mm/page_io.c
index 3e1c12649448..2ab8994ed1c2 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -132,7 +132,7 @@ int generic_swap_activate(struct file *swap_file, struct swap_info_struct *sis)
 		/*
 		 * We found a PAGE_SIZE-length, PAGE_SIZE-aligned run of blocks
 		 */
-		ret = add_swap_extent(sis, 1,
+		ret = add_swap_extent(sis, 1, inode->i_sb->s_bdev,
 				first_block >> (PAGE_SHIFT - blkbits));
 		if (ret < 0)
 			return ret;
@@ -141,7 +141,6 @@ int generic_swap_activate(struct file *swap_file, struct swap_info_struct *sis)
 		continue;
 	}
 	return 0;
-
 bad_bmap:
 	pr_err("swapon: swapfile has holes\n");
 	return -EINVAL;
diff --git a/mm/swapfile.c b/mm/swapfile.c
index fbf11c8c5c69..2c9d2af736c4 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -2707,7 +2707,7 @@ static void destroy_swap_extents(struct swap_info_struct *sis,
  */
 int
 add_swap_extent(struct swap_info_struct *sis, unsigned long nr_pages,
-		sector_t start_block)
+		struct block_device *bdev, sector_t start_block)
 {
 	struct rb_node **link = &sis->swap_extent_root.rb_node, *parent = NULL;
 	struct swap_extent *se;
@@ -2718,6 +2718,12 @@ add_swap_extent(struct swap_info_struct *sis, unsigned long nr_pages,
 		return 0;
 	nr_pages = min(nr_pages, sis->max - sis->pages);
 
+	/* Only one bdev per swap file for now. */
+	if (!sis->bdev)
+		sis->bdev = bdev;
+	else if (bdev != sis->bdev)
+		return -EINVAL;
+
 	/*
 	 * place the new node at the right most since the
 	 * function is called in ascending page order.
@@ -2793,6 +2799,8 @@ static int setup_swap_extents(struct swap_info_struct *sis,
 	sis->flags |= SWP_ACTIVATED;
 	if (sis->flags & SWP_FS_OPS)
 		error = sio_pool_init();
+	else if (WARN_ON_ONCE(!sis->bdev))
+		error = -EINVAL;
 	if (error)
 		destroy_swap_extents(sis, swap_file);
 	return error;
@@ -3224,26 +3232,6 @@ static struct swap_info_struct *alloc_swap_info(void)
 	return p;
 }
 
-static int claim_swapfile(struct swap_info_struct *si, struct inode *inode)
-{
-	if (S_ISBLK(inode->i_mode)) {
-		si->bdev = I_BDEV(inode);
-		/*
-		 * Zoned block devices contain zones that have a sequential
-		 * write only restriction.  Hence zoned block devices are not
-		 * suitable for swapping.  Disallow them here.
-		 */
-		if (bdev_is_zoned(si->bdev))
-			return -EINVAL;
-		si->flags |= SWP_BLKDEV;
-	} else if (S_ISREG(inode->i_mode)) {
-		si->bdev = inode->i_sb->s_bdev;
-	}
-
-	return 0;
-}
-
-
 /*
  * Find out how many pages are allowed for a single swap device. There
  * are two limiting factors:
@@ -3500,16 +3488,14 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
 	dentry = swap_file->f_path.dentry;
 	inode = mapping->host;
 
-	error = claim_swapfile(si, inode);
-	if (unlikely(error))
-		goto bad_swap;
-
 	inode_lock(inode);
 	if (d_unlinked(dentry) || cant_mount(dentry)) {
 		error = -ENOENT;
 		goto bad_swap_unlock_inode;
 	}
-	if (!S_ISBLK(inode->i_mode) && !S_ISREG(inode->i_mode)) {
+	if (S_ISBLK(inode->i_mode)) {
+		si->flags |= SWP_BLKDEV;
+	} else if (!S_ISREG(inode->i_mode)) {
 		error = -EINVAL;
 		goto bad_swap_unlock_inode;
 	}
-- 
2.53.0


^ permalink raw reply related

* [PATCH 08/12] swap,iomap: simplify iomap_swapfile_iter
From: Christoph Hellwig @ 2026-05-12  5:35 UTC (permalink / raw)
  To: Andrew Morton, Chris Li, Kairui Song
  Cc: Christian Brauner, Darrick J . Wong , Jens Axboe, David Sterba,
	Theodore Ts'o, Jaegeuk Kim, Chao Yu, Trond Myklebust,
	Anna Schumaker, Namjae Jeon, Hyunchul Lee, Steve French,
	Paulo Alcantara, Carlos Maiolino, Damien Le Moal, Naohiro Aota,
	linux-xfs, linux-fsdevel, linux-doc, linux-mm, linux-block,
	linux-btrfs, linux-ext4, linux-f2fs-devel, linux-nfs, linux-cifs
In-Reply-To: <20260512053625.2950900-1-hch@lst.de>

add_swap_extent already coalesces multiple extents, no need to duplicate
that in the caller.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/iomap/swapfile.c | 104 +++++++++++++-------------------------------
 1 file changed, 31 insertions(+), 73 deletions(-)

diff --git a/fs/iomap/swapfile.c b/fs/iomap/swapfile.c
index cf354fdfb7c3..a4e0ca462cc4 100644
--- a/fs/iomap/swapfile.c
+++ b/fs/iomap/swapfile.c
@@ -6,57 +6,32 @@
 #include <linux/iomap.h>
 #include <linux/swap.h>
 
-/* Swapfile activation */
-
-struct iomap_swapfile_info {
-	struct iomap iomap;		/* accumulated iomap */
-	struct swap_info_struct *sis;
-	unsigned long nr_pages;		/* number of pages collected */
-	struct file *file;
-};
-
-/*
- * Collect physical extents for this swap file.  Physical extents reported to
- * the swap code must be trimmed to align to a page boundary.  The logical
- * offset within the file is irrelevant since the swapfile code maps logical
- * page numbers of the swap device to the physical page-aligned extents.
- */
-static int iomap_swapfile_add_extent(struct iomap_swapfile_info *isi)
-{
-	struct iomap *iomap = &isi->iomap;
-	uint64_t first_ppage;
-	uint64_t next_ppage;
-
-	/*
-	 * Round the start up and the end down so that the physical
-	 * extent aligns to a page boundary.
-	 */
-	first_ppage = ALIGN(iomap->addr, PAGE_SIZE) >> PAGE_SHIFT;
-	next_ppage = ALIGN_DOWN(iomap->addr + iomap->length, PAGE_SIZE) >>
-			PAGE_SHIFT;
-	return add_swap_extent(isi->sis, next_ppage - first_ppage, first_ppage);
-}
-
-static int iomap_swapfile_fail(struct iomap_swapfile_info *isi, const char *str)
+static int iomap_swapfile_fail(struct file *file, const char *str)
 {
 	char *buf, *p = ERR_PTR(-ENOMEM);
 
 	buf = kmalloc(PATH_MAX, GFP_KERNEL);
 	if (buf)
-		p = file_path(isi->file, buf, PATH_MAX);
+		p = file_path(file, buf, PATH_MAX);
 	pr_err("swapon: file %s %s\n", IS_ERR(p) ? "<unknown>" : p, str);
 	kfree(buf);
 	return -EINVAL;
 }
 
 /*
- * Accumulate iomaps for this swap file.  We have to accumulate iomaps because
- * swap only cares about contiguous page-aligned physical extents and makes no
- * distinction between written and unwritten extents.
+ * Report physical extents for this swap file.  Physical extents reported to the
+ * swap code must be trimmed to align to a page boundary.  The logical offset
+ * within the file is irrelevant since the swapfile code maps logical page
+ * numbers of the swap device to the physical page-aligned extents.
  */
-static int iomap_swapfile_iter(struct iomap_iter *iter,
-		struct iomap *iomap, struct iomap_swapfile_info *isi)
+static int iomap_swapfile_iter(struct iomap_iter *iter, struct file *file,
+		struct swap_info_struct *sis)
 {
+	struct iomap *iomap = &iter->iomap;
+	uint64_t first_ppage;
+	uint64_t next_ppage;
+	int error;
+
 	switch (iomap->type) {
 	case IOMAP_MAPPED:
 	case IOMAP_UNWRITTEN:
@@ -64,35 +39,31 @@ static int iomap_swapfile_iter(struct iomap_iter *iter,
 		break;
 	case IOMAP_INLINE:
 		/* No inline data. */
-		return iomap_swapfile_fail(isi, "is inline");
+		return iomap_swapfile_fail(file, "is inline");
 	default:
-		return iomap_swapfile_fail(isi, "has unallocated extents");
+		return iomap_swapfile_fail(file, "has unallocated extents");
 	}
 
 	/* No uncommitted metadata or shared blocks. */
 	if (iomap->flags & IOMAP_F_DIRTY)
-		return iomap_swapfile_fail(isi, "is not committed");
+		return iomap_swapfile_fail(file, "is not committed");
 	if (iomap->flags & IOMAP_F_SHARED)
-		return iomap_swapfile_fail(isi, "has shared extents");
+		return iomap_swapfile_fail(file, "has shared extents");
 
 	/* Only one bdev per swap file. */
-	if (iomap->bdev != isi->sis->bdev)
-		return iomap_swapfile_fail(isi, "outside the main device");
-
-	if (isi->iomap.length == 0) {
-		/* No accumulated extent, so just store it. */
-		memcpy(&isi->iomap, iomap, sizeof(isi->iomap));
-	} else if (isi->iomap.addr + isi->iomap.length == iomap->addr) {
-		/* Append this to the accumulated extent. */
-		isi->iomap.length += iomap->length;
-	} else {
-		/* Otherwise, add the retained iomap and store this one. */
-		int error = iomap_swapfile_add_extent(isi);
-		if (error)
-			return error;
-		memcpy(&isi->iomap, iomap, sizeof(isi->iomap));
-	}
+	if (iomap->bdev != sis->bdev)
+		return iomap_swapfile_fail(file, "outside the main device");
 
+	/*
+	 * Round the start up and the end down so that the physical extent
+	 * aligns to a page boundary.
+	 */
+	first_ppage = ALIGN(iomap->addr, PAGE_SIZE) >> PAGE_SHIFT;
+	next_ppage = ALIGN_DOWN(iomap->addr + iomap->length, PAGE_SIZE) >>
+			PAGE_SHIFT;
+	error = add_swap_extent(sis, next_ppage - first_ppage, first_ppage);
+	if (error)
+		return error;
 	return iomap_iter_advance_full(iter);
 }
 
@@ -110,10 +81,6 @@ int iomap_swap_activate(struct file *file, struct swap_info_struct *sis,
 		.len	= ALIGN_DOWN(i_size_read(inode), PAGE_SIZE),
 		.flags	= IOMAP_REPORT,
 	};
-	struct iomap_swapfile_info isi = {
-		.sis = sis,
-		.file = file,
-	};
 	int ret;
 
 	/*
@@ -125,16 +92,7 @@ int iomap_swap_activate(struct file *file, struct swap_info_struct *sis,
 		return ret;
 
 	while ((ret = iomap_iter(&iter, ops)) > 0)
-		iter.status = iomap_swapfile_iter(&iter, &iter.iomap, &isi);
-	if (ret < 0)
-		return ret;
-
-	if (isi.iomap.length) {
-		ret = iomap_swapfile_add_extent(&isi);
-		if (ret)
-			return ret;
-	}
-
-	return 0;
+		iter.status = iomap_swapfile_iter(&iter, file, sis);
+	return ret;
 }
 EXPORT_SYMBOL_GPL(iomap_swap_activate);
-- 
2.53.0


^ permalink raw reply related

* [PATCH 07/12] swap,block: limit swap file size to device size
From: Christoph Hellwig @ 2026-05-12  5:35 UTC (permalink / raw)
  To: Andrew Morton, Chris Li, Kairui Song
  Cc: Christian Brauner, Darrick J . Wong , Jens Axboe, David Sterba,
	Theodore Ts'o, Jaegeuk Kim, Chao Yu, Trond Myklebust,
	Anna Schumaker, Namjae Jeon, Hyunchul Lee, Steve French,
	Paulo Alcantara, Carlos Maiolino, Damien Le Moal, Naohiro Aota,
	linux-xfs, linux-fsdevel, linux-doc, linux-mm, linux-block,
	linux-btrfs, linux-ext4, linux-f2fs-devel, linux-nfs, linux-cifs
In-Reply-To: <20260512053625.2950900-1-hch@lst.de>

Don't blindly pass the value from the swap header to swap_add_extent,
but instead the device size rounded down to page granularity.  This
activated the sanity checking in the core code that catches a too large
value in the swap header.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 block/fops.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/block/fops.c b/block/fops.c
index 453141801684..067e46299666 100644
--- a/block/fops.c
+++ b/block/fops.c
@@ -951,7 +951,9 @@ static int blkdev_mmap_prepare(struct vm_area_desc *desc)
 
 static int blkdev_swap_activate(struct file *file, struct swap_info_struct *sis)
 {
-	return add_swap_extent(sis, sis->max, 0);
+	loff_t isize = i_size_read(bdev_file_inode(file));
+
+	return add_swap_extent(sis, div_u64(isize, PAGE_SIZE), 0);
 }
 
 const struct file_operations def_blk_fops = {
-- 
2.53.0


^ permalink raw reply related

* [PATCH 06/12] swap,block: move the block device swapon code into block/fops.c
From: Christoph Hellwig @ 2026-05-12  5:35 UTC (permalink / raw)
  To: Andrew Morton, Chris Li, Kairui Song
  Cc: Christian Brauner, Darrick J . Wong , Jens Axboe, David Sterba,
	Theodore Ts'o, Jaegeuk Kim, Chao Yu, Trond Myklebust,
	Anna Schumaker, Namjae Jeon, Hyunchul Lee, Steve French,
	Paulo Alcantara, Carlos Maiolino, Damien Le Moal, Naohiro Aota,
	linux-xfs, linux-fsdevel, linux-doc, linux-mm, linux-block,
	linux-btrfs, linux-ext4, linux-f2fs-devel, linux-nfs, linux-cifs
In-Reply-To: <20260512053625.2950900-1-hch@lst.de>

Make use of the abstractions we have.  This is a preparation for
moving more special casing down into block/.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 block/fops.c  | 6 ++++++
 mm/swapfile.c | 5 -----
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/block/fops.c b/block/fops.c
index bb6642b45937..453141801684 100644
--- a/block/fops.c
+++ b/block/fops.c
@@ -949,6 +949,11 @@ static int blkdev_mmap_prepare(struct vm_area_desc *desc)
 	return generic_file_mmap_prepare(desc);
 }
 
+static int blkdev_swap_activate(struct file *file, struct swap_info_struct *sis)
+{
+	return add_swap_extent(sis, sis->max, 0);
+}
+
 const struct file_operations def_blk_fops = {
 	.open		= blkdev_open,
 	.release	= blkdev_release,
@@ -965,6 +970,7 @@ const struct file_operations def_blk_fops = {
 	.splice_read	= filemap_splice_read,
 	.splice_write	= iter_file_splice_write,
 	.fallocate	= blkdev_fallocate,
+	.swap_activate	= blkdev_swap_activate,
 	.uring_cmd	= blkdev_uring_cmd,
 	.fop_flags	= FOP_BUFFER_RASYNC,
 };
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 1b7fc03612f4..fbf11c8c5c69 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -2781,13 +2781,8 @@ EXPORT_SYMBOL_GPL(add_swap_extent);
 static int setup_swap_extents(struct swap_info_struct *sis,
 			      struct file *swap_file)
 {
-	struct address_space *mapping = swap_file->f_mapping;
-	struct inode *inode = mapping->host;
 	int ret, error = 0;
 
-	if (S_ISBLK(inode->i_mode))
-		return add_swap_extent(sis, sis->max, 0);
-
 	if (swap_file->f_op->swap_activate)
 		ret = swap_file->f_op->swap_activate(swap_file, sis);
 	else
-- 
2.53.0


^ permalink raw reply related

* [PATCH 05/12] swap: cleanup setup_swap_extents
From: Christoph Hellwig @ 2026-05-12  5:35 UTC (permalink / raw)
  To: Andrew Morton, Chris Li, Kairui Song
  Cc: Christian Brauner, Darrick J . Wong , Jens Axboe, David Sterba,
	Theodore Ts'o, Jaegeuk Kim, Chao Yu, Trond Myklebust,
	Anna Schumaker, Namjae Jeon, Hyunchul Lee, Steve French,
	Paulo Alcantara, Carlos Maiolino, Damien Le Moal, Naohiro Aota,
	linux-xfs, linux-fsdevel, linux-doc, linux-mm, linux-block,
	linux-btrfs, linux-ext4, linux-f2fs-devel, linux-nfs, linux-cifs
In-Reply-To: <20260512053625.2950900-1-hch@lst.de>

Reflow setup_swap_extents so that the flag checking is not conditional on
a swap_activate method.  This is currently a no-op because the swapoff
code still checks the presence of a swap_deactivate method, but it
simplifies adding a new check, and also makes the SWP_ACTIVATED flag
more consistent.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 mm/swapfile.c | 23 +++++++++++------------
 1 file changed, 11 insertions(+), 12 deletions(-)

diff --git a/mm/swapfile.c b/mm/swapfile.c
index 651c1b59ff9f..1b7fc03612f4 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -2783,25 +2783,24 @@ static int setup_swap_extents(struct swap_info_struct *sis,
 {
 	struct address_space *mapping = swap_file->f_mapping;
 	struct inode *inode = mapping->host;
-	int ret;
+	int ret, error = 0;
 
 	if (S_ISBLK(inode->i_mode))
 		return add_swap_extent(sis, sis->max, 0);
 
-	if (swap_file->f_op->swap_activate) {
+	if (swap_file->f_op->swap_activate)
 		ret = swap_file->f_op->swap_activate(swap_file, sis);
-		if (ret < 0)
-			return ret;
-		sis->flags |= SWP_ACTIVATED;
-		if ((sis->flags & SWP_FS_OPS) &&
-		    sio_pool_init() != 0) {
-			destroy_swap_extents(sis, swap_file);
-			return -ENOMEM;
-		}
+	else
+		ret = generic_swap_activate(swap_file, sis);
+	if (ret < 0)
 		return ret;
-	}
 
-	return generic_swap_activate(swap_file, sis);
+	sis->flags |= SWP_ACTIVATED;
+	if (sis->flags & SWP_FS_OPS)
+		error = sio_pool_init();
+	if (error)
+		destroy_swap_extents(sis, swap_file);
+	return error;
 }
 
 static void _enable_swap_info(struct swap_info_struct *si)
-- 
2.53.0


^ permalink raw reply related

* [PATCH 04/12] swap: restrict to regular files or block devices
From: Christoph Hellwig @ 2026-05-12  5:35 UTC (permalink / raw)
  To: Andrew Morton, Chris Li, Kairui Song
  Cc: Christian Brauner, Darrick J . Wong , Jens Axboe, David Sterba,
	Theodore Ts'o, Jaegeuk Kim, Chao Yu, Trond Myklebust,
	Anna Schumaker, Namjae Jeon, Hyunchul Lee, Steve French,
	Paulo Alcantara, Carlos Maiolino, Damien Le Moal, Naohiro Aota,
	linux-xfs, linux-fsdevel, linux-doc, linux-mm, linux-block,
	linux-btrfs, linux-ext4, linux-f2fs-devel, linux-nfs, linux-cifs
In-Reply-To: <20260512053625.2950900-1-hch@lst.de>

Various swap code assumes it runs either on a block device or on a
regular file.  Make this restriction explicit using checks right
after opening the file.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 mm/swapfile.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/mm/swapfile.c b/mm/swapfile.c
index a183c9c95695..651c1b59ff9f 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -3515,6 +3515,10 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
 		error = -ENOENT;
 		goto bad_swap_unlock_inode;
 	}
+	if (!S_ISBLK(inode->i_mode) && !S_ISREG(inode->i_mode)) {
+		error = -EINVAL;
+		goto bad_swap_unlock_inode;
+	}
 	if (IS_SWAPFILE(inode)) {
 		error = -EBUSY;
 		goto bad_swap_unlock_inode;
-- 
2.53.0


^ permalink raw reply related

* [PATCH 03/12] swap,fs: move swapfile operations to struct file_operations
From: Christoph Hellwig @ 2026-05-12  5:35 UTC (permalink / raw)
  To: Andrew Morton, Chris Li, Kairui Song
  Cc: Christian Brauner, Darrick J . Wong , Jens Axboe, David Sterba,
	Theodore Ts'o, Jaegeuk Kim, Chao Yu, Trond Myklebust,
	Anna Schumaker, Namjae Jeon, Hyunchul Lee, Steve French,
	Paulo Alcantara, Carlos Maiolino, Damien Le Moal, Naohiro Aota,
	linux-xfs, linux-fsdevel, linux-doc, linux-mm, linux-block,
	linux-btrfs, linux-ext4, linux-f2fs-devel, linux-nfs, linux-cifs
In-Reply-To: <20260512053625.2950900-1-hch@lst.de>

The swap operations have nothing to do with the address_space, which is
used for pagecache operations.  Move them to struct file_operations
instead.  This will allow moving the block device special cases into
block/fops.c subsequently.

Pass struct file first to ->swap_activate as file operations typically
get the file or iocb as first argument and use swap_activate instead of
swapfile_activate in all names to be consistent.

Note that while the trivial iomap wrappers are moved to a new file when
applicable to keep them local to the file operation instances, complex
implementation are kept in their existing place.  It might be worth to
move them in follow-on patches if the maintainers desire so.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 .../filesystems/iomap/operations.rst          |  3 +-
 Documentation/filesystems/locking.rst         | 35 +++++++-------
 Documentation/filesystems/vfs.rst             | 40 ++++++++--------
 fs/btrfs/btrfs_inode.h                        |  3 ++
 fs/btrfs/file.c                               |  4 ++
 fs/btrfs/inode.c                              | 15 +-----
 fs/ext4/file.c                                |  6 +++
 fs/ext4/inode.c                               | 10 ----
 fs/f2fs/data.c                                | 15 +-----
 fs/f2fs/f2fs.h                                |  2 +
 fs/f2fs/file.c                                |  4 ++
 fs/iomap/swapfile.c                           | 12 ++---
 fs/nfs/direct.c                               |  1 +
 fs/nfs/file.c                                 | 12 +++--
 fs/nfs/nfs4file.c                             |  3 ++
 fs/ntfs/aops.c                                |  7 ---
 fs/ntfs/file.c                                |  6 +++
 fs/smb/client/cifsfs.c                        | 18 ++++++++
 fs/smb/client/cifsfs.h                        |  3 ++
 fs/smb/client/file.c                          | 12 ++---
 fs/xfs/xfs_aops.c                             | 46 -------------------
 fs/xfs/xfs_file.c                             | 45 ++++++++++++++++++
 fs/zonefs/file.c                              | 29 ++++++------
 include/linux/fs.h                            | 10 ++--
 include/linux/iomap.h                         |  6 +--
 include/linux/nfs_fs.h                        |  3 ++
 include/linux/swap.h                          |  2 +-
 mm/page_io.c                                  |  9 ++--
 mm/swapfile.c                                 | 12 ++---
 29 files changed, 187 insertions(+), 186 deletions(-)

diff --git a/Documentation/filesystems/iomap/operations.rst b/Documentation/filesystems/iomap/operations.rst
index da982ca7e413..2a78037665b7 100644
--- a/Documentation/filesystems/iomap/operations.rst
+++ b/Documentation/filesystems/iomap/operations.rst
@@ -55,7 +55,6 @@ The following address space operations can be wrapped easily:
  * ``readahead``
  * ``writepages``
  * ``bmap``
- * ``swap_activate``
 
 ``struct iomap_write_ops``
 --------------------------
@@ -747,7 +746,7 @@ function.
 Swap File Activation
 ====================
 
-The ``iomap_swapfile_activate`` function finds all the base-page aligned
+The ``iomap_swap_activate`` function finds all the base-page aligned
 regions in a file and sets them up as swap space.
 The file will be ``fsync()``'d before activation.
 ``IOMAP_REPORT`` will be passed as the ``flags`` argument to
diff --git a/Documentation/filesystems/locking.rst b/Documentation/filesystems/locking.rst
index f3658204d070..e79d72a12273 100644
--- a/Documentation/filesystems/locking.rst
+++ b/Documentation/filesystems/locking.rst
@@ -264,9 +264,6 @@ prototypes::
 	int (*launder_folio)(struct folio *);
 	bool (*is_partially_uptodate)(struct folio *, size_t from, size_t count);
 	int (*error_remove_folio)(struct address_space *, struct folio *);
-	int (*swap_activate)(struct swap_info_struct *sis, struct file *f)
-	int (*swap_deactivate)(struct file *);
-	int (*swap_rw)(struct kiocb *iocb, struct iov_iter *iter);
 
 locking rules:
 	All except dirty_folio and free_folio may block
@@ -289,9 +286,6 @@ migrate_folio:		yes (both)
 launder_folio:		yes
 is_partially_uptodate:	yes
 error_remove_folio:	yes
-swap_activate:		no
-swap_deactivate:	no
-swap_rw:		yes, unlocks
 ======================	======================== =========	===============
 
 ->write_begin(), ->write_end() and ->read_folio() may be called from
@@ -350,19 +344,6 @@ cleaned, or an error value if not. Note that in order to prevent the folio
 getting mapped back in and redirtied, it needs to be kept locked
 across the entire operation.
 
-->swap_activate() will be called to prepare the given file for swap.  It
-should perform any validation and preparation necessary to ensure that
-writes can be performed with minimal memory allocation.  It should call
-add_swap_extent(), or the helper iomap_swapfile_activate(), and return
-the number of extents added.  If IO should be submitted through
-->swap_rw(), it should set SWP_FS_OPS, otherwise IO will be submitted
-directly to the block device ``sis->bdev``.
-
-->swap_deactivate() will be called in the sys_swapoff()
-path after ->swap_activate() returned success.
-
-->swap_rw will be called for swap IO if SWP_FS_OPS was set by ->swap_activate().
-
 file_lock_operations
 ====================
 
@@ -503,6 +484,9 @@ prototypes::
 			struct file *file_out, loff_t pos_out,
 			loff_t len, unsigned int remap_flags);
 	int (*fadvise)(struct file *, loff_t, loff_t, int);
+	int (*swap_activate)(struct file *file, struct swap_info_struct *sis);
+	int (*swap_deactivate)(struct file *);
+	int (*swap_rw)(struct kiocb *iocb, struct iov_iter *iter);
 
 locking rules:
 	All may block.
@@ -555,6 +539,19 @@ used. To block changes to file contents via a memory mapping during the
 operation, the filesystem must take mapping->invalidate_lock to coordinate
 with ->page_mkwrite.
 
+->swap_activate() is called to prepare the given file for swap.  It should
+perform any validation and preparation necessary to ensure that writes can be
+performed with minimal memory allocation.  It should call add_swap_extent(),
+or the helper iomap_swap_activate(), and return the number of extents added.
+If IO should be submitted through ->swap_rw(), the file system must set
+SWP_FS_OPS from ->swap_activate(), otherwise IO will be submitted directly to
+the block device ``sis->bdev``.
+
+->swap_deactivate() is called from the swapoff path to disable a swapfile
+successfully activated using ->swap_activate().
+
+->swap_rw will be called for swap IO if SWP_FS_OPS was set by ->swap_activate().
+
 dquot_operations
 ================
 
diff --git a/Documentation/filesystems/vfs.rst b/Documentation/filesystems/vfs.rst
index 4092b2149a5d..1624c1ee82d6 100644
--- a/Documentation/filesystems/vfs.rst
+++ b/Documentation/filesystems/vfs.rst
@@ -774,9 +774,6 @@ cache in your filesystem.  The following members are defined:
 					       size_t count);
 		void (*is_dirty_writeback)(struct folio *, bool *, bool *);
 		int (*error_remove_folio)(struct mapping *mapping, struct folio *);
-		int (*swap_activate)(struct swap_info_struct *sis, struct file *f);
-		int (*swap_deactivate)(struct file *);
-		int (*swap_rw)(struct kiocb *iocb, struct iov_iter *iter);
 	};
 
 ``read_folio``
@@ -970,23 +967,6 @@ cache in your filesystem.  The following members are defined:
 	Setting this implies you deal with pages going away under you,
 	unless you have them locked or reference counts increased.
 
-``swap_activate``
-
-	Called to prepare the given file for swap.  It should perform
-	any validation and preparation necessary to ensure that writes
-	can be performed with minimal memory allocation.  It should call
-	add_swap_extent(), or the helper iomap_swapfile_activate(), and
-	return the number of extents added.  If IO should be submitted
-	through ->swap_rw(), it should set SWP_FS_OPS, otherwise IO will
-	be submitted directly to the block device ``sis->bdev``.
-
-``swap_deactivate``
-	Called during swapoff on files where swap_activate was
-	successful.
-
-``swap_rw``
-	Called to read or write swap pages when SWP_FS_OPS is set.
-
 The File Object
 ===============
 
@@ -1046,6 +1026,9 @@ This describes how the VFS can manipulate an open file.  As of kernel
 		int (*uring_cmd_iopoll)(struct io_uring_cmd *, struct io_comp_batch *,
 					unsigned int poll_flags);
 		int (*mmap_prepare)(struct vm_area_desc *);
+		int (*swap_activate)(struct file *file, struct swap_info_struct *sis);
+		int (*swap_deactivate)(struct file *);
+		int (*swap_rw)(struct kiocb *iocb, struct iov_iter *iter);
 	};
 
 Again, all methods are called without any locks being held, unless
@@ -1175,6 +1158,23 @@ otherwise noted.
 	this can be specified by the vm_area_desc->action field and related
 	parameters.
 
+``swap_activate``
+
+	Called to prepare the given file for swap.  It should perform
+	any validation and preparation necessary to ensure that writes
+	can be performed with minimal memory allocation.  It should call
+	add_swap_extent(), or the helper iomap_swap_activate(), and
+	return the number of extents added.  If IO should be submitted
+	through ->swap_rw(), it should set SWP_FS_OPS, otherwise IO will
+	be submitted directly to the block device ``sis->bdev``.
+
+``swap_deactivate``
+	Called during swapoff on files where swap_activate was
+	successful.
+
+``swap_rw``
+	Called to read or write swap pages when SWP_FS_OPS is set.
+
 Note that the file operations are implemented by the specific
 filesystem in which the inode resides.  When opening a device node
 (character or block special) most filesystems will call special
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index 55c272fe5d92..f527126882d6 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -670,4 +670,7 @@ struct extent_map *btrfs_create_io_em(struct btrfs_inode *inode, u64 start,
 				      const struct btrfs_file_extent *file_extent,
 				      int type);
 
+int btrfs_swap_activate(struct file *file, struct swap_info_struct *sis);
+void btrfs_swap_deactivate(struct file *file);
+
 #endif
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index cf1cb5c4db75..165b8da1d7db 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -3867,6 +3867,10 @@ const struct file_operations btrfs_file_operations = {
 	.uring_cmd	= btrfs_uring_cmd,
 	.fop_flags	= FOP_BUFFER_RASYNC | FOP_BUFFER_WASYNC,
 	.setlease	= generic_setlease,
+#ifdef CONFIG_SWAP
+	.swap_activate	= btrfs_swap_activate,
+	.swap_deactivate = btrfs_swap_deactivate,
+#endif
 };
 
 int btrfs_fdatawrite_range(struct btrfs_inode *inode, loff_t start, loff_t end)
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 198d87e6f19a..ee0a7947706a 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -10217,7 +10217,7 @@ static int btrfs_add_swap_extent(struct swap_info_struct *sis,
 	return add_swap_extent(sis, next_ppage - first_ppage, first_ppage);
 }
 
-static void btrfs_swap_deactivate(struct file *file)
+void btrfs_swap_deactivate(struct file *file)
 {
 	struct inode *inode = file_inode(file);
 
@@ -10225,7 +10225,7 @@ static void btrfs_swap_deactivate(struct file *file)
 	atomic_dec(&BTRFS_I(inode)->root->nr_swapfiles);
 }
 
-static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file)
+int btrfs_swap_activate(struct file *file, struct swap_info_struct *sis)
 {
 	struct inode *inode = file_inode(file);
 	struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -10537,15 +10537,6 @@ static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file)
 		sis->bdev = device->bdev;
 	return ret;
 }
-#else
-static void btrfs_swap_deactivate(struct file *file)
-{
-}
-
-static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file)
-{
-	return -EOPNOTSUPP;
-}
 #endif
 
 /*
@@ -10692,8 +10683,6 @@ static const struct address_space_operations btrfs_aops = {
 	.migrate_folio	= btrfs_migrate_folio,
 	.dirty_folio	= filemap_dirty_folio,
 	.error_remove_folio = generic_error_remove_folio,
-	.swap_activate	= btrfs_swap_activate,
-	.swap_deactivate = btrfs_swap_deactivate,
 };
 
 static const struct inode_operations btrfs_file_inode_operations = {
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index eb1a323962b1..fad3ed05c02a 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -971,6 +971,11 @@ loff_t ext4_llseek(struct file *file, loff_t offset, int whence)
 	return vfs_setpos(file, offset, maxbytes);
 }
 
+static int ext4_swap_activate(struct file *file, struct swap_info_struct *sis)
+{
+	return iomap_swap_activate(file, sis, &ext4_iomap_report_ops);
+}
+
 const struct file_operations ext4_file_operations = {
 	.llseek		= ext4_llseek,
 	.read_iter	= ext4_file_read_iter,
@@ -992,6 +997,7 @@ const struct file_operations ext4_file_operations = {
 			  FOP_DIO_PARALLEL_WRITE |
 			  FOP_DONTCACHE,
 	.setlease	= generic_setlease,
+	.swap_activate	= ext4_swap_activate,
 };
 
 const struct inode_operations ext4_file_inode_operations = {
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index ca7bac4a8b4a..efbb2ddad363 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3939,12 +3939,6 @@ static bool ext4_dirty_folio(struct address_space *mapping, struct folio *folio)
 	return block_dirty_folio(mapping, folio);
 }
 
-static int ext4_iomap_swap_activate(struct swap_info_struct *sis,
-				    struct file *file)
-{
-	return iomap_swapfile_activate(sis, file, &ext4_iomap_report_ops);
-}
-
 static const struct address_space_operations ext4_aops = {
 	.read_folio		= ext4_read_folio,
 	.readahead		= ext4_readahead,
@@ -3958,7 +3952,6 @@ static const struct address_space_operations ext4_aops = {
 	.migrate_folio		= buffer_migrate_folio,
 	.is_partially_uptodate  = block_is_partially_uptodate,
 	.error_remove_folio	= generic_error_remove_folio,
-	.swap_activate		= ext4_iomap_swap_activate,
 };
 
 static const struct address_space_operations ext4_journalled_aops = {
@@ -3974,7 +3967,6 @@ static const struct address_space_operations ext4_journalled_aops = {
 	.migrate_folio		= buffer_migrate_folio_norefs,
 	.is_partially_uptodate  = block_is_partially_uptodate,
 	.error_remove_folio	= generic_error_remove_folio,
-	.swap_activate		= ext4_iomap_swap_activate,
 };
 
 static const struct address_space_operations ext4_da_aops = {
@@ -3990,14 +3982,12 @@ static const struct address_space_operations ext4_da_aops = {
 	.migrate_folio		= buffer_migrate_folio,
 	.is_partially_uptodate  = block_is_partially_uptodate,
 	.error_remove_folio	= generic_error_remove_folio,
-	.swap_activate		= ext4_iomap_swap_activate,
 };
 
 static const struct address_space_operations ext4_dax_aops = {
 	.writepages		= ext4_dax_writepages,
 	.dirty_folio		= noop_dirty_folio,
 	.bmap			= ext4_bmap,
-	.swap_activate		= ext4_iomap_swap_activate,
 };
 
 void ext4_set_aops(struct inode *inode)
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 86fabacc67e6..8bcf630df557 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -4338,7 +4338,7 @@ static int check_swap_activate(struct swap_info_struct *sis,
 	return ret;
 }
 
-static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file)
+int f2fs_swap_activate(struct file *file, struct swap_info_struct *sis)
 {
 	struct inode *inode = file_inode(file);
 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
@@ -4378,22 +4378,13 @@ static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file)
 	return 0;
 }
 
-static void f2fs_swap_deactivate(struct file *file)
+void f2fs_swap_deactivate(struct file *file)
 {
 	struct inode *inode = file_inode(file);
 
 	stat_dec_swapfile_inode(inode);
 	clear_inode_flag(inode, FI_PIN_FILE);
 }
-#else
-static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file)
-{
-	return -EOPNOTSUPP;
-}
-
-static void f2fs_swap_deactivate(struct file *file)
-{
-}
 #endif
 
 const struct address_space_operations f2fs_dblock_aops = {
@@ -4407,8 +4398,6 @@ const struct address_space_operations f2fs_dblock_aops = {
 	.invalidate_folio = f2fs_invalidate_folio,
 	.release_folio	= f2fs_release_folio,
 	.bmap		= f2fs_bmap,
-	.swap_activate  = f2fs_swap_activate,
-	.swap_deactivate = f2fs_swap_deactivate,
 };
 
 void f2fs_clear_page_cache_dirty_tag(struct folio *folio)
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 91f506e7c9cf..93e9709f26fa 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -4195,6 +4195,8 @@ int f2fs_init_post_read_processing(void);
 void f2fs_destroy_post_read_processing(void);
 int f2fs_init_post_read_wq(struct f2fs_sb_info *sbi);
 void f2fs_destroy_post_read_wq(struct f2fs_sb_info *sbi);
+int f2fs_swap_activate(struct file *file, struct swap_info_struct *sis);
+void f2fs_swap_deactivate(struct file *file);
 extern const struct iomap_ops f2fs_iomap_ops;
 
 /*
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index fb12c5c9affd..aa91d5fff1cf 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -5488,4 +5488,8 @@ const struct file_operations f2fs_file_operations = {
 	.fadvise	= f2fs_file_fadvise,
 	.fop_flags	= FOP_BUFFER_RASYNC,
 	.setlease	= generic_setlease,
+#ifdef CONFIG_SWAP
+	.swap_activate  = f2fs_swap_activate,
+	.swap_deactivate = f2fs_swap_deactivate,
+#endif
 };
diff --git a/fs/iomap/swapfile.c b/fs/iomap/swapfile.c
index f778b2c6c922..cf354fdfb7c3 100644
--- a/fs/iomap/swapfile.c
+++ b/fs/iomap/swapfile.c
@@ -100,10 +100,10 @@ static int iomap_swapfile_iter(struct iomap_iter *iter,
  * Iterate a swap file's iomaps to construct physical extents that can be
  * passed to the swapfile subsystem.
  */
-int iomap_swapfile_activate(struct swap_info_struct *sis,
-		struct file *swap_file, const struct iomap_ops *ops)
+int iomap_swap_activate(struct file *file, struct swap_info_struct *sis,
+		const struct iomap_ops *ops)
 {
-	struct inode *inode = swap_file->f_mapping->host;
+	struct inode *inode = file->f_mapping->host;
 	struct iomap_iter iter = {
 		.inode	= inode,
 		.pos	= 0,
@@ -112,7 +112,7 @@ int iomap_swapfile_activate(struct swap_info_struct *sis,
 	};
 	struct iomap_swapfile_info isi = {
 		.sis = sis,
-		.file = swap_file,
+		.file = file,
 	};
 	int ret;
 
@@ -120,7 +120,7 @@ int iomap_swapfile_activate(struct swap_info_struct *sis,
 	 * Persist all file mapping metadata so that we won't have any
 	 * IOMAP_F_DIRTY iomaps.
 	 */
-	ret = vfs_fsync(swap_file, 1);
+	ret = vfs_fsync(file, 1);
 	if (ret)
 		return ret;
 
@@ -137,4 +137,4 @@ int iomap_swapfile_activate(struct swap_info_struct *sis,
 
 	return 0;
 }
-EXPORT_SYMBOL_GPL(iomap_swapfile_activate);
+EXPORT_SYMBOL_GPL(iomap_swap_activate);
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 48d89716193a..e92a4c8f8f77 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -164,6 +164,7 @@ int nfs_swap_rw(struct kiocb *iocb, struct iov_iter *iter)
 		return ret;
 	return 0;
 }
+EXPORT_SYMBOL_GPL(nfs_swap_rw);
 
 static void nfs_direct_release_pages(struct page **pages, unsigned int npages)
 {
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 74b401aa2b3a..2bc55d9d71e1 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -567,7 +567,7 @@ static int nfs_launder_folio(struct folio *folio)
 	return ret;
 }
 
-static int nfs_swap_activate(struct swap_info_struct *sis, struct file *file)
+int nfs_swap_activate(struct file *file, struct swap_info_struct *sis)
 {
 	unsigned long blocks;
 	long long isize;
@@ -600,8 +600,9 @@ static int nfs_swap_activate(struct swap_info_struct *sis, struct file *file)
 	sis->flags |= SWP_FS_OPS;
 	return 0;
 }
+EXPORT_SYMBOL_GPL(nfs_swap_activate);
 
-static void nfs_swap_deactivate(struct file *file)
+void nfs_swap_deactivate(struct file *file)
 {
 	struct inode *inode = file_inode(file);
 	struct rpc_clnt *clnt = NFS_CLIENT(inode);
@@ -611,6 +612,7 @@ static void nfs_swap_deactivate(struct file *file)
 	if (cl->rpc_ops->disable_swap)
 		cl->rpc_ops->disable_swap(file_inode(file));
 }
+EXPORT_SYMBOL_GPL(nfs_swap_deactivate);
 
 const struct address_space_operations nfs_file_aops = {
 	.read_folio = nfs_read_folio,
@@ -625,9 +627,6 @@ const struct address_space_operations nfs_file_aops = {
 	.launder_folio = nfs_launder_folio,
 	.is_dirty_writeback = nfs_check_dirty_writeback,
 	.error_remove_folio = generic_error_remove_folio,
-	.swap_activate = nfs_swap_activate,
-	.swap_deactivate = nfs_swap_deactivate,
-	.swap_rw = nfs_swap_rw,
 };
 
 /*
@@ -960,6 +959,9 @@ const struct file_operations nfs_file_operations = {
 	.splice_read	= nfs_file_splice_read,
 	.splice_write	= iter_file_splice_write,
 	.check_flags	= nfs_check_flags,
+	.swap_activate	= nfs_swap_activate,
+	.swap_deactivate = nfs_swap_deactivate,
+	.swap_rw	= nfs_swap_rw,
 	.fop_flags	= FOP_DONTCACHE,
 };
 EXPORT_SYMBOL_GPL(nfs_file_operations);
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index be40e126c539..eb1a8dbab55a 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -455,5 +455,8 @@ const struct file_operations nfs4_file_operations = {
 #else
 	.llseek		= nfs_file_llseek,
 #endif
+	.swap_activate	= nfs_swap_activate,
+	.swap_deactivate = nfs_swap_deactivate,
+	.swap_rw	= nfs_swap_rw,
 	.fop_flags	= FOP_DONTCACHE,
 };
diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c
index 4b7d019bc6ed..a94f5f675790 100644
--- a/fs/ntfs/aops.c
+++ b/fs/ntfs/aops.c
@@ -270,12 +270,6 @@ static int ntfs_writepages(struct address_space *mapping,
 	return iomap_writepages(&wpc);
 }
 
-static int ntfs_swap_activate(struct swap_info_struct *sis,
-		struct file *swap_file)
-{
-	return iomap_swapfile_activate(sis, swap_file, &ntfs_read_iomap_ops);
-}
-
 const struct address_space_operations ntfs_aops = {
 	.read_folio		= ntfs_read_folio,
 	.readahead		= ntfs_readahead,
@@ -287,7 +281,6 @@ const struct address_space_operations ntfs_aops = {
 	.error_remove_folio	= generic_error_remove_folio,
 	.release_folio		= iomap_release_folio,
 	.invalidate_folio	= iomap_invalidate_folio,
-	.swap_activate          = ntfs_swap_activate,
 };
 
 const struct address_space_operations ntfs_mft_aops = {
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index e8bea22b81a7..0dcf8479362a 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -1114,6 +1114,11 @@ static long ntfs_fallocate(struct file *file, int mode, loff_t offset, loff_t le
 	return err;
 }
 
+static int ntfs_swap_activate(struct file *file, struct swap_info_struct *sis)
+{
+	return iomap_swap_activate(file, sis, &ntfs_read_iomap_ops);
+}
+
 const struct file_operations ntfs_file_ops = {
 	.llseek		= ntfs_file_llseek,
 	.read_iter	= ntfs_file_read_iter,
@@ -1130,6 +1135,7 @@ const struct file_operations ntfs_file_ops = {
 #endif
 	.fallocate	= ntfs_fallocate,
 	.setlease	= generic_setlease,
+	.swap_activate	= ntfs_swap_activate,
 };
 
 const struct inode_operations ntfs_file_inode_ops = {
diff --git a/fs/smb/client/cifsfs.c b/fs/smb/client/cifsfs.c
index 9f76b0347fa9..f0d8a3a46074 100644
--- a/fs/smb/client/cifsfs.c
+++ b/fs/smb/client/cifsfs.c
@@ -1577,6 +1577,9 @@ const struct file_operations cifs_file_ops = {
 	.remap_file_range = cifs_remap_file_range,
 	.setlease = cifs_setlease,
 	.fallocate = cifs_fallocate,
+	.swap_activate	= cifs_swap_activate,
+	.swap_deactivate = cifs_swap_deactivate,
+	.swap_rw = cifs_swap_rw,
 };
 
 const struct file_operations cifs_file_strict_ops = {
@@ -1597,6 +1600,9 @@ const struct file_operations cifs_file_strict_ops = {
 	.remap_file_range = cifs_remap_file_range,
 	.setlease = cifs_setlease,
 	.fallocate = cifs_fallocate,
+	.swap_activate	= cifs_swap_activate,
+	.swap_deactivate = cifs_swap_deactivate,
+	.swap_rw = cifs_swap_rw,
 };
 
 const struct file_operations cifs_file_direct_ops = {
@@ -1617,6 +1623,9 @@ const struct file_operations cifs_file_direct_ops = {
 	.llseek = cifs_llseek,
 	.setlease = cifs_setlease,
 	.fallocate = cifs_fallocate,
+	.swap_activate	= cifs_swap_activate,
+	.swap_deactivate = cifs_swap_deactivate,
+	.swap_rw = cifs_swap_rw,
 };
 
 const struct file_operations cifs_file_nobrl_ops = {
@@ -1635,6 +1644,9 @@ const struct file_operations cifs_file_nobrl_ops = {
 	.remap_file_range = cifs_remap_file_range,
 	.setlease = cifs_setlease,
 	.fallocate = cifs_fallocate,
+	.swap_activate	= cifs_swap_activate,
+	.swap_deactivate = cifs_swap_deactivate,
+	.swap_rw = cifs_swap_rw,
 };
 
 const struct file_operations cifs_file_strict_nobrl_ops = {
@@ -1653,6 +1665,9 @@ const struct file_operations cifs_file_strict_nobrl_ops = {
 	.remap_file_range = cifs_remap_file_range,
 	.setlease = cifs_setlease,
 	.fallocate = cifs_fallocate,
+	.swap_activate	= cifs_swap_activate,
+	.swap_deactivate = cifs_swap_deactivate,
+	.swap_rw = cifs_swap_rw,
 };
 
 const struct file_operations cifs_file_direct_nobrl_ops = {
@@ -1671,6 +1686,9 @@ const struct file_operations cifs_file_direct_nobrl_ops = {
 	.llseek = cifs_llseek,
 	.setlease = cifs_setlease,
 	.fallocate = cifs_fallocate,
+	.swap_activate	= cifs_swap_activate,
+	.swap_deactivate = cifs_swap_deactivate,
+	.swap_rw = cifs_swap_rw,
 };
 
 const struct file_operations cifs_dir_ops = {
diff --git a/fs/smb/client/cifsfs.h b/fs/smb/client/cifsfs.h
index c455b15f2778..1e5b9fce84f9 100644
--- a/fs/smb/client/cifsfs.h
+++ b/fs/smb/client/cifsfs.h
@@ -115,6 +115,9 @@ int cifs_file_mmap_prepare(struct vm_area_desc *desc);
 int cifs_file_strict_mmap_prepare(struct vm_area_desc *desc);
 extern const struct file_operations cifs_dir_ops;
 int cifs_readdir(struct file *file, struct dir_context *ctx);
+int cifs_swap_activate(struct file *swap_file, struct swap_info_struct *sis);
+void cifs_swap_deactivate(struct file *file);
+int cifs_swap_rw(struct kiocb *iocb, struct iov_iter *iter);
 
 /* Functions related to dir entries */
 extern const struct dentry_operations cifs_dentry_ops;
diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c
index 11d4655ef490..84459f87907e 100644
--- a/fs/smb/client/file.c
+++ b/fs/smb/client/file.c
@@ -3286,8 +3286,7 @@ void cifs_oplock_break(struct work_struct *work)
 	cifs_done_oplock_break(cinode);
 }
 
-static int cifs_swap_activate(struct swap_info_struct *sis,
-			      struct file *swap_file)
+int cifs_swap_activate(struct file *swap_file, struct swap_info_struct *sis)
 {
 	struct cifsFileInfo *cfile = swap_file->private_data;
 	struct inode *inode = swap_file->f_mapping->host;
@@ -3296,7 +3295,7 @@ static int cifs_swap_activate(struct swap_info_struct *sis,
 
 	cifs_dbg(FYI, "swap activate\n");
 
-	if (!swap_file->f_mapping->a_ops->swap_rw)
+	if (!swap_file->f_op->swap_rw)
 		/* Cannot support swap */
 		return -EINVAL;
 
@@ -3331,7 +3330,7 @@ static int cifs_swap_activate(struct swap_info_struct *sis,
 	return add_swap_extent(sis, sis->max, 0);
 }
 
-static void cifs_swap_deactivate(struct file *file)
+void cifs_swap_deactivate(struct file *file)
 {
 	struct cifsFileInfo *cfile = file->private_data;
 
@@ -3352,7 +3351,7 @@ static void cifs_swap_deactivate(struct file *file)
  *
  * Perform IO to the swap-file.  This is much like direct IO.
  */
-static int cifs_swap_rw(struct kiocb *iocb, struct iov_iter *iter)
+int cifs_swap_rw(struct kiocb *iocb, struct iov_iter *iter)
 {
 	ssize_t ret;
 
@@ -3378,9 +3377,6 @@ const struct address_space_operations cifs_addr_ops = {
 	 * TODO: investigate and if useful we could add an is_dirty_writeback
 	 * helper if needed
 	 */
-	.swap_activate	= cifs_swap_activate,
-	.swap_deactivate = cifs_swap_deactivate,
-	.swap_rw = cifs_swap_rw,
 };
 
 /*
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 1e8662e0e7cd..7488fc6a7b78 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -796,50 +796,6 @@ xfs_vm_readahead(
 	iomap_readahead(&xfs_read_iomap_ops, &ctx, NULL);
 }
 
-static int
-xfs_vm_swap_activate(
-	struct swap_info_struct		*sis,
-	struct file			*swap_file)
-{
-	struct xfs_inode		*ip = XFS_I(file_inode(swap_file));
-
-	if (xfs_is_zoned_inode(ip))
-		return -EINVAL;
-
-	/*
-	 * Swap file activation can race against concurrent shared extent
-	 * removal in files that have been cloned.  If this happens,
-	 * iomap_swapfile_iter() can fail because it encountered a shared
-	 * extent even though an operation is in progress to remove those
-	 * shared extents.
-	 *
-	 * This race becomes problematic when we defer extent removal
-	 * operations beyond the end of a syscall (i.e. use async background
-	 * processing algorithms).  Users think the extents are no longer
-	 * shared, but iomap_swapfile_iter() still sees them as shared
-	 * because the refcountbt entries for the extents being removed have
-	 * not yet been updated.  Hence the swapon call fails unexpectedly.
-	 *
-	 * The race condition is currently most obvious from the unlink()
-	 * operation as extent removal is deferred until after the last
-	 * reference to the inode goes away.  We then process the extent
-	 * removal asynchronously, hence triggers the "syscall completed but
-	 * work not done" condition mentioned above.  To close this race
-	 * window, we need to flush any pending inodegc operations to ensure
-	 * they have updated the refcountbt records before we try to map the
-	 * swapfile.
-	 */
-	xfs_inodegc_flush(ip->i_mount);
-
-	/*
-	 * Direct the swap code to the correct block device when this file
-	 * sits on the RT device.
-	 */
-	sis->bdev = xfs_inode_buftarg(ip)->bt_bdev;
-
-	return iomap_swapfile_activate(sis, swap_file, &xfs_read_iomap_ops);
-}
-
 const struct address_space_operations xfs_address_space_operations = {
 	.read_folio		= xfs_vm_read_folio,
 	.readahead		= xfs_vm_readahead,
@@ -851,11 +807,9 @@ const struct address_space_operations xfs_address_space_operations = {
 	.migrate_folio		= filemap_migrate_folio,
 	.is_partially_uptodate  = iomap_is_partially_uptodate,
 	.error_remove_folio	= generic_error_remove_folio,
-	.swap_activate		= xfs_vm_swap_activate,
 };
 
 const struct address_space_operations xfs_dax_aops = {
 	.writepages		= xfs_dax_writepages,
 	.dirty_folio		= noop_dirty_folio,
-	.swap_activate		= xfs_vm_swap_activate,
 };
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 845a97c9b063..41f7e19bd31f 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -2081,6 +2081,50 @@ xfs_file_mmap_prepare(
 	return 0;
 }
 
+static int
+xfs_file_swap_activate(
+	struct file			*file,
+	struct swap_info_struct		*sis)
+{
+	struct xfs_inode		*ip = XFS_I(file_inode(file));
+
+	if (xfs_is_zoned_inode(ip))
+		return -EINVAL;
+
+	/*
+	 * Swap file activation can race against concurrent shared extent
+	 * removal in files that have been cloned.  If this happens,
+	 * iomap_swapfile_iter() can fail because it encountered a shared
+	 * extent even though an operation is in progress to remove those
+	 * shared extents.
+	 *
+	 * This race becomes problematic when we defer extent removal
+	 * operations beyond the end of a syscall (i.e. use async background
+	 * processing algorithms).  Users think the extents are no longer
+	 * shared, but iomap_swapfile_iter() still sees them as shared
+	 * because the refcountbt entries for the extents being removed have
+	 * not yet been updated.  Hence the swapon call fails unexpectedly.
+	 *
+	 * The race condition is currently most obvious from the unlink()
+	 * operation as extent removal is deferred until after the last
+	 * reference to the inode goes away.  We then process the extent
+	 * removal asynchronously, hence triggers the "syscall completed but
+	 * work not done" condition mentioned above.  To close this race
+	 * window, we need to flush any pending inodegc operations to ensure
+	 * they have updated the refcountbt records before we try to map the
+	 * swapfile.
+	 */
+	xfs_inodegc_flush(ip->i_mount);
+
+	/*
+	 * Direct the swap code to the correct block device when this file
+	 * sits on the RT device.
+	 */
+	sis->bdev = xfs_inode_buftarg(ip)->bt_bdev;
+
+	return iomap_swap_activate(file, sis, &xfs_read_iomap_ops);
+}
+
 const struct file_operations xfs_file_operations = {
 	.llseek		= xfs_file_llseek,
 	.read_iter	= xfs_file_read_iter,
@@ -2104,6 +2148,7 @@ const struct file_operations xfs_file_operations = {
 			  FOP_BUFFER_WASYNC | FOP_DIO_PARALLEL_WRITE |
 			  FOP_DONTCACHE,
 	.setlease	= generic_setlease,
+	.swap_activate	= xfs_file_swap_activate,
 };
 
 const struct file_operations xfs_dir_file_operations = {
diff --git a/fs/zonefs/file.c b/fs/zonefs/file.c
index 214e4bf8e30a..2c817917a13d 100644
--- a/fs/zonefs/file.c
+++ b/fs/zonefs/file.c
@@ -167,20 +167,6 @@ static int zonefs_writepages(struct address_space *mapping,
 	return iomap_writepages(&wpc);
 }
 
-static int zonefs_swap_activate(struct swap_info_struct *sis,
-				struct file *swap_file)
-{
-	struct inode *inode = file_inode(swap_file);
-
-	if (zonefs_inode_is_seq(inode)) {
-		zonefs_err(inode->i_sb,
-			   "swap file: not a conventional zone file\n");
-		return -EINVAL;
-	}
-
-	return iomap_swapfile_activate(sis, swap_file, &zonefs_read_iomap_ops);
-}
-
 const struct address_space_operations zonefs_file_aops = {
 	.read_folio		= zonefs_read_folio,
 	.readahead		= zonefs_readahead,
@@ -191,7 +177,6 @@ const struct address_space_operations zonefs_file_aops = {
 	.migrate_folio		= filemap_migrate_folio,
 	.is_partially_uptodate	= iomap_is_partially_uptodate,
 	.error_remove_folio	= generic_error_remove_folio,
-	.swap_activate		= zonefs_swap_activate,
 };
 
 int zonefs_file_truncate(struct inode *inode, loff_t isize)
@@ -858,6 +843,19 @@ static int zonefs_file_release(struct inode *inode, struct file *file)
 	return 0;
 }
 
+static int zonefs_swap_activate(struct file *file, struct swap_info_struct *sis)
+{
+	struct inode *inode = file_inode(file);
+
+	if (zonefs_inode_is_seq(inode)) {
+		zonefs_err(inode->i_sb,
+			   "swap file: not a conventional zone file\n");
+		return -EINVAL;
+	}
+
+	return iomap_swap_activate(file, sis, &zonefs_read_iomap_ops);
+}
+
 const struct file_operations zonefs_file_operations = {
 	.open		= zonefs_file_open,
 	.release	= zonefs_file_release,
@@ -869,4 +867,5 @@ const struct file_operations zonefs_file_operations = {
 	.splice_read	= zonefs_file_splice_read,
 	.splice_write	= iter_file_splice_write,
 	.iopoll		= iocb_bio_iopoll,
+	.swap_activate	= zonefs_swap_activate,
 };
diff --git a/include/linux/fs.h b/include/linux/fs.h
index b8b6f7a38f4d..7564cef5405d 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -433,11 +433,6 @@ struct address_space_operations {
 			size_t count);
 	void (*is_dirty_writeback) (struct folio *, bool *dirty, bool *wb);
 	int (*error_remove_folio)(struct address_space *, struct folio *);
-
-	/* swapfile support */
-	int (*swap_activate)(struct swap_info_struct *sis, struct file *file);
-	void (*swap_deactivate)(struct file *file);
-	int (*swap_rw)(struct kiocb *iocb, struct iov_iter *iter);
 };
 
 extern const struct address_space_operations empty_aops;
@@ -1966,6 +1961,11 @@ struct file_operations {
 	int (*uring_cmd_iopoll)(struct io_uring_cmd *, struct io_comp_batch *,
 				unsigned int poll_flags);
 	int (*mmap_prepare)(struct vm_area_desc *);
+
+	/* swapfile support */
+	int (*swap_activate)(struct file *file, struct swap_info_struct *sis);
+	void (*swap_deactivate)(struct file *file);
+	int (*swap_rw)(struct kiocb *iocb, struct iov_iter *iter);
 } __randomize_layout;
 
 /* Supports async buffered reads */
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index d82126e3d086..3fd582d375b6 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -603,10 +603,10 @@ void iomap_dio_bio_end_io(struct bio *bio);
 struct file;
 struct swap_info_struct;
 
-int iomap_swapfile_activate(struct swap_info_struct *sis,
-		struct file *swap_file, const struct iomap_ops *ops);
+int iomap_swap_activate(struct file *file, struct swap_info_struct *sis,
+		const struct iomap_ops *ops);
 #else
-# define iomap_swapfile_activate(sis, swapfile, ops)	(-EIO)
+# define iomap_swap_activate(file, sis, ops)	(-EIO)
 #endif /* CONFIG_SWAP */
 
 extern struct bio_set iomap_ioend_bioset;
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 4623262da3c0..9746212a085e 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -538,6 +538,9 @@ extern __be32 root_nfs_parse_addr(char *name); /*__init*/
 /*
  * linux/fs/nfs/file.c
  */
+int nfs_swap_activate(struct file *file, struct swap_info_struct *sis);
+void nfs_swap_deactivate(struct file *file);
+
 extern const struct file_operations nfs_file_operations;
 #if IS_ENABLED(CONFIG_NFS_V4)
 extern const struct file_operations nfs4_file_operations;
diff --git a/include/linux/swap.h b/include/linux/swap.h
index b8dfe2c6bc98..657779485ae4 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -405,7 +405,7 @@ extern void __meminit kswapd_stop(int nid);
 
 int add_swap_extent(struct swap_info_struct *sis, unsigned long nr_pages,
 		sector_t start_block);
-int generic_swapfile_activate(struct swap_info_struct *, struct file *);
+int generic_swap_activate(struct file *swap_file, struct swap_info_struct *sis);
 
 static inline unsigned long total_swapcache_pages(void)
 {
diff --git a/mm/page_io.c b/mm/page_io.c
index f30f36ec1ed0..3e1c12649448 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -75,8 +75,7 @@ static void end_swap_bio_read(struct bio *bio)
 	bio_put(bio);
 }
 
-int generic_swapfile_activate(struct swap_info_struct *sis,
-				struct file *swap_file)
+int generic_swap_activate(struct file *swap_file, struct swap_info_struct *sis)
 {
 	struct address_space *mapping = swap_file->f_mapping;
 	struct inode *inode = mapping->host;
@@ -451,11 +450,10 @@ void __swap_writepage(struct folio *folio, struct swap_iocb **swap_plug)
 void swap_write_unplug(struct swap_iocb *sio)
 {
 	struct iov_iter from;
-	struct address_space *mapping = sio->iocb.ki_filp->f_mapping;
 	int ret;
 
 	iov_iter_bvec(&from, ITER_SOURCE, sio->bvec, sio->pages, sio->len);
-	ret = mapping->a_ops->swap_rw(&sio->iocb, &from);
+	ret = sio->iocb.ki_filp->f_op->swap_rw(&sio->iocb, &from);
 	if (ret != -EIOCBQUEUED)
 		sio_write_complete(&sio->iocb, ret);
 }
@@ -640,11 +638,10 @@ void swap_read_folio(struct folio *folio, struct swap_iocb **plug)
 void __swap_read_unplug(struct swap_iocb *sio)
 {
 	struct iov_iter from;
-	struct address_space *mapping = sio->iocb.ki_filp->f_mapping;
 	int ret;
 
 	iov_iter_bvec(&from, ITER_DEST, sio->bvec, sio->pages, sio->len);
-	ret = mapping->a_ops->swap_rw(&sio->iocb, &from);
+	ret = sio->iocb.ki_filp->f_op->swap_rw(&sio->iocb, &from);
 	if (ret != -EIOCBQUEUED)
 		sio_read_complete(&sio->iocb, ret);
 }
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 158620fd2978..a183c9c95695 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -2692,11 +2692,9 @@ static void destroy_swap_extents(struct swap_info_struct *sis,
 	}
 
 	if (sis->flags & SWP_ACTIVATED) {
-		struct address_space *mapping = swap_file->f_mapping;
-
 		sis->flags &= ~SWP_ACTIVATED;
-		if (mapping->a_ops->swap_deactivate)
-			mapping->a_ops->swap_deactivate(swap_file);
+		if (swap_file->f_op->swap_deactivate)
+			swap_file->f_op->swap_deactivate(swap_file);
 	}
 }
 
@@ -2790,8 +2788,8 @@ static int setup_swap_extents(struct swap_info_struct *sis,
 	if (S_ISBLK(inode->i_mode))
 		return add_swap_extent(sis, sis->max, 0);
 
-	if (mapping->a_ops->swap_activate) {
-		ret = mapping->a_ops->swap_activate(sis, swap_file);
+	if (swap_file->f_op->swap_activate) {
+		ret = swap_file->f_op->swap_activate(swap_file, sis);
 		if (ret < 0)
 			return ret;
 		sis->flags |= SWP_ACTIVATED;
@@ -2803,7 +2801,7 @@ static int setup_swap_extents(struct swap_info_struct *sis,
 		return ret;
 	}
 
-	return generic_swapfile_activate(sis, swap_file);
+	return generic_swap_activate(swap_file, sis);
 }
 
 static void _enable_swap_info(struct swap_info_struct *si)
-- 
2.53.0


^ permalink raw reply related

* [PATCH 02/12] swap: move boilerplate code into the core swap code
From: Christoph Hellwig @ 2026-05-12  5:35 UTC (permalink / raw)
  To: Andrew Morton, Chris Li, Kairui Song
  Cc: Christian Brauner, Darrick J . Wong , Jens Axboe, David Sterba,
	Theodore Ts'o, Jaegeuk Kim, Chao Yu, Trond Myklebust,
	Anna Schumaker, Namjae Jeon, Hyunchul Lee, Steve French,
	Paulo Alcantara, Carlos Maiolino, Damien Le Moal, Naohiro Aota,
	linux-xfs, linux-fsdevel, linux-doc, linux-mm, linux-block,
	linux-btrfs, linux-ext4, linux-f2fs-devel, linux-nfs, linux-cifs
In-Reply-To: <20260512053625.2950900-1-hch@lst.de>

Make the core swap code calculate sis->pages, nr_extents and the span,
re-set sis->max based on it and don't require passing the current offset
into the swap file to swap_add_extent as all that can trivially be
calculated internally.  Also truncate the spans based on the available
information.

All this removes a lot of boilerplate code in the callers.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 Documentation/filesystems/locking.rst |   2 +-
 Documentation/filesystems/vfs.rst     |   2 +-
 fs/btrfs/inode.c                      |  58 ++-----------
 fs/ext4/inode.c                       |   5 +-
 fs/f2fs/data.c                        |  38 ++-------
 fs/iomap/swapfile.c                   |  58 +------------
 fs/nfs/file.c                         |   9 +-
 fs/ntfs/aops.c                        |   5 +-
 fs/smb/client/file.c                  |   5 +-
 fs/xfs/xfs_aops.c                     |   6 +-
 fs/zonefs/file.c                      |   5 +-
 include/linux/fs.h                    |   3 +-
 include/linux/iomap.h                 |   5 +-
 include/linux/swap.h                  |  11 ++-
 mm/page_io.c                          |  39 ++-------
 mm/swapfile.c                         | 116 ++++++++++++++++----------
 16 files changed, 121 insertions(+), 246 deletions(-)

diff --git a/Documentation/filesystems/locking.rst b/Documentation/filesystems/locking.rst
index 8421ea21bd35..f3658204d070 100644
--- a/Documentation/filesystems/locking.rst
+++ b/Documentation/filesystems/locking.rst
@@ -264,7 +264,7 @@ prototypes::
 	int (*launder_folio)(struct folio *);
 	bool (*is_partially_uptodate)(struct folio *, size_t from, size_t count);
 	int (*error_remove_folio)(struct address_space *, struct folio *);
-	int (*swap_activate)(struct swap_info_struct *sis, struct file *f, sector_t *span)
+	int (*swap_activate)(struct swap_info_struct *sis, struct file *f)
 	int (*swap_deactivate)(struct file *);
 	int (*swap_rw)(struct kiocb *iocb, struct iov_iter *iter);
 
diff --git a/Documentation/filesystems/vfs.rst b/Documentation/filesystems/vfs.rst
index 7c753148af88..4092b2149a5d 100644
--- a/Documentation/filesystems/vfs.rst
+++ b/Documentation/filesystems/vfs.rst
@@ -774,7 +774,7 @@ cache in your filesystem.  The following members are defined:
 					       size_t count);
 		void (*is_dirty_writeback)(struct folio *, bool *, bool *);
 		int (*error_remove_folio)(struct mapping *mapping, struct folio *);
-		int (*swap_activate)(struct swap_info_struct *sis, struct file *f, sector_t *span)
+		int (*swap_activate)(struct swap_info_struct *sis, struct file *f);
 		int (*swap_deactivate)(struct file *);
 		int (*swap_rw)(struct kiocb *iocb, struct iov_iter *iter);
 	};
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 906d5c21ebc4..198d87e6f19a 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -10204,51 +10204,17 @@ struct btrfs_swap_info {
 	u64 start;
 	u64 block_start;
 	u64 block_len;
-	u64 lowest_ppage;
-	u64 highest_ppage;
-	unsigned long nr_pages;
-	int nr_extents;
 };
 
 static int btrfs_add_swap_extent(struct swap_info_struct *sis,
 				 struct btrfs_swap_info *bsi)
 {
-	unsigned long nr_pages;
-	unsigned long max_pages;
-	u64 first_ppage, first_ppage_reported, next_ppage;
-	int ret;
-
-	/*
-	 * Our swapfile may have had its size extended after the swap header was
-	 * written. In that case activating the swapfile should not go beyond
-	 * the max size set in the swap header.
-	 */
-	if (bsi->nr_pages >= sis->max)
-		return 0;
+	u64 first_ppage, next_ppage;
 
-	max_pages = sis->max - bsi->nr_pages;
 	first_ppage = PAGE_ALIGN(bsi->block_start) >> PAGE_SHIFT;
 	next_ppage = PAGE_ALIGN_DOWN(bsi->block_start + bsi->block_len) >> PAGE_SHIFT;
 
-	if (first_ppage >= next_ppage)
-		return 0;
-	nr_pages = next_ppage - first_ppage;
-	nr_pages = min(nr_pages, max_pages);
-
-	first_ppage_reported = first_ppage;
-	if (bsi->start == 0)
-		first_ppage_reported++;
-	if (bsi->lowest_ppage > first_ppage_reported)
-		bsi->lowest_ppage = first_ppage_reported;
-	if (bsi->highest_ppage < (next_ppage - 1))
-		bsi->highest_ppage = next_ppage - 1;
-
-	ret = add_swap_extent(sis, bsi->nr_pages, nr_pages, first_ppage);
-	if (ret < 0)
-		return ret;
-	bsi->nr_extents += ret;
-	bsi->nr_pages += nr_pages;
-	return 0;
+	return add_swap_extent(sis, next_ppage - first_ppage, first_ppage);
 }
 
 static void btrfs_swap_deactivate(struct file *file)
@@ -10259,8 +10225,7 @@ static void btrfs_swap_deactivate(struct file *file)
 	atomic_dec(&BTRFS_I(inode)->root->nr_swapfiles);
 }
 
-static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,
-			       sector_t *span)
+static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file)
 {
 	struct inode *inode = file_inode(file);
 	struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -10269,9 +10234,7 @@ static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,
 	struct extent_state *cached_state = NULL;
 	struct btrfs_chunk_map *map = NULL;
 	struct btrfs_device *device = NULL;
-	struct btrfs_swap_info bsi = {
-		.lowest_ppage = (sector_t)-1ULL,
-	};
+	struct btrfs_swap_info bsi = {};
 	struct btrfs_backref_share_check_ctx *backref_ctx = NULL;
 	struct btrfs_path *path = NULL;
 	int ret = 0;
@@ -10570,23 +10533,16 @@ static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,
 	up_write(&BTRFS_I(inode)->i_mmap_lock);
 	btrfs_free_backref_share_ctx(backref_ctx);
 	btrfs_free_path(path);
-	if (ret)
-		return ret;
-
-	if (device)
+	if (!ret && device)
 		sis->bdev = device->bdev;
-	*span = bsi.highest_ppage - bsi.lowest_ppage + 1;
-	sis->max = bsi.nr_pages;
-	sis->pages = bsi.nr_pages - 1;
-	return bsi.nr_extents;
+	return ret;
 }
 #else
 static void btrfs_swap_deactivate(struct file *file)
 {
 }
 
-static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,
-			       sector_t *span)
+static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file)
 {
 	return -EOPNOTSUPP;
 }
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index c2c2d6ac7f3d..ca7bac4a8b4a 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3940,10 +3940,9 @@ static bool ext4_dirty_folio(struct address_space *mapping, struct folio *folio)
 }
 
 static int ext4_iomap_swap_activate(struct swap_info_struct *sis,
-				    struct file *file, sector_t *span)
+				    struct file *file)
 {
-	return iomap_swapfile_activate(sis, file, span,
-				       &ext4_iomap_report_ops);
+	return iomap_swapfile_activate(sis, file, &ext4_iomap_report_ops);
 }
 
 static const struct address_space_operations ext4_aops = {
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 8d4f1e75dee3..86fabacc67e6 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -4249,7 +4249,7 @@ static int f2fs_migrate_blocks(struct inode *inode, block_t start_blk,
 }
 
 static int check_swap_activate(struct swap_info_struct *sis,
-				struct file *swap_file, sector_t *span)
+				struct file *swap_file)
 {
 	struct address_space *mapping = swap_file->f_mapping;
 	struct inode *inode = mapping->host;
@@ -4257,9 +4257,6 @@ static int check_swap_activate(struct swap_info_struct *sis,
 	block_t cur_lblock;
 	block_t last_lblock;
 	block_t pblock;
-	block_t lowest_pblock = -1;
-	block_t highest_pblock = 0;
-	int nr_extents = 0;
 	unsigned int nr_pblocks;
 	unsigned int blks_per_sec = BLKS_PER_SEC(sbi);
 	unsigned int not_aligned = 0;
@@ -4272,7 +4269,7 @@ static int check_swap_activate(struct swap_info_struct *sis,
 	cur_lblock = 0;
 	last_lblock = F2FS_BYTES_TO_BLK(i_size_read(inode));
 
-	while (cur_lblock < last_lblock && cur_lblock < sis->max) {
+	while (cur_lblock < last_lblock) {
 		struct f2fs_map_blocks map;
 		bool last_extent = false;
 retry:
@@ -4307,8 +4304,6 @@ static int check_swap_activate(struct swap_info_struct *sis,
 			not_aligned++;
 
 			nr_pblocks = roundup(nr_pblocks, blks_per_sec);
-			if (cur_lblock + nr_pblocks > sis->max)
-				nr_pblocks -= blks_per_sec;
 
 			/* this extent is last one */
 			if (!nr_pblocks) {
@@ -4328,31 +4323,14 @@ static int check_swap_activate(struct swap_info_struct *sis,
 			goto retry;
 		}
 
-		if (cur_lblock + nr_pblocks >= sis->max)
-			nr_pblocks = sis->max - cur_lblock;
-
-		if (cur_lblock) {	/* exclude the header page */
-			if (pblock < lowest_pblock)
-				lowest_pblock = pblock;
-			if (pblock + nr_pblocks - 1 > highest_pblock)
-				highest_pblock = pblock + nr_pblocks - 1;
-		}
-
 		/*
 		 * We found a PAGE_SIZE-length, PAGE_SIZE-aligned run of blocks
 		 */
-		ret = add_swap_extent(sis, cur_lblock, nr_pblocks, pblock);
+		ret = add_swap_extent(sis, nr_pblocks, pblock);
 		if (ret < 0)
 			goto out;
-		nr_extents += ret;
 		cur_lblock += nr_pblocks;
 	}
-	ret = nr_extents;
-	*span = 1 + highest_pblock - lowest_pblock;
-	if (cur_lblock == 0)
-		cur_lblock = 1;	/* force Empty message */
-	sis->max = cur_lblock;
-	sis->pages = cur_lblock - 1;
 out:
 	if (not_aligned)
 		f2fs_warn(sbi, "Swapfile (%u) is not align to section: 1) creat(), 2) ioctl(F2FS_IOC_SET_PIN_FILE), 3) fallocate(%lu * N)",
@@ -4360,8 +4338,7 @@ static int check_swap_activate(struct swap_info_struct *sis,
 	return ret;
 }
 
-static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file,
-				sector_t *span)
+static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file)
 {
 	struct inode *inode = file_inode(file);
 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
@@ -4391,14 +4368,14 @@ static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file,
 
 	f2fs_precache_extents(inode);
 
-	ret = check_swap_activate(sis, file, span);
+	ret = check_swap_activate(sis, file);
 	if (ret < 0)
 		return ret;
 
 	stat_inc_swapfile_inode(inode);
 	set_inode_flag(inode, FI_PIN_FILE);
 	f2fs_update_time(sbi, REQ_TIME);
-	return ret;
+	return 0;
 }
 
 static void f2fs_swap_deactivate(struct file *file)
@@ -4409,8 +4386,7 @@ static void f2fs_swap_deactivate(struct file *file)
 	clear_inode_flag(inode, FI_PIN_FILE);
 }
 #else
-static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file,
-				sector_t *span)
+static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file)
 {
 	return -EOPNOTSUPP;
 }
diff --git a/fs/iomap/swapfile.c b/fs/iomap/swapfile.c
index 0db77c449467..f778b2c6c922 100644
--- a/fs/iomap/swapfile.c
+++ b/fs/iomap/swapfile.c
@@ -11,10 +11,7 @@
 struct iomap_swapfile_info {
 	struct iomap iomap;		/* accumulated iomap */
 	struct swap_info_struct *sis;
-	uint64_t lowest_ppage;		/* lowest physical addr seen (pages) */
-	uint64_t highest_ppage;		/* highest physical addr seen (pages) */
 	unsigned long nr_pages;		/* number of pages collected */
-	int nr_extents;			/* extent count */
 	struct file *file;
 };
 
@@ -27,16 +24,8 @@ struct iomap_swapfile_info {
 static int iomap_swapfile_add_extent(struct iomap_swapfile_info *isi)
 {
 	struct iomap *iomap = &isi->iomap;
-	unsigned long nr_pages;
-	unsigned long max_pages;
 	uint64_t first_ppage;
-	uint64_t first_ppage_reported;
 	uint64_t next_ppage;
-	int error;
-
-	if (unlikely(isi->nr_pages >= isi->sis->max))
-		return 0;
-	max_pages = isi->sis->max - isi->nr_pages;
 
 	/*
 	 * Round the start up and the end down so that the physical
@@ -45,33 +34,7 @@ static int iomap_swapfile_add_extent(struct iomap_swapfile_info *isi)
 	first_ppage = ALIGN(iomap->addr, PAGE_SIZE) >> PAGE_SHIFT;
 	next_ppage = ALIGN_DOWN(iomap->addr + iomap->length, PAGE_SIZE) >>
 			PAGE_SHIFT;
-
-	/* Skip too-short physical extents. */
-	if (first_ppage >= next_ppage)
-		return 0;
-	nr_pages = next_ppage - first_ppage;
-	nr_pages = min(nr_pages, max_pages);
-
-	/*
-	 * Calculate how much swap space we're adding; the first page contains
-	 * the swap header and doesn't count.  The mm still wants that first
-	 * page fed to add_swap_extent, however.
-	 */
-	first_ppage_reported = first_ppage;
-	if (iomap->offset == 0)
-		first_ppage_reported++;
-	if (isi->lowest_ppage > first_ppage_reported)
-		isi->lowest_ppage = first_ppage_reported;
-	if (isi->highest_ppage < (next_ppage - 1))
-		isi->highest_ppage = next_ppage - 1;
-
-	/* Add extent, set up for the next call. */
-	error = add_swap_extent(isi->sis, isi->nr_pages, nr_pages, first_ppage);
-	if (error < 0)
-		return error;
-	isi->nr_extents += error;
-	isi->nr_pages += nr_pages;
-	return 0;
+	return add_swap_extent(isi->sis, next_ppage - first_ppage, first_ppage);
 }
 
 static int iomap_swapfile_fail(struct iomap_swapfile_info *isi, const char *str)
@@ -138,8 +101,7 @@ static int iomap_swapfile_iter(struct iomap_iter *iter,
  * passed to the swapfile subsystem.
  */
 int iomap_swapfile_activate(struct swap_info_struct *sis,
-		struct file *swap_file, sector_t *pagespan,
-		const struct iomap_ops *ops)
+		struct file *swap_file, const struct iomap_ops *ops)
 {
 	struct inode *inode = swap_file->f_mapping->host;
 	struct iomap_iter iter = {
@@ -150,7 +112,6 @@ int iomap_swapfile_activate(struct swap_info_struct *sis,
 	};
 	struct iomap_swapfile_info isi = {
 		.sis = sis,
-		.lowest_ppage = (sector_t)-1ULL,
 		.file = swap_file,
 	};
 	int ret;
@@ -174,19 +135,6 @@ int iomap_swapfile_activate(struct swap_info_struct *sis,
 			return ret;
 	}
 
-	/*
-	 * If this swapfile doesn't contain even a single page-aligned
-	 * contiguous range of blocks, reject this useless swapfile to
-	 * prevent confusion later on.
-	 */
-	if (isi.nr_pages == 0) {
-		pr_warn("swapon: Cannot find a single usable page in file.\n");
-		return -EINVAL;
-	}
-
-	*pagespan = 1 + isi.highest_ppage - isi.lowest_ppage;
-	sis->max = isi.nr_pages;
-	sis->pages = isi.nr_pages - 1;
-	return isi.nr_extents;
+	return 0;
 }
 EXPORT_SYMBOL_GPL(iomap_swapfile_activate);
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 25048a3c2364..74b401aa2b3a 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -567,8 +567,7 @@ static int nfs_launder_folio(struct folio *folio)
 	return ret;
 }
 
-static int nfs_swap_activate(struct swap_info_struct *sis, struct file *file,
-						sector_t *span)
+static int nfs_swap_activate(struct swap_info_struct *sis, struct file *file)
 {
 	unsigned long blocks;
 	long long isize;
@@ -589,19 +588,17 @@ static int nfs_swap_activate(struct swap_info_struct *sis, struct file *file,
 	ret = rpc_clnt_swap_activate(clnt);
 	if (ret)
 		return ret;
-	ret = add_swap_extent(sis, 0, sis->max, 0);
+	ret = add_swap_extent(sis, sis->max, 0);
 	if (ret < 0) {
 		rpc_clnt_swap_deactivate(clnt);
 		return ret;
 	}
 
-	*span = sis->pages;
-
 	if (cl->rpc_ops->enable_swap)
 		cl->rpc_ops->enable_swap(inode);
 
 	sis->flags |= SWP_FS_OPS;
-	return ret;
+	return 0;
 }
 
 static void nfs_swap_deactivate(struct file *file)
diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c
index 1fbf832ad165..4b7d019bc6ed 100644
--- a/fs/ntfs/aops.c
+++ b/fs/ntfs/aops.c
@@ -271,10 +271,9 @@ static int ntfs_writepages(struct address_space *mapping,
 }
 
 static int ntfs_swap_activate(struct swap_info_struct *sis,
-		struct file *swap_file, sector_t *span)
+		struct file *swap_file)
 {
-	return iomap_swapfile_activate(sis, swap_file, span,
-			&ntfs_read_iomap_ops);
+	return iomap_swapfile_activate(sis, swap_file, &ntfs_read_iomap_ops);
 }
 
 const struct address_space_operations ntfs_aops = {
diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c
index 664a2c223089..11d4655ef490 100644
--- a/fs/smb/client/file.c
+++ b/fs/smb/client/file.c
@@ -3287,7 +3287,7 @@ void cifs_oplock_break(struct work_struct *work)
 }
 
 static int cifs_swap_activate(struct swap_info_struct *sis,
-			      struct file *swap_file, sector_t *span)
+			      struct file *swap_file)
 {
 	struct cifsFileInfo *cfile = swap_file->private_data;
 	struct inode *inode = swap_file->f_mapping->host;
@@ -3308,7 +3308,6 @@ static int cifs_swap_activate(struct swap_info_struct *sis,
 		pr_warn("swap activate: swapfile has holes\n");
 		return -EINVAL;
 	}
-	*span = sis->pages;
 
 	pr_warn_once("Swap support over SMB3 is experimental\n");
 
@@ -3329,7 +3328,7 @@ static int cifs_swap_activate(struct swap_info_struct *sis,
 	 */
 
 	sis->flags |= SWP_FS_OPS;
-	return add_swap_extent(sis, 0, sis->max, 0);
+	return add_swap_extent(sis, sis->max, 0);
 }
 
 static void cifs_swap_deactivate(struct file *file)
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index f279055fcea0..1e8662e0e7cd 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -799,8 +799,7 @@ xfs_vm_readahead(
 static int
 xfs_vm_swap_activate(
 	struct swap_info_struct		*sis,
-	struct file			*swap_file,
-	sector_t			*span)
+	struct file			*swap_file)
 {
 	struct xfs_inode		*ip = XFS_I(file_inode(swap_file));
 
@@ -838,8 +837,7 @@ xfs_vm_swap_activate(
 	 */
 	sis->bdev = xfs_inode_buftarg(ip)->bt_bdev;
 
-	return iomap_swapfile_activate(sis, swap_file, span,
-			&xfs_read_iomap_ops);
+	return iomap_swapfile_activate(sis, swap_file, &xfs_read_iomap_ops);
 }
 
 const struct address_space_operations xfs_address_space_operations = {
diff --git a/fs/zonefs/file.c b/fs/zonefs/file.c
index 5ada33f70bb4..214e4bf8e30a 100644
--- a/fs/zonefs/file.c
+++ b/fs/zonefs/file.c
@@ -168,7 +168,7 @@ static int zonefs_writepages(struct address_space *mapping,
 }
 
 static int zonefs_swap_activate(struct swap_info_struct *sis,
-				struct file *swap_file, sector_t *span)
+				struct file *swap_file)
 {
 	struct inode *inode = file_inode(swap_file);
 
@@ -178,8 +178,7 @@ static int zonefs_swap_activate(struct swap_info_struct *sis,
 		return -EINVAL;
 	}
 
-	return iomap_swapfile_activate(sis, swap_file, span,
-				       &zonefs_read_iomap_ops);
+	return iomap_swapfile_activate(sis, swap_file, &zonefs_read_iomap_ops);
 }
 
 const struct address_space_operations zonefs_file_aops = {
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 11559c513dfb..b8b6f7a38f4d 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -435,8 +435,7 @@ struct address_space_operations {
 	int (*error_remove_folio)(struct address_space *, struct folio *);
 
 	/* swapfile support */
-	int (*swap_activate)(struct swap_info_struct *sis, struct file *file,
-				sector_t *span);
+	int (*swap_activate)(struct swap_info_struct *sis, struct file *file);
 	void (*swap_deactivate)(struct file *file);
 	int (*swap_rw)(struct kiocb *iocb, struct iov_iter *iter);
 };
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 2c5685adf3a9..d82126e3d086 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -604,10 +604,9 @@ struct file;
 struct swap_info_struct;
 
 int iomap_swapfile_activate(struct swap_info_struct *sis,
-		struct file *swap_file, sector_t *pagespan,
-		const struct iomap_ops *ops);
+		struct file *swap_file, const struct iomap_ops *ops);
 #else
-# define iomap_swapfile_activate(sis, swapfile, pagespan, ops)	(-EIO)
+# define iomap_swapfile_activate(sis, swapfile, ops)	(-EIO)
 #endif /* CONFIG_SWAP */
 
 extern struct bio_set iomap_ioend_bioset;
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 7a09df6977a5..b8dfe2c6bc98 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -403,10 +403,9 @@ extern void __meminit kswapd_stop(int nid);
 
 #ifdef CONFIG_SWAP
 
-int add_swap_extent(struct swap_info_struct *sis, unsigned long start_page,
-		unsigned long nr_pages, sector_t start_block);
-int generic_swapfile_activate(struct swap_info_struct *, struct file *,
-		sector_t *);
+int add_swap_extent(struct swap_info_struct *sis, unsigned long nr_pages,
+		sector_t start_block);
+int generic_swapfile_activate(struct swap_info_struct *, struct file *);
 
 static inline unsigned long total_swapcache_pages(void)
 {
@@ -528,8 +527,8 @@ static inline bool folio_free_swap(struct folio *folio)
 }
 
 static inline int add_swap_extent(struct swap_info_struct *sis,
-				  unsigned long start_page,
-				  unsigned long nr_pages, sector_t start_block)
+		unsigned long start_page, unsigned long nr_pages,
+		sector_t start_block)
 {
 	return -EINVAL;
 }
diff --git a/mm/page_io.c b/mm/page_io.c
index 70cea9e24d2f..f30f36ec1ed0 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -76,19 +76,14 @@ static void end_swap_bio_read(struct bio *bio)
 }
 
 int generic_swapfile_activate(struct swap_info_struct *sis,
-				struct file *swap_file,
-				sector_t *span)
+				struct file *swap_file)
 {
 	struct address_space *mapping = swap_file->f_mapping;
 	struct inode *inode = mapping->host;
 	unsigned blocks_per_page;
-	unsigned long page_no;
 	unsigned blkbits;
 	sector_t probe_block;
 	sector_t last_block;
-	sector_t lowest_block = -1;
-	sector_t highest_block = 0;
-	int nr_extents = 0;
 	int ret;
 
 	blkbits = inode->i_blkbits;
@@ -99,10 +94,8 @@ int generic_swapfile_activate(struct swap_info_struct *sis,
 	 * to be very smart.
 	 */
 	probe_block = 0;
-	page_no = 0;
 	last_block = i_size_read(inode) >> blkbits;
-	while ((probe_block + blocks_per_page) <= last_block &&
-			page_no < sis->max) {
+	while ((probe_block + blocks_per_page) <= last_block) {
 		unsigned block_in_page;
 		sector_t first_block;
 
@@ -137,38 +130,22 @@ int generic_swapfile_activate(struct swap_info_struct *sis,
 			}
 		}
 
-		first_block >>= (PAGE_SHIFT - blkbits);
-		if (page_no) {	/* exclude the header page */
-			if (first_block < lowest_block)
-				lowest_block = first_block;
-			if (first_block > highest_block)
-				highest_block = first_block;
-		}
-
 		/*
 		 * We found a PAGE_SIZE-length, PAGE_SIZE-aligned run of blocks
 		 */
-		ret = add_swap_extent(sis, page_no, 1, first_block);
+		ret = add_swap_extent(sis, 1,
+				first_block >> (PAGE_SHIFT - blkbits));
 		if (ret < 0)
-			goto out;
-		nr_extents += ret;
-		page_no++;
+			return ret;
 		probe_block += blocks_per_page;
 reprobe:
 		continue;
 	}
-	ret = nr_extents;
-	*span = 1 + highest_block - lowest_block;
-	if (page_no == 0)
-		page_no = 1;	/* force Empty message */
-	sis->max = page_no;
-	sis->pages = page_no - 1;
-out:
-	return ret;
+	return 0;
+
 bad_bmap:
 	pr_err("swapon: swapfile has holes\n");
-	ret = -EINVAL;
-	goto out;
+	return -EINVAL;
 }
 
 static bool is_folio_zero_filled(struct folio *folio)
diff --git a/mm/swapfile.c b/mm/swapfile.c
index f7ebd97e28a3..158620fd2978 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -2704,15 +2704,21 @@ static void destroy_swap_extents(struct swap_info_struct *sis,
  * Add a block range (and the corresponding page range) into this swapdev's
  * extent tree.
  *
- * This function rather assumes that it is called in ascending page order.
+ * Note that start_block is in units of PAGE_SIZE and not actually in block
+ * layer sectors as the sector_t would suggest.
  */
 int
-add_swap_extent(struct swap_info_struct *sis, unsigned long start_page,
-		unsigned long nr_pages, sector_t start_block)
+add_swap_extent(struct swap_info_struct *sis, unsigned long nr_pages,
+		sector_t start_block)
 {
 	struct rb_node **link = &sis->swap_extent_root.rb_node, *parent = NULL;
 	struct swap_extent *se;
-	struct swap_extent *new_se;
+
+	if (!nr_pages)
+		return 0;
+	if (unlikely(sis->pages >= sis->max))
+		return 0;
+	nr_pages = min(nr_pages, sis->max - sis->pages);
 
 	/*
 	 * place the new node at the right most since the
@@ -2725,25 +2731,25 @@ add_swap_extent(struct swap_info_struct *sis, unsigned long start_page,
 
 	if (parent) {
 		se = rb_entry(parent, struct swap_extent, rb_node);
-		BUG_ON(se->start_page + se->nr_pages != start_page);
-		if (se->start_block + se->nr_pages == start_block) {
-			/* Merge it */
-			se->nr_pages += nr_pages;
-			return 0;
-		}
+		if (WARN_ON_ONCE(se->start_page + se->nr_pages != sis->pages))
+			return -EINVAL;
+		if (se->start_block + se->nr_pages == start_block)
+			goto add;
 	}
 
 	/* No merge, insert a new extent. */
-	new_se = kmalloc_obj(*se);
-	if (new_se == NULL)
+	se = kzalloc_obj(*se);
+	if (!se)
 		return -ENOMEM;
-	new_se->start_page = start_page;
-	new_se->nr_pages = nr_pages;
-	new_se->start_block = start_block;
-
-	rb_link_node(&new_se->rb_node, parent, link);
-	rb_insert_color(&new_se->rb_node, &sis->swap_extent_root);
-	return 1;
+	rb_link_node(&se->rb_node, parent, link);
+	rb_insert_color(&se->rb_node, &sis->swap_extent_root);
+
+	se->start_page = sis->pages;
+	se->start_block = start_block;
+add:
+	se->nr_pages += nr_pages;
+	sis->pages += nr_pages;
+	return 0;
 }
 EXPORT_SYMBOL_GPL(add_swap_extent);
 
@@ -2775,20 +2781,17 @@ EXPORT_SYMBOL_GPL(add_swap_extent);
  * extents in the rbtree. - akpm.
  */
 static int setup_swap_extents(struct swap_info_struct *sis,
-			      struct file *swap_file, sector_t *span)
+			      struct file *swap_file)
 {
 	struct address_space *mapping = swap_file->f_mapping;
 	struct inode *inode = mapping->host;
 	int ret;
 
-	if (S_ISBLK(inode->i_mode)) {
-		ret = add_swap_extent(sis, 0, sis->max, 0);
-		*span = sis->pages;
-		return ret;
-	}
+	if (S_ISBLK(inode->i_mode))
+		return add_swap_extent(sis, sis->max, 0);
 
 	if (mapping->a_ops->swap_activate) {
-		ret = mapping->a_ops->swap_activate(sis, swap_file, span);
+		ret = mapping->a_ops->swap_activate(sis, swap_file);
 		if (ret < 0)
 			return ret;
 		sis->flags |= SWP_ACTIVATED;
@@ -2800,7 +2803,7 @@ static int setup_swap_extents(struct swap_info_struct *sis,
 		return ret;
 	}
 
-	return generic_swapfile_activate(sis, swap_file, span);
+	return generic_swapfile_activate(sis, swap_file);
 }
 
 static void _enable_swap_info(struct swap_info_struct *si)
@@ -3428,6 +3431,40 @@ static int setup_swap_clusters_info(struct swap_info_struct *si,
 	return err;
 }
 
+static void swap_print_info(struct swap_info_struct *si, const char *name)
+{
+	unsigned int nr_extents = 0;
+	u64 lowest_ppage = (u64)-1;
+	u64 highest_ppage = 0;
+	struct swap_extent *se;
+
+	/*
+	 * Calculate how much swap space we're adding; the first page contains
+	 * the swap header and doesn't count.
+	 */
+	for (se = first_se(si); se; se = next_se(se)) {
+		u64 first_ppage = se->start_block;
+		u64 next_ppage = se->start_block + se->nr_pages;
+
+		if (se->start_page == 0)
+			first_ppage++;
+
+		if (lowest_ppage > first_ppage)
+			lowest_ppage = first_ppage;
+		if (highest_ppage < next_ppage - 1)
+			highest_ppage = next_ppage - 1;
+		nr_extents++;
+	}
+
+	pr_info("Adding %uk swap on %s.  Priority:%d extents:%d across:%lluk %s%s%s%s\n",
+		K(si->pages), name, si->prio, nr_extents,
+		K(highest_ppage - lowest_ppage),
+		(si->flags & SWP_SOLIDSTATE) ? "SS" : "",
+		(si->flags & SWP_DISCARDABLE) ? "D" : "",
+		(si->flags & SWP_AREA_DISCARD) ? "s" : "",
+		(si->flags & SWP_PAGE_DISCARD) ? "c" : "");
+}
+
 SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
 {
 	struct swap_info_struct *si;
@@ -3437,8 +3474,6 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
 	int prio;
 	int error;
 	union swap_header *swap_header;
-	int nr_extents;
-	sector_t span;
 	struct folio *folio = NULL;
 	struct inode *inode = NULL;
 	bool inced_nr_rotate_swap = false;
@@ -3510,24 +3545,25 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
 	}
 	swap_header = kmap_local_folio(folio, 0);
 
+	si->pages = 0;
 	si->max = read_swap_header(si, swap_header, inode);
 	if (unlikely(!si->max)) {
 		error = -EINVAL;
 		goto bad_swap_unlock_inode;
 	}
 
-	si->pages = si->max - 1;
-	nr_extents = setup_swap_extents(si, swap_file, &span);
-	if (nr_extents < 0) {
-		error = nr_extents;
+	error = setup_swap_extents(si, swap_file);
+	if (error < 0)
 		goto bad_swap_unlock_inode;
-	}
-	if (si->pages != si->max - 1) {
-		pr_err("swap:%u != (max:%u - 1)\n", si->pages, si->max);
+	if (si->pages != si->max) {
+		pr_err("swap:%u != (max:%u)\n", si->pages, si->max);
 		error = -EINVAL;
 		goto bad_swap_unlock_inode;
 	}
 
+	/* Remove the first page countaining the swap header. */
+	si->pages--;
+
 	/* Set up the swap cluster info */
 	error = setup_swap_clusters_info(si, swap_header);
 	if (error)
@@ -3624,13 +3660,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
 	/* Sets SWP_WRITEOK, resurrect the percpu ref, expose the swap device */
 	enable_swap_info(si);
 
-	pr_info("Adding %uk swap on %s.  Priority:%d extents:%d across:%lluk %s%s%s%s\n",
-		K(si->pages), name->name, si->prio, nr_extents,
-		K((unsigned long long)span),
-		(si->flags & SWP_SOLIDSTATE) ? "SS" : "",
-		(si->flags & SWP_DISCARDABLE) ? "D" : "",
-		(si->flags & SWP_AREA_DISCARD) ? "s" : "",
-		(si->flags & SWP_PAGE_DISCARD) ? "c" : "");
+	swap_print_info(si, name->name);
 
 	mutex_unlock(&swapon_mutex);
 	atomic_inc(&proc_poll_event);
-- 
2.53.0


^ permalink raw reply related

* [PATCH 01/12] swap: remove the maxpages variable in sys_swapon
From: Christoph Hellwig @ 2026-05-12  5:35 UTC (permalink / raw)
  To: Andrew Morton, Chris Li, Kairui Song
  Cc: Christian Brauner, Darrick J . Wong , Jens Axboe, David Sterba,
	Theodore Ts'o, Jaegeuk Kim, Chao Yu, Trond Myklebust,
	Anna Schumaker, Namjae Jeon, Hyunchul Lee, Steve French,
	Paulo Alcantara, Carlos Maiolino, Damien Le Moal, Naohiro Aota,
	linux-xfs, linux-fsdevel, linux-doc, linux-mm, linux-block,
	linux-btrfs, linux-ext4, linux-f2fs-devel, linux-nfs, linux-cifs
In-Reply-To: <20260512053625.2950900-1-hch@lst.de>

Always use si->max which is updated setup_swap_extents instead of copying
into and out of maxpages.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 mm/swapfile.c | 27 +++++++++++----------------
 1 file changed, 11 insertions(+), 16 deletions(-)

diff --git a/mm/swapfile.c b/mm/swapfile.c
index 9174f1eeffb0..f7ebd97e28a3 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -3350,10 +3350,9 @@ static unsigned long read_swap_header(struct swap_info_struct *si,
 }
 
 static int setup_swap_clusters_info(struct swap_info_struct *si,
-				    union swap_header *swap_header,
-				    unsigned long maxpages)
+				    union swap_header *swap_header)
 {
-	unsigned long nr_clusters = DIV_ROUND_UP(maxpages, SWAPFILE_CLUSTER);
+	unsigned long nr_clusters = DIV_ROUND_UP(si->max, SWAPFILE_CLUSTER);
 	struct swap_cluster_info *cluster_info;
 	int err = -ENOMEM;
 	unsigned long i;
@@ -3395,7 +3394,7 @@ static int setup_swap_clusters_info(struct swap_info_struct *si,
 		if (err)
 			goto err;
 	}
-	for (i = maxpages; i < round_up(maxpages, SWAPFILE_CLUSTER); i++) {
+	for (i = si->max; i < round_up(si->max, SWAPFILE_CLUSTER); i++) {
 		err = swap_cluster_setup_bad_slot(si, cluster_info, i, true);
 		if (err)
 			goto err;
@@ -3425,7 +3424,7 @@ static int setup_swap_clusters_info(struct swap_info_struct *si,
 	si->cluster_info = cluster_info;
 	return 0;
 err:
-	free_swap_cluster_info(cluster_info, maxpages);
+	free_swap_cluster_info(cluster_info, si->max);
 	return err;
 }
 
@@ -3440,7 +3439,6 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
 	union swap_header *swap_header;
 	int nr_extents;
 	sector_t span;
-	unsigned long maxpages;
 	struct folio *folio = NULL;
 	struct inode *inode = NULL;
 	bool inced_nr_rotate_swap = false;
@@ -3512,14 +3510,13 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
 	}
 	swap_header = kmap_local_folio(folio, 0);
 
-	maxpages = read_swap_header(si, swap_header, inode);
-	if (unlikely(!maxpages)) {
+	si->max = read_swap_header(si, swap_header, inode);
+	if (unlikely(!si->max)) {
 		error = -EINVAL;
 		goto bad_swap_unlock_inode;
 	}
 
-	si->max = maxpages;
-	si->pages = maxpages - 1;
+	si->pages = si->max - 1;
 	nr_extents = setup_swap_extents(si, swap_file, &span);
 	if (nr_extents < 0) {
 		error = nr_extents;
@@ -3531,14 +3528,12 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
 		goto bad_swap_unlock_inode;
 	}
 
-	maxpages = si->max;
-
 	/* Set up the swap cluster info */
-	error = setup_swap_clusters_info(si, swap_header, maxpages);
+	error = setup_swap_clusters_info(si, swap_header);
 	if (error)
 		goto bad_swap_unlock_inode;
 
-	error = swap_cgroup_swapon(si->type, maxpages);
+	error = swap_cgroup_swapon(si->type, si->max);
 	if (error)
 		goto bad_swap_unlock_inode;
 
@@ -3546,7 +3541,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
 	 * Use kvmalloc_array instead of bitmap_zalloc as the allocation order might
 	 * be above MAX_PAGE_ORDER incase of a large swap file.
 	 */
-	si->zeromap = kvmalloc_array(BITS_TO_LONGS(maxpages), sizeof(long),
+	si->zeromap = kvmalloc_array(BITS_TO_LONGS(si->max), sizeof(long),
 				     GFP_KERNEL | __GFP_ZERO);
 	if (!si->zeromap) {
 		error = -ENOMEM;
@@ -3597,7 +3592,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
 		}
 	}
 
-	error = zswap_swapon(si->type, maxpages);
+	error = zswap_swapon(si->type, si->max);
 	if (error)
 		goto bad_swap_unlock_inode;
 
-- 
2.53.0


^ permalink raw reply related

* improve the swap_activate interface
From: Christoph Hellwig @ 2026-05-12  5:35 UTC (permalink / raw)
  To: Andrew Morton, Chris Li, Kairui Song
  Cc: Christian Brauner, Darrick J . Wong , Jens Axboe, David Sterba,
	Theodore Ts'o, Jaegeuk Kim, Chao Yu, Trond Myklebust,
	Anna Schumaker, Namjae Jeon, Hyunchul Lee, Steve French,
	Paulo Alcantara, Carlos Maiolino, Damien Le Moal, Naohiro Aota,
	linux-xfs, linux-fsdevel, linux-doc, linux-mm, linux-block,
	linux-btrfs, linux-ext4, linux-f2fs-devel, linux-nfs, linux-cifs

Hi all,

Darrick recently posted iomap support for fuse-iomap, which was trivial
but a bit ugly, which triggered me into looking how this could be done
in a cleaner way.  The result of that is this fairly big series that
reworks how the MM code calls into the file system to activate swap
files to make it much cleaner and easier to use.

I've tested this with swap devices manually, and using the swap tests
in xfstests on btrfs, ext3, ext4, f2fs and xfs to exercise the different
implementation.  Out of those all passed, but f2fs actually notruns all
tests even in the baseline as it requires special preparation for
swapfiles which never got wired up in xfstests.

Diffstat:
 Documentation/filesystems/iomap/operations.rst |    3 
 Documentation/filesystems/locking.rst          |   35 +--
 Documentation/filesystems/vfs.rst              |   40 ++--
 block/fops.c                                   |   15 +
 fs/btrfs/btrfs_inode.h                         |    3 
 fs/btrfs/file.c                                |    4 
 fs/btrfs/inode.c                               |   72 -------
 fs/ext4/file.c                                 |    6 
 fs/ext4/inode.c                                |   11 -
 fs/f2fs/data.c                                 |   50 -----
 fs/f2fs/f2fs.h                                 |    2 
 fs/f2fs/file.c                                 |    4 
 fs/iomap/swapfile.c                            |  165 +++---------------
 fs/nfs/direct.c                                |    1 
 fs/nfs/file.c                                  |   21 --
 fs/nfs/nfs4file.c                              |    3 
 fs/ntfs/aops.c                                 |    8 
 fs/ntfs/file.c                                 |    6 
 fs/smb/client/cifsfs.c                         |   18 +
 fs/smb/client/cifsfs.h                         |    3 
 fs/smb/client/file.c                           |   16 -
 fs/xfs/xfs_aops.c                              |   48 -----
 fs/xfs/xfs_file.c                              |   39 ++++
 fs/zonefs/file.c                               |   30 +--
 include/linux/fs.h                             |   11 -
 include/linux/iomap.h                          |    5 
 include/linux/nfs_fs.h                         |    3 
 include/linux/swap.h                           |  129 +-------------
 mm/page_io.c                                   |   45 ----
 mm/swap.h                                      |   92 ++++++++++
 mm/swapfile.c                                  |  227 ++++++++++++++-----------
 31 files changed, 471 insertions(+), 644 deletions(-)

^ permalink raw reply

* Re: [PATCH mm-unstable v17 03/14] mm/khugepaged: rework max_ptes_* handling with helper functions
From: Lance Yang @ 2026-05-12  4:44 UTC (permalink / raw)
  To: npache
  Cc: linux-doc, linux-kernel, linux-mm, linux-trace-kernel, aarcange,
	akpm, anshuman.khandual, apopple, baohua, baolin.wang, byungchul,
	catalin.marinas, cl, corbet, dave.hansen, david, dev.jain, gourry,
	hannes, hughd, jack, jackmanb, jannh, jglisse, joshua.hahnjy, kas,
	lance.yang, liam, ljs, mathieu.desnoyers, matthew.brost, mhiramat,
	mhocko, peterx, pfalcato, rakie.kim, raquini, rdunlap,
	richard.weiyang, rientjes, rostedt, rppt, ryan.roberts, shivankg,
	sunnanyong, surenb, thomas.hellstrom, tiwai, usamaarif642, vbabka,
	vishal.moola, wangkefeng.wang, will, willy, yang, ying.huang, ziy,
	zokeefe, usama.arif
In-Reply-To: <20260511185817.686831-4-npache@redhat.com>


On Mon, May 11, 2026 at 12:58:03PM -0600, Nico Pache wrote:
>The following cleanup reworks all the max_ptes_* handling into helper
>functions. This increases the code readability and will later be used to
>implement the mTHP handling of these variables.
>
>With these changes we abstract all the madvise_collapse() special casing
>(dont respect the sysctls) away from the functions that utilize them. And

Nit: s/dont/do not/

>will be used later in this series to cleanly restrict the mTHP collapse
>behavior.
>
>No functional change is intended; however, we are now only reading the
>sysfs variables once per scan, whereas before these variables were being
>read on each loop iteration.
>
>Suggested-by: David Hildenbrand <david@kernel.org>
>Acked-by: David Hildenbrand (Arm) <david@kernel.org>
>Acked-by: Usama Arif <usama.arif@linux.dev>
>Signed-off-by: Nico Pache <npache@redhat.com>
>---
> mm/khugepaged.c | 118 +++++++++++++++++++++++++++++++++---------------
> 1 file changed, 82 insertions(+), 36 deletions(-)
>
>diff --git a/mm/khugepaged.c b/mm/khugepaged.c
>index f0e29d5c7b1f..f68853b3caa7 100644
>--- a/mm/khugepaged.c
>+++ b/mm/khugepaged.c
>@@ -348,6 +348,62 @@ static bool pte_none_or_zero(pte_t pte)
> 	return pte_present(pte) && is_zero_pfn(pte_pfn(pte));
> }
> 
>+/**
>+ * collapse_max_ptes_none - Calculate maximum allowed none-page or zero-page
>+ * PTEs for the given collapse operation.
>+ * @cc: The collapse control struct
>+ * @vma: The vma to check for userfaultfd
>+ *
>+ * Return: Maximum number of none-page or zero-page PTEs allowed for the
>+ * collapse operation.
>+ */
>+static unsigned int collapse_max_ptes_none(struct collapse_control *cc,
>+		struct vm_area_struct *vma)
>+{
>+	// If the vma is userfaultfd-armed, allow no none-page or zero-page PTEs.
>+	if (vma && userfaultfd_armed(vma))
>+		return 0;
>+	// for MADV_COLLAPSE, allow any none-page or zero-page PTEs.
>+	if (!cc->is_khugepaged)
>+		return HPAGE_PMD_NR;
>+	// For all other cases repect the user defined maximum.
>+	return khugepaged_max_ptes_none;

Nit: kernel code usually uses C-style comments. This could be:

/* For all other cases, respect the user-defined maximum. */

Also, s/repect/respect/.

>+}
>+
>+/**
>+ * collapse_max_ptes_shared - Calculate maximum allowed PTEs that map shared
>+ * anonymous pages for the given collapse operation.
>+ * @cc: The collapse control struct
>+ *
>+ * Return: Maximum number of PTEs that map shared anonymous pages for the
>+ * collapse operation
>+ */
>+static unsigned int collapse_max_ptes_shared(struct collapse_control *cc)
>+{
>+	// for MADV_COLLAPSE, do not restrict the number of PTEs that map shared
>+	// anonymous pages.

Ditto.

>+	if (!cc->is_khugepaged)
>+		return HPAGE_PMD_NR;
>+	return khugepaged_max_ptes_shared;
>+}
>+
>+/**
>+ * collapse_max_ptes_swap - Calculate the maximum allowed non-present PTEs or the
>+ * maximum allowed non-present pagecache entries for the given collapse operation.
>+ * @cc: The collapse control struct
>+ *
>+ * Return: Maximum number of non-present PTEs or the maximum allowed non-present
>+ * pagecache entries for the collapse operation.
>+ */
>+static unsigned int collapse_max_ptes_swap(struct collapse_control *cc)
>+{
>+	// for MADV_COLLAPSE, do not restrict the number PTEs entries or
>+	// pagecache entries that are non-present.

Same here.

>+	if (!cc->is_khugepaged)
>+		return HPAGE_PMD_NR;
>+	return khugepaged_max_ptes_swap;
>+}
>+
> int hugepage_madvise(struct vm_area_struct *vma,
> 		     vm_flags_t *vm_flags, int advice)
> {
>@@ -546,21 +602,19 @@ static enum scan_result __collapse_huge_page_isolate(struct vm_area_struct *vma,
> 	pte_t *_pte;
> 	int none_or_zero = 0, shared = 0, referenced = 0;
> 	enum scan_result result = SCAN_FAIL;
>+	unsigned int max_ptes_none = collapse_max_ptes_none(cc, vma);
>+	unsigned int max_ptes_shared = collapse_max_ptes_shared(cc);

Nit: could these be const, as David suggested earlier?

Nothing else jumped out at me. LGTM!

Reviewed-by: Lance Yang <lance.yang@linux.dev>

^ permalink raw reply

* [kees:for-next/hardening 1/1] htmldocs: Documentation/driver-api/basics:127: ./include/linux/stddef.h:110: WARNING: Definition list ends without a blank line; unexpected unindent. [docutils]
From: kernel test robot @ 2026-05-12  3:06 UTC (permalink / raw)
  To: Gustavo A. R. Silva; +Cc: oe-kbuild-all, Kees Cook, linux-doc

tree:   https://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git for-next/hardening
head:   3c74955937520e6aabc0ec921b1bfe01734c6abc
commit: 3c74955937520e6aabc0ec921b1bfe01734c6abc [1/1] stddef: Document designated initializer semantics for __TRAILING_OVERLAP()
compiler: clang version 20.1.8 (https://github.com/llvm/llvm-project 87f0227cb60147a26a1eeb4fb06e3b505e9c7261)
docutils: docutils (Docutils 0.21.2, Python 3.13.5, on linux)
reproduce: (https://download.01.org/0day-ci/archive/20260512/202605120507.9iQRMgKR-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202605120507.9iQRMgKR-lkp@intel.com/

All warnings (new ones prefixed by >>):

   --------------------------------------------------------------------------------------------^
   Documentation/driver-api/basics:42: ./kernel/time/time.c:370: WARNING: Duplicate C declaration, also defined at driver-api/basics:436.
   Declaration is '.. c:function:: unsigned int jiffies_to_msecs (const unsigned long j)'. [duplicate_declaration.c]
   Documentation/driver-api/basics:42: ./kernel/time/time.c:393: WARNING: Duplicate C declaration, also defined at driver-api/basics:453.
   Declaration is '.. c:function:: unsigned int jiffies_to_usecs (const unsigned long j)'. [duplicate_declaration.c]
>> Documentation/driver-api/basics:127: ./include/linux/stddef.h:110: WARNING: Definition list ends without a blank line; unexpected unindent. [docutils]
   Documentation/driver-api/basics:127: ./include/linux/stddef.h:115: ERROR: Unexpected indentation. [docutils]
>> Documentation/driver-api/basics:127: ./include/linux/stddef.h:116: WARNING: Block quote ends without a blank line; unexpected unindent. [docutils]
   Documentation/driver-api/basics:127: ./include/linux/stddef.h:117: WARNING: Definition list ends without a blank line; unexpected unindent. [docutils]
   Documentation/driver-api/basics:127: ./include/linux/stddef.h:122: WARNING: Definition list ends without a blank line; unexpected unindent. [docutils]
   Documentation/driver-api/basics:127: ./include/linux/stddef.h:124: WARNING: Definition list ends without a blank line; unexpected unindent. [docutils]
   Documentation/driver-api/basics:127: ./include/linux/stddef.h:139: WARNING: Definition list ends without a blank line; unexpected unindent. [docutils]
   Documentation/driver-api/basics:127: ./include/linux/stddef.h:140: WARNING: Definition list ends without a blank line; unexpected unindent. [docutils]

--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki

^ permalink raw reply

* Re: [PATCH v4 1/4] kernel: param: initialize module_kset before do_initcalls()
From: Shashank Balaji @ 2026-05-12  2:12 UTC (permalink / raw)
  To: Thierry Reding, Jonathan Hunter
  Cc: Gary Guo, Suzuki K Poulose, James Clark, Alexander Shishkin,
	Maxime Coquelin, Alexandre Torgue, Greg Kroah-Hartman,
	Rafael J. Wysocki, Danilo Krummrich, Miguel Ojeda, Boqun Feng,
	Björn Roy Baron, Benno Lossin, Andreas Hindborg, Alice Ryhl,
	Trevor Gross, Richard Cochran, Jonathan Corbet, Shuah Khan,
	Luis Chamberlain, Petr Pavlu, Daniel Gomez, Sami Tolvanen,
	Aaron Tomlin, Mike Leach, Leo Yan, Rahul Bukte, linux-kernel,
	coresight, linux-arm-kernel, driver-core, rust-for-linux,
	linux-doc, Daniel Palmer, Tim Bird, linux-modules, linux-tegra
In-Reply-To: <afCxHUrjr3Z22U6V@JPC00244420>

Hi Thierry, Jonathan,

Just following up on the below, would moving tegra194_cbb_driver and
tegra234_cbb_driver from pure_initcall to core_initcall work for you?

Thanks,
Shashank

On Tue, Apr 28, 2026 at 10:07:41PM +0900, Shashank Balaji wrote:
> Adding Tegra maintainers.
> 
> On Tue, Apr 28, 2026 at 12:10:50PM +0100, Gary Guo wrote:
> > On Tue Apr 28, 2026 at 1:37 AM BST, Shashank Balaji wrote:
> > > Hi Gary,
> > >
> > > On Mon, Apr 27, 2026 at 02:29:55PM +0100, Gary Guo wrote:
> > >> On Mon Apr 27, 2026 at 3:41 AM BST, Shashank Balaji wrote:
> > >> > module_kset is initialized in param_sysfs_init(), a subsys_initcall. A number
> > >> > of platform drivers register themselves prior to subsys_initcalls
> > >> > (tegra194_cbb_driver registers in a pure_initcall, for example). With an
> > >> > upcoming patch ("driver core: platform: set mod_name in driver registration")
> > >> > that sets their mod_name in struct device_driver, lookup_or_create_module_kobject()
> > >> > will be called for those drivers, which calls kset_find_obj(module_kset, mod_name).
> > >> > This causes a null deref because module_kset isn't alive yet.
> > >> >
> > >> > Fix this by initializing module_kset in do_basic_setup() before do_initcalls().
> > >> > Modernize the pr_warn while we're at it.
> > >> >
> > >> > Suggested-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
> > >> > Suggested-by: Gary Guo <gary@garyguo.net>
> > >> 
> > >> I didn't suggest this change :)
> > >> 
> > >> I suggested `pure_initcall`, which is just a one line change.
> > >
> > > Oops, sorry about the misattribution.
> > >
> > >> diff --git a/kernel/params.c b/kernel/params.c
> > >> index 74d620bc2521..ac088d4b09a9 100644
> > >> --- a/kernel/params.c
> > >> +++ b/kernel/params.c
> > >> @@ -957,7 +957,7 @@ static int __init param_sysfs_init(void)
> > >>  
> > >>  	return 0;
> > >>  }
> > >> -subsys_initcall(param_sysfs_init);
> > >> +pure_initcall(param_sysfs_init);
> > >>  
> > >>  /*
> > >>   * param_sysfs_builtin_init - add sysfs version and parameter
> > >> 
> > >> pure_initcall is level 0 so it happens before all other init calls. Does it not
> > >> work?
> > >
> > > tegra194_cbb_driver registers itself in a pure_initcall too. We wouldn't
> > > want the ordering of its registration and module_kset init to be link order
> > > dependent.
> > 
> > It's the only device driver that does this. And I don't think it's supposed to.
> > 
> > >From documentation:
> > 
> > > A "pure" initcall has no dependencies on anything else, and purely
> > > initializes variables that couldn't be statically initialized.
> > 
> > I understand that given large amount of drivers registering themselves during
> > core/arch_initcall that there might be regressions if all of them are moved, but
> > surely we can demote these two specific tegra driver to core/postcore_initcall?
> > This will still be called earlier than init_machine call which happens during
> > arch_initcall.
> > 
> > Looks like the tegra CBB driver is just doing error logging anyway.
> 
> That's a good point, Gary. Thanks!
> 
> Hi Thierry and Jonathan,
> 
> You can find the context for this email in this patch:
> https://lore.kernel.org/all/20260427-acpi_mod_name-v4-1-22b42240c9bf@sony.com/
> 
> TL;DR: tegra194_cbb_driver and tegra234_cbb_driver are the only drivers
> registering themselves as early as in a pure_initcall. This is a problem
> on two fronts:
> 1. Philosophical: As Gary pointed out, pure_initcalls are intended to purely
> initialize variables that couldn't be statically initialized. But these
> are doing driver registrations.
> 2. module_kset not initialized at pure_initcall stage: This is needed to
> set the module sysfs symlink. Since module_kset is not alive yet during
> pure_initcalls, registering these drivers panics the kernel.
> 
> We would like to do the tegra cbb driver registration in a core_initcall
> (or some later initcall works too), and move module_kset initialization
> to a pure_initcall. Like this:
> 
> diff --git a/drivers/soc/tegra/cbb/tegra194-cbb.c b/drivers/soc/tegra/cbb/tegra194-cbb.c
> index ab75d50cc85c..2f69e104c838 100644
> --- a/drivers/soc/tegra/cbb/tegra194-cbb.c
> +++ b/drivers/soc/tegra/cbb/tegra194-cbb.c
> @@ -2342,7 +2342,7 @@ static int __init tegra194_cbb_init(void)
>  {
>         return platform_driver_register(&tegra194_cbb_driver);
>  }
> -pure_initcall(tegra194_cbb_init);
> +core_initcall(tegra194_cbb_init);
> 
>  static void __exit tegra194_cbb_exit(void)
>  {
> diff --git a/drivers/soc/tegra/cbb/tegra234-cbb.c b/drivers/soc/tegra/cbb/tegra234-cbb.c
> index fb26f085f691..785072fa4e85 100644
> --- a/drivers/soc/tegra/cbb/tegra234-cbb.c
> +++ b/drivers/soc/tegra/cbb/tegra234-cbb.c
> @@ -1774,7 +1774,7 @@ static int __init tegra234_cbb_init(void)
>  {
>         return platform_driver_register(&tegra234_cbb_driver);
>  }
> -pure_initcall(tegra234_cbb_init);
> +core_initcall(tegra234_cbb_init);
> 
>  static void __exit tegra234_cbb_exit(void)
>  {
> 
> Would this work?
> 
> Thanks,
> Shashank
> 

^ permalink raw reply

* Re: [PATCH] docs: reporting-issues: fix advice wording
From: Randy Dunlap @ 2026-05-12  2:03 UTC (permalink / raw)
  To: Chen-Shi-Hong, linux; +Cc: corbet, skhan, linux-doc, linux-kernel
In-Reply-To: <20260512015146.4081-1-eric039eric@gmail.com>



On 5/11/26 6:51 PM, Chen-Shi-Hong wrote:
> Replace "these advices" with "this advice" in
> Documentation/admin-guide/reporting-issues.rst.
> 
> Signed-off-by: Chen-Shi-Hong <eric039eric@gmail.com>

Acked-by: Randy Dunlap <rdunlap@infradead.org>
Thanks.

> ---
>  Documentation/admin-guide/reporting-issues.rst | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/Documentation/admin-guide/reporting-issues.rst b/Documentation/admin-guide/reporting-issues.rst
> index 16a66a1f1975..731865b5e8ff 100644
> --- a/Documentation/admin-guide/reporting-issues.rst
> +++ b/Documentation/admin-guide/reporting-issues.rst
> @@ -129,7 +129,7 @@ After these preparations you'll now enter the main part:
>     situations; during the merge window that actually might be even the best
>     approach, but in that development phase it can be an even better idea to
>     suspend your efforts for a few days anyway. Whatever version you choose,
> -   ideally use a 'vanilla' build. Ignoring these advices will dramatically
> +   ideally use a 'vanilla' build. Ignoring this advice will dramatically
>     increase the risk your report will be rejected or ignored.
>  
>   * Ensure the kernel you just installed does not 'taint' itself when
> @@ -795,7 +795,7 @@ Install a fresh kernel for testing
>      situations; during the merge window that actually might be even the best
>      approach, but in that development phase it can be an even better idea to
>      suspend your efforts for a few days anyway. Whatever version you choose,
> -    ideally use a 'vanilla' built. Ignoring these advices will dramatically
> +    ideally use a 'vanilla' built. Ignoring this advice will dramatically
>      increase the risk your report will be rejected or ignored.*
>  
>  As mentioned in the detailed explanation for the first step already: Like most
> 
> base-commit: 5d6919055dec134de3c40167a490f33c74c12581
> prerequisite-patch-id: 1089bde9e188a84c873ff722a776bc107a6e8103

-- 
~Randy

^ permalink raw reply

* Re: [PATCH RFC v3 0/3] Add splash DRM client
From: Mario Limonciello @ 2026-05-12  1:59 UTC (permalink / raw)
  To: Francesco Valla, Maarten Lankhorst, Maxime Ripard,
	Thomas Zimmermann, David Airlie, Simona Vetter, Jonathan Corbet,
	Jocelyn Falempe, Javier Martinez Canillas, Shuah Khan
  Cc: Sam Ravnborg, linux-kernel, dri-devel, linux-doc, linux-embedded
In-Reply-To: <20260510-drm_client_splash-v3-0-a9aee9f0b2fc@valla.it>



On 5/10/26 16:29, Francesco Valla wrote:
> Hello,
> 
> this is the third (and hopefully last) RFC version for the DRM-based
> splash screen.
> 
> Motivation behind the work can be found in v1 [0]; in a nutshell, the
> splash DRM client can draw a splashscreen using:
> 
>    - the BMP image supplied by the EFI BGRT;
>    - a BMP image loaded as firmware (either built-in or loaded from the
>      filesystem);
>    - a colored background.
> 
> This revision greatly simplifies the image seletion logic; now the EFI
> BGRT is always used as first source if enabled, with a fallback to BMP
> image loaded as firmware and then to a plain color.
> 
> Sanity checks on the EFI BGRT image have been borrowed from the efifb
> driver. More complete splash providers (e.g.: Plymouth) have an
> extensive management of platform-specific quirks, but I don't think it
> would be reasonable to introduce such complexity here.
> 
> Additional notes:
>    - Rotation is still not managed (and probably won't?).
>    - Support for tiled screens is untested.
>    - Plain color and BMP sources were tested on QEMU, Beagleplay and
>      i.MX93 FRDM.
>    - EFI BGRT support was tested using QEMU+OVMF.
> 
> Thank you in advance for any feedback.

Unfortunately I found that I couldn't compile with my normal Kconfig.

ERROR: modpost: "bgrt_tab" [drivers/gpu/drm/clients/drm_client_lib.ko] 
undefined!
ERROR: modpost: "bgrt_image_size" 
[drivers/gpu/drm/clients/drm_client_lib.ko] undefined!
make[2]: *** [scripts/Makefile.modpost:147: Module.symvers] Error 1
make[1]: *** [/home/supermario/src/linux/Makefile:2091: modpost] Error 2
make: *** [Makefile:248: __sub-make] Error 2

❮ grep ^CONFIG_DRM .config
CONFIG_DRM=y
CONFIG_DRM_KMS_HELPER=m
CONFIG_DRM_DRAW=y
CONFIG_DRM_CLIENT=y
CONFIG_DRM_CLIENT_LIB=m
CONFIG_DRM_CLIENT_SELECTION=m
CONFIG_DRM_CLIENT_SETUP=y
CONFIG_DRM_FBDEV_EMULATION=y
CONFIG_DRM_FBDEV_OVERALLOC=100
CONFIG_DRM_CLIENT_SPLASH=y
CONFIG_DRM_CLIENT_SPLASH_BACKGROUND_COLOR=0x000000
CONFIG_DRM_CLIENT_SPLASH_SRC_BGRT=y
CONFIG_DRM_CLIENT_SPLASH_BMP_SUPPORT=y
CONFIG_DRM_CLIENT_DEFAULT_SPLASH=y
CONFIG_DRM_CLIENT_DEFAULT="splash"
CONFIG_DRM_LOAD_EDID_FIRMWARE=y
CONFIG_DRM_DISPLAY_HELPER=m
CONFIG_DRM_DISPLAY_DP_AUX_CHARDEV=y
CONFIG_DRM_DISPLAY_DP_HELPER=y
CONFIG_DRM_DISPLAY_DSC_HELPER=y
CONFIG_DRM_DISPLAY_HDCP_HELPER=y
CONFIG_DRM_DISPLAY_HDMI_CEC_NOTIFIER_HELPER=y
CONFIG_DRM_DISPLAY_HDMI_HELPER=y
CONFIG_DRM_TTM=m
CONFIG_DRM_EXEC=m
CONFIG_DRM_BUDDY=m
CONFIG_DRM_TTM_HELPER=m
CONFIG_DRM_GEM_SHMEM_HELPER=m
CONFIG_DRM_SUBALLOC_HELPER=m
CONFIG_DRM_SCHED=m
CONFIG_DRM_PANEL_BACKLIGHT_QUIRKS=m
CONFIG_DRM_PRIVACY_SCREEN=y
CONFIG_DRM_AMDGPU=m
CONFIG_DRM_AMDGPU_CIK=y
CONFIG_DRM_AMDGPU_USERPTR=y
CONFIG_DRM_AMD_ISP=y
CONFIG_DRM_AMD_ACP=y
CONFIG_DRM_AMD_DC=y
CONFIG_DRM_AMD_DC_FP=y
CONFIG_DRM_AMD_SECURE_DISPLAY=y
CONFIG_DRM_BRIDGE=y
CONFIG_DRM_PANEL_BRIDGE=y
CONFIG_DRM_PANEL=y
CONFIG_DRM_SYSFB_HELPER=m
CONFIG_DRM_SIMPLEDRM=m
CONFIG_DRM_PANEL_ORIENTATION_QUIRKS=y
CONFIG_DRM_ACCEL=y
CONFIG_DRM_ACCEL_AMDXDNA=m
CONFIG_DRM_ACCEL_HABANALABS=m
CONFIG_DRM_ACCEL_IVPU=m
CONFIG_DRM_ACCEL_QAIC=m
❮ grep BGRT .config
CONFIG_ACPI_BGRT=y
CONFIG_DRM_CLIENT_SPLASH_SRC_BGRT=y

> 
> Best regards,
> Francesco
> 
> [0] https://lore.kernel.org/all/20251027-drm_client_splash-v1-0-00698933b34a@valla.it
> 
> Signed-off-by: Francesco Valla <francesco@valla.it>
> ---
> Changes in v3:
>    - Simplified the image selection and management logic, with direct
>      fallback from EFI BGRT to MP as firmware
>    - Used new drm_draw_can_convert_from_xrgb8888() API
>    - Added proper get_unaligned_ calls for EFI BGRT access
>    - Fixed Kconfig dependencies
>    - Link to v2: https://lore.kernel.org/r/20260106-drm_client_splash-v2-0-6e86a7434b59@valla.it
> 
> Changes in v2:
>    - Moved from raw dump to BMP format for static image source
>    - Removed support for configurable message
>    - Removed support for progress bar
>    - Added EFI BGRT as image source
> Link to v1: https://lore.kernel.org/r/20251027-drm_client_splash-v1-0-00698933b34a@valla.it
> 
> To: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
> To: Maxime Ripard <mripard@kernel.org>
> To: Thomas Zimmermann <tzimmermann@suse.de>
> To: David Airlie <airlied@gmail.com>
> To: Simona Vetter <simona@ffwll.ch>
> To: Francesco Valla <francesco@valla.it>
> To: Jonathan Corbet <corbet@lwn.net>
> To: Shuah Khan <skhan@linuxfoundation.org>
> Cc: linux-kernel@vger.kernel.org
> Cc: dri-devel@lists.freedesktop.org
> Cc: linux-doc@vger.kernel.org
> 
> ---
> Francesco Valla (3):
>        drm: client: add splash client
>        MAINTAINERS: add entry for DRM splash client
>        drm: docs: remove bootsplash from TODO
> 
>   Documentation/gpu/todo.rst                    |  17 -
>   MAINTAINERS                                   |   7 +
>   drivers/gpu/drm/clients/Kconfig               |  63 +-
>   drivers/gpu/drm/clients/Makefile              |   1 +
>   drivers/gpu/drm/clients/drm_client_internal.h |   9 +
>   drivers/gpu/drm/clients/drm_client_setup.c    |   8 +
>   drivers/gpu/drm/clients/drm_splash.c          | 932 ++++++++++++++++++++++++++
>   7 files changed, 1019 insertions(+), 18 deletions(-)
> ---
> base-commit: afaa0a477099cb7256e26fe11289c753a225ac97
> change-id: 20251026-drm_client_splash-e10d7d663e7f
> 
> Best regards,
> --
> Francesco Valla <francesco@valla.it>
> 


^ permalink raw reply

* [PATCH] docs: reporting-issues: fix advice wording
From: Chen-Shi-Hong @ 2026-05-12  1:51 UTC (permalink / raw)
  To: linux; +Cc: corbet, skhan, linux-doc, linux-kernel, Chen-Shi-Hong

Replace "these advices" with "this advice" in
Documentation/admin-guide/reporting-issues.rst.

Signed-off-by: Chen-Shi-Hong <eric039eric@gmail.com>
---
 Documentation/admin-guide/reporting-issues.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Documentation/admin-guide/reporting-issues.rst b/Documentation/admin-guide/reporting-issues.rst
index 16a66a1f1975..731865b5e8ff 100644
--- a/Documentation/admin-guide/reporting-issues.rst
+++ b/Documentation/admin-guide/reporting-issues.rst
@@ -129,7 +129,7 @@ After these preparations you'll now enter the main part:
    situations; during the merge window that actually might be even the best
    approach, but in that development phase it can be an even better idea to
    suspend your efforts for a few days anyway. Whatever version you choose,
-   ideally use a 'vanilla' build. Ignoring these advices will dramatically
+   ideally use a 'vanilla' build. Ignoring this advice will dramatically
    increase the risk your report will be rejected or ignored.
 
  * Ensure the kernel you just installed does not 'taint' itself when
@@ -795,7 +795,7 @@ Install a fresh kernel for testing
     situations; during the merge window that actually might be even the best
     approach, but in that development phase it can be an even better idea to
     suspend your efforts for a few days anyway. Whatever version you choose,
-    ideally use a 'vanilla' built. Ignoring these advices will dramatically
+    ideally use a 'vanilla' built. Ignoring this advice will dramatically
     increase the risk your report will be rejected or ignored.*
 
 As mentioned in the detailed explanation for the first step already: Like most

base-commit: 5d6919055dec134de3c40167a490f33c74c12581
prerequisite-patch-id: 1089bde9e188a84c873ff722a776bc107a6e8103
-- 
2.53.0


^ permalink raw reply related

* Re: [PATCH] docs/zh_CN: update admin-guide/index.rst translation
From: Dongliang Mu @ 2026-05-12  1:46 UTC (permalink / raw)
  To: kernel test robot
  Cc: Yan Zhu, corbet, alexs, si.yanteng, kees, oe-kbuild-all, skhan,
	dzm91, tony.luck, gpiccoli, frederic, jani.nikula, longman,
	mchehab+huawei, linux-doc, linux-kernel
In-Reply-To: <202605111009.hlpiVkT6-lkp@intel.com>

On Mon, May 11, 2026 at 4:53 PM kernel test robot <lkp@intel.com> wrote:
>
> Hi Yan,
>

Hi Yan,

Please take a look at this testing report and verify if it is correct
or false alarm.

Dongliang Mu

> kernel test robot noticed the following build warnings:
>
> [auto build test WARNING on lwn/docs-next]
> [also build test WARNING on linus/master v7.1-rc3 next-20260508]
> [If your patch is applied to the wrong git tree, kindly drop us a note.
> And when submitting patch, we suggest to use '--base' as documented in
> https://git-scm.com/docs/git-format-patch#_base_tree_information]
>
> url:    https://github.com/intel-lab-lkp/linux/commits/Yan-Zhu/docs-zh_CN-update-admin-guide-index-rst-translation/20260511-102406
> base:   git://git.lwn.net/linux.git docs-next
> patch link:    https://lore.kernel.org/r/tencent_7ADF2D1EBD8EAD2028BC93BA7858EA655D0A%40qq.com
> patch subject: [PATCH] docs/zh_CN: update admin-guide/index.rst translation
> compiler: clang version 20.1.8 (https://github.com/llvm/llvm-project 87f0227cb60147a26a1eeb4fb06e3b505e9c7261)
> docutils: docutils (Docutils 0.21.2, Python 3.13.5, on linux)
> reproduce: (https://download.01.org/0day-ci/archive/20260511/202605111009.hlpiVkT6-lkp@intel.com/reproduce)
>
> If you fix the issue in a separate patch/commit (i.e. not just a new version of
> the same patch/commit), kindly add following tags
> | Reported-by: kernel test robot <lkp@intel.com>
> | Closes: https://lore.kernel.org/oe-kbuild-all/202605111009.hlpiVkT6-lkp@intel.com/
>
> All warnings (new ones prefixed by >>):
>
>    Checksumming on output with GSO
>    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ [docutils]
>    MAINTAINERS:40: WARNING: Inline strong start-string without end-string. [docutils]
> >> Documentation/translations/zh_CN/admin-guide/index.rst:114: WARNING: toctree contains reference to nonexisting document 'translations/zh_CN/admin-guide/module-signing' [toc.not_readable]
>    Documentation/userspace-api/landlock:504: ./security/landlock/errata/abi-4.h:5: ERROR: Unexpected section title.
>
>
> vim +114 Documentation/translations/zh_CN/admin-guide/index.rst
>
>    113
>  > 114  .. toctree::
>    115     :maxdepth: 1
>    116
>    117     cpu-load
>    118     mm/index
>    119     module-signing
>    120     numastat
>    121
>    122
>    123  Todolist:
>    124
>
> --
> 0-DAY CI Kernel Test Service
> https://github.com/intel/lkp-tests/wiki
>

^ permalink raw reply

* [PATCH 2/2] hwmon: (pmbus/d1u74t) Add Murata D1U74T PSU driver
From: Abdurrahman Hussain @ 2026-05-12  1:44 UTC (permalink / raw)
  To: Guenter Roeck, Rob Herring, Krzysztof Kozlowski, Conor Dooley,
	Jonathan Corbet, Shuah Khan
  Cc: linux-hwmon, devicetree, linux-kernel, linux-doc,
	Abdurrahman Hussain
In-Reply-To: <20260511-d1u74t-v1-0-623c2bc1532a@nexthop.ai>

Add PMBUS driver for Murata D1U74T power supplies.

Signed-off-by: Abdurrahman Hussain <abdurrahman@nexthop.ai>
---
 Documentation/hwmon/d1u74t.rst | 97 ++++++++++++++++++++++++++++++++++++++++++
 Documentation/hwmon/index.rst  |  1 +
 MAINTAINERS                    |  2 +
 drivers/hwmon/pmbus/Kconfig    |  9 ++++
 drivers/hwmon/pmbus/Makefile   |  1 +
 drivers/hwmon/pmbus/d1u74t.c   | 85 ++++++++++++++++++++++++++++++++++++
 6 files changed, 195 insertions(+)

diff --git a/Documentation/hwmon/d1u74t.rst b/Documentation/hwmon/d1u74t.rst
new file mode 100644
index 000000000000..f7786ebba0b4
--- /dev/null
+++ b/Documentation/hwmon/d1u74t.rst
@@ -0,0 +1,97 @@
+.. SPDX-License-Identifier: GPL-2.0-or-later
+
+Kernel driver d1u74t
+==================
+
+Supported chips:
+
+  * Murata D1U74T
+
+    Prefix: 'd1u74t'
+
+    Addresses scanned: -
+
+    Datasheet: Only available under NDA.
+
+Authors:
+    Abdurrahman Hussain <abdurrahman@nexthop.ai>
+
+
+Description
+-----------
+
+This driver implements support for Murata D1U74T Power Supply with
+PMBus support.
+
+The driver is a client driver to the core PMBus driver.
+Please see Documentation/hwmon/pmbus.rst for details on PMBus client drivers.
+
+
+Usage Notes
+-----------
+
+This driver does not auto-detect devices. You will have to instantiate the
+devices explicitly. Please see Documentation/i2c/instantiating-devices.rst for
+details.
+
+
+Sysfs entries
+-------------
+
+======================= ======================================================
+curr1_label		"iin"
+curr1_input		Measured input current
+curr1_max		Maximum input current
+curr1_max_alarm		Input maximum current high alarm
+curr1_crit		Critical high input current
+curr1_crit_alarm	Input critical current high alarm
+curr1_rated_max		Maximum rated input current
+
+curr2_label		"iout1"
+curr2_input		Measured output current
+curr2_max		Maximum output current
+curr2_max_alarm		Output maximum current high alarm
+curr2_crit		Critical high output current
+curr2_crit_alarm	Output critical current high alarm
+curr2_rated_max		Maximum rated output current
+
+in1_label		"vin"
+in1_input		Measured input voltage
+in1_crit		Critical input over voltage
+in1_crit_alarm		Critical input over voltage alarm
+in1_max			Maximum input over voltage
+in1_max_alarm		Maximum input over voltage alarm
+in1_rated_min		Minimum rated input voltage
+in1_rated_max		Maximum rated input voltage
+
+in2_label		"vout1"
+in2_input		Measured input voltage
+in2_crit		Critical input over voltage
+in2_crit_alarm		Critical input over voltage alarm
+in2_lcrit		Critical input under voltage fault
+in2_lcrit_alarm		Critical input under voltage fault alarm
+in2_max			Maximum input over voltage
+in2_max_alarm		Maximum input over voltage alarm
+in2_min			Minimum input under voltage warning
+in2_min_alarm		Minimum input under voltage warning alarm
+in2_rated_min		Minimum rated input voltage
+in2_rated_max		Maximum rated input voltage
+
+power1_label		"pin"
+power1_input		Measured input power
+power1_alarm		Input power high alarm
+power1_max  		Maximum input power
+power1_rated_max	Maximum rated input power
+
+temp[1-3]_input		Measured temperature
+temp[1-3]_crit 		Critical temperature
+temp[1-3]_crit_alarm	Critical temperature alarm
+temp[1-3]_max		Maximum temperature
+temp[1-3]_max_alarm	Maximum temperature alarm
+temp[1-3]_rated_max	Maximum rated temperature
+
+fan1_alarm		Fan 1 warning.
+fan1_fault		Fan 1 fault.
+fan1_input		Fan 1 speed in RPM.
+fan1_target		Fan 1 target.
+======================= ======================================================
diff --git a/Documentation/hwmon/index.rst b/Documentation/hwmon/index.rst
index 8b655e5d6b68..97b1ef65b1c1 100644
--- a/Documentation/hwmon/index.rst
+++ b/Documentation/hwmon/index.rst
@@ -60,6 +60,7 @@ Hardware Monitoring Kernel Drivers
    corsair-psu
    cros_ec_hwmon
    crps
+   d1u74t
    da9052
    da9055
    dell-smm-hwmon
diff --git a/MAINTAINERS b/MAINTAINERS
index b6a055fbb870..8f443a2f0e45 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6808,6 +6808,8 @@ M:	Abdurrahman Hussain <abdurrahman@nexthop.ai>
 L:	linux-hwmon@vger.kernel.org
 S:	Maintained
 F:	Documentation/devicetree/bindings/hwmon/pmbus/murata,d1u74t.yaml
+F:	Documentation/hwmon/d1u74t.rst
+F:	drivers/hwmon/pmbus/d1u74t.c
 
 CRYPTO API
 M:	Herbert Xu <herbert@gondor.apana.org.au>
diff --git a/drivers/hwmon/pmbus/Kconfig b/drivers/hwmon/pmbus/Kconfig
index 8f4bff375ecb..ee93b22d2887 100644
--- a/drivers/hwmon/pmbus/Kconfig
+++ b/drivers/hwmon/pmbus/Kconfig
@@ -113,6 +113,15 @@ config SENSORS_CRPS
 	  This driver can also be built as a module. If so, the module will
 	  be called crps.
 
+config SENSORS_D1U74T
+	tristate "Murata D1U74T Power Supply"
+	help
+	  If you say yes here you get hardware monitoring support for the Murata
+	  D1U74T Power Supply.
+
+	  This driver can also be built as a module. If so, the module will
+	  be called d1u74t.
+
 config SENSORS_DELTA_AHE50DC_FAN
 	tristate "Delta AHE-50DC fan control module"
 	help
diff --git a/drivers/hwmon/pmbus/Makefile b/drivers/hwmon/pmbus/Makefile
index 7129b62bc00f..8cf7d3075371 100644
--- a/drivers/hwmon/pmbus/Makefile
+++ b/drivers/hwmon/pmbus/Makefile
@@ -76,3 +76,4 @@ obj-$(CONFIG_SENSORS_XDPE1A2G7B)	+= xdpe1a2g7b.o
 obj-$(CONFIG_SENSORS_ZL6100)	+= zl6100.o
 obj-$(CONFIG_SENSORS_PIM4328)	+= pim4328.o
 obj-$(CONFIG_SENSORS_CRPS)	+= crps.o
+obj-$(CONFIG_SENSORS_D1U74T)	+= d1u74t.o
diff --git a/drivers/hwmon/pmbus/d1u74t.c b/drivers/hwmon/pmbus/d1u74t.c
new file mode 100644
index 000000000000..3127e0e5a23d
--- /dev/null
+++ b/drivers/hwmon/pmbus/d1u74t.c
@@ -0,0 +1,85 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright 2026 Nexthop Systems.
+ */
+
+#include <linux/i2c.h>
+#include <linux/of.h>
+#include <linux/pmbus.h>
+
+#include "pmbus.h"
+
+static const struct i2c_device_id d1u74t_id[] = {
+	{ "d1u74t" },
+	{},
+};
+MODULE_DEVICE_TABLE(i2c, d1u74t_id);
+
+static struct pmbus_driver_info d1u74t_info = {
+	.pages = 1,
+	/* PSU uses default linear data format. */
+	.func[0] = PMBUS_HAVE_PIN | PMBUS_HAVE_IOUT | PMBUS_HAVE_STATUS_IOUT |
+		   PMBUS_HAVE_IIN | PMBUS_HAVE_VIN | PMBUS_HAVE_STATUS_INPUT |
+		   PMBUS_HAVE_VOUT | PMBUS_HAVE_STATUS_VOUT | PMBUS_HAVE_TEMP |
+		   PMBUS_HAVE_TEMP2 | PMBUS_HAVE_TEMP3 |
+		   PMBUS_HAVE_STATUS_TEMP | PMBUS_HAVE_FAN12 |
+		   PMBUS_HAVE_STATUS_FAN12,
+};
+
+static int d1u74t_probe(struct i2c_client *client)
+{
+	char buf[I2C_SMBUS_BLOCK_MAX + 2] = { 0 };
+	struct device *dev = &client->dev;
+	int rc;
+
+	rc = i2c_smbus_read_block_data(client, PMBUS_MFR_ID, buf);
+	if (rc < 0)
+		return dev_err_probe(dev, rc, "Failed to read PMBUS_MFR_ID\n");
+
+	if (rc != 9 || strncmp(buf, "Murata-PS", 9)) {
+		buf[rc] = '\0';
+		dev_err(dev, "Unsupported Manufacturer ID '%s'\n", buf);
+		return -ENODEV;
+	}
+
+	rc = i2c_smbus_read_block_data(client, PMBUS_MFR_MODEL, buf);
+	if (rc < 0)
+		return dev_err_probe(dev, rc,
+				     "Failed to read PMBUS_MFR_MODEL\n");
+
+	if (strncmp(buf, "D1U74T-W", 8)) {
+		buf[rc] = '\0';
+		return dev_err_probe(dev, -ENODEV, "Model '%s' not supported\n",
+				     buf);
+	}
+
+	rc = pmbus_do_probe(client, &d1u74t_info);
+	if (rc)
+		return dev_err_probe(dev, rc, "Failed to probe\n");
+
+	return 0;
+}
+
+static const struct of_device_id d1u74t_of_match[] = {
+	{
+		.compatible = "murata,d1u74t",
+	},
+	{},
+};
+MODULE_DEVICE_TABLE(of, d1u74t_of_match);
+
+static struct i2c_driver d1u74t_driver = {
+	.driver = {
+		.name = "d1u74t",
+		.of_match_table = d1u74t_of_match,
+	},
+	.probe = d1u74t_probe,
+	.id_table = d1u74t_id,
+};
+
+module_i2c_driver(d1u74t_driver);
+
+MODULE_AUTHOR("Abdurrahman Hussain");
+MODULE_DESCRIPTION("PMBus driver for Murata D1U74T-W power supplies");
+MODULE_LICENSE("GPL");
+MODULE_IMPORT_NS("PMBUS");

-- 
2.53.0


^ permalink raw reply related

* [PATCH 1/2] dt-bindings: hwmon: pmbus: Add Murata D1U74T PSU
From: Abdurrahman Hussain @ 2026-05-12  1:44 UTC (permalink / raw)
  To: Guenter Roeck, Rob Herring, Krzysztof Kozlowski, Conor Dooley,
	Jonathan Corbet, Shuah Khan
  Cc: linux-hwmon, devicetree, linux-kernel, linux-doc,
	Abdurrahman Hussain
In-Reply-To: <20260511-d1u74t-v1-0-623c2bc1532a@nexthop.ai>

Add devicetree binding for the Murata D1U74T-W PMBus power supply
unit.

Signed-off-by: Abdurrahman Hussain <abdurrahman@nexthop.ai>
---
 .../bindings/hwmon/pmbus/murata,d1u74t.yaml        | 41 ++++++++++++++++++++++
 MAINTAINERS                                        |  6 ++++
 2 files changed, 47 insertions(+)

diff --git a/Documentation/devicetree/bindings/hwmon/pmbus/murata,d1u74t.yaml b/Documentation/devicetree/bindings/hwmon/pmbus/murata,d1u74t.yaml
new file mode 100644
index 000000000000..ef080283bf79
--- /dev/null
+++ b/Documentation/devicetree/bindings/hwmon/pmbus/murata,d1u74t.yaml
@@ -0,0 +1,41 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/hwmon/pmbus/murata,d1u74t.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Murata D1U74T-W power supply unit
+
+maintainers:
+  - Abdurrahman Hussain <abdurrahman@nexthop.ai>
+
+description:
+  The Murata D1U74T-W is a PMBus-compliant AC/DC power supply unit that
+  exposes input/output voltage, current and power telemetry, three
+  temperature sensors and dual fan monitoring over an I2C interface.
+
+properties:
+  compatible:
+    enum:
+      - murata,d1u74t
+
+  reg:
+    maxItems: 1
+
+required:
+  - compatible
+  - reg
+
+additionalProperties: false
+
+examples:
+  - |
+    i2c {
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        psu@58 {
+            compatible = "murata,d1u74t";
+            reg = <0x58>;
+        };
+    };
diff --git a/MAINTAINERS b/MAINTAINERS
index b2040011a386..b6a055fbb870 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6803,6 +6803,12 @@ S:	Maintained
 F:	Documentation/hwmon/crps.rst
 F:	drivers/hwmon/pmbus/crps.c
 
+MURATA D1U74T PSU DRIVER
+M:	Abdurrahman Hussain <abdurrahman@nexthop.ai>
+L:	linux-hwmon@vger.kernel.org
+S:	Maintained
+F:	Documentation/devicetree/bindings/hwmon/pmbus/murata,d1u74t.yaml
+
 CRYPTO API
 M:	Herbert Xu <herbert@gondor.apana.org.au>
 M:	"David S. Miller" <davem@davemloft.net>

-- 
2.53.0


^ permalink raw reply related

* [PATCH 0/2] hwmon: Add Murata D1U74T-W PSU driver
From: Abdurrahman Hussain @ 2026-05-12  1:44 UTC (permalink / raw)
  To: Guenter Roeck, Rob Herring, Krzysztof Kozlowski, Conor Dooley,
	Jonathan Corbet, Shuah Khan
  Cc: linux-hwmon, devicetree, linux-kernel, linux-doc,
	Abdurrahman Hussain

This series adds a PMBus driver for the Murata D1U74T-W AC/DC power
supply unit, used in some Open Compute Project platforms.

The PSU is PMBus-compliant and uses the linear data format. The driver
exposes:

  - input/output voltage, current and power telemetry,
  - three temperature sensors,
  - dual fan tachometer monitoring,

through the standard hwmon/pmbus sysfs interface. Probe verifies the
PMBUS_MFR_ID and PMBUS_MFR_MODEL fields before binding so the driver
only attaches to actual D1U74T-W hardware.

Patch 1 adds the devicetree binding.
Patch 2 adds the driver, hwmon documentation, Kconfig/Makefile entries
and MAINTAINERS section.

Signed-off-by: Abdurrahman Hussain <abdurrahman@nexthop.ai>
---
Abdurrahman Hussain (2):
      dt-bindings: hwmon: pmbus: Add Murata D1U74T PSU
      hwmon: (pmbus/d1u74t) Add Murata D1U74T PSU driver

 .../bindings/hwmon/pmbus/murata,d1u74t.yaml        | 41 +++++++++
 Documentation/hwmon/d1u74t.rst                     | 97 ++++++++++++++++++++++
 Documentation/hwmon/index.rst                      |  1 +
 MAINTAINERS                                        |  8 ++
 drivers/hwmon/pmbus/Kconfig                        |  9 ++
 drivers/hwmon/pmbus/Makefile                       |  1 +
 drivers/hwmon/pmbus/d1u74t.c                       | 85 +++++++++++++++++++
 7 files changed, 242 insertions(+)
---
base-commit: 5d6919055dec134de3c40167a490f33c74c12581
change-id: 20260511-d1u74t-c0cba8f1c344

Best regards,
--  
Abdurrahman Hussain <abdurrahman@nexthop.ai>


^ permalink raw reply

* [PATCH v3 4/4] clk: test: convert constants to use HZ_PER_MHZ
From: Brian Masney @ 2026-05-12  1:35 UTC (permalink / raw)
  To: Michael Turquette, Stephen Boyd, Maxime Ripard, Jonathan Corbet,
	Shuah Khan
  Cc: linux-clk, linux-kernel, linux-doc, Brian Masney
In-Reply-To: <20260511-clk-docs-v3-0-ed67e1065809@redhat.com>

Convert the DUMMY_CLOCK_* constants over to use HZ_PER_MHZ.

Signed-off-by: Brian Masney <bmasney@redhat.com>
---
 drivers/clk/clk_test.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/clk/clk_test.c b/drivers/clk/clk_test.c
index b1961daac5e2..497db4e54d03 100644
--- a/drivers/clk/clk_test.c
+++ b/drivers/clk/clk_test.c
@@ -7,6 +7,7 @@
 #include <linux/clk/clk-conf.h>
 #include <linux/of.h>
 #include <linux/platform_device.h>
+#include <linux/units.h>
 
 /* Needed for clk_hw_get_clk() */
 #include "clk.h"
@@ -21,9 +22,9 @@
 
 static const struct clk_ops empty_clk_ops = { };
 
-#define DUMMY_CLOCK_INIT_RATE	(42 * 1000 * 1000)
-#define DUMMY_CLOCK_RATE_1	(142 * 1000 * 1000)
-#define DUMMY_CLOCK_RATE_2	(242 * 1000 * 1000)
+#define DUMMY_CLOCK_INIT_RATE		(42 * HZ_PER_MHZ)
+#define DUMMY_CLOCK_RATE_1		(142 * HZ_PER_MHZ)
+#define DUMMY_CLOCK_RATE_2		(242 * HZ_PER_MHZ)
 
 struct clk_dummy_context {
 	struct clk_hw hw;

-- 
2.54.0


^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox