Linux filesystem development
 help / color / mirror / Atom feed
From: Jan Kara <jack@suse.cz>
To: <linux-fsdevel@vger.kernel.org>
Cc: <linux-mm@kvack.org>, Matthew Wilcox <willy@infradead.org>,
	Jan Kara <jack@suse.cz>
Subject: [PATCH 2/4] fs: Basic infrastructure for offloading inode reclaim
Date: Wed, 29 Apr 2026 20:00:52 +0200	[thread overview]
Message-ID: <20260429180056.29598-6-jack@suse.cz> (raw)
In-Reply-To: <20260429174850.18223-1-jack@suse.cz>

Reclaim of some inodes is rather complex requiring running transactions
or doing other IO. Consequently filesystems end up doing GFP_NOFAIL
allocations from kswapd or even direct reclaim which is problematic
because forward progress of these allocations isn't guaranteed. Add
infrastructure for marking inodes whose reclaim is difficult and offload
reclaim of such inodes into a workqueue to not block kswapd with
difficult inode reclaim.

Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/inode.c                     | 89 +++++++++++++++++++++++++++++++---
 fs/super.c                     |  5 ++
 include/linux/fs.h             |  5 +-
 include/linux/fs/super_types.h |  7 +++
 4 files changed, 99 insertions(+), 7 deletions(-)

diff --git a/fs/inode.c b/fs/inode.c
index 276debcd3e20..448e3d7ee48e 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -938,6 +938,11 @@ void evict_inodes(struct super_block *sb)
 }
 EXPORT_SYMBOL_GPL(evict_inodes);
 
+struct inodes_to_prune {
+	struct list_head freeable;
+	struct list_head deferred;
+};
+
 /*
  * Isolate the inode from the LRU in preparation for freeing it.
  *
@@ -952,7 +957,7 @@ EXPORT_SYMBOL_GPL(evict_inodes);
 static enum lru_status inode_lru_isolate(struct list_head *item,
 		struct list_lru_one *lru, void *arg)
 {
-	struct list_head *freeable = arg;
+	struct inodes_to_prune *lists = arg;
 	struct inode	*inode = container_of(item, struct inode, i_lru);
 
 	/*
@@ -969,7 +974,7 @@ static enum lru_status inode_lru_isolate(struct list_head *item,
 	 * sync, or the last page cache deletion will requeue them.
 	 */
 	if (icount_read(inode) ||
-	    (inode_state_read(inode) & ~I_REFERENCED) ||
+	    inode_state_read(inode) & ~(I_REFERENCED | I_DEFER_RECLAIM) ||
 	    !mapping_shrinkable(&inode->i_data)) {
 		list_lru_isolate(lru, &inode->i_lru);
 		spin_unlock(&inode->i_lock);
@@ -1007,7 +1012,11 @@ static enum lru_status inode_lru_isolate(struct list_head *item,
 
 	WARN_ON(inode_state_read(inode) & I_NEW);
 	inode_state_set(inode, I_FREEING);
-	list_lru_isolate_move(lru, &inode->i_lru, freeable);
+	/* Inode will take long time to cleanup. Offload that to worker. */
+	if (inode_state_read(inode) & I_DEFER_RECLAIM)
+		list_lru_isolate_move(lru, &inode->i_lru, &lists->deferred);
+	else
+		list_lru_isolate_move(lru, &inode->i_lru, &lists->freeable);
 	spin_unlock(&inode->i_lock);
 
 	this_cpu_dec(nr_unused);
@@ -1022,15 +1031,83 @@ static enum lru_status inode_lru_isolate(struct list_head *item,
  */
 long prune_icache_sb(struct super_block *sb, struct shrink_control *sc)
 {
-	LIST_HEAD(freeable);
+	struct inodes_to_prune lists = {
+		.freeable = LIST_HEAD_INIT(lists.freeable),
+		.deferred = LIST_HEAD_INIT(lists.deferred),
+	};
 	long freed;
 
 	freed = list_lru_shrink_walk(&sb->s_inode_lru, sc,
-				     inode_lru_isolate, &freeable);
-	dispose_list(&freeable);
+				     inode_lru_isolate, &lists);
+	dispose_list(&lists.freeable);
+	if (!list_empty(&lists.deferred)) {
+		struct inode_deferred_reclaim *reclaim =
+						READ_ONCE(sb->s_inode_reclaim);
+
+		if (WARN_ON_ONCE(!reclaim)) {
+			dispose_list(&lists.deferred);
+			return freed;
+		}
+		spin_lock(&reclaim->lock);
+		if (list_empty(&reclaim->list))
+			queue_work(system_dfl_wq, &reclaim->work);
+		list_splice_tail(&lists.deferred, &reclaim->list);
+		spin_unlock(&reclaim->lock);
+	}
 	return freed;
 }
 
+static void inode_reclaim_deferred(struct work_struct *work)
+{
+	struct inode_deferred_reclaim *reclaim =
+		container_of(work, struct inode_deferred_reclaim, work);
+	struct inode *inode;
+
+	spin_lock(&reclaim->lock);
+	while (!list_empty(&reclaim->list)) {
+		inode = list_first_entry(&reclaim->list, struct inode, i_lru);
+		list_del_init(&inode->i_lru);
+		spin_unlock(&reclaim->lock);
+		evict(inode);
+		cond_resched();
+		spin_lock(&reclaim->lock);
+	}
+	spin_unlock(&reclaim->lock);
+}
+
+static struct inode_deferred_reclaim *inode_deferred_reclaim_alloc(
+							struct super_block *sb)
+{
+	struct inode_deferred_reclaim *reclaim;
+
+	reclaim = kzalloc_obj(*reclaim, GFP_KERNEL | __GFP_NOFAIL);
+	INIT_LIST_HEAD(&reclaim->list);
+	INIT_WORK(&reclaim->work, inode_reclaim_deferred);
+	spin_lock_init(&reclaim->lock);
+	/* Someone installed new struct before us? */
+	if (cmpxchg(&sb->s_inode_reclaim, NULL, reclaim))
+		kfree(reclaim);
+
+	return sb->s_inode_reclaim;
+}
+
+void mark_inode_reclaim_deferred(struct inode *inode)
+{
+	struct inode_deferred_reclaim *reclaim;
+
+	if (inode_state_read_once(inode) & I_DEFER_RECLAIM)
+		return;
+
+	reclaim = READ_ONCE(inode->i_sb->s_inode_reclaim);
+	if (!reclaim)
+		reclaim = inode_deferred_reclaim_alloc(inode->i_sb);
+
+	spin_lock(&inode->i_lock);
+	inode_state_set(inode, I_DEFER_RECLAIM);
+	spin_unlock(&inode->i_lock);
+}
+EXPORT_SYMBOL_GPL(mark_inode_reclaim_deferred);
+
 static void __wait_on_freeing_inode(struct inode *inode, bool hash_locked, bool rcu_locked);
 
 /*
diff --git a/fs/super.c b/fs/super.c
index 378e81efe643..c35bfb3f7785 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -645,6 +645,11 @@ void generic_shutdown_super(struct super_block *sb)
 		if (sop->put_super)
 			sop->put_super(sb);
 
+		if (sb->s_inode_reclaim) {
+			cancel_work_sync(&sb->s_inode_reclaim->work);
+			kfree(sb->s_inode_reclaim);
+		}
+
 		/*
 		 * Now that all potentially-encrypted inodes have been evicted,
 		 * the fscrypt keyring can be destroyed.
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 11559c513dfb..2a20cbffc87c 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -745,7 +745,8 @@ enum inode_state_flags_enum {
 	I_CREATING		= (1U << 15),
 	I_DONTCACHE		= (1U << 16),
 	I_SYNC_QUEUED		= (1U << 17),
-	I_PINNING_NETFS_WB	= (1U << 18)
+	I_PINNING_NETFS_WB	= (1U << 18),
+	I_DEFER_RECLAIM		= (1U << 19),
 };
 
 #define I_DIRTY_INODE (I_DIRTY_SYNC | I_DIRTY_DATASYNC)
@@ -2218,6 +2219,8 @@ static inline void mark_inode_dirty_sync(struct inode *inode)
 	__mark_inode_dirty(inode, I_DIRTY_SYNC);
 }
 
+void mark_inode_reclaim_deferred(struct inode *inode);
+
 static inline int icount_read(const struct inode *inode)
 {
 	return atomic_read(&inode->i_count);
diff --git a/include/linux/fs/super_types.h b/include/linux/fs/super_types.h
index 383050e7fdf5..00744ae5be18 100644
--- a/include/linux/fs/super_types.h
+++ b/include/linux/fs/super_types.h
@@ -129,6 +129,12 @@ struct super_operations {
 	void (*report_error)(const struct fserror_event *event);
 };
 
+struct inode_deferred_reclaim {
+	struct list_head	list;
+	struct work_struct	work;
+	spinlock_t		lock;
+};
+
 struct super_block {
 	struct list_head			s_list;		/* Keep this first */
 	dev_t					s_dev;		/* search index; _not_ kdev_t */
@@ -254,6 +260,7 @@ struct super_block {
 	 */
 	struct list_lru				s_dentry_lru;
 	struct list_lru				s_inode_lru;
+	struct inode_deferred_reclaim		*s_inode_reclaim;
 	struct rcu_head				rcu;
 	struct work_struct			destroy_work;
 
-- 
2.51.0


  parent reply	other threads:[~2026-04-29 18:01 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-04-29 18:00 [PATCH RFC 0/4] fs: Deferred inode reclaim Jan Kara
2026-04-29 18:00 ` [PATCH 1/4] fs: Avoid inode dirtying on last iput Jan Kara
2026-04-29 18:00 ` Jan Kara [this message]
2026-04-29 18:00 ` [PATCH 3/4] fs: Add throttling to deferred inode reclaim Jan Kara
2026-04-29 18:00 ` [PATCH 4/4] ext4: Defer inode reclaim if it has preallocations Jan Kara

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260429180056.29598-6-jack@suse.cz \
    --to=jack@suse.cz \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=willy@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox