linux-xfs.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Josef Bacik <josef@toxicpanda.com>
To: linux-fsdevel@vger.kernel.org, linux-btrfs@vger.kernel.org,
	kernel-team@fb.com, linux-ext4@vger.kernel.org,
	linux-xfs@vger.kernel.org, brauner@kernel.org,
	viro@ZenIV.linux.org.uk, amir73il@gmail.com
Subject: [PATCH v2 19/54] fs: hold a full ref while the inode is on a LRU
Date: Tue, 26 Aug 2025 11:39:19 -0400	[thread overview]
Message-ID: <7ea665f486c7fba75d44b9d01c5a0151a0ecae73.1756222465.git.josef@toxicpanda.com> (raw)
In-Reply-To: <cover.1756222464.git.josef@toxicpanda.com>

We want to eliminate 0 refcount inodes that can be used. To that end,
make the LRU's hold a full reference on the inode while it is on an LRU
list. From there we can change the eviction code to always just iput the
inode, and the LRU operations will just add or drop a full reference
where appropriate.

We also now must take into account unlink, and avoid adding the inode to
the LRU if it has an nlink of 0.

Signed-off-by: Josef Bacik <josef@toxicpanda.com>
---
 fs/inode.c | 87 +++++++++++++++++++++++++-----------------------------
 1 file changed, 40 insertions(+), 47 deletions(-)

diff --git a/fs/inode.c b/fs/inode.c
index ede9118bb649..9001f809add0 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -555,11 +555,13 @@ static void inode_add_cached_lru(struct inode *inode)
 
 	if (inode->i_state & I_CACHED_LRU)
 		return;
+	if (inode->__i_nlink == 0)
+		return;
 	if (!list_empty(&inode->i_lru))
 		return;
 
 	inode->i_state |= I_CACHED_LRU;
-	iobj_get(inode);
+	__iget(inode);
 	spin_lock(&inode->i_sb->s_cached_inodes_lock);
 	list_add(&inode->i_lru, &inode->i_sb->s_cached_inodes);
 	spin_unlock(&inode->i_sb->s_cached_inodes_lock);
@@ -582,7 +584,7 @@ static bool __inode_del_cached_lru(struct inode *inode)
 static bool inode_del_cached_lru(struct inode *inode)
 {
 	if (__inode_del_cached_lru(inode)) {
-		iobj_put(inode);
+		iput(inode);
 		return true;
 	}
 	return false;
@@ -598,6 +600,8 @@ static void __inode_add_lru(struct inode *inode, bool rotate)
 		return;
 	if (icount_read(inode))
 		return;
+	if (inode->__i_nlink == 0)
+		return;
 	if (!(inode->i_sb->s_flags & SB_ACTIVE))
 		return;
 	if (inode_needs_cached(inode)) {
@@ -609,7 +613,7 @@ static void __inode_add_lru(struct inode *inode, bool rotate)
 	if (list_lru_add_obj(&inode->i_sb->s_inode_lru, &inode->i_lru)) {
 		inode->i_state |= I_LRU;
 		if (need_ref)
-			iobj_get(inode);
+			__iget(inode);
 		this_cpu_inc(nr_unused);
 	} else if (rotate) {
 		inode->i_state |= I_REFERENCED;
@@ -655,7 +659,7 @@ void inode_lru_list_del(struct inode *inode)
 
 	if (list_lru_del_obj(&inode->i_sb->s_inode_lru, &inode->i_lru)) {
 		inode->i_state &= ~I_LRU;
-		iobj_put(inode);
+		iput(inode);
 		this_cpu_dec(nr_unused);
 	}
 }
@@ -926,6 +930,7 @@ static void evict(struct inode *inode)
 	BUG_ON(inode->i_state != (I_FREEING | I_CLEAR));
 }
 
+static void iput_evict(struct inode *inode);
 /*
  * dispose_list - dispose of the contents of a local list
  * @head: the head of the list to free
@@ -933,20 +938,14 @@ static void evict(struct inode *inode)
  * Dispose-list gets a local list with local inodes in it, so it doesn't
  * need to worry about list corruption and SMP locks.
  */
-static void dispose_list(struct list_head *head, bool for_lru)
+static void dispose_list(struct list_head *head)
 {
 	while (!list_empty(head)) {
 		struct inode *inode;
 
 		inode = list_first_entry(head, struct inode, i_lru);
 		list_del_init(&inode->i_lru);
-
-		if (for_lru) {
-			evict(inode);
-			iobj_put(inode);
-		} else {
-			iput(inode);
-		}
+		iput_evict(inode);
 		cond_resched();
 	}
 }
@@ -987,13 +986,13 @@ void evict_inodes(struct super_block *sb)
 		if (need_resched()) {
 			spin_unlock(&sb->s_inode_list_lock);
 			cond_resched();
-			dispose_list(&dispose, false);
+			dispose_list(&dispose);
 			goto again;
 		}
 	}
 	spin_unlock(&sb->s_inode_list_lock);
 
-	dispose_list(&dispose, false);
+	dispose_list(&dispose);
 }
 EXPORT_SYMBOL_GPL(evict_inodes);
 
@@ -1031,22 +1030,7 @@ static enum lru_status inode_lru_isolate(struct list_head *item,
 	if (inode_needs_cached(inode)) {
 		list_lru_isolate(lru, &inode->i_lru);
 		inode_add_cached_lru(inode);
-		iobj_put(inode);
-		spin_unlock(&inode->i_lock);
-		this_cpu_dec(nr_unused);
-		return LRU_REMOVED;
-	}
-
-	/*
-	 * Inodes can get referenced, redirtied, or repopulated while
-	 * they're already on the LRU, and this can make them
-	 * unreclaimable for a while. Remove them lazily here; iput,
-	 * sync, or the last page cache deletion will requeue them.
-	 */
-	if (icount_read(inode) ||
-	    (inode->i_state & ~I_REFERENCED)) {
-		list_lru_isolate(lru, &inode->i_lru);
-		inode->i_state &= ~I_LRU;
+		iput(inode);
 		spin_unlock(&inode->i_lock);
 		this_cpu_dec(nr_unused);
 		return LRU_REMOVED;
@@ -1082,7 +1066,6 @@ static enum lru_status inode_lru_isolate(struct list_head *item,
 	}
 
 	WARN_ON(inode->i_state & I_NEW);
-	inode->i_state |= I_FREEING;
 	inode->i_state &= ~I_LRU;
 	list_lru_isolate_move(lru, &inode->i_lru, freeable);
 	spin_unlock(&inode->i_lock);
@@ -1104,7 +1087,7 @@ long prune_icache_sb(struct super_block *sb, struct shrink_control *sc)
 
 	freed = list_lru_shrink_walk(&sb->s_inode_lru, sc,
 				     inode_lru_isolate, &freeable);
-	dispose_list(&freeable, true);
+	dispose_list(&freeable);
 	return freed;
 }
 
@@ -1967,7 +1950,7 @@ EXPORT_SYMBOL(generic_delete_inode);
  * in cache if fs is alive, sync and evict if fs is
  * shutting down.
  */
-static void iput_final(struct inode *inode)
+static void iput_final(struct inode *inode, bool skip_lru)
 {
 	struct super_block *sb = inode->i_sb;
 	const struct super_operations *op = inode->i_sb->s_op;
@@ -1981,7 +1964,7 @@ static void iput_final(struct inode *inode)
 	else
 		drop = generic_drop_inode(inode);
 
-	if (!drop &&
+	if (!drop && !skip_lru &&
 	    !(inode->i_state & I_DONTCACHE) &&
 	    (sb->s_flags & SB_ACTIVE)) {
 		__inode_add_lru(inode, true);
@@ -1989,6 +1972,8 @@ static void iput_final(struct inode *inode)
 		return;
 	}
 
+	WARN_ON(!list_empty(&inode->i_lru));
+
 	state = inode->i_state;
 	if (!drop) {
 		WRITE_ONCE(inode->i_state, state | I_WILL_FREE);
@@ -2003,23 +1988,12 @@ static void iput_final(struct inode *inode)
 	}
 
 	WRITE_ONCE(inode->i_state, state | I_FREEING);
-	if (!list_empty(&inode->i_lru))
-		inode_lru_list_del(inode);
 	spin_unlock(&inode->i_lock);
 
 	evict(inode);
 }
 
-/**
- *	iput	- put an inode
- *	@inode: inode to put
- *
- *	Puts an inode, dropping its usage count. If the inode use count hits
- *	zero, the inode is then freed and may also be destroyed.
- *
- *	Consequently, iput() can sleep.
- */
-void iput(struct inode *inode)
+static void __iput(struct inode *inode, bool skip_lru)
 {
 	if (!inode)
 		return;
@@ -2038,12 +2012,31 @@ void iput(struct inode *inode)
 	spin_lock(&inode->i_lock);
 	if (atomic_dec_and_test(&inode->i_count)) {
 		/* iput_final() drops i_lock */
-		iput_final(inode);
+		iput_final(inode, skip_lru);
 	} else {
 		spin_unlock(&inode->i_lock);
 	}
 	iobj_put(inode);
 }
+
+static void iput_evict(struct inode *inode)
+{
+	__iput(inode, true);
+}
+
+/**
+ *	iput	- put an inode
+ *	@inode: inode to put
+ *
+ *	Puts an inode, dropping its usage count. If the inode use count hits
+ *	zero, the inode is then freed and may also be destroyed.
+ *
+ *	Consequently, iput() can sleep.
+ */
+void iput(struct inode *inode)
+{
+	__iput(inode, false);
+}
 EXPORT_SYMBOL(iput);
 
 /**
-- 
2.49.0


  parent reply	other threads:[~2025-08-26 15:41 UTC|newest]

Thread overview: 105+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-08-26 15:39 [PATCH v2 00/54] fs: rework inode reference counting Josef Bacik
2025-08-26 15:39 ` [PATCH v2 01/54] fs: make the i_state flags an enum Josef Bacik
2025-08-26 15:39 ` [PATCH v2 02/54] fs: add an icount_read helper Josef Bacik
2025-08-26 22:18   ` Mateusz Guzik
2025-08-27 11:25   ` (subset) " Christian Brauner
2025-08-26 15:39 ` [PATCH v2 03/54] fs: rework iput logic Josef Bacik
2025-08-27 12:58   ` Mateusz Guzik
2025-08-27 14:18     ` Mateusz Guzik
2025-08-27 14:54       ` Josef Bacik
2025-08-27 14:57       ` Christian Brauner
2025-08-27 16:24         ` [PATCH] fs: revamp iput() Mateusz Guzik
2025-08-30 15:54           ` Mateusz Guzik
2025-09-01  8:50             ` Jan Kara
2025-09-01 10:39               ` Christian Brauner
2025-09-01 10:41             ` Christian Brauner
2025-08-26 15:39 ` [PATCH v2 04/54] fs: add an i_obj_count refcount to the inode Josef Bacik
2025-08-26 15:39 ` [PATCH v2 05/54] fs: hold an i_obj_count reference in wait_sb_inodes Josef Bacik
2025-08-26 15:39 ` [PATCH v2 06/54] fs: hold an i_obj_count reference for the i_wb_list Josef Bacik
2025-08-26 15:39 ` [PATCH v2 07/54] fs: hold an i_obj_count reference for the i_io_list Josef Bacik
2025-08-26 15:39 ` [PATCH v2 08/54] fs: hold an i_obj_count reference in writeback_sb_inodes Josef Bacik
2025-08-26 15:39 ` [PATCH v2 09/54] fs: hold an i_obj_count reference while on the hashtable Josef Bacik
2025-08-26 15:39 ` [PATCH v2 10/54] fs: hold an i_obj_count reference while on the LRU list Josef Bacik
2025-08-26 15:39 ` [PATCH v2 11/54] fs: hold an i_obj_count reference while on the sb inode list Josef Bacik
2025-08-26 15:39 ` [PATCH v2 12/54] fs: stop accessing ->i_count directly in f2fs and gfs2 Josef Bacik
2025-08-26 15:39 ` [PATCH v2 13/54] fs: hold an i_obj_count when we have an i_count reference Josef Bacik
2025-08-26 15:39 ` [PATCH v2 14/54] fs: add an I_LRU flag to the inode Josef Bacik
2025-08-26 15:39 ` [PATCH v2 15/54] fs: maintain a list of pinned inodes Josef Bacik
2025-08-27 15:20   ` Christian Brauner
2025-08-27 16:07     ` Josef Bacik
2025-08-28  8:24       ` Christian Brauner
2025-08-26 15:39 ` [PATCH v2 16/54] fs: delete the inode from the LRU list on lookup Josef Bacik
2025-08-27 21:46   ` Dave Chinner
2025-08-28 11:42     ` Josef Bacik
2025-09-02  4:07       ` Dave Chinner
2025-08-26 15:39 ` [PATCH v2 17/54] fs: remove the inode from the LRU list on unlink/rmdir Josef Bacik
2025-08-27 12:32   ` Christian Brauner
2025-08-27 16:08     ` Josef Bacik
2025-08-27 22:01     ` Dave Chinner
2025-08-28 11:46       ` Josef Bacik
2025-09-02  1:48         ` Dave Chinner
2025-08-28  9:00   ` Christian Brauner
2025-08-28  9:06   ` Christian Brauner
2025-08-28 10:43     ` Christian Brauner
2025-08-26 15:39 ` [PATCH v2 18/54] fs: change evict_inodes to use iput instead of evict directly Josef Bacik
2025-08-28 10:18   ` Christian Brauner
2025-08-26 15:39 ` Josef Bacik [this message]
2025-08-28 10:51   ` [PATCH v2 19/54] fs: hold a full ref while the inode is on a LRU Christian Brauner
2025-08-26 15:39 ` [PATCH v2 20/54] fs: disallow 0 reference count inodes Josef Bacik
2025-08-28 11:02   ` Christian Brauner
2025-08-28 11:44     ` Josef Bacik
2025-08-26 15:39 ` [PATCH v2 21/54] fs: make evict_inodes add to the dispose list under the i_lock Josef Bacik
2025-08-26 15:39 ` [PATCH v2 22/54] fs: convert i_count to refcount_t Josef Bacik
2025-08-28 12:00   ` Christian Brauner
2025-08-26 15:39 ` [PATCH v2 23/54] fs: use refcount_inc_not_zero in igrab Josef Bacik
2025-08-28 22:08   ` Eric Biggers
2025-08-29 13:42     ` Josef Bacik
2025-08-26 15:39 ` [PATCH v2 24/54] fs: use inode_tryget in find_inode* Josef Bacik
2025-08-26 15:39 ` [PATCH v2 25/54] fs: update find_inode_*rcu to check the i_count count Josef Bacik
2025-08-26 15:39 ` [PATCH v2 26/54] fs: use igrab in insert_inode_locked Josef Bacik
2025-08-28 12:15   ` Christian Brauner
2025-08-26 15:39 ` [PATCH v2 27/54] fs: remove I_WILL_FREE|I_FREEING check from __inode_add_lru Josef Bacik
2025-08-26 15:39 ` [PATCH v2 28/54] fs: remove I_WILL_FREE|I_FREEING check in inode_pin_lru_isolating Josef Bacik
2025-08-26 15:39 ` [PATCH v2 29/54] fs: use inode_tryget in evict_inodes Josef Bacik
2025-08-26 15:39 ` [PATCH v2 30/54] fs: change evict_dentries_for_decrypted_inodes to use refcount Josef Bacik
2025-08-28 12:25   ` Christian Brauner
2025-08-28 22:26     ` Eric Biggers
2025-08-29  7:38       ` Christian Brauner
2025-08-26 15:39 ` [PATCH v2 31/54] block: use igrab in sync_bdevs Josef Bacik
2025-08-26 15:39 ` [PATCH v2 32/54] bcachefs: use the refcount instead of I_WILL_FREE|I_FREEING Josef Bacik
2025-08-26 15:39 ` [PATCH v2 33/54] btrfs: don't check I_WILL_FREE|I_FREEING Josef Bacik
2025-08-26 15:39 ` [PATCH v2 34/54] fs: use igrab in drop_pagecache_sb Josef Bacik
2025-08-26 15:39 ` [PATCH v2 35/54] fs: stop checking I_FREEING in d_find_alias_rcu Josef Bacik
2025-08-26 15:39 ` [PATCH v2 36/54] ext4: stop checking I_WILL_FREE|IFREEING in ext4_check_map_extents_env Josef Bacik
2025-08-26 15:39 ` [PATCH v2 37/54] fs: remove I_WILL_FREE|I_FREEING from fs-writeback.c Josef Bacik
2025-08-26 15:39 ` [PATCH v2 38/54] gfs2: remove I_WILL_FREE|I_FREEING usage Josef Bacik
2025-08-26 15:39 ` [PATCH v2 39/54] fs: remove I_WILL_FREE|I_FREEING check from dquot.c Josef Bacik
2025-08-28 12:35   ` Christian Brauner
2025-08-26 15:39 ` [PATCH v2 40/54] notify: remove I_WILL_FREE|I_FREEING checks in fsnotify_unmount_inodes Josef Bacik
2025-08-26 15:39 ` [PATCH v2 41/54] xfs: remove I_FREEING check Josef Bacik
2025-08-26 15:39 ` [PATCH v2 42/54] landlock: remove I_FREEING|I_WILL_FREE check Josef Bacik
2025-08-26 15:39 ` [PATCH v2 43/54] fs: change inode_is_dirtytime_only to use refcount Josef Bacik
2025-08-26 22:06   ` Mateusz Guzik
2025-08-28 12:38     ` Christian Brauner
2025-08-26 15:39 ` [PATCH v2 44/54] btrfs: remove references to I_FREEING Josef Bacik
2025-08-26 15:39 ` [PATCH v2 45/54] ext4: remove reference to I_FREEING in inode.c Josef Bacik
2025-08-26 15:39 ` [PATCH v2 46/54] ext4: remove reference to I_FREEING in orphan.c Josef Bacik
2025-08-26 15:39 ` [PATCH v2 47/54] pnfs: use i_count refcount to determine if the inode is going away Josef Bacik
2025-08-26 15:39 ` [PATCH v2 48/54] fs: remove some spurious I_FREEING references in inode.c Josef Bacik
2025-08-28 12:40   ` Christian Brauner
2025-08-26 15:39 ` [PATCH v2 49/54] xfs: remove reference to I_FREEING|I_WILL_FREE Josef Bacik
2025-08-26 15:39 ` [PATCH v2 50/54] ocfs2: do not set I_WILL_FREE Josef Bacik
2025-08-26 15:39 ` [PATCH v2 51/54] fs: remove I_FREEING|I_WILL_FREE Josef Bacik
2025-08-28 12:42   ` Christian Brauner
2025-08-26 15:39 ` [PATCH v2 52/54] fs: remove I_REFERENCED Josef Bacik
2025-08-28 12:47   ` Christian Brauner
2025-08-26 15:39 ` [PATCH v2 53/54] fs: remove I_LRU_ISOLATING flag Josef Bacik
2025-08-28  0:26   ` Dave Chinner
2025-08-28 10:35     ` Christian Brauner
2025-08-26 15:39 ` [PATCH v2 54/54] fs: add documentation explaining the reference count rules for inodes Josef Bacik
2025-08-27  8:03 ` [syzbot ci] Re: fs: rework inode reference counting syzbot ci
2025-08-27 11:14 ` (subset) [PATCH v2 00/54] " Christian Brauner
2025-08-28 12:51 ` Christian Brauner
2025-08-28 21:22   ` Josef Bacik
2025-09-02 10:06 ` Mateusz Guzik
2025-09-02 21:16   ` Josef Bacik

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=7ea665f486c7fba75d44b9d01c5a0151a0ecae73.1756222465.git.josef@toxicpanda.com \
    --to=josef@toxicpanda.com \
    --cc=amir73il@gmail.com \
    --cc=brauner@kernel.org \
    --cc=kernel-team@fb.com \
    --cc=linux-btrfs@vger.kernel.org \
    --cc=linux-ext4@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-xfs@vger.kernel.org \
    --cc=viro@ZenIV.linux.org.uk \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).