public inbox for linux-fsdevel@vger.kernel.org
 help / color / mirror / Atom feed
From: Mateusz Guzik <mjguzik@gmail.com>
To: brauner@kernel.org
Cc: viro@zeniv.linux.org.uk, jack@suse.cz,
	linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org,
	Mateusz Guzik <mjguzik@gmail.com>
Subject: [PATCH v3 7/7] fs: locklessly bump refs in the inode hash when possible
Date: Sun, 29 Mar 2026 19:20:02 +0200	[thread overview]
Message-ID: <20260329172002.3557801-8-mjguzik@gmail.com> (raw)
In-Reply-To: <20260329172002.3557801-1-mjguzik@gmail.com>

Signed-off-by: Mateusz Guzik <mjguzik@gmail.com>
---
 fs/dcache.c |  4 ++++
 fs/inode.c  | 52 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 56 insertions(+)

diff --git a/fs/dcache.c b/fs/dcache.c
index 9ceab142896f..b63450ebb85c 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -2033,6 +2033,10 @@ void d_instantiate_new(struct dentry *entry, struct inode *inode)
 	__d_instantiate(entry, inode);
 	spin_unlock(&entry->d_lock);
 	WARN_ON(!(inode_state_read(inode) & I_NEW));
+	/*
+	 * Paired with igrab_try_lockless()
+	 */
+	smp_wmb();
 	inode_state_clear(inode, I_NEW | I_CREATING);
 	inode_wake_up_bit(inode, __I_NEW);
 	spin_unlock(&inode->i_lock);
diff --git a/fs/inode.c b/fs/inode.c
index c7585924d5c8..c6e53ec90057 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1029,6 +1029,7 @@ long prune_icache_sb(struct super_block *sb, struct shrink_control *sc)
 }
 
 static void __wait_on_freeing_inode(struct inode *inode, bool hash_locked, bool rcu_locked);
+static bool igrab_try_lockless(struct inode *inode);
 
 /*
  * Called with the inode lock held.
@@ -1053,6 +1054,11 @@ static struct inode *find_inode(struct super_block *sb,
 			continue;
 		if (!test(inode, data))
 			continue;
+		if (igrab_try_lockless(inode)) {
+			rcu_read_unlock();
+			*isnew = false;
+			return inode;
+		}
 		spin_lock(&inode->i_lock);
 		if (inode_state_read(inode) & (I_FREEING | I_WILL_FREE)) {
 			__wait_on_freeing_inode(inode, hash_locked, true);
@@ -1095,6 +1101,11 @@ static struct inode *find_inode_fast(struct super_block *sb,
 			continue;
 		if (inode->i_sb != sb)
 			continue;
+		if (igrab_try_lockless(inode)) {
+			rcu_read_unlock();
+			*isnew = false;
+			return inode;
+		}
 		spin_lock(&inode->i_lock);
 		if (inode_state_read(inode) & (I_FREEING | I_WILL_FREE)) {
 			__wait_on_freeing_inode(inode, hash_locked, true);
@@ -1212,6 +1223,10 @@ void unlock_new_inode(struct inode *inode)
 	lockdep_annotate_inode_mutex_key(inode);
 	spin_lock(&inode->i_lock);
 	WARN_ON(!(inode_state_read(inode) & I_NEW));
+	/*
+	 * Paired with igrab_try_lockless()
+	 */
+	smp_wmb();
 	inode_state_clear(inode, I_NEW | I_CREATING);
 	inode_wake_up_bit(inode, __I_NEW);
 	spin_unlock(&inode->i_lock);
@@ -1223,6 +1238,10 @@ void discard_new_inode(struct inode *inode)
 	lockdep_annotate_inode_mutex_key(inode);
 	spin_lock(&inode->i_lock);
 	WARN_ON(!(inode_state_read(inode) & I_NEW));
+	/*
+	 * Paired with igrab_try_lockless()
+	 */
+	smp_wmb();
 	inode_state_clear(inode, I_NEW);
 	inode_wake_up_bit(inode, __I_NEW);
 	spin_unlock(&inode->i_lock);
@@ -1604,6 +1623,39 @@ struct inode *igrab(struct inode *inode)
 }
 EXPORT_SYMBOL(igrab);
 
+/*
+ * Special routine for the inode hash. Don't use elsewhere.
+ *
+ * It provides lockless refcount acquire in the common case of no problematic
+ * flags being set.
+ *
+ * Any of I_NEW, I_CREATING, I_FREEING and I_WILL_FREE require dedicated treatment
+ * during lookup and bumping inodes with these is intentionally avoided. Additionally
+ * it is illegal to add refs if eiter I_FREEING or I_WILL_FREE is set in the first place.
+ *
+ * Correctness is achieved as follows:
+ * 1. both I_NEW and I_CREATING can only legally get set *before* the inode is visible
+ *    in the hash, meaning the upfront read takes care of them.
+ * 2. unsetting of I_NEW is preceded with a store fence, paired with full fence in
+ *    atomic_add_unless
+ * 3. both I_FREEING and I_WILL_FREE can only legally get set if ->i_count == 0, thus if
+ *    cmpxchg managed to replace any non-0 value, we have an invariant these flags are
+ *    not present
+ */
+static bool igrab_try_lockless(struct inode *inode)
+{
+	if (inode_state_read_once(inode) & (I_NEW | I_CREATING | I_FREEING | I_WILL_FREE))
+		return false;
+	/*
+	 * Paired with routines clearing I_NEW
+	 */
+	if (atomic_add_unless(&inode->i_count, 1, 0)) {
+		VFS_BUG_ON_INODE(inode_state_read_once(inode) & (I_FREEING | I_WILL_FREE), inode);
+		return true;
+	}
+	return false;
+}
+
 /**
  * ilookup5_nowait - search for an inode in the inode cache
  * @sb:		super block of file system to search
-- 
2.48.1


  parent reply	other threads:[~2026-03-29 17:20 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-03-29 17:19 [PATCH v3 0/7] assorted ->i_count changes + extension of lockless handling Mateusz Guzik
2026-03-29 17:19 ` [PATCH v3 1/7] fs: add icount_read_once() Mateusz Guzik
2026-03-29 17:19 ` [PATCH v3 2/7] Use icount_read() and icount_read_once() as appropriate Mateusz Guzik
2026-03-29 17:19 ` [PATCH v3 3/7] fs: enforce locking in icount_read(), add some commentary Mateusz Guzik
2026-03-29 17:19 ` [PATCH v3 4/7] fs: relocate and tidy up ihold() Mateusz Guzik
2026-03-29 17:20 ` [PATCH v3 5/7] fs: handle hypothetical filesystems which use I_DONTCACHE and drop the lock in ->drop_inode Mateusz Guzik
2026-03-31 11:23   ` Christian Brauner
2026-03-31 11:37     ` Mateusz Guzik
2026-03-29 17:20 ` [PATCH v3 6/7] fs: locklessly bump refs in igrab as long as it does not transition 0->1 Mateusz Guzik
2026-03-29 17:20 ` Mateusz Guzik [this message]
2026-03-30  7:38 ` [syzbot ci] Re: assorted ->i_count changes + extension of lockless handling syzbot ci
2026-03-31 11:31 ` [PATCH v3 0/7] " Christian Brauner
2026-03-31 11:36   ` Mateusz Guzik

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260329172002.3557801-8-mjguzik@gmail.com \
    --to=mjguzik@gmail.com \
    --cc=brauner@kernel.org \
    --cc=jack@suse.cz \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=viro@zeniv.linux.org.uk \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox