From: Mateusz Guzik <mjguzik@gmail.com>
To: brauner@kernel.org
Cc: viro@zeniv.linux.org.uk, jack@suse.cz,
linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org,
Mateusz Guzik <mjguzik@gmail.com>
Subject: [PATCH v3 7/7] fs: locklessly bump refs in the inode hash when possible
Date: Sun, 29 Mar 2026 19:20:02 +0200 [thread overview]
Message-ID: <20260329172002.3557801-8-mjguzik@gmail.com> (raw)
In-Reply-To: <20260329172002.3557801-1-mjguzik@gmail.com>
Signed-off-by: Mateusz Guzik <mjguzik@gmail.com>
---
fs/dcache.c | 4 ++++
fs/inode.c | 52 ++++++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 56 insertions(+)
diff --git a/fs/dcache.c b/fs/dcache.c
index 9ceab142896f..b63450ebb85c 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -2033,6 +2033,10 @@ void d_instantiate_new(struct dentry *entry, struct inode *inode)
__d_instantiate(entry, inode);
spin_unlock(&entry->d_lock);
WARN_ON(!(inode_state_read(inode) & I_NEW));
+ /*
+ * Paired with igrab_try_lockless()
+ */
+ smp_wmb();
inode_state_clear(inode, I_NEW | I_CREATING);
inode_wake_up_bit(inode, __I_NEW);
spin_unlock(&inode->i_lock);
diff --git a/fs/inode.c b/fs/inode.c
index c7585924d5c8..c6e53ec90057 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1029,6 +1029,7 @@ long prune_icache_sb(struct super_block *sb, struct shrink_control *sc)
}
static void __wait_on_freeing_inode(struct inode *inode, bool hash_locked, bool rcu_locked);
+static bool igrab_try_lockless(struct inode *inode);
/*
* Called with the inode lock held.
@@ -1053,6 +1054,11 @@ static struct inode *find_inode(struct super_block *sb,
continue;
if (!test(inode, data))
continue;
+ if (igrab_try_lockless(inode)) {
+ rcu_read_unlock();
+ *isnew = false;
+ return inode;
+ }
spin_lock(&inode->i_lock);
if (inode_state_read(inode) & (I_FREEING | I_WILL_FREE)) {
__wait_on_freeing_inode(inode, hash_locked, true);
@@ -1095,6 +1101,11 @@ static struct inode *find_inode_fast(struct super_block *sb,
continue;
if (inode->i_sb != sb)
continue;
+ if (igrab_try_lockless(inode)) {
+ rcu_read_unlock();
+ *isnew = false;
+ return inode;
+ }
spin_lock(&inode->i_lock);
if (inode_state_read(inode) & (I_FREEING | I_WILL_FREE)) {
__wait_on_freeing_inode(inode, hash_locked, true);
@@ -1212,6 +1223,10 @@ void unlock_new_inode(struct inode *inode)
lockdep_annotate_inode_mutex_key(inode);
spin_lock(&inode->i_lock);
WARN_ON(!(inode_state_read(inode) & I_NEW));
+ /*
+ * Paired with igrab_try_lockless()
+ */
+ smp_wmb();
inode_state_clear(inode, I_NEW | I_CREATING);
inode_wake_up_bit(inode, __I_NEW);
spin_unlock(&inode->i_lock);
@@ -1223,6 +1238,10 @@ void discard_new_inode(struct inode *inode)
lockdep_annotate_inode_mutex_key(inode);
spin_lock(&inode->i_lock);
WARN_ON(!(inode_state_read(inode) & I_NEW));
+ /*
+ * Paired with igrab_try_lockless()
+ */
+ smp_wmb();
inode_state_clear(inode, I_NEW);
inode_wake_up_bit(inode, __I_NEW);
spin_unlock(&inode->i_lock);
@@ -1604,6 +1623,39 @@ struct inode *igrab(struct inode *inode)
}
EXPORT_SYMBOL(igrab);
+/*
+ * Special routine for the inode hash. Don't use elsewhere.
+ *
+ * It provides lockless refcount acquire in the common case of no problematic
+ * flags being set.
+ *
+ * Any of I_NEW, I_CREATING, I_FREEING and I_WILL_FREE require dedicated treatment
+ * during lookup and bumping inodes with these is intentionally avoided. Additionally
+ * it is illegal to add refs if eiter I_FREEING or I_WILL_FREE is set in the first place.
+ *
+ * Correctness is achieved as follows:
+ * 1. both I_NEW and I_CREATING can only legally get set *before* the inode is visible
+ * in the hash, meaning the upfront read takes care of them.
+ * 2. unsetting of I_NEW is preceded with a store fence, paired with full fence in
+ * atomic_add_unless
+ * 3. both I_FREEING and I_WILL_FREE can only legally get set if ->i_count == 0, thus if
+ * cmpxchg managed to replace any non-0 value, we have an invariant these flags are
+ * not present
+ */
+static bool igrab_try_lockless(struct inode *inode)
+{
+ if (inode_state_read_once(inode) & (I_NEW | I_CREATING | I_FREEING | I_WILL_FREE))
+ return false;
+ /*
+ * Paired with routines clearing I_NEW
+ */
+ if (atomic_add_unless(&inode->i_count, 1, 0)) {
+ VFS_BUG_ON_INODE(inode_state_read_once(inode) & (I_FREEING | I_WILL_FREE), inode);
+ return true;
+ }
+ return false;
+}
+
/**
* ilookup5_nowait - search for an inode in the inode cache
* @sb: super block of file system to search
--
2.48.1
next prev parent reply other threads:[~2026-03-29 17:20 UTC|newest]
Thread overview: 13+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-03-29 17:19 [PATCH v3 0/7] assorted ->i_count changes + extension of lockless handling Mateusz Guzik
2026-03-29 17:19 ` [PATCH v3 1/7] fs: add icount_read_once() Mateusz Guzik
2026-03-29 17:19 ` [PATCH v3 2/7] Use icount_read() and icount_read_once() as appropriate Mateusz Guzik
2026-03-29 17:19 ` [PATCH v3 3/7] fs: enforce locking in icount_read(), add some commentary Mateusz Guzik
2026-03-29 17:19 ` [PATCH v3 4/7] fs: relocate and tidy up ihold() Mateusz Guzik
2026-03-29 17:20 ` [PATCH v3 5/7] fs: handle hypothetical filesystems which use I_DONTCACHE and drop the lock in ->drop_inode Mateusz Guzik
2026-03-31 11:23 ` Christian Brauner
2026-03-31 11:37 ` Mateusz Guzik
2026-03-29 17:20 ` [PATCH v3 6/7] fs: locklessly bump refs in igrab as long as it does not transition 0->1 Mateusz Guzik
2026-03-29 17:20 ` Mateusz Guzik [this message]
2026-03-30 7:38 ` [syzbot ci] Re: assorted ->i_count changes + extension of lockless handling syzbot ci
2026-03-31 11:31 ` [PATCH v3 0/7] " Christian Brauner
2026-03-31 11:36 ` Mateusz Guzik
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260329172002.3557801-8-mjguzik@gmail.com \
--to=mjguzik@gmail.com \
--cc=brauner@kernel.org \
--cc=jack@suse.cz \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=viro@zeniv.linux.org.uk \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox