* [PATCH] fs: rework I_NEW handling to operate without fences
@ 2025-10-10 22:17 Mateusz Guzik
2025-10-15 11:50 ` Mateusz Guzik
` (3 more replies)
0 siblings, 4 replies; 11+ messages in thread
From: Mateusz Guzik @ 2025-10-10 22:17 UTC (permalink / raw)
To: brauner; +Cc: viro, jack, linux-kernel, linux-fsdevel, Mateusz Guzik
In the inode hash code grab the state while ->i_lock is held. If found
to be set, synchronize the sleep once more with the lock held.
In the real world the flag is not set most of the time.
Apart from being simpler to reason about, it comes with a minor speed up
as now clearing the flag does not require the smp_mb() fence.
While here rename wait_on_inode() to wait_on_new_inode() to line it up
with __wait_on_freeing_inode().
Signed-off-by: Mateusz Guzik <mjguzik@gmail.com>
---
This temporarily duplicated sleep code from inode_wait_for_lru_isolating().
This is going to get dedupped later.
There is high repetition of:
if (unlikely(isnew)) {
wait_on_new_inode(old);
if (unlikely(inode_unhashed(old))) {
iput(old);
goto again;
}
I expect this is going to go away after I post a patch to sanitize the
current APIs for the hash.
fs/afs/dir.c | 4 +-
fs/dcache.c | 10 ----
fs/gfs2/glock.c | 2 +-
fs/inode.c | 146 +++++++++++++++++++++++++++------------------
include/linux/fs.h | 12 +---
5 files changed, 93 insertions(+), 81 deletions(-)
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 89d36e3e5c79..f4e9e12373ac 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -779,7 +779,7 @@ static struct inode *afs_do_lookup(struct inode *dir, struct dentry *dentry)
struct afs_vnode *dvnode = AFS_FS_I(dir), *vnode;
struct inode *inode = NULL, *ti;
afs_dataversion_t data_version = READ_ONCE(dvnode->status.data_version);
- bool supports_ibulk;
+ bool supports_ibulk, isnew;
long ret;
int i;
@@ -850,7 +850,7 @@ static struct inode *afs_do_lookup(struct inode *dir, struct dentry *dentry)
* callback counters.
*/
ti = ilookup5_nowait(dir->i_sb, vp->fid.vnode,
- afs_ilookup5_test_by_fid, &vp->fid);
+ afs_ilookup5_test_by_fid, &vp->fid, &isnew);
if (!IS_ERR_OR_NULL(ti)) {
vnode = AFS_FS_I(ti);
vp->dv_before = vnode->status.data_version;
diff --git a/fs/dcache.c b/fs/dcache.c
index 78ffa7b7e824..25131f105a60 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1981,17 +1981,7 @@ void d_instantiate_new(struct dentry *entry, struct inode *inode)
spin_lock(&inode->i_lock);
__d_instantiate(entry, inode);
WARN_ON(!(inode_state_read(inode) & I_NEW));
- /*
- * Pairs with smp_rmb in wait_on_inode().
- */
- smp_wmb();
inode_state_clear(inode, I_NEW | I_CREATING);
- /*
- * Pairs with the barrier in prepare_to_wait_event() to make sure
- * ___wait_var_event() either sees the bit cleared or
- * waitqueue_active() check in wake_up_var() sees the waiter.
- */
- smp_mb();
inode_wake_up_bit(inode, __I_NEW);
spin_unlock(&inode->i_lock);
}
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index b677c0e6b9ab..c9712235e7a0 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -957,7 +957,7 @@ static struct gfs2_inode *gfs2_grab_existing_inode(struct gfs2_glock *gl)
ip = NULL;
spin_unlock(&gl->gl_lockref.lock);
if (ip) {
- wait_on_inode(&ip->i_inode);
+ wait_on_new_inode(&ip->i_inode);
if (is_bad_inode(&ip->i_inode)) {
iput(&ip->i_inode);
ip = NULL;
diff --git a/fs/inode.c b/fs/inode.c
index 3153d725859c..1396f79b2551 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -558,6 +558,32 @@ struct wait_queue_head *inode_bit_waitqueue(struct wait_bit_queue_entry *wqe,
}
EXPORT_SYMBOL(inode_bit_waitqueue);
+void wait_on_new_inode(struct inode *inode)
+{
+ struct wait_bit_queue_entry wqe;
+ struct wait_queue_head *wq_head;
+
+ spin_lock(&inode->i_lock);
+ if (!(inode_state_read(inode) & I_NEW)) {
+ spin_unlock(&inode->i_lock);
+ return;
+ }
+
+ wq_head = inode_bit_waitqueue(&wqe, inode, __I_NEW);
+ for (;;) {
+ prepare_to_wait_event(wq_head, &wqe.wq_entry, TASK_UNINTERRUPTIBLE);
+ if (!(inode_state_read(inode) & I_NEW))
+ break;
+ spin_unlock(&inode->i_lock);
+ schedule();
+ spin_lock(&inode->i_lock);
+ }
+ finish_wait(wq_head, &wqe.wq_entry);
+ WARN_ON(inode_state_read(inode) & I_NEW);
+ spin_unlock(&inode->i_lock);
+}
+EXPORT_SYMBOL(wait_on_new_inode);
+
/*
* Add inode to LRU if needed (inode is unused and clean).
*
@@ -1008,7 +1034,8 @@ static void __wait_on_freeing_inode(struct inode *inode, bool is_inode_hash_lock
static struct inode *find_inode(struct super_block *sb,
struct hlist_head *head,
int (*test)(struct inode *, void *),
- void *data, bool is_inode_hash_locked)
+ void *data, bool is_inode_hash_locked,
+ bool *isnew)
{
struct inode *inode = NULL;
@@ -1035,6 +1062,7 @@ static struct inode *find_inode(struct super_block *sb,
return ERR_PTR(-ESTALE);
}
__iget(inode);
+ *isnew = !!(inode_state_read(inode) & I_NEW);
spin_unlock(&inode->i_lock);
rcu_read_unlock();
return inode;
@@ -1049,7 +1077,7 @@ static struct inode *find_inode(struct super_block *sb,
*/
static struct inode *find_inode_fast(struct super_block *sb,
struct hlist_head *head, unsigned long ino,
- bool is_inode_hash_locked)
+ bool is_inode_hash_locked, bool *isnew)
{
struct inode *inode = NULL;
@@ -1076,6 +1104,7 @@ static struct inode *find_inode_fast(struct super_block *sb,
return ERR_PTR(-ESTALE);
}
__iget(inode);
+ *isnew = !!(inode_state_read(inode) & I_NEW);
spin_unlock(&inode->i_lock);
rcu_read_unlock();
return inode;
@@ -1181,17 +1210,7 @@ void unlock_new_inode(struct inode *inode)
lockdep_annotate_inode_mutex_key(inode);
spin_lock(&inode->i_lock);
WARN_ON(!(inode_state_read(inode) & I_NEW));
- /*
- * Pairs with smp_rmb in wait_on_inode().
- */
- smp_wmb();
inode_state_clear(inode, I_NEW | I_CREATING);
- /*
- * Pairs with the barrier in prepare_to_wait_event() to make sure
- * ___wait_var_event() either sees the bit cleared or
- * waitqueue_active() check in wake_up_var() sees the waiter.
- */
- smp_mb();
inode_wake_up_bit(inode, __I_NEW);
spin_unlock(&inode->i_lock);
}
@@ -1202,17 +1221,7 @@ void discard_new_inode(struct inode *inode)
lockdep_annotate_inode_mutex_key(inode);
spin_lock(&inode->i_lock);
WARN_ON(!(inode_state_read(inode) & I_NEW));
- /*
- * Pairs with smp_rmb in wait_on_inode().
- */
- smp_wmb();
inode_state_clear(inode, I_NEW);
- /*
- * Pairs with the barrier in prepare_to_wait_event() to make sure
- * ___wait_var_event() either sees the bit cleared or
- * waitqueue_active() check in wake_up_var() sees the waiter.
- */
- smp_mb();
inode_wake_up_bit(inode, __I_NEW);
spin_unlock(&inode->i_lock);
iput(inode);
@@ -1286,12 +1295,13 @@ struct inode *inode_insert5(struct inode *inode, unsigned long hashval,
{
struct hlist_head *head = inode_hashtable + hash(inode->i_sb, hashval);
struct inode *old;
+ bool isnew;
might_sleep();
again:
spin_lock(&inode_hash_lock);
- old = find_inode(inode->i_sb, head, test, data, true);
+ old = find_inode(inode->i_sb, head, test, data, true, &isnew);
if (unlikely(old)) {
/*
* Uhhuh, somebody else created the same inode under us.
@@ -1300,10 +1310,12 @@ struct inode *inode_insert5(struct inode *inode, unsigned long hashval,
spin_unlock(&inode_hash_lock);
if (IS_ERR(old))
return NULL;
- wait_on_inode(old);
- if (unlikely(inode_unhashed(old))) {
- iput(old);
- goto again;
+ if (unlikely(isnew)) {
+ wait_on_new_inode(old);
+ if (unlikely(inode_unhashed(old))) {
+ iput(old);
+ goto again;
+ }
}
return old;
}
@@ -1391,18 +1403,21 @@ struct inode *iget5_locked_rcu(struct super_block *sb, unsigned long hashval,
{
struct hlist_head *head = inode_hashtable + hash(sb, hashval);
struct inode *inode, *new;
+ bool isnew;
might_sleep();
again:
- inode = find_inode(sb, head, test, data, false);
+ inode = find_inode(sb, head, test, data, false, &isnew);
if (inode) {
if (IS_ERR(inode))
return NULL;
- wait_on_inode(inode);
- if (unlikely(inode_unhashed(inode))) {
- iput(inode);
- goto again;
+ if (unlikely(isnew)) {
+ wait_on_new_inode(inode);
+ if (unlikely(inode_unhashed(inode))) {
+ iput(inode);
+ goto again;
+ }
}
return inode;
}
@@ -1434,18 +1449,21 @@ struct inode *iget_locked(struct super_block *sb, unsigned long ino)
{
struct hlist_head *head = inode_hashtable + hash(sb, ino);
struct inode *inode;
+ bool isnew;
might_sleep();
again:
- inode = find_inode_fast(sb, head, ino, false);
+ inode = find_inode_fast(sb, head, ino, false, &isnew);
if (inode) {
if (IS_ERR(inode))
return NULL;
- wait_on_inode(inode);
- if (unlikely(inode_unhashed(inode))) {
- iput(inode);
- goto again;
+ if (unlikely(isnew)) {
+ wait_on_new_inode(inode);
+ if (unlikely(inode_unhashed(inode))) {
+ iput(inode);
+ goto again;
+ }
}
return inode;
}
@@ -1456,7 +1474,7 @@ struct inode *iget_locked(struct super_block *sb, unsigned long ino)
spin_lock(&inode_hash_lock);
/* We released the lock, so.. */
- old = find_inode_fast(sb, head, ino, true);
+ old = find_inode_fast(sb, head, ino, true, &isnew);
if (!old) {
inode->i_ino = ino;
spin_lock(&inode->i_lock);
@@ -1482,10 +1500,12 @@ struct inode *iget_locked(struct super_block *sb, unsigned long ino)
if (IS_ERR(old))
return NULL;
inode = old;
- wait_on_inode(inode);
- if (unlikely(inode_unhashed(inode))) {
- iput(inode);
- goto again;
+ if (unlikely(isnew)) {
+ wait_on_new_inode(inode);
+ if (unlikely(inode_unhashed(inode))) {
+ iput(inode);
+ goto again;
+ }
}
}
return inode;
@@ -1586,13 +1606,13 @@ EXPORT_SYMBOL(igrab);
* Note2: @test is called with the inode_hash_lock held, so can't sleep.
*/
struct inode *ilookup5_nowait(struct super_block *sb, unsigned long hashval,
- int (*test)(struct inode *, void *), void *data)
+ int (*test)(struct inode *, void *), void *data, bool *isnew)
{
struct hlist_head *head = inode_hashtable + hash(sb, hashval);
struct inode *inode;
spin_lock(&inode_hash_lock);
- inode = find_inode(sb, head, test, data, true);
+ inode = find_inode(sb, head, test, data, true, isnew);
spin_unlock(&inode_hash_lock);
return IS_ERR(inode) ? NULL : inode;
@@ -1620,16 +1640,19 @@ struct inode *ilookup5(struct super_block *sb, unsigned long hashval,
int (*test)(struct inode *, void *), void *data)
{
struct inode *inode;
+ bool isnew;
might_sleep();
again:
- inode = ilookup5_nowait(sb, hashval, test, data);
+ inode = ilookup5_nowait(sb, hashval, test, data, &isnew);
if (inode) {
- wait_on_inode(inode);
- if (unlikely(inode_unhashed(inode))) {
- iput(inode);
- goto again;
+ if (unlikely(isnew)) {
+ wait_on_new_inode(inode);
+ if (unlikely(inode_unhashed(inode))) {
+ iput(inode);
+ goto again;
+ }
}
}
return inode;
@@ -1648,19 +1671,22 @@ struct inode *ilookup(struct super_block *sb, unsigned long ino)
{
struct hlist_head *head = inode_hashtable + hash(sb, ino);
struct inode *inode;
+ bool isnew;
might_sleep();
again:
- inode = find_inode_fast(sb, head, ino, false);
+ inode = find_inode_fast(sb, head, ino, false, &isnew);
if (inode) {
if (IS_ERR(inode))
return NULL;
- wait_on_inode(inode);
- if (unlikely(inode_unhashed(inode))) {
- iput(inode);
- goto again;
+ if (unlikely(isnew)) {
+ wait_on_new_inode(inode);
+ if (unlikely(inode_unhashed(inode))) {
+ iput(inode);
+ goto again;
+ }
}
}
return inode;
@@ -1800,6 +1826,7 @@ int insert_inode_locked(struct inode *inode)
struct super_block *sb = inode->i_sb;
ino_t ino = inode->i_ino;
struct hlist_head *head = inode_hashtable + hash(sb, ino);
+ bool isnew;
might_sleep();
@@ -1832,12 +1859,15 @@ int insert_inode_locked(struct inode *inode)
return -EBUSY;
}
__iget(old);
+ isnew = !!(inode_state_read(old) & I_NEW);
spin_unlock(&old->i_lock);
spin_unlock(&inode_hash_lock);
- wait_on_inode(old);
- if (unlikely(!inode_unhashed(old))) {
- iput(old);
- return -EBUSY;
+ if (isnew) {
+ wait_on_new_inode(old);
+ if (unlikely(!inode_unhashed(old))) {
+ iput(old);
+ return -EBUSY;
+ }
}
iput(old);
}
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 21c73df3ce75..a813abdcf218 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1030,15 +1030,7 @@ static inline void inode_fake_hash(struct inode *inode)
hlist_add_fake(&inode->i_hash);
}
-static inline void wait_on_inode(struct inode *inode)
-{
- wait_var_event(inode_state_wait_address(inode, __I_NEW),
- !(inode_state_read_once(inode) & I_NEW));
- /*
- * Pairs with routines clearing I_NEW.
- */
- smp_rmb();
-}
+void wait_on_new_inode(struct inode *inode);
/*
* inode->i_rwsem nesting subclasses for the lock validator:
@@ -3417,7 +3409,7 @@ extern void d_mark_dontcache(struct inode *inode);
extern struct inode *ilookup5_nowait(struct super_block *sb,
unsigned long hashval, int (*test)(struct inode *, void *),
- void *data);
+ void *data, bool *isnew);
extern struct inode *ilookup5(struct super_block *sb, unsigned long hashval,
int (*test)(struct inode *, void *), void *data);
extern struct inode *ilookup(struct super_block *sb, unsigned long ino);
--
2.34.1
^ permalink raw reply related [flat|nested] 11+ messages in thread
* Re: [PATCH] fs: rework I_NEW handling to operate without fences
2025-10-10 22:17 [PATCH] fs: rework I_NEW handling to operate without fences Mateusz Guzik
@ 2025-10-15 11:50 ` Mateusz Guzik
2025-10-21 12:48 ` Christian Brauner
2025-10-21 12:49 ` Christian Brauner
` (2 subsequent siblings)
3 siblings, 1 reply; 11+ messages in thread
From: Mateusz Guzik @ 2025-10-15 11:50 UTC (permalink / raw)
To: brauner; +Cc: viro, jack, linux-kernel, linux-fsdevel
can i get some flames on this?
On Sat, Oct 11, 2025 at 12:17 AM Mateusz Guzik <mjguzik@gmail.com> wrote:
>
> In the inode hash code grab the state while ->i_lock is held. If found
> to be set, synchronize the sleep once more with the lock held.
>
> In the real world the flag is not set most of the time.
>
> Apart from being simpler to reason about, it comes with a minor speed up
> as now clearing the flag does not require the smp_mb() fence.
>
> While here rename wait_on_inode() to wait_on_new_inode() to line it up
> with __wait_on_freeing_inode().
>
> Signed-off-by: Mateusz Guzik <mjguzik@gmail.com>
> ---
>
> This temporarily duplicated sleep code from inode_wait_for_lru_isolating().
> This is going to get dedupped later.
>
> There is high repetition of:
> if (unlikely(isnew)) {
> wait_on_new_inode(old);
> if (unlikely(inode_unhashed(old))) {
> iput(old);
> goto again;
> }
>
> I expect this is going to go away after I post a patch to sanitize the
> current APIs for the hash.
>
>
> fs/afs/dir.c | 4 +-
> fs/dcache.c | 10 ----
> fs/gfs2/glock.c | 2 +-
> fs/inode.c | 146 +++++++++++++++++++++++++++------------------
> include/linux/fs.h | 12 +---
> 5 files changed, 93 insertions(+), 81 deletions(-)
>
> diff --git a/fs/afs/dir.c b/fs/afs/dir.c
> index 89d36e3e5c79..f4e9e12373ac 100644
> --- a/fs/afs/dir.c
> +++ b/fs/afs/dir.c
> @@ -779,7 +779,7 @@ static struct inode *afs_do_lookup(struct inode *dir, struct dentry *dentry)
> struct afs_vnode *dvnode = AFS_FS_I(dir), *vnode;
> struct inode *inode = NULL, *ti;
> afs_dataversion_t data_version = READ_ONCE(dvnode->status.data_version);
> - bool supports_ibulk;
> + bool supports_ibulk, isnew;
> long ret;
> int i;
>
> @@ -850,7 +850,7 @@ static struct inode *afs_do_lookup(struct inode *dir, struct dentry *dentry)
> * callback counters.
> */
> ti = ilookup5_nowait(dir->i_sb, vp->fid.vnode,
> - afs_ilookup5_test_by_fid, &vp->fid);
> + afs_ilookup5_test_by_fid, &vp->fid, &isnew);
> if (!IS_ERR_OR_NULL(ti)) {
> vnode = AFS_FS_I(ti);
> vp->dv_before = vnode->status.data_version;
> diff --git a/fs/dcache.c b/fs/dcache.c
> index 78ffa7b7e824..25131f105a60 100644
> --- a/fs/dcache.c
> +++ b/fs/dcache.c
> @@ -1981,17 +1981,7 @@ void d_instantiate_new(struct dentry *entry, struct inode *inode)
> spin_lock(&inode->i_lock);
> __d_instantiate(entry, inode);
> WARN_ON(!(inode_state_read(inode) & I_NEW));
> - /*
> - * Pairs with smp_rmb in wait_on_inode().
> - */
> - smp_wmb();
> inode_state_clear(inode, I_NEW | I_CREATING);
> - /*
> - * Pairs with the barrier in prepare_to_wait_event() to make sure
> - * ___wait_var_event() either sees the bit cleared or
> - * waitqueue_active() check in wake_up_var() sees the waiter.
> - */
> - smp_mb();
> inode_wake_up_bit(inode, __I_NEW);
> spin_unlock(&inode->i_lock);
> }
> diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
> index b677c0e6b9ab..c9712235e7a0 100644
> --- a/fs/gfs2/glock.c
> +++ b/fs/gfs2/glock.c
> @@ -957,7 +957,7 @@ static struct gfs2_inode *gfs2_grab_existing_inode(struct gfs2_glock *gl)
> ip = NULL;
> spin_unlock(&gl->gl_lockref.lock);
> if (ip) {
> - wait_on_inode(&ip->i_inode);
> + wait_on_new_inode(&ip->i_inode);
> if (is_bad_inode(&ip->i_inode)) {
> iput(&ip->i_inode);
> ip = NULL;
> diff --git a/fs/inode.c b/fs/inode.c
> index 3153d725859c..1396f79b2551 100644
> --- a/fs/inode.c
> +++ b/fs/inode.c
> @@ -558,6 +558,32 @@ struct wait_queue_head *inode_bit_waitqueue(struct wait_bit_queue_entry *wqe,
> }
> EXPORT_SYMBOL(inode_bit_waitqueue);
>
> +void wait_on_new_inode(struct inode *inode)
> +{
> + struct wait_bit_queue_entry wqe;
> + struct wait_queue_head *wq_head;
> +
> + spin_lock(&inode->i_lock);
> + if (!(inode_state_read(inode) & I_NEW)) {
> + spin_unlock(&inode->i_lock);
> + return;
> + }
> +
> + wq_head = inode_bit_waitqueue(&wqe, inode, __I_NEW);
> + for (;;) {
> + prepare_to_wait_event(wq_head, &wqe.wq_entry, TASK_UNINTERRUPTIBLE);
> + if (!(inode_state_read(inode) & I_NEW))
> + break;
> + spin_unlock(&inode->i_lock);
> + schedule();
> + spin_lock(&inode->i_lock);
> + }
> + finish_wait(wq_head, &wqe.wq_entry);
> + WARN_ON(inode_state_read(inode) & I_NEW);
> + spin_unlock(&inode->i_lock);
> +}
> +EXPORT_SYMBOL(wait_on_new_inode);
> +
> /*
> * Add inode to LRU if needed (inode is unused and clean).
> *
> @@ -1008,7 +1034,8 @@ static void __wait_on_freeing_inode(struct inode *inode, bool is_inode_hash_lock
> static struct inode *find_inode(struct super_block *sb,
> struct hlist_head *head,
> int (*test)(struct inode *, void *),
> - void *data, bool is_inode_hash_locked)
> + void *data, bool is_inode_hash_locked,
> + bool *isnew)
> {
> struct inode *inode = NULL;
>
> @@ -1035,6 +1062,7 @@ static struct inode *find_inode(struct super_block *sb,
> return ERR_PTR(-ESTALE);
> }
> __iget(inode);
> + *isnew = !!(inode_state_read(inode) & I_NEW);
> spin_unlock(&inode->i_lock);
> rcu_read_unlock();
> return inode;
> @@ -1049,7 +1077,7 @@ static struct inode *find_inode(struct super_block *sb,
> */
> static struct inode *find_inode_fast(struct super_block *sb,
> struct hlist_head *head, unsigned long ino,
> - bool is_inode_hash_locked)
> + bool is_inode_hash_locked, bool *isnew)
> {
> struct inode *inode = NULL;
>
> @@ -1076,6 +1104,7 @@ static struct inode *find_inode_fast(struct super_block *sb,
> return ERR_PTR(-ESTALE);
> }
> __iget(inode);
> + *isnew = !!(inode_state_read(inode) & I_NEW);
> spin_unlock(&inode->i_lock);
> rcu_read_unlock();
> return inode;
> @@ -1181,17 +1210,7 @@ void unlock_new_inode(struct inode *inode)
> lockdep_annotate_inode_mutex_key(inode);
> spin_lock(&inode->i_lock);
> WARN_ON(!(inode_state_read(inode) & I_NEW));
> - /*
> - * Pairs with smp_rmb in wait_on_inode().
> - */
> - smp_wmb();
> inode_state_clear(inode, I_NEW | I_CREATING);
> - /*
> - * Pairs with the barrier in prepare_to_wait_event() to make sure
> - * ___wait_var_event() either sees the bit cleared or
> - * waitqueue_active() check in wake_up_var() sees the waiter.
> - */
> - smp_mb();
> inode_wake_up_bit(inode, __I_NEW);
> spin_unlock(&inode->i_lock);
> }
> @@ -1202,17 +1221,7 @@ void discard_new_inode(struct inode *inode)
> lockdep_annotate_inode_mutex_key(inode);
> spin_lock(&inode->i_lock);
> WARN_ON(!(inode_state_read(inode) & I_NEW));
> - /*
> - * Pairs with smp_rmb in wait_on_inode().
> - */
> - smp_wmb();
> inode_state_clear(inode, I_NEW);
> - /*
> - * Pairs with the barrier in prepare_to_wait_event() to make sure
> - * ___wait_var_event() either sees the bit cleared or
> - * waitqueue_active() check in wake_up_var() sees the waiter.
> - */
> - smp_mb();
> inode_wake_up_bit(inode, __I_NEW);
> spin_unlock(&inode->i_lock);
> iput(inode);
> @@ -1286,12 +1295,13 @@ struct inode *inode_insert5(struct inode *inode, unsigned long hashval,
> {
> struct hlist_head *head = inode_hashtable + hash(inode->i_sb, hashval);
> struct inode *old;
> + bool isnew;
>
> might_sleep();
>
> again:
> spin_lock(&inode_hash_lock);
> - old = find_inode(inode->i_sb, head, test, data, true);
> + old = find_inode(inode->i_sb, head, test, data, true, &isnew);
> if (unlikely(old)) {
> /*
> * Uhhuh, somebody else created the same inode under us.
> @@ -1300,10 +1310,12 @@ struct inode *inode_insert5(struct inode *inode, unsigned long hashval,
> spin_unlock(&inode_hash_lock);
> if (IS_ERR(old))
> return NULL;
> - wait_on_inode(old);
> - if (unlikely(inode_unhashed(old))) {
> - iput(old);
> - goto again;
> + if (unlikely(isnew)) {
> + wait_on_new_inode(old);
> + if (unlikely(inode_unhashed(old))) {
> + iput(old);
> + goto again;
> + }
> }
> return old;
> }
> @@ -1391,18 +1403,21 @@ struct inode *iget5_locked_rcu(struct super_block *sb, unsigned long hashval,
> {
> struct hlist_head *head = inode_hashtable + hash(sb, hashval);
> struct inode *inode, *new;
> + bool isnew;
>
> might_sleep();
>
> again:
> - inode = find_inode(sb, head, test, data, false);
> + inode = find_inode(sb, head, test, data, false, &isnew);
> if (inode) {
> if (IS_ERR(inode))
> return NULL;
> - wait_on_inode(inode);
> - if (unlikely(inode_unhashed(inode))) {
> - iput(inode);
> - goto again;
> + if (unlikely(isnew)) {
> + wait_on_new_inode(inode);
> + if (unlikely(inode_unhashed(inode))) {
> + iput(inode);
> + goto again;
> + }
> }
> return inode;
> }
> @@ -1434,18 +1449,21 @@ struct inode *iget_locked(struct super_block *sb, unsigned long ino)
> {
> struct hlist_head *head = inode_hashtable + hash(sb, ino);
> struct inode *inode;
> + bool isnew;
>
> might_sleep();
>
> again:
> - inode = find_inode_fast(sb, head, ino, false);
> + inode = find_inode_fast(sb, head, ino, false, &isnew);
> if (inode) {
> if (IS_ERR(inode))
> return NULL;
> - wait_on_inode(inode);
> - if (unlikely(inode_unhashed(inode))) {
> - iput(inode);
> - goto again;
> + if (unlikely(isnew)) {
> + wait_on_new_inode(inode);
> + if (unlikely(inode_unhashed(inode))) {
> + iput(inode);
> + goto again;
> + }
> }
> return inode;
> }
> @@ -1456,7 +1474,7 @@ struct inode *iget_locked(struct super_block *sb, unsigned long ino)
>
> spin_lock(&inode_hash_lock);
> /* We released the lock, so.. */
> - old = find_inode_fast(sb, head, ino, true);
> + old = find_inode_fast(sb, head, ino, true, &isnew);
> if (!old) {
> inode->i_ino = ino;
> spin_lock(&inode->i_lock);
> @@ -1482,10 +1500,12 @@ struct inode *iget_locked(struct super_block *sb, unsigned long ino)
> if (IS_ERR(old))
> return NULL;
> inode = old;
> - wait_on_inode(inode);
> - if (unlikely(inode_unhashed(inode))) {
> - iput(inode);
> - goto again;
> + if (unlikely(isnew)) {
> + wait_on_new_inode(inode);
> + if (unlikely(inode_unhashed(inode))) {
> + iput(inode);
> + goto again;
> + }
> }
> }
> return inode;
> @@ -1586,13 +1606,13 @@ EXPORT_SYMBOL(igrab);
> * Note2: @test is called with the inode_hash_lock held, so can't sleep.
> */
> struct inode *ilookup5_nowait(struct super_block *sb, unsigned long hashval,
> - int (*test)(struct inode *, void *), void *data)
> + int (*test)(struct inode *, void *), void *data, bool *isnew)
> {
> struct hlist_head *head = inode_hashtable + hash(sb, hashval);
> struct inode *inode;
>
> spin_lock(&inode_hash_lock);
> - inode = find_inode(sb, head, test, data, true);
> + inode = find_inode(sb, head, test, data, true, isnew);
> spin_unlock(&inode_hash_lock);
>
> return IS_ERR(inode) ? NULL : inode;
> @@ -1620,16 +1640,19 @@ struct inode *ilookup5(struct super_block *sb, unsigned long hashval,
> int (*test)(struct inode *, void *), void *data)
> {
> struct inode *inode;
> + bool isnew;
>
> might_sleep();
>
> again:
> - inode = ilookup5_nowait(sb, hashval, test, data);
> + inode = ilookup5_nowait(sb, hashval, test, data, &isnew);
> if (inode) {
> - wait_on_inode(inode);
> - if (unlikely(inode_unhashed(inode))) {
> - iput(inode);
> - goto again;
> + if (unlikely(isnew)) {
> + wait_on_new_inode(inode);
> + if (unlikely(inode_unhashed(inode))) {
> + iput(inode);
> + goto again;
> + }
> }
> }
> return inode;
> @@ -1648,19 +1671,22 @@ struct inode *ilookup(struct super_block *sb, unsigned long ino)
> {
> struct hlist_head *head = inode_hashtable + hash(sb, ino);
> struct inode *inode;
> + bool isnew;
>
> might_sleep();
>
> again:
> - inode = find_inode_fast(sb, head, ino, false);
> + inode = find_inode_fast(sb, head, ino, false, &isnew);
>
> if (inode) {
> if (IS_ERR(inode))
> return NULL;
> - wait_on_inode(inode);
> - if (unlikely(inode_unhashed(inode))) {
> - iput(inode);
> - goto again;
> + if (unlikely(isnew)) {
> + wait_on_new_inode(inode);
> + if (unlikely(inode_unhashed(inode))) {
> + iput(inode);
> + goto again;
> + }
> }
> }
> return inode;
> @@ -1800,6 +1826,7 @@ int insert_inode_locked(struct inode *inode)
> struct super_block *sb = inode->i_sb;
> ino_t ino = inode->i_ino;
> struct hlist_head *head = inode_hashtable + hash(sb, ino);
> + bool isnew;
>
> might_sleep();
>
> @@ -1832,12 +1859,15 @@ int insert_inode_locked(struct inode *inode)
> return -EBUSY;
> }
> __iget(old);
> + isnew = !!(inode_state_read(old) & I_NEW);
> spin_unlock(&old->i_lock);
> spin_unlock(&inode_hash_lock);
> - wait_on_inode(old);
> - if (unlikely(!inode_unhashed(old))) {
> - iput(old);
> - return -EBUSY;
> + if (isnew) {
> + wait_on_new_inode(old);
> + if (unlikely(!inode_unhashed(old))) {
> + iput(old);
> + return -EBUSY;
> + }
> }
> iput(old);
> }
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index 21c73df3ce75..a813abdcf218 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -1030,15 +1030,7 @@ static inline void inode_fake_hash(struct inode *inode)
> hlist_add_fake(&inode->i_hash);
> }
>
> -static inline void wait_on_inode(struct inode *inode)
> -{
> - wait_var_event(inode_state_wait_address(inode, __I_NEW),
> - !(inode_state_read_once(inode) & I_NEW));
> - /*
> - * Pairs with routines clearing I_NEW.
> - */
> - smp_rmb();
> -}
> +void wait_on_new_inode(struct inode *inode);
>
> /*
> * inode->i_rwsem nesting subclasses for the lock validator:
> @@ -3417,7 +3409,7 @@ extern void d_mark_dontcache(struct inode *inode);
>
> extern struct inode *ilookup5_nowait(struct super_block *sb,
> unsigned long hashval, int (*test)(struct inode *, void *),
> - void *data);
> + void *data, bool *isnew);
> extern struct inode *ilookup5(struct super_block *sb, unsigned long hashval,
> int (*test)(struct inode *, void *), void *data);
> extern struct inode *ilookup(struct super_block *sb, unsigned long ino);
> --
> 2.34.1
>
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH] fs: rework I_NEW handling to operate without fences
2025-10-15 11:50 ` Mateusz Guzik
@ 2025-10-21 12:48 ` Christian Brauner
2025-10-21 12:54 ` Mateusz Guzik
0 siblings, 1 reply; 11+ messages in thread
From: Christian Brauner @ 2025-10-21 12:48 UTC (permalink / raw)
To: Mateusz Guzik; +Cc: viro, jack, linux-kernel, linux-fsdevel
On Wed, Oct 15, 2025 at 01:50:25PM +0200, Mateusz Guzik wrote:
> can i get some flames on this?
Ok, that looks fine to me. I don't particularly enjoy that boolean but I
think it simplifies d_instantiate_new() enough to make up for it.
>
> On Sat, Oct 11, 2025 at 12:17 AM Mateusz Guzik <mjguzik@gmail.com> wrote:
> >
> > In the inode hash code grab the state while ->i_lock is held. If found
> > to be set, synchronize the sleep once more with the lock held.
> >
> > In the real world the flag is not set most of the time.
> >
> > Apart from being simpler to reason about, it comes with a minor speed up
> > as now clearing the flag does not require the smp_mb() fence.
> >
> > While here rename wait_on_inode() to wait_on_new_inode() to line it up
> > with __wait_on_freeing_inode().
> >
> > Signed-off-by: Mateusz Guzik <mjguzik@gmail.com>
> > ---
> >
> > This temporarily duplicated sleep code from inode_wait_for_lru_isolating().
> > This is going to get dedupped later.
> >
> > There is high repetition of:
> > if (unlikely(isnew)) {
> > wait_on_new_inode(old);
> > if (unlikely(inode_unhashed(old))) {
> > iput(old);
> > goto again;
> > }
> >
> > I expect this is going to go away after I post a patch to sanitize the
> > current APIs for the hash.
> >
> >
> > fs/afs/dir.c | 4 +-
> > fs/dcache.c | 10 ----
> > fs/gfs2/glock.c | 2 +-
> > fs/inode.c | 146 +++++++++++++++++++++++++++------------------
> > include/linux/fs.h | 12 +---
> > 5 files changed, 93 insertions(+), 81 deletions(-)
> >
> > diff --git a/fs/afs/dir.c b/fs/afs/dir.c
> > index 89d36e3e5c79..f4e9e12373ac 100644
> > --- a/fs/afs/dir.c
> > +++ b/fs/afs/dir.c
> > @@ -779,7 +779,7 @@ static struct inode *afs_do_lookup(struct inode *dir, struct dentry *dentry)
> > struct afs_vnode *dvnode = AFS_FS_I(dir), *vnode;
> > struct inode *inode = NULL, *ti;
> > afs_dataversion_t data_version = READ_ONCE(dvnode->status.data_version);
> > - bool supports_ibulk;
> > + bool supports_ibulk, isnew;
> > long ret;
> > int i;
> >
> > @@ -850,7 +850,7 @@ static struct inode *afs_do_lookup(struct inode *dir, struct dentry *dentry)
> > * callback counters.
> > */
> > ti = ilookup5_nowait(dir->i_sb, vp->fid.vnode,
> > - afs_ilookup5_test_by_fid, &vp->fid);
> > + afs_ilookup5_test_by_fid, &vp->fid, &isnew);
> > if (!IS_ERR_OR_NULL(ti)) {
> > vnode = AFS_FS_I(ti);
> > vp->dv_before = vnode->status.data_version;
> > diff --git a/fs/dcache.c b/fs/dcache.c
> > index 78ffa7b7e824..25131f105a60 100644
> > --- a/fs/dcache.c
> > +++ b/fs/dcache.c
> > @@ -1981,17 +1981,7 @@ void d_instantiate_new(struct dentry *entry, struct inode *inode)
> > spin_lock(&inode->i_lock);
> > __d_instantiate(entry, inode);
> > WARN_ON(!(inode_state_read(inode) & I_NEW));
> > - /*
> > - * Pairs with smp_rmb in wait_on_inode().
> > - */
> > - smp_wmb();
> > inode_state_clear(inode, I_NEW | I_CREATING);
> > - /*
> > - * Pairs with the barrier in prepare_to_wait_event() to make sure
> > - * ___wait_var_event() either sees the bit cleared or
> > - * waitqueue_active() check in wake_up_var() sees the waiter.
> > - */
> > - smp_mb();
> > inode_wake_up_bit(inode, __I_NEW);
> > spin_unlock(&inode->i_lock);
> > }
> > diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
> > index b677c0e6b9ab..c9712235e7a0 100644
> > --- a/fs/gfs2/glock.c
> > +++ b/fs/gfs2/glock.c
> > @@ -957,7 +957,7 @@ static struct gfs2_inode *gfs2_grab_existing_inode(struct gfs2_glock *gl)
> > ip = NULL;
> > spin_unlock(&gl->gl_lockref.lock);
> > if (ip) {
> > - wait_on_inode(&ip->i_inode);
> > + wait_on_new_inode(&ip->i_inode);
> > if (is_bad_inode(&ip->i_inode)) {
> > iput(&ip->i_inode);
> > ip = NULL;
> > diff --git a/fs/inode.c b/fs/inode.c
> > index 3153d725859c..1396f79b2551 100644
> > --- a/fs/inode.c
> > +++ b/fs/inode.c
> > @@ -558,6 +558,32 @@ struct wait_queue_head *inode_bit_waitqueue(struct wait_bit_queue_entry *wqe,
> > }
> > EXPORT_SYMBOL(inode_bit_waitqueue);
> >
> > +void wait_on_new_inode(struct inode *inode)
> > +{
> > + struct wait_bit_queue_entry wqe;
> > + struct wait_queue_head *wq_head;
> > +
> > + spin_lock(&inode->i_lock);
> > + if (!(inode_state_read(inode) & I_NEW)) {
> > + spin_unlock(&inode->i_lock);
> > + return;
> > + }
> > +
> > + wq_head = inode_bit_waitqueue(&wqe, inode, __I_NEW);
> > + for (;;) {
> > + prepare_to_wait_event(wq_head, &wqe.wq_entry, TASK_UNINTERRUPTIBLE);
> > + if (!(inode_state_read(inode) & I_NEW))
> > + break;
> > + spin_unlock(&inode->i_lock);
> > + schedule();
> > + spin_lock(&inode->i_lock);
> > + }
> > + finish_wait(wq_head, &wqe.wq_entry);
> > + WARN_ON(inode_state_read(inode) & I_NEW);
> > + spin_unlock(&inode->i_lock);
> > +}
> > +EXPORT_SYMBOL(wait_on_new_inode);
> > +
> > /*
> > * Add inode to LRU if needed (inode is unused and clean).
> > *
> > @@ -1008,7 +1034,8 @@ static void __wait_on_freeing_inode(struct inode *inode, bool is_inode_hash_lock
> > static struct inode *find_inode(struct super_block *sb,
> > struct hlist_head *head,
> > int (*test)(struct inode *, void *),
> > - void *data, bool is_inode_hash_locked)
> > + void *data, bool is_inode_hash_locked,
> > + bool *isnew)
> > {
> > struct inode *inode = NULL;
> >
> > @@ -1035,6 +1062,7 @@ static struct inode *find_inode(struct super_block *sb,
> > return ERR_PTR(-ESTALE);
> > }
> > __iget(inode);
> > + *isnew = !!(inode_state_read(inode) & I_NEW);
> > spin_unlock(&inode->i_lock);
> > rcu_read_unlock();
> > return inode;
> > @@ -1049,7 +1077,7 @@ static struct inode *find_inode(struct super_block *sb,
> > */
> > static struct inode *find_inode_fast(struct super_block *sb,
> > struct hlist_head *head, unsigned long ino,
> > - bool is_inode_hash_locked)
> > + bool is_inode_hash_locked, bool *isnew)
> > {
> > struct inode *inode = NULL;
> >
> > @@ -1076,6 +1104,7 @@ static struct inode *find_inode_fast(struct super_block *sb,
> > return ERR_PTR(-ESTALE);
> > }
> > __iget(inode);
> > + *isnew = !!(inode_state_read(inode) & I_NEW);
> > spin_unlock(&inode->i_lock);
> > rcu_read_unlock();
> > return inode;
> > @@ -1181,17 +1210,7 @@ void unlock_new_inode(struct inode *inode)
> > lockdep_annotate_inode_mutex_key(inode);
> > spin_lock(&inode->i_lock);
> > WARN_ON(!(inode_state_read(inode) & I_NEW));
> > - /*
> > - * Pairs with smp_rmb in wait_on_inode().
> > - */
> > - smp_wmb();
> > inode_state_clear(inode, I_NEW | I_CREATING);
> > - /*
> > - * Pairs with the barrier in prepare_to_wait_event() to make sure
> > - * ___wait_var_event() either sees the bit cleared or
> > - * waitqueue_active() check in wake_up_var() sees the waiter.
> > - */
> > - smp_mb();
You're getting rid of smp_mb() because you're rechecking the flag under
i_lock after you called prepare_to_wait_event() in wait_on_new_inode()?
> > inode_wake_up_bit(inode, __I_NEW);
> > spin_unlock(&inode->i_lock);
> > }
> > @@ -1202,17 +1221,7 @@ void discard_new_inode(struct inode *inode)
> > lockdep_annotate_inode_mutex_key(inode);
> > spin_lock(&inode->i_lock);
> > WARN_ON(!(inode_state_read(inode) & I_NEW));
> > - /*
> > - * Pairs with smp_rmb in wait_on_inode().
> > - */
> > - smp_wmb();
> > inode_state_clear(inode, I_NEW);
> > - /*
> > - * Pairs with the barrier in prepare_to_wait_event() to make sure
> > - * ___wait_var_event() either sees the bit cleared or
> > - * waitqueue_active() check in wake_up_var() sees the waiter.
> > - */
> > - smp_mb();
> > inode_wake_up_bit(inode, __I_NEW);
> > spin_unlock(&inode->i_lock);
> > iput(inode);
> > @@ -1286,12 +1295,13 @@ struct inode *inode_insert5(struct inode *inode, unsigned long hashval,
> > {
> > struct hlist_head *head = inode_hashtable + hash(inode->i_sb, hashval);
> > struct inode *old;
> > + bool isnew;
> >
> > might_sleep();
> >
> > again:
> > spin_lock(&inode_hash_lock);
> > - old = find_inode(inode->i_sb, head, test, data, true);
> > + old = find_inode(inode->i_sb, head, test, data, true, &isnew);
> > if (unlikely(old)) {
> > /*
> > * Uhhuh, somebody else created the same inode under us.
> > @@ -1300,10 +1310,12 @@ struct inode *inode_insert5(struct inode *inode, unsigned long hashval,
> > spin_unlock(&inode_hash_lock);
> > if (IS_ERR(old))
> > return NULL;
> > - wait_on_inode(old);
> > - if (unlikely(inode_unhashed(old))) {
> > - iput(old);
> > - goto again;
> > + if (unlikely(isnew)) {
> > + wait_on_new_inode(old);
> > + if (unlikely(inode_unhashed(old))) {
> > + iput(old);
> > + goto again;
> > + }
> > }
> > return old;
> > }
> > @@ -1391,18 +1403,21 @@ struct inode *iget5_locked_rcu(struct super_block *sb, unsigned long hashval,
> > {
> > struct hlist_head *head = inode_hashtable + hash(sb, hashval);
> > struct inode *inode, *new;
> > + bool isnew;
> >
> > might_sleep();
> >
> > again:
> > - inode = find_inode(sb, head, test, data, false);
> > + inode = find_inode(sb, head, test, data, false, &isnew);
> > if (inode) {
> > if (IS_ERR(inode))
> > return NULL;
> > - wait_on_inode(inode);
> > - if (unlikely(inode_unhashed(inode))) {
> > - iput(inode);
> > - goto again;
> > + if (unlikely(isnew)) {
> > + wait_on_new_inode(inode);
> > + if (unlikely(inode_unhashed(inode))) {
> > + iput(inode);
> > + goto again;
> > + }
> > }
> > return inode;
> > }
> > @@ -1434,18 +1449,21 @@ struct inode *iget_locked(struct super_block *sb, unsigned long ino)
> > {
> > struct hlist_head *head = inode_hashtable + hash(sb, ino);
> > struct inode *inode;
> > + bool isnew;
> >
> > might_sleep();
> >
> > again:
> > - inode = find_inode_fast(sb, head, ino, false);
> > + inode = find_inode_fast(sb, head, ino, false, &isnew);
> > if (inode) {
> > if (IS_ERR(inode))
> > return NULL;
> > - wait_on_inode(inode);
> > - if (unlikely(inode_unhashed(inode))) {
> > - iput(inode);
> > - goto again;
> > + if (unlikely(isnew)) {
> > + wait_on_new_inode(inode);
> > + if (unlikely(inode_unhashed(inode))) {
> > + iput(inode);
> > + goto again;
> > + }
> > }
> > return inode;
> > }
> > @@ -1456,7 +1474,7 @@ struct inode *iget_locked(struct super_block *sb, unsigned long ino)
> >
> > spin_lock(&inode_hash_lock);
> > /* We released the lock, so.. */
> > - old = find_inode_fast(sb, head, ino, true);
> > + old = find_inode_fast(sb, head, ino, true, &isnew);
> > if (!old) {
> > inode->i_ino = ino;
> > spin_lock(&inode->i_lock);
> > @@ -1482,10 +1500,12 @@ struct inode *iget_locked(struct super_block *sb, unsigned long ino)
> > if (IS_ERR(old))
> > return NULL;
> > inode = old;
> > - wait_on_inode(inode);
> > - if (unlikely(inode_unhashed(inode))) {
> > - iput(inode);
> > - goto again;
> > + if (unlikely(isnew)) {
> > + wait_on_new_inode(inode);
> > + if (unlikely(inode_unhashed(inode))) {
> > + iput(inode);
> > + goto again;
> > + }
> > }
> > }
> > return inode;
> > @@ -1586,13 +1606,13 @@ EXPORT_SYMBOL(igrab);
> > * Note2: @test is called with the inode_hash_lock held, so can't sleep.
> > */
> > struct inode *ilookup5_nowait(struct super_block *sb, unsigned long hashval,
> > - int (*test)(struct inode *, void *), void *data)
> > + int (*test)(struct inode *, void *), void *data, bool *isnew)
> > {
> > struct hlist_head *head = inode_hashtable + hash(sb, hashval);
> > struct inode *inode;
> >
> > spin_lock(&inode_hash_lock);
> > - inode = find_inode(sb, head, test, data, true);
> > + inode = find_inode(sb, head, test, data, true, isnew);
> > spin_unlock(&inode_hash_lock);
> >
> > return IS_ERR(inode) ? NULL : inode;
> > @@ -1620,16 +1640,19 @@ struct inode *ilookup5(struct super_block *sb, unsigned long hashval,
> > int (*test)(struct inode *, void *), void *data)
> > {
> > struct inode *inode;
> > + bool isnew;
> >
> > might_sleep();
> >
> > again:
> > - inode = ilookup5_nowait(sb, hashval, test, data);
> > + inode = ilookup5_nowait(sb, hashval, test, data, &isnew);
> > if (inode) {
> > - wait_on_inode(inode);
> > - if (unlikely(inode_unhashed(inode))) {
> > - iput(inode);
> > - goto again;
> > + if (unlikely(isnew)) {
> > + wait_on_new_inode(inode);
> > + if (unlikely(inode_unhashed(inode))) {
> > + iput(inode);
> > + goto again;
> > + }
> > }
> > }
> > return inode;
> > @@ -1648,19 +1671,22 @@ struct inode *ilookup(struct super_block *sb, unsigned long ino)
> > {
> > struct hlist_head *head = inode_hashtable + hash(sb, ino);
> > struct inode *inode;
> > + bool isnew;
> >
> > might_sleep();
> >
> > again:
> > - inode = find_inode_fast(sb, head, ino, false);
> > + inode = find_inode_fast(sb, head, ino, false, &isnew);
> >
> > if (inode) {
> > if (IS_ERR(inode))
> > return NULL;
> > - wait_on_inode(inode);
> > - if (unlikely(inode_unhashed(inode))) {
> > - iput(inode);
> > - goto again;
> > + if (unlikely(isnew)) {
> > + wait_on_new_inode(inode);
> > + if (unlikely(inode_unhashed(inode))) {
> > + iput(inode);
> > + goto again;
> > + }
> > }
> > }
> > return inode;
> > @@ -1800,6 +1826,7 @@ int insert_inode_locked(struct inode *inode)
> > struct super_block *sb = inode->i_sb;
> > ino_t ino = inode->i_ino;
> > struct hlist_head *head = inode_hashtable + hash(sb, ino);
> > + bool isnew;
> >
> > might_sleep();
> >
> > @@ -1832,12 +1859,15 @@ int insert_inode_locked(struct inode *inode)
> > return -EBUSY;
> > }
> > __iget(old);
> > + isnew = !!(inode_state_read(old) & I_NEW);
> > spin_unlock(&old->i_lock);
> > spin_unlock(&inode_hash_lock);
> > - wait_on_inode(old);
> > - if (unlikely(!inode_unhashed(old))) {
> > - iput(old);
> > - return -EBUSY;
> > + if (isnew) {
> > + wait_on_new_inode(old);
> > + if (unlikely(!inode_unhashed(old))) {
> > + iput(old);
> > + return -EBUSY;
> > + }
> > }
> > iput(old);
> > }
> > diff --git a/include/linux/fs.h b/include/linux/fs.h
> > index 21c73df3ce75..a813abdcf218 100644
> > --- a/include/linux/fs.h
> > +++ b/include/linux/fs.h
> > @@ -1030,15 +1030,7 @@ static inline void inode_fake_hash(struct inode *inode)
> > hlist_add_fake(&inode->i_hash);
> > }
> >
> > -static inline void wait_on_inode(struct inode *inode)
> > -{
> > - wait_var_event(inode_state_wait_address(inode, __I_NEW),
> > - !(inode_state_read_once(inode) & I_NEW));
> > - /*
> > - * Pairs with routines clearing I_NEW.
> > - */
> > - smp_rmb();
> > -}
> > +void wait_on_new_inode(struct inode *inode);
> >
> > /*
> > * inode->i_rwsem nesting subclasses for the lock validator:
> > @@ -3417,7 +3409,7 @@ extern void d_mark_dontcache(struct inode *inode);
> >
> > extern struct inode *ilookup5_nowait(struct super_block *sb,
> > unsigned long hashval, int (*test)(struct inode *, void *),
> > - void *data);
> > + void *data, bool *isnew);
> > extern struct inode *ilookup5(struct super_block *sb, unsigned long hashval,
> > int (*test)(struct inode *, void *), void *data);
> > extern struct inode *ilookup(struct super_block *sb, unsigned long ino);
> > --
> > 2.34.1
> >
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH] fs: rework I_NEW handling to operate without fences
2025-10-10 22:17 [PATCH] fs: rework I_NEW handling to operate without fences Mateusz Guzik
2025-10-15 11:50 ` Mateusz Guzik
@ 2025-10-21 12:49 ` Christian Brauner
2025-10-22 9:41 ` Jan Kara
2025-11-24 17:47 ` Andreas Gruenbacher
3 siblings, 0 replies; 11+ messages in thread
From: Christian Brauner @ 2025-10-21 12:49 UTC (permalink / raw)
To: Mateusz Guzik; +Cc: Christian Brauner, viro, jack, linux-kernel, linux-fsdevel
On Sat, 11 Oct 2025 00:17:36 +0200, Mateusz Guzik wrote:
> In the inode hash code grab the state while ->i_lock is held. If found
> to be set, synchronize the sleep once more with the lock held.
>
> In the real world the flag is not set most of the time.
>
> Apart from being simpler to reason about, it comes with a minor speed up
> as now clearing the flag does not require the smp_mb() fence.
>
> [...]
Applied to the vfs-6.19.inode branch of the vfs/vfs.git tree.
Patches in the vfs-6.19.inode branch should appear in linux-next soon.
Please report any outstanding bugs that were missed during review in a
new review to the original patch series allowing us to drop it.
It's encouraged to provide Acked-bys and Reviewed-bys even though the
patch has now been applied. If possible patch trailers will be updated.
Note that commit hashes shown below are subject to change due to rebase,
trailer updates or similar. If in doubt, please check the listed branch.
tree: https://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs.git
branch: vfs-6.19.inode
[1/1] fs: rework I_NEW handling to operate without fences
https://git.kernel.org/vfs/vfs/c/f5608fff035a
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH] fs: rework I_NEW handling to operate without fences
2025-10-21 12:48 ` Christian Brauner
@ 2025-10-21 12:54 ` Mateusz Guzik
0 siblings, 0 replies; 11+ messages in thread
From: Mateusz Guzik @ 2025-10-21 12:54 UTC (permalink / raw)
To: Christian Brauner; +Cc: viro, jack, linux-kernel, linux-fsdevel
On Tue, Oct 21, 2025 at 2:48 PM Christian Brauner <brauner@kernel.org> wrote:
>
> On Wed, Oct 15, 2025 at 01:50:25PM +0200, Mateusz Guzik wrote:
> > can i get some flames on this?
>
> Ok, that looks fine to me. I don't particularly enjoy that boolean but I
> think it simplifies d_instantiate_new() enough to make up for it.
>
yes, going to sleep (or not) is lock-protected, obsoleting the need
for custom fences.
this is an incremental cleanup. I have more in the pipeline which
should dedup most of the current handling, but there is quite a bit to
clean up first and I'm have not decided to which way I'll try go
first.
> >
> > On Sat, Oct 11, 2025 at 12:17 AM Mateusz Guzik <mjguzik@gmail.com> wrote:
> > >
> > > In the inode hash code grab the state while ->i_lock is held. If found
> > > to be set, synchronize the sleep once more with the lock held.
> > >
> > > In the real world the flag is not set most of the time.
> > >
> > > Apart from being simpler to reason about, it comes with a minor speed up
> > > as now clearing the flag does not require the smp_mb() fence.
> > >
> > > While here rename wait_on_inode() to wait_on_new_inode() to line it up
> > > with __wait_on_freeing_inode().
> > >
> > > Signed-off-by: Mateusz Guzik <mjguzik@gmail.com>
> > > ---
> > >
> > > This temporarily duplicated sleep code from inode_wait_for_lru_isolating().
> > > This is going to get dedupped later.
> > >
> > > There is high repetition of:
> > > if (unlikely(isnew)) {
> > > wait_on_new_inode(old);
> > > if (unlikely(inode_unhashed(old))) {
> > > iput(old);
> > > goto again;
> > > }
> > >
> > > I expect this is going to go away after I post a patch to sanitize the
> > > current APIs for the hash.
> > >
> > >
> > > fs/afs/dir.c | 4 +-
> > > fs/dcache.c | 10 ----
> > > fs/gfs2/glock.c | 2 +-
> > > fs/inode.c | 146 +++++++++++++++++++++++++++------------------
> > > include/linux/fs.h | 12 +---
> > > 5 files changed, 93 insertions(+), 81 deletions(-)
> > >
> > > diff --git a/fs/afs/dir.c b/fs/afs/dir.c
> > > index 89d36e3e5c79..f4e9e12373ac 100644
> > > --- a/fs/afs/dir.c
> > > +++ b/fs/afs/dir.c
> > > @@ -779,7 +779,7 @@ static struct inode *afs_do_lookup(struct inode *dir, struct dentry *dentry)
> > > struct afs_vnode *dvnode = AFS_FS_I(dir), *vnode;
> > > struct inode *inode = NULL, *ti;
> > > afs_dataversion_t data_version = READ_ONCE(dvnode->status.data_version);
> > > - bool supports_ibulk;
> > > + bool supports_ibulk, isnew;
> > > long ret;
> > > int i;
> > >
> > > @@ -850,7 +850,7 @@ static struct inode *afs_do_lookup(struct inode *dir, struct dentry *dentry)
> > > * callback counters.
> > > */
> > > ti = ilookup5_nowait(dir->i_sb, vp->fid.vnode,
> > > - afs_ilookup5_test_by_fid, &vp->fid);
> > > + afs_ilookup5_test_by_fid, &vp->fid, &isnew);
> > > if (!IS_ERR_OR_NULL(ti)) {
> > > vnode = AFS_FS_I(ti);
> > > vp->dv_before = vnode->status.data_version;
> > > diff --git a/fs/dcache.c b/fs/dcache.c
> > > index 78ffa7b7e824..25131f105a60 100644
> > > --- a/fs/dcache.c
> > > +++ b/fs/dcache.c
> > > @@ -1981,17 +1981,7 @@ void d_instantiate_new(struct dentry *entry, struct inode *inode)
> > > spin_lock(&inode->i_lock);
> > > __d_instantiate(entry, inode);
> > > WARN_ON(!(inode_state_read(inode) & I_NEW));
> > > - /*
> > > - * Pairs with smp_rmb in wait_on_inode().
> > > - */
> > > - smp_wmb();
> > > inode_state_clear(inode, I_NEW | I_CREATING);
> > > - /*
> > > - * Pairs with the barrier in prepare_to_wait_event() to make sure
> > > - * ___wait_var_event() either sees the bit cleared or
> > > - * waitqueue_active() check in wake_up_var() sees the waiter.
> > > - */
> > > - smp_mb();
> > > inode_wake_up_bit(inode, __I_NEW);
> > > spin_unlock(&inode->i_lock);
> > > }
> > > diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
> > > index b677c0e6b9ab..c9712235e7a0 100644
> > > --- a/fs/gfs2/glock.c
> > > +++ b/fs/gfs2/glock.c
> > > @@ -957,7 +957,7 @@ static struct gfs2_inode *gfs2_grab_existing_inode(struct gfs2_glock *gl)
> > > ip = NULL;
> > > spin_unlock(&gl->gl_lockref.lock);
> > > if (ip) {
> > > - wait_on_inode(&ip->i_inode);
> > > + wait_on_new_inode(&ip->i_inode);
> > > if (is_bad_inode(&ip->i_inode)) {
> > > iput(&ip->i_inode);
> > > ip = NULL;
> > > diff --git a/fs/inode.c b/fs/inode.c
> > > index 3153d725859c..1396f79b2551 100644
> > > --- a/fs/inode.c
> > > +++ b/fs/inode.c
> > > @@ -558,6 +558,32 @@ struct wait_queue_head *inode_bit_waitqueue(struct wait_bit_queue_entry *wqe,
> > > }
> > > EXPORT_SYMBOL(inode_bit_waitqueue);
> > >
> > > +void wait_on_new_inode(struct inode *inode)
> > > +{
> > > + struct wait_bit_queue_entry wqe;
> > > + struct wait_queue_head *wq_head;
> > > +
> > > + spin_lock(&inode->i_lock);
> > > + if (!(inode_state_read(inode) & I_NEW)) {
> > > + spin_unlock(&inode->i_lock);
> > > + return;
> > > + }
> > > +
> > > + wq_head = inode_bit_waitqueue(&wqe, inode, __I_NEW);
> > > + for (;;) {
> > > + prepare_to_wait_event(wq_head, &wqe.wq_entry, TASK_UNINTERRUPTIBLE);
> > > + if (!(inode_state_read(inode) & I_NEW))
> > > + break;
> > > + spin_unlock(&inode->i_lock);
> > > + schedule();
> > > + spin_lock(&inode->i_lock);
> > > + }
> > > + finish_wait(wq_head, &wqe.wq_entry);
> > > + WARN_ON(inode_state_read(inode) & I_NEW);
> > > + spin_unlock(&inode->i_lock);
> > > +}
> > > +EXPORT_SYMBOL(wait_on_new_inode);
> > > +
> > > /*
> > > * Add inode to LRU if needed (inode is unused and clean).
> > > *
> > > @@ -1008,7 +1034,8 @@ static void __wait_on_freeing_inode(struct inode *inode, bool is_inode_hash_lock
> > > static struct inode *find_inode(struct super_block *sb,
> > > struct hlist_head *head,
> > > int (*test)(struct inode *, void *),
> > > - void *data, bool is_inode_hash_locked)
> > > + void *data, bool is_inode_hash_locked,
> > > + bool *isnew)
> > > {
> > > struct inode *inode = NULL;
> > >
> > > @@ -1035,6 +1062,7 @@ static struct inode *find_inode(struct super_block *sb,
> > > return ERR_PTR(-ESTALE);
> > > }
> > > __iget(inode);
> > > + *isnew = !!(inode_state_read(inode) & I_NEW);
> > > spin_unlock(&inode->i_lock);
> > > rcu_read_unlock();
> > > return inode;
> > > @@ -1049,7 +1077,7 @@ static struct inode *find_inode(struct super_block *sb,
> > > */
> > > static struct inode *find_inode_fast(struct super_block *sb,
> > > struct hlist_head *head, unsigned long ino,
> > > - bool is_inode_hash_locked)
> > > + bool is_inode_hash_locked, bool *isnew)
> > > {
> > > struct inode *inode = NULL;
> > >
> > > @@ -1076,6 +1104,7 @@ static struct inode *find_inode_fast(struct super_block *sb,
> > > return ERR_PTR(-ESTALE);
> > > }
> > > __iget(inode);
> > > + *isnew = !!(inode_state_read(inode) & I_NEW);
> > > spin_unlock(&inode->i_lock);
> > > rcu_read_unlock();
> > > return inode;
> > > @@ -1181,17 +1210,7 @@ void unlock_new_inode(struct inode *inode)
> > > lockdep_annotate_inode_mutex_key(inode);
> > > spin_lock(&inode->i_lock);
> > > WARN_ON(!(inode_state_read(inode) & I_NEW));
> > > - /*
> > > - * Pairs with smp_rmb in wait_on_inode().
> > > - */
> > > - smp_wmb();
> > > inode_state_clear(inode, I_NEW | I_CREATING);
> > > - /*
> > > - * Pairs with the barrier in prepare_to_wait_event() to make sure
> > > - * ___wait_var_event() either sees the bit cleared or
> > > - * waitqueue_active() check in wake_up_var() sees the waiter.
> > > - */
> > > - smp_mb();
>
> You're getting rid of smp_mb() because you're rechecking the flag under
> i_lock after you called prepare_to_wait_event() in wait_on_new_inode()?
>
> > > inode_wake_up_bit(inode, __I_NEW);
> > > spin_unlock(&inode->i_lock);
> > > }
> > > @@ -1202,17 +1221,7 @@ void discard_new_inode(struct inode *inode)
> > > lockdep_annotate_inode_mutex_key(inode);
> > > spin_lock(&inode->i_lock);
> > > WARN_ON(!(inode_state_read(inode) & I_NEW));
> > > - /*
> > > - * Pairs with smp_rmb in wait_on_inode().
> > > - */
> > > - smp_wmb();
> > > inode_state_clear(inode, I_NEW);
> > > - /*
> > > - * Pairs with the barrier in prepare_to_wait_event() to make sure
> > > - * ___wait_var_event() either sees the bit cleared or
> > > - * waitqueue_active() check in wake_up_var() sees the waiter.
> > > - */
> > > - smp_mb();
> > > inode_wake_up_bit(inode, __I_NEW);
> > > spin_unlock(&inode->i_lock);
> > > iput(inode);
> > > @@ -1286,12 +1295,13 @@ struct inode *inode_insert5(struct inode *inode, unsigned long hashval,
> > > {
> > > struct hlist_head *head = inode_hashtable + hash(inode->i_sb, hashval);
> > > struct inode *old;
> > > + bool isnew;
> > >
> > > might_sleep();
> > >
> > > again:
> > > spin_lock(&inode_hash_lock);
> > > - old = find_inode(inode->i_sb, head, test, data, true);
> > > + old = find_inode(inode->i_sb, head, test, data, true, &isnew);
> > > if (unlikely(old)) {
> > > /*
> > > * Uhhuh, somebody else created the same inode under us.
> > > @@ -1300,10 +1310,12 @@ struct inode *inode_insert5(struct inode *inode, unsigned long hashval,
> > > spin_unlock(&inode_hash_lock);
> > > if (IS_ERR(old))
> > > return NULL;
> > > - wait_on_inode(old);
> > > - if (unlikely(inode_unhashed(old))) {
> > > - iput(old);
> > > - goto again;
> > > + if (unlikely(isnew)) {
> > > + wait_on_new_inode(old);
> > > + if (unlikely(inode_unhashed(old))) {
> > > + iput(old);
> > > + goto again;
> > > + }
> > > }
> > > return old;
> > > }
> > > @@ -1391,18 +1403,21 @@ struct inode *iget5_locked_rcu(struct super_block *sb, unsigned long hashval,
> > > {
> > > struct hlist_head *head = inode_hashtable + hash(sb, hashval);
> > > struct inode *inode, *new;
> > > + bool isnew;
> > >
> > > might_sleep();
> > >
> > > again:
> > > - inode = find_inode(sb, head, test, data, false);
> > > + inode = find_inode(sb, head, test, data, false, &isnew);
> > > if (inode) {
> > > if (IS_ERR(inode))
> > > return NULL;
> > > - wait_on_inode(inode);
> > > - if (unlikely(inode_unhashed(inode))) {
> > > - iput(inode);
> > > - goto again;
> > > + if (unlikely(isnew)) {
> > > + wait_on_new_inode(inode);
> > > + if (unlikely(inode_unhashed(inode))) {
> > > + iput(inode);
> > > + goto again;
> > > + }
> > > }
> > > return inode;
> > > }
> > > @@ -1434,18 +1449,21 @@ struct inode *iget_locked(struct super_block *sb, unsigned long ino)
> > > {
> > > struct hlist_head *head = inode_hashtable + hash(sb, ino);
> > > struct inode *inode;
> > > + bool isnew;
> > >
> > > might_sleep();
> > >
> > > again:
> > > - inode = find_inode_fast(sb, head, ino, false);
> > > + inode = find_inode_fast(sb, head, ino, false, &isnew);
> > > if (inode) {
> > > if (IS_ERR(inode))
> > > return NULL;
> > > - wait_on_inode(inode);
> > > - if (unlikely(inode_unhashed(inode))) {
> > > - iput(inode);
> > > - goto again;
> > > + if (unlikely(isnew)) {
> > > + wait_on_new_inode(inode);
> > > + if (unlikely(inode_unhashed(inode))) {
> > > + iput(inode);
> > > + goto again;
> > > + }
> > > }
> > > return inode;
> > > }
> > > @@ -1456,7 +1474,7 @@ struct inode *iget_locked(struct super_block *sb, unsigned long ino)
> > >
> > > spin_lock(&inode_hash_lock);
> > > /* We released the lock, so.. */
> > > - old = find_inode_fast(sb, head, ino, true);
> > > + old = find_inode_fast(sb, head, ino, true, &isnew);
> > > if (!old) {
> > > inode->i_ino = ino;
> > > spin_lock(&inode->i_lock);
> > > @@ -1482,10 +1500,12 @@ struct inode *iget_locked(struct super_block *sb, unsigned long ino)
> > > if (IS_ERR(old))
> > > return NULL;
> > > inode = old;
> > > - wait_on_inode(inode);
> > > - if (unlikely(inode_unhashed(inode))) {
> > > - iput(inode);
> > > - goto again;
> > > + if (unlikely(isnew)) {
> > > + wait_on_new_inode(inode);
> > > + if (unlikely(inode_unhashed(inode))) {
> > > + iput(inode);
> > > + goto again;
> > > + }
> > > }
> > > }
> > > return inode;
> > > @@ -1586,13 +1606,13 @@ EXPORT_SYMBOL(igrab);
> > > * Note2: @test is called with the inode_hash_lock held, so can't sleep.
> > > */
> > > struct inode *ilookup5_nowait(struct super_block *sb, unsigned long hashval,
> > > - int (*test)(struct inode *, void *), void *data)
> > > + int (*test)(struct inode *, void *), void *data, bool *isnew)
> > > {
> > > struct hlist_head *head = inode_hashtable + hash(sb, hashval);
> > > struct inode *inode;
> > >
> > > spin_lock(&inode_hash_lock);
> > > - inode = find_inode(sb, head, test, data, true);
> > > + inode = find_inode(sb, head, test, data, true, isnew);
> > > spin_unlock(&inode_hash_lock);
> > >
> > > return IS_ERR(inode) ? NULL : inode;
> > > @@ -1620,16 +1640,19 @@ struct inode *ilookup5(struct super_block *sb, unsigned long hashval,
> > > int (*test)(struct inode *, void *), void *data)
> > > {
> > > struct inode *inode;
> > > + bool isnew;
> > >
> > > might_sleep();
> > >
> > > again:
> > > - inode = ilookup5_nowait(sb, hashval, test, data);
> > > + inode = ilookup5_nowait(sb, hashval, test, data, &isnew);
> > > if (inode) {
> > > - wait_on_inode(inode);
> > > - if (unlikely(inode_unhashed(inode))) {
> > > - iput(inode);
> > > - goto again;
> > > + if (unlikely(isnew)) {
> > > + wait_on_new_inode(inode);
> > > + if (unlikely(inode_unhashed(inode))) {
> > > + iput(inode);
> > > + goto again;
> > > + }
> > > }
> > > }
> > > return inode;
> > > @@ -1648,19 +1671,22 @@ struct inode *ilookup(struct super_block *sb, unsigned long ino)
> > > {
> > > struct hlist_head *head = inode_hashtable + hash(sb, ino);
> > > struct inode *inode;
> > > + bool isnew;
> > >
> > > might_sleep();
> > >
> > > again:
> > > - inode = find_inode_fast(sb, head, ino, false);
> > > + inode = find_inode_fast(sb, head, ino, false, &isnew);
> > >
> > > if (inode) {
> > > if (IS_ERR(inode))
> > > return NULL;
> > > - wait_on_inode(inode);
> > > - if (unlikely(inode_unhashed(inode))) {
> > > - iput(inode);
> > > - goto again;
> > > + if (unlikely(isnew)) {
> > > + wait_on_new_inode(inode);
> > > + if (unlikely(inode_unhashed(inode))) {
> > > + iput(inode);
> > > + goto again;
> > > + }
> > > }
> > > }
> > > return inode;
> > > @@ -1800,6 +1826,7 @@ int insert_inode_locked(struct inode *inode)
> > > struct super_block *sb = inode->i_sb;
> > > ino_t ino = inode->i_ino;
> > > struct hlist_head *head = inode_hashtable + hash(sb, ino);
> > > + bool isnew;
> > >
> > > might_sleep();
> > >
> > > @@ -1832,12 +1859,15 @@ int insert_inode_locked(struct inode *inode)
> > > return -EBUSY;
> > > }
> > > __iget(old);
> > > + isnew = !!(inode_state_read(old) & I_NEW);
> > > spin_unlock(&old->i_lock);
> > > spin_unlock(&inode_hash_lock);
> > > - wait_on_inode(old);
> > > - if (unlikely(!inode_unhashed(old))) {
> > > - iput(old);
> > > - return -EBUSY;
> > > + if (isnew) {
> > > + wait_on_new_inode(old);
> > > + if (unlikely(!inode_unhashed(old))) {
> > > + iput(old);
> > > + return -EBUSY;
> > > + }
> > > }
> > > iput(old);
> > > }
> > > diff --git a/include/linux/fs.h b/include/linux/fs.h
> > > index 21c73df3ce75..a813abdcf218 100644
> > > --- a/include/linux/fs.h
> > > +++ b/include/linux/fs.h
> > > @@ -1030,15 +1030,7 @@ static inline void inode_fake_hash(struct inode *inode)
> > > hlist_add_fake(&inode->i_hash);
> > > }
> > >
> > > -static inline void wait_on_inode(struct inode *inode)
> > > -{
> > > - wait_var_event(inode_state_wait_address(inode, __I_NEW),
> > > - !(inode_state_read_once(inode) & I_NEW));
> > > - /*
> > > - * Pairs with routines clearing I_NEW.
> > > - */
> > > - smp_rmb();
> > > -}
> > > +void wait_on_new_inode(struct inode *inode);
> > >
> > > /*
> > > * inode->i_rwsem nesting subclasses for the lock validator:
> > > @@ -3417,7 +3409,7 @@ extern void d_mark_dontcache(struct inode *inode);
> > >
> > > extern struct inode *ilookup5_nowait(struct super_block *sb,
> > > unsigned long hashval, int (*test)(struct inode *, void *),
> > > - void *data);
> > > + void *data, bool *isnew);
> > > extern struct inode *ilookup5(struct super_block *sb, unsigned long hashval,
> > > int (*test)(struct inode *, void *), void *data);
> > > extern struct inode *ilookup(struct super_block *sb, unsigned long ino);
> > > --
> > > 2.34.1
> > >
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH] fs: rework I_NEW handling to operate without fences
2025-10-10 22:17 [PATCH] fs: rework I_NEW handling to operate without fences Mateusz Guzik
2025-10-15 11:50 ` Mateusz Guzik
2025-10-21 12:49 ` Christian Brauner
@ 2025-10-22 9:41 ` Jan Kara
2025-11-24 17:47 ` Andreas Gruenbacher
3 siblings, 0 replies; 11+ messages in thread
From: Jan Kara @ 2025-10-22 9:41 UTC (permalink / raw)
To: Mateusz Guzik; +Cc: brauner, viro, jack, linux-kernel, linux-fsdevel
On Sat 11-10-25 00:17:36, Mateusz Guzik wrote:
> In the inode hash code grab the state while ->i_lock is held. If found
> to be set, synchronize the sleep once more with the lock held.
>
> In the real world the flag is not set most of the time.
>
> Apart from being simpler to reason about, it comes with a minor speed up
> as now clearing the flag does not require the smp_mb() fence.
>
> While here rename wait_on_inode() to wait_on_new_inode() to line it up
> with __wait_on_freeing_inode().
>
> Signed-off-by: Mateusz Guzik <mjguzik@gmail.com>
> ---
>
> This temporarily duplicated sleep code from inode_wait_for_lru_isolating().
> This is going to get dedupped later.
>
> There is high repetition of:
> if (unlikely(isnew)) {
> wait_on_new_inode(old);
> if (unlikely(inode_unhashed(old))) {
> iput(old);
> goto again;
> }
>
> I expect this is going to go away after I post a patch to sanitize the
> current APIs for the hash.
Yeah, it seems all but one caller (ilookup5_nowait() which is only used by
AFS) would be fine with waiting for I_NEW inodes as a part of hash lookup
similarly as we wait for I_FREEING. What AFS is doing is beyond me as it
seems to be playing weird tricks with I_NEW in afs_fetch_status_success().
Anyway with the promise of deduplication I think this is moving in a good
direction so feel free to add:
Reviewed-by: Jan Kara <jack@suse.cz>
Honza
--
Jan Kara <jack@suse.com>
SUSE Labs, CR
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH] fs: rework I_NEW handling to operate without fences
2025-10-10 22:17 [PATCH] fs: rework I_NEW handling to operate without fences Mateusz Guzik
` (2 preceding siblings ...)
2025-10-22 9:41 ` Jan Kara
@ 2025-11-24 17:47 ` Andreas Gruenbacher
2025-11-24 19:25 ` Mateusz Guzik
3 siblings, 1 reply; 11+ messages in thread
From: Andreas Gruenbacher @ 2025-11-24 17:47 UTC (permalink / raw)
To: Mateusz Guzik
Cc: Andreas Gruenbacher, brauner, viro, jack, linux-kernel,
linux-fsdevel
On Sat, Oct 11, 2025 at 12:17 AM Mateusz Guzik <mjguzik@gmail.com> wrote:
> In the inode hash code grab the state while ->i_lock is held. If found
> to be set, synchronize the sleep once more with the lock held.
>
> In the real world the flag is not set most of the time.
>
> Apart from being simpler to reason about, it comes with a minor speed up
> as now clearing the flag does not require the smp_mb() fence.
>
> While here rename wait_on_inode() to wait_on_new_inode() to line it up
> with __wait_on_freeing_inode().
>
> Signed-off-by: Mateusz Guzik <mjguzik@gmail.com>
> ---
>
> This temporarily duplicated sleep code from inode_wait_for_lru_isolating().
> This is going to get dedupped later.
>
> There is high repetition of:
> if (unlikely(isnew)) {
> wait_on_new_inode(old);
> if (unlikely(inode_unhashed(old))) {
> iput(old);
> goto again;
> }
>
> I expect this is going to go away after I post a patch to sanitize the
> current APIs for the hash.
>
>
> fs/afs/dir.c | 4 +-
> fs/dcache.c | 10 ----
> fs/gfs2/glock.c | 2 +-
> fs/inode.c | 146 +++++++++++++++++++++++++++------------------
> include/linux/fs.h | 12 +---
> 5 files changed, 93 insertions(+), 81 deletions(-)
>
> diff --git a/fs/afs/dir.c b/fs/afs/dir.c
> index 89d36e3e5c79..f4e9e12373ac 100644
> --- a/fs/afs/dir.c
> +++ b/fs/afs/dir.c
> @@ -779,7 +779,7 @@ static struct inode *afs_do_lookup(struct inode *dir, struct dentry *dentry)
> struct afs_vnode *dvnode = AFS_FS_I(dir), *vnode;
> struct inode *inode = NULL, *ti;
> afs_dataversion_t data_version = READ_ONCE(dvnode->status.data_version);
> - bool supports_ibulk;
> + bool supports_ibulk, isnew;
> long ret;
> int i;
>
> @@ -850,7 +850,7 @@ static struct inode *afs_do_lookup(struct inode *dir, struct dentry *dentry)
> * callback counters.
> */
> ti = ilookup5_nowait(dir->i_sb, vp->fid.vnode,
> - afs_ilookup5_test_by_fid, &vp->fid);
> + afs_ilookup5_test_by_fid, &vp->fid, &isnew);
> if (!IS_ERR_OR_NULL(ti)) {
> vnode = AFS_FS_I(ti);
> vp->dv_before = vnode->status.data_version;
> diff --git a/fs/dcache.c b/fs/dcache.c
> index 78ffa7b7e824..25131f105a60 100644
> --- a/fs/dcache.c
> +++ b/fs/dcache.c
> @@ -1981,17 +1981,7 @@ void d_instantiate_new(struct dentry *entry, struct inode *inode)
> spin_lock(&inode->i_lock);
> __d_instantiate(entry, inode);
> WARN_ON(!(inode_state_read(inode) & I_NEW));
> - /*
> - * Pairs with smp_rmb in wait_on_inode().
> - */
> - smp_wmb();
> inode_state_clear(inode, I_NEW | I_CREATING);
> - /*
> - * Pairs with the barrier in prepare_to_wait_event() to make sure
> - * ___wait_var_event() either sees the bit cleared or
> - * waitqueue_active() check in wake_up_var() sees the waiter.
> - */
> - smp_mb();
> inode_wake_up_bit(inode, __I_NEW);
> spin_unlock(&inode->i_lock);
> }
> diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
> index b677c0e6b9ab..c9712235e7a0 100644
> --- a/fs/gfs2/glock.c
> +++ b/fs/gfs2/glock.c
> @@ -957,7 +957,7 @@ static struct gfs2_inode *gfs2_grab_existing_inode(struct gfs2_glock *gl)
> ip = NULL;
> spin_unlock(&gl->gl_lockref.lock);
> if (ip) {
> - wait_on_inode(&ip->i_inode);
> + wait_on_new_inode(&ip->i_inode);
> if (is_bad_inode(&ip->i_inode)) {
> iput(&ip->i_inode);
> ip = NULL;
> diff --git a/fs/inode.c b/fs/inode.c
> index 3153d725859c..1396f79b2551 100644
> --- a/fs/inode.c
> +++ b/fs/inode.c
> @@ -558,6 +558,32 @@ struct wait_queue_head *inode_bit_waitqueue(struct wait_bit_queue_entry *wqe,
> }
> EXPORT_SYMBOL(inode_bit_waitqueue);
>
> +void wait_on_new_inode(struct inode *inode)
> +{
> + struct wait_bit_queue_entry wqe;
> + struct wait_queue_head *wq_head;
> +
> + spin_lock(&inode->i_lock);
> + if (!(inode_state_read(inode) & I_NEW)) {
> + spin_unlock(&inode->i_lock);
> + return;
> + }
> +
> + wq_head = inode_bit_waitqueue(&wqe, inode, __I_NEW);
> + for (;;) {
> + prepare_to_wait_event(wq_head, &wqe.wq_entry, TASK_UNINTERRUPTIBLE);
> + if (!(inode_state_read(inode) & I_NEW))
> + break;
> + spin_unlock(&inode->i_lock);
> + schedule();
> + spin_lock(&inode->i_lock);
> + }
> + finish_wait(wq_head, &wqe.wq_entry);
> + WARN_ON(inode_state_read(inode) & I_NEW);
> + spin_unlock(&inode->i_lock);
> +}
> +EXPORT_SYMBOL(wait_on_new_inode);
> +
> /*
> * Add inode to LRU if needed (inode is unused and clean).
> *
> @@ -1008,7 +1034,8 @@ static void __wait_on_freeing_inode(struct inode *inode, bool is_inode_hash_lock
> static struct inode *find_inode(struct super_block *sb,
> struct hlist_head *head,
> int (*test)(struct inode *, void *),
> - void *data, bool is_inode_hash_locked)
> + void *data, bool is_inode_hash_locked,
> + bool *isnew)
> {
> struct inode *inode = NULL;
>
> @@ -1035,6 +1062,7 @@ static struct inode *find_inode(struct super_block *sb,
> return ERR_PTR(-ESTALE);
> }
> __iget(inode);
> + *isnew = !!(inode_state_read(inode) & I_NEW);
Nit: the not-nots here and in the other two places in this patch are not
doing anything. Please avoid that kind of thing.
> spin_unlock(&inode->i_lock);
> rcu_read_unlock();
> return inode;
> @@ -1049,7 +1077,7 @@ static struct inode *find_inode(struct super_block *sb,
> */
> static struct inode *find_inode_fast(struct super_block *sb,
> struct hlist_head *head, unsigned long ino,
> - bool is_inode_hash_locked)
> + bool is_inode_hash_locked, bool *isnew)
> {
> struct inode *inode = NULL;
>
> @@ -1076,6 +1104,7 @@ static struct inode *find_inode_fast(struct super_block *sb,
> return ERR_PTR(-ESTALE);
> }
> __iget(inode);
> + *isnew = !!(inode_state_read(inode) & I_NEW);
> spin_unlock(&inode->i_lock);
> rcu_read_unlock();
> return inode;
> @@ -1181,17 +1210,7 @@ void unlock_new_inode(struct inode *inode)
> lockdep_annotate_inode_mutex_key(inode);
> spin_lock(&inode->i_lock);
> WARN_ON(!(inode_state_read(inode) & I_NEW));
> - /*
> - * Pairs with smp_rmb in wait_on_inode().
> - */
> - smp_wmb();
> inode_state_clear(inode, I_NEW | I_CREATING);
> - /*
> - * Pairs with the barrier in prepare_to_wait_event() to make sure
> - * ___wait_var_event() either sees the bit cleared or
> - * waitqueue_active() check in wake_up_var() sees the waiter.
> - */
> - smp_mb();
> inode_wake_up_bit(inode, __I_NEW);
> spin_unlock(&inode->i_lock);
> }
> @@ -1202,17 +1221,7 @@ void discard_new_inode(struct inode *inode)
> lockdep_annotate_inode_mutex_key(inode);
> spin_lock(&inode->i_lock);
> WARN_ON(!(inode_state_read(inode) & I_NEW));
> - /*
> - * Pairs with smp_rmb in wait_on_inode().
> - */
> - smp_wmb();
> inode_state_clear(inode, I_NEW);
> - /*
> - * Pairs with the barrier in prepare_to_wait_event() to make sure
> - * ___wait_var_event() either sees the bit cleared or
> - * waitqueue_active() check in wake_up_var() sees the waiter.
> - */
> - smp_mb();
> inode_wake_up_bit(inode, __I_NEW);
> spin_unlock(&inode->i_lock);
> iput(inode);
> @@ -1286,12 +1295,13 @@ struct inode *inode_insert5(struct inode *inode, unsigned long hashval,
> {
> struct hlist_head *head = inode_hashtable + hash(inode->i_sb, hashval);
> struct inode *old;
> + bool isnew;
>
> might_sleep();
>
> again:
> spin_lock(&inode_hash_lock);
> - old = find_inode(inode->i_sb, head, test, data, true);
> + old = find_inode(inode->i_sb, head, test, data, true, &isnew);
> if (unlikely(old)) {
> /*
> * Uhhuh, somebody else created the same inode under us.
> @@ -1300,10 +1310,12 @@ struct inode *inode_insert5(struct inode *inode, unsigned long hashval,
> spin_unlock(&inode_hash_lock);
> if (IS_ERR(old))
> return NULL;
> - wait_on_inode(old);
> - if (unlikely(inode_unhashed(old))) {
> - iput(old);
> - goto again;
> + if (unlikely(isnew)) {
> + wait_on_new_inode(old);
> + if (unlikely(inode_unhashed(old))) {
> + iput(old);
> + goto again;
> + }
> }
> return old;
> }
> @@ -1391,18 +1403,21 @@ struct inode *iget5_locked_rcu(struct super_block *sb, unsigned long hashval,
> {
> struct hlist_head *head = inode_hashtable + hash(sb, hashval);
> struct inode *inode, *new;
> + bool isnew;
>
> might_sleep();
>
> again:
> - inode = find_inode(sb, head, test, data, false);
> + inode = find_inode(sb, head, test, data, false, &isnew);
> if (inode) {
> if (IS_ERR(inode))
> return NULL;
> - wait_on_inode(inode);
> - if (unlikely(inode_unhashed(inode))) {
> - iput(inode);
> - goto again;
> + if (unlikely(isnew)) {
> + wait_on_new_inode(inode);
> + if (unlikely(inode_unhashed(inode))) {
> + iput(inode);
> + goto again;
> + }
> }
> return inode;
> }
> @@ -1434,18 +1449,21 @@ struct inode *iget_locked(struct super_block *sb, unsigned long ino)
> {
> struct hlist_head *head = inode_hashtable + hash(sb, ino);
> struct inode *inode;
> + bool isnew;
>
> might_sleep();
>
> again:
> - inode = find_inode_fast(sb, head, ino, false);
> + inode = find_inode_fast(sb, head, ino, false, &isnew);
> if (inode) {
> if (IS_ERR(inode))
> return NULL;
> - wait_on_inode(inode);
> - if (unlikely(inode_unhashed(inode))) {
> - iput(inode);
> - goto again;
> + if (unlikely(isnew)) {
> + wait_on_new_inode(inode);
> + if (unlikely(inode_unhashed(inode))) {
> + iput(inode);
> + goto again;
> + }
> }
> return inode;
> }
> @@ -1456,7 +1474,7 @@ struct inode *iget_locked(struct super_block *sb, unsigned long ino)
>
> spin_lock(&inode_hash_lock);
> /* We released the lock, so.. */
> - old = find_inode_fast(sb, head, ino, true);
> + old = find_inode_fast(sb, head, ino, true, &isnew);
> if (!old) {
> inode->i_ino = ino;
> spin_lock(&inode->i_lock);
> @@ -1482,10 +1500,12 @@ struct inode *iget_locked(struct super_block *sb, unsigned long ino)
> if (IS_ERR(old))
> return NULL;
> inode = old;
> - wait_on_inode(inode);
> - if (unlikely(inode_unhashed(inode))) {
> - iput(inode);
> - goto again;
> + if (unlikely(isnew)) {
> + wait_on_new_inode(inode);
> + if (unlikely(inode_unhashed(inode))) {
> + iput(inode);
> + goto again;
> + }
> }
> }
> return inode;
> @@ -1586,13 +1606,13 @@ EXPORT_SYMBOL(igrab);
> * Note2: @test is called with the inode_hash_lock held, so can't sleep.
> */
> struct inode *ilookup5_nowait(struct super_block *sb, unsigned long hashval,
> - int (*test)(struct inode *, void *), void *data)
> + int (*test)(struct inode *, void *), void *data, bool *isnew)
> {
> struct hlist_head *head = inode_hashtable + hash(sb, hashval);
> struct inode *inode;
>
> spin_lock(&inode_hash_lock);
> - inode = find_inode(sb, head, test, data, true);
> + inode = find_inode(sb, head, test, data, true, isnew);
> spin_unlock(&inode_hash_lock);
>
> return IS_ERR(inode) ? NULL : inode;
> @@ -1620,16 +1640,19 @@ struct inode *ilookup5(struct super_block *sb, unsigned long hashval,
> int (*test)(struct inode *, void *), void *data)
> {
> struct inode *inode;
> + bool isnew;
>
> might_sleep();
>
> again:
> - inode = ilookup5_nowait(sb, hashval, test, data);
> + inode = ilookup5_nowait(sb, hashval, test, data, &isnew);
> if (inode) {
> - wait_on_inode(inode);
> - if (unlikely(inode_unhashed(inode))) {
> - iput(inode);
> - goto again;
> + if (unlikely(isnew)) {
> + wait_on_new_inode(inode);
> + if (unlikely(inode_unhashed(inode))) {
> + iput(inode);
> + goto again;
> + }
> }
> }
> return inode;
> @@ -1648,19 +1671,22 @@ struct inode *ilookup(struct super_block *sb, unsigned long ino)
> {
> struct hlist_head *head = inode_hashtable + hash(sb, ino);
> struct inode *inode;
> + bool isnew;
>
> might_sleep();
>
> again:
> - inode = find_inode_fast(sb, head, ino, false);
> + inode = find_inode_fast(sb, head, ino, false, &isnew);
>
> if (inode) {
> if (IS_ERR(inode))
> return NULL;
> - wait_on_inode(inode);
> - if (unlikely(inode_unhashed(inode))) {
> - iput(inode);
> - goto again;
> + if (unlikely(isnew)) {
> + wait_on_new_inode(inode);
> + if (unlikely(inode_unhashed(inode))) {
> + iput(inode);
> + goto again;
> + }
> }
> }
> return inode;
> @@ -1800,6 +1826,7 @@ int insert_inode_locked(struct inode *inode)
> struct super_block *sb = inode->i_sb;
> ino_t ino = inode->i_ino;
> struct hlist_head *head = inode_hashtable + hash(sb, ino);
> + bool isnew;
>
> might_sleep();
>
> @@ -1832,12 +1859,15 @@ int insert_inode_locked(struct inode *inode)
> return -EBUSY;
> }
> __iget(old);
> + isnew = !!(inode_state_read(old) & I_NEW);
> spin_unlock(&old->i_lock);
> spin_unlock(&inode_hash_lock);
> - wait_on_inode(old);
> - if (unlikely(!inode_unhashed(old))) {
> - iput(old);
> - return -EBUSY;
> + if (isnew) {
> + wait_on_new_inode(old);
> + if (unlikely(!inode_unhashed(old))) {
> + iput(old);
> + return -EBUSY;
> + }
> }
> iput(old);
> }
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index 21c73df3ce75..a813abdcf218 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -1030,15 +1030,7 @@ static inline void inode_fake_hash(struct inode *inode)
> hlist_add_fake(&inode->i_hash);
> }
>
> -static inline void wait_on_inode(struct inode *inode)
> -{
> - wait_var_event(inode_state_wait_address(inode, __I_NEW),
> - !(inode_state_read_once(inode) & I_NEW));
> - /*
> - * Pairs with routines clearing I_NEW.
> - */
> - smp_rmb();
> -}
> +void wait_on_new_inode(struct inode *inode);
>
> /*
> * inode->i_rwsem nesting subclasses for the lock validator:
> @@ -3417,7 +3409,7 @@ extern void d_mark_dontcache(struct inode *inode);
>
> extern struct inode *ilookup5_nowait(struct super_block *sb,
> unsigned long hashval, int (*test)(struct inode *, void *),
> - void *data);
> + void *data, bool *isnew);
> extern struct inode *ilookup5(struct super_block *sb, unsigned long hashval,
> int (*test)(struct inode *, void *), void *data);
> extern struct inode *ilookup(struct super_block *sb, unsigned long ino);
> --
> 2.34.1
>
>
Thanks,
Andreas
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH] fs: rework I_NEW handling to operate without fences
2025-11-24 17:47 ` Andreas Gruenbacher
@ 2025-11-24 19:25 ` Mateusz Guzik
2025-11-24 23:04 ` Andreas Gruenbacher
0 siblings, 1 reply; 11+ messages in thread
From: Mateusz Guzik @ 2025-11-24 19:25 UTC (permalink / raw)
To: Andreas Gruenbacher; +Cc: brauner, viro, jack, linux-kernel, linux-fsdevel
On Mon, Nov 24, 2025 at 6:47 PM Andreas Gruenbacher <agruenba@redhat.com> wrote:
>
> On Sat, Oct 11, 2025 at 12:17 AM Mateusz Guzik <mjguzik@gmail.com> wrote:
> > + *isnew = !!(inode_state_read(inode) & I_NEW);
>
> Nit: the not-nots here and in the other two places in this patch are not
> doing anything. Please avoid that kind of thing.
>
Huh, it appears you are right. So happens I_NEW has the value of 0x1,
so I tried out another flag:
bool flagvar_de(struct inode *inode);
bool flagvar_de(struct inode *inode)
{
return !!(inode_state_read(inode) & I_CREATING);
}
EXPORT_SYMBOL(flagvar_de);
bool flagvar(struct inode *inode);
bool flagvar(struct inode *inode)
{
return inode_state_read(inode) & I_CREATING;
}
EXPORT_SYMBOL(flagvar);
endbr64
call 22c9 <flagvar+0x9>
movzbl 0x91(%rdi),%eax
shr $0x7,%al
jmp 22d8 <flagvar+0x18>
endbr64
call 699 <flagvar_de+0x9>
movzbl 0x91(%rdi),%eax
shr $0x7,%al
jmp 6a8 <flagvar_de+0x18>
Was that always a thing? My grep for '!!' shows plenty of hits in the
kernel tree and I'm pretty sure this was an established pratice.
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH] fs: rework I_NEW handling to operate without fences
2025-11-24 19:25 ` Mateusz Guzik
@ 2025-11-24 23:04 ` Andreas Gruenbacher
2025-11-25 3:00 ` Mateusz Guzik
0 siblings, 1 reply; 11+ messages in thread
From: Andreas Gruenbacher @ 2025-11-24 23:04 UTC (permalink / raw)
To: Mateusz Guzik; +Cc: brauner, viro, jack, linux-kernel, linux-fsdevel
On Mon, Nov 24, 2025 at 8:25 PM Mateusz Guzik <mjguzik@gmail.com> wrote:
> On Mon, Nov 24, 2025 at 6:47 PM Andreas Gruenbacher <agruenba@redhat.com> wrote:
> >
> > On Sat, Oct 11, 2025 at 12:17 AM Mateusz Guzik <mjguzik@gmail.com> wrote:
> > > + *isnew = !!(inode_state_read(inode) & I_NEW);
> >
> > Nit: the not-nots here and in the other two places in this patch are not
> > doing anything. Please avoid that kind of thing.
> >
>
> Huh, it appears you are right. So happens I_NEW has the value of 0x1,
> so I tried out another flag:
>
> bool flagvar_de(struct inode *inode);
> bool flagvar_de(struct inode *inode)
> {
> return !!(inode_state_read(inode) & I_CREATING);
> }
> EXPORT_SYMBOL(flagvar_de);
>
> bool flagvar(struct inode *inode);
> bool flagvar(struct inode *inode)
> {
> return inode_state_read(inode) & I_CREATING;
> }
> EXPORT_SYMBOL(flagvar);
>
> endbr64
> call 22c9 <flagvar+0x9>
> movzbl 0x91(%rdi),%eax
> shr $0x7,%al
> jmp 22d8 <flagvar+0x18>
>
> endbr64
> call 699 <flagvar_de+0x9>
> movzbl 0x91(%rdi),%eax
> shr $0x7,%al
> jmp 6a8 <flagvar_de+0x18>
>
> Was that always a thing? My grep for '!!' shows plenty of hits in the
> kernel tree and I'm pretty sure this was an established pratice.
It depends on the data type. The non-not "operator" converts non-0
values into 1. For boolean values, that conversion is implicit. For
example,
!!0x100 == 1
(bool)0x100 == 1
but
(char)0x100 == 0
Andreas
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH] fs: rework I_NEW handling to operate without fences
2025-11-24 23:04 ` Andreas Gruenbacher
@ 2025-11-25 3:00 ` Mateusz Guzik
2025-11-25 10:39 ` Andreas Gruenbacher
0 siblings, 1 reply; 11+ messages in thread
From: Mateusz Guzik @ 2025-11-25 3:00 UTC (permalink / raw)
To: Andreas Gruenbacher; +Cc: brauner, viro, jack, linux-kernel, linux-fsdevel
On Tue, Nov 25, 2025 at 12:04 AM Andreas Gruenbacher
<agruenba@redhat.com> wrote:
>
> On Mon, Nov 24, 2025 at 8:25 PM Mateusz Guzik <mjguzik@gmail.com> wrote:
> > On Mon, Nov 24, 2025 at 6:47 PM Andreas Gruenbacher <agruenba@redhat.com> wrote:
> > >
> > > On Sat, Oct 11, 2025 at 12:17 AM Mateusz Guzik <mjguzik@gmail.com> wrote:
> > Was that always a thing? My grep for '!!' shows plenty of hits in the
> > kernel tree and I'm pretty sure this was an established pratice.
>
> It depends on the data type. The non-not "operator" converts non-0
> values into 1. For boolean values, that conversion is implicit. For
> example,
>
> !!0x100 == 1
> (bool)0x100 == 1
>
> but
>
> (char)0x100 == 0
>
I mean it was an established practice *specifically* for bools.
Case in point from quick grep on the kernel:
/* Internal helper functions to match cpu capability type */
static bool
cpucap_late_cpu_optional(const struct arm64_cpu_capabilities *cap)
{
return !!(cap->type & ARM64_CPUCAP_OPTIONAL_FOR_LATE_CPU);
}
static bool
cpucap_late_cpu_permitted(const struct arm64_cpu_capabilities *cap)
{
return !!(cap->type & ARM64_CPUCAP_PERMITTED_FOR_LATE_CPU);
}
static bool
cpucap_panic_on_conflict(const struct arm64_cpu_capabilities *cap)
{
return !!(cap->type & ARM64_CPUCAP_PANIC_ON_CONFLICT);
}
I suspect the practice predates bool support in the C standard and
people afterwards never found out.
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH] fs: rework I_NEW handling to operate without fences
2025-11-25 3:00 ` Mateusz Guzik
@ 2025-11-25 10:39 ` Andreas Gruenbacher
0 siblings, 0 replies; 11+ messages in thread
From: Andreas Gruenbacher @ 2025-11-25 10:39 UTC (permalink / raw)
To: Mateusz Guzik; +Cc: brauner, viro, jack, linux-kernel, linux-fsdevel
On Tue, Nov 25, 2025 at 4:01 AM Mateusz Guzik <mjguzik@gmail.com> wrote:
> On Tue, Nov 25, 2025 at 12:04 AM Andreas Gruenbacher
> <agruenba@redhat.com> wrote:
> >
> > On Mon, Nov 24, 2025 at 8:25 PM Mateusz Guzik <mjguzik@gmail.com> wrote:
> > > On Mon, Nov 24, 2025 at 6:47 PM Andreas Gruenbacher <agruenba@redhat.com> wrote:
> > > >
> > > > On Sat, Oct 11, 2025 at 12:17 AM Mateusz Guzik <mjguzik@gmail.com> wrote:
> > > Was that always a thing? My grep for '!!' shows plenty of hits in the
> > > kernel tree and I'm pretty sure this was an established pratice.
> >
> > It depends on the data type. The non-not "operator" converts non-0
> > values into 1. For boolean values, that conversion is implicit. For
> > example,
> >
> > !!0x100 == 1
> > (bool)0x100 == 1
> >
> > but
> >
> > (char)0x100 == 0
> >
>
> I mean it was an established practice *specifically* for bools.
>
> Case in point from quick grep on the kernel:
> /* Internal helper functions to match cpu capability type */
> static bool
> cpucap_late_cpu_optional(const struct arm64_cpu_capabilities *cap)
> {
> return !!(cap->type & ARM64_CPUCAP_OPTIONAL_FOR_LATE_CPU);
> }
>
> static bool
> cpucap_late_cpu_permitted(const struct arm64_cpu_capabilities *cap)
> {
> return !!(cap->type & ARM64_CPUCAP_PERMITTED_FOR_LATE_CPU);
> }
>
> static bool
> cpucap_panic_on_conflict(const struct arm64_cpu_capabilities *cap)
> {
> return !!(cap->type & ARM64_CPUCAP_PANIC_ON_CONFLICT);
> }
>
> I suspect the practice predates bool support in the C standard and
> people afterwards never found out.
Yes, often it's simply not needed anymore.
Andreas
^ permalink raw reply [flat|nested] 11+ messages in thread
end of thread, other threads:[~2025-11-25 10:39 UTC | newest]
Thread overview: 11+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-10-10 22:17 [PATCH] fs: rework I_NEW handling to operate without fences Mateusz Guzik
2025-10-15 11:50 ` Mateusz Guzik
2025-10-21 12:48 ` Christian Brauner
2025-10-21 12:54 ` Mateusz Guzik
2025-10-21 12:49 ` Christian Brauner
2025-10-22 9:41 ` Jan Kara
2025-11-24 17:47 ` Andreas Gruenbacher
2025-11-24 19:25 ` Mateusz Guzik
2025-11-24 23:04 ` Andreas Gruenbacher
2025-11-25 3:00 ` Mateusz Guzik
2025-11-25 10:39 ` Andreas Gruenbacher
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).