From: "Lai, Yi" <yi1.lai@linux.intel.com>
To: Mateusz Guzik <mjguzik@gmail.com>
Cc: brauner@kernel.org, viro@zeniv.linux.org.uk, jack@suse.cz,
linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org,
yi1.lai@intel.com
Subject: Re: [PATCH v2] fs: make insert_inode_locked() wait for inode destruction
Date: Fri, 13 Mar 2026 09:59:11 +0800 [thread overview]
Message-ID: <abNvb2PcrKj1FBeC@ly-workstation> (raw)
In-Reply-To: <20260114094717.236202-1-mjguzik@gmail.com>
On Wed, Jan 14, 2026 at 10:47:16AM +0100, Mateusz Guzik wrote:
> This is the only routine which instead skipped instead of waiting.
>
> The current behavior is arguably a bug as it results in a corner case
> where the inode hash can have *two* matching inodes, one of which is on
> its way out.
>
> Ironing out this difference is an incremental step towards sanitizing
> the API.
>
> Signed-off-by: Mateusz Guzik <mjguzik@gmail.com>
> ---
>
> v2:
> - add a way to avoid the rcu dance in __wait_on_freeing_inode
>
>
> fs/inode.c | 41 ++++++++++++++++++++++++-----------------
> 1 file changed, 24 insertions(+), 17 deletions(-)
>
> diff --git a/fs/inode.c b/fs/inode.c
> index 8a47c4da603f..a4cfe9182a7c 100644
> --- a/fs/inode.c
> +++ b/fs/inode.c
> @@ -1028,19 +1028,20 @@ long prune_icache_sb(struct super_block *sb, struct shrink_control *sc)
> return freed;
> }
>
> -static void __wait_on_freeing_inode(struct inode *inode, bool is_inode_hash_locked);
> +static void __wait_on_freeing_inode(struct inode *inode, bool hash_locked, bool rcu_locked);
> +
> /*
> * Called with the inode lock held.
> */
> static struct inode *find_inode(struct super_block *sb,
> struct hlist_head *head,
> int (*test)(struct inode *, void *),
> - void *data, bool is_inode_hash_locked,
> + void *data, bool hash_locked,
> bool *isnew)
> {
> struct inode *inode = NULL;
>
> - if (is_inode_hash_locked)
> + if (hash_locked)
> lockdep_assert_held(&inode_hash_lock);
> else
> lockdep_assert_not_held(&inode_hash_lock);
> @@ -1054,7 +1055,7 @@ static struct inode *find_inode(struct super_block *sb,
> continue;
> spin_lock(&inode->i_lock);
> if (inode_state_read(inode) & (I_FREEING | I_WILL_FREE)) {
> - __wait_on_freeing_inode(inode, is_inode_hash_locked);
> + __wait_on_freeing_inode(inode, hash_locked, true);
> goto repeat;
> }
> if (unlikely(inode_state_read(inode) & I_CREATING)) {
> @@ -1078,11 +1079,11 @@ static struct inode *find_inode(struct super_block *sb,
> */
> static struct inode *find_inode_fast(struct super_block *sb,
> struct hlist_head *head, unsigned long ino,
> - bool is_inode_hash_locked, bool *isnew)
> + bool hash_locked, bool *isnew)
> {
> struct inode *inode = NULL;
>
> - if (is_inode_hash_locked)
> + if (hash_locked)
> lockdep_assert_held(&inode_hash_lock);
> else
> lockdep_assert_not_held(&inode_hash_lock);
> @@ -1096,7 +1097,7 @@ static struct inode *find_inode_fast(struct super_block *sb,
> continue;
> spin_lock(&inode->i_lock);
> if (inode_state_read(inode) & (I_FREEING | I_WILL_FREE)) {
> - __wait_on_freeing_inode(inode, is_inode_hash_locked);
> + __wait_on_freeing_inode(inode, hash_locked, true);
> goto repeat;
> }
> if (unlikely(inode_state_read(inode) & I_CREATING)) {
> @@ -1832,16 +1833,13 @@ int insert_inode_locked(struct inode *inode)
> while (1) {
> struct inode *old = NULL;
> spin_lock(&inode_hash_lock);
> +repeat:
> hlist_for_each_entry(old, head, i_hash) {
> if (old->i_ino != ino)
> continue;
> if (old->i_sb != sb)
> continue;
> spin_lock(&old->i_lock);
> - if (inode_state_read(old) & (I_FREEING | I_WILL_FREE)) {
> - spin_unlock(&old->i_lock);
> - continue;
> - }
> break;
> }
> if (likely(!old)) {
> @@ -1852,6 +1850,11 @@ int insert_inode_locked(struct inode *inode)
> spin_unlock(&inode_hash_lock);
> return 0;
> }
> + if (inode_state_read(old) & (I_FREEING | I_WILL_FREE)) {
> + __wait_on_freeing_inode(old, true, false);
> + old = NULL;
> + goto repeat;
> + }
> if (unlikely(inode_state_read(old) & I_CREATING)) {
> spin_unlock(&old->i_lock);
> spin_unlock(&inode_hash_lock);
> @@ -2522,16 +2525,18 @@ EXPORT_SYMBOL(inode_needs_sync);
> * wake_up_bit(&inode->i_state, __I_NEW) after removing from the hash list
> * will DTRT.
> */
> -static void __wait_on_freeing_inode(struct inode *inode, bool is_inode_hash_locked)
> +static void __wait_on_freeing_inode(struct inode *inode, bool hash_locked, bool rcu_locked)
> {
> struct wait_bit_queue_entry wqe;
> struct wait_queue_head *wq_head;
>
> + VFS_BUG_ON(!hash_locked && !rcu_locked);
> +
> /*
> * Handle racing against evict(), see that routine for more details.
> */
> if (unlikely(inode_unhashed(inode))) {
> - WARN_ON(is_inode_hash_locked);
> + WARN_ON(hash_locked);
> spin_unlock(&inode->i_lock);
> return;
> }
> @@ -2539,14 +2544,16 @@ static void __wait_on_freeing_inode(struct inode *inode, bool is_inode_hash_lock
> wq_head = inode_bit_waitqueue(&wqe, inode, __I_NEW);
> prepare_to_wait_event(wq_head, &wqe.wq_entry, TASK_UNINTERRUPTIBLE);
> spin_unlock(&inode->i_lock);
> - rcu_read_unlock();
> - if (is_inode_hash_locked)
> + if (rcu_locked)
> + rcu_read_unlock();
> + if (hash_locked)
> spin_unlock(&inode_hash_lock);
> schedule();
> finish_wait(wq_head, &wqe.wq_entry);
> - if (is_inode_hash_locked)
> + if (hash_locked)
> spin_lock(&inode_hash_lock);
> - rcu_read_lock();
> + if (rcu_locked)
> + rcu_read_lock();
> }
>
> static __initdata unsigned long ihash_entries;
> --
> 2.48.1
>
Hi Mateusz Guzik,
Greetings!
I used Syzkaller and found that there is INFO: task hung in in add_transaction_credits and jbd2_journal_commit_transaction in linux-next next-20260311.
After bisection and two issues lead to the same first bad commit:
"
88ec797c4680 fs: make insert_inode_locked() wait for inode destruction
"
Detailed information for "there is INFO: task hung in in add_transaction_credits":
https://github.com/laifryiee/syzkaller_logs/tree/main/260312_161703_add_transaction_credits
Syzkaller repro code:
https://github.com/laifryiee/syzkaller_logs/tree/main/260312_161703_add_transaction_credits/repro.c
Syzkaller repro syscall steps:
https://github.com/laifryiee/syzkaller_logs/tree/main/260312_161703_add_transaction_credits/repro.prog
Syzkaller report:
https://github.com/laifryiee/syzkaller_logs/tree/main/260312_161703_add_transaction_credits/repro.report
Kconfig(make olddefconfig):
https://github.com/laifryiee/syzkaller_logs/tree/main/260312_161703_add_transaction_credits/kconfig_origin
Bisect info:
https://github.com/laifryiee/syzkaller_logs/tree/main/260312_161703_add_transaction_credits/bisect_info.log
bzImage:
https://github.com/laifryiee/syzkaller_logs/raw/refs/heads/main/260312_161703_add_transaction_credits/bzImage_next-20260311
Issue dmesg:
https://github.com/laifryiee/syzkaller_logs/blob/main/260312_161703_add_transaction_credits/next-20260311_dmesg.log
"
[ 300.335888] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
[ 300.336427] task:systemd state:D stack:0 pid:744 tgid:1 ppid:0 task_flags:0x400040 flags:0x00
[ 300.337200] Call Trace:
[ 300.337398] <TASK>
[ 300.337563] __schedule+0xf7f/0x4910
[ 300.337849] ? __pfx___schedule+0x10/0x10
[ 300.338141] ? lock_release+0x14f/0x2d0
[ 300.338473] ? wait_transaction_locked+0x196/0x240
[ 300.338829] schedule+0xf6/0x3d0
[ 300.339063] wait_transaction_locked+0x1bb/0x240
[ 300.339382] ? wait_transaction_locked+0x196/0x240
[ 300.339717] ? __pfx_wait_transaction_locked+0x10/0x10
[ 300.340074] ? __pfx_autoremove_wake_function+0x10/0x10
[ 300.340437] add_transaction_credits+0x121/0x1000
[ 300.340771] ? check_preemption_disabled+0x1/0x180
[ 300.341133] ? __pfx_add_transaction_credits+0x10/0x10
[ 300.341519] ? __this_cpu_preempt_check+0x21/0x30
[ 300.341881] ? lock_acquire+0x1c1/0x330
[ 300.342181] ? __kasan_check_write+0x18/0x20
[ 300.342541] start_this_handle+0x457/0x1550
[ 300.342887] ? __pfx_start_this_handle+0x10/0x10
[ 300.343249] ? kasan_save_track+0x18/0x40
[ 300.343560] ? jbd2__journal_start+0x198/0x6c0
[ 300.343899] ? debug_smp_processor_id+0x20/0x30
[ 300.344242] ? rcu_is_watching+0x19/0xc0
[ 300.344540] ? kmem_cache_alloc_noprof+0x4d4/0x6c0
[ 300.344916] jbd2__journal_start+0x397/0x6c0
[ 300.345256] __ext4_journal_start_sb+0x451/0x650
[ 300.345620] ? ext4_rmdir+0x637/0xf10
[ 300.345917] ext4_rmdir+0x637/0xf10
[ 300.346196] ? __pfx_ext4_rmdir+0x10/0x10
[ 300.346893] ? sysvec_call_function_single+0x9a/0x110
[ 300.347299] ? vfs_rmdir+0x1a3/0x880
[ 300.347581] vfs_rmdir+0x351/0x880
[ 300.347857] filename_rmdir+0x3e5/0x560
[ 300.348138] ? __virt_addr_valid+0x10a/0x5f0
[ 300.348443] ? __pfx_filename_rmdir+0x10/0x10
[ 300.348761] ? __sanitizer_cov_trace_const_cmp8+0x1c/0x30
[ 300.349135] ? strncpy_from_user+0x198/0x290
[ 300.349443] ? __sanitizer_cov_trace_const_cmp4+0x1a/0x20
[ 300.349824] ? do_getname+0x19e/0x3e0
[ 300.350091] __x64_sys_unlinkat+0x107/0x140
[ 300.350390] x64_sys_call+0x1b55/0x21c0
[ 300.350690] do_syscall_64+0xc1/0x1130
[ 300.350964] entry_SYSCALL_64_after_hwframe+0x76/0x7e
[ 300.351316] RIP: 0033:0x7f69c443ebab
[ 300.351570] RSP: 002b:00007f69c3b33838 EFLAGS: 00000246 ORIG_RAX: 0000000000000107
[ 300.352091] RAX: ffffffffffffffda RBX: 0000000000000006 RCX: 00007f69c443ebab
[ 300.352576] RDX: 0000000000000200 RSI: 00007f69bc008bf0 RDI: 000000000000000c
[ 300.353064] RBP: 000000000000000c R08: 0000000000000003 R09: 0000000000000078
[ 300.353547] R10: 000000000000009b R11: 0000000000000246 R12: 0000000000000000
[ 300.354034] R13: 00007f69bc008bf0 R14: 0000000000000200 R15: 0000000000000006
[ 300.354550] </TASK>
"
Detailed information for "there is INFO: task hung in in jbd2_journal_commit_transaction":
https://github.com/laifryiee/syzkaller_logs/tree/main/260312_123000_jbd2_journal_commit_transaction
Syzkaller repro code:
https://github.com/laifryiee/syzkaller_logs/tree/main/260312_123000_jbd2_journal_commit_transaction/repro.c
Syzkaller repro syscall steps:
https://github.com/laifryiee/syzkaller_logs/tree/main/260312_123000_jbd2_journal_commit_transaction/repro.prog
Syzkaller report:
https://github.com/laifryiee/syzkaller_logs/tree/main/260312_123000_jbd2_journal_commit_transaction/repro.report
Kconfig(make olddefconfig):
https://github.com/laifryiee/syzkaller_logs/tree/main/260312_123000_jbd2_journal_commit_transaction/kconfig_origin
Bisect info:
https://github.com/laifryiee/syzkaller_logs/tree/main/260312_123000_jbd2_journal_commit_transaction/bisect_info.log
bzImage:
https://github.com/laifryiee/syzkaller_logs/raw/refs/heads/main/260312_123000_jbd2_journal_commit_transaction/bzImage_v7.0-rc3
Issue dmesg:
https://github.com/laifryiee/syzkaller_logs/blob/main/260312_123000_jbd2_journal_commit_transaction/v7.0-rc3_dmesg.log
"
[ 300.500157] INFO: task jbd2/sda-8:100 blocked for more than 147 seconds.
[ 300.500574] Not tainted 7.0.0-rc3-v7.0-rc3 #1
[ 300.500876] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
[ 300.501335] task:jbd2/sda-8 state:D stack:0 pid:100 tgid:100 ppid:2 task_flags:0x240040 flags:0x00
[ 300.501988] Call Trace:
[ 300.502146] <TASK>
[ 300.502289] __schedule+0xf9f/0x4200
[ 300.502526] ? __pfx___schedule+0x10/0x10
[ 300.502787] ? lock_release+0x14f/0x2d0
[ 300.503036] schedule+0xf6/0x3d0
[ 300.503248] jbd2_journal_wait_updates+0x146/0x270
[ 300.503551] ? __pfx_jbd2_journal_wait_updates+0x10/0x10
[ 300.503892] ? __pfx_autoremove_wake_function+0x10/0x10
[ 300.504212] ? lock_release+0x14f/0x2d0
[ 300.504464] jbd2_journal_commit_transaction+0x7aa/0x6350
[ 300.504827] ? lock_is_held_type+0xef/0x150
[ 300.505097] ? __lock_acquire+0x412/0x2210
[ 300.505355] ? __lock_acquire+0x412/0x2210
[ 300.505616] ? __pfx_jbd2_journal_commit_transaction+0x10/0x10
[ 300.505980] ? do_raw_spin_lock+0x140/0x280
[ 300.506244] ? __pfx_do_raw_spin_lock+0x10/0x10
[ 300.506526] ? lock_acquire+0x1c1/0x330
[ 300.506782] ? __this_cpu_preempt_check+0x21/0x30
[ 300.507069] ? _raw_spin_unlock_irqrestore+0x35/0x70
[ 300.507376] ? lockdep_hardirqs_on+0x85/0x110
[ 300.507653] ? _raw_spin_unlock_irqrestore+0x45/0x70
[ 300.507956] ? __sanitizer_cov_trace_const_cmp4+0x1a/0x20
[ 300.508280] ? __timer_delete_sync+0x21e/0x300
[ 300.508578] ? __this_cpu_preempt_check+0x21/0x30
[ 300.508879] ? lock_release+0x14f/0x2d0
[ 300.509129] kjournald2+0x203/0x790
[ 300.509361] ? __pfx_kjournald2+0x10/0x10
[ 300.509617] ? lockdep_hardirqs_on+0x85/0x110
[ 300.509897] ? __pfx_autoremove_wake_function+0x10/0x10
[ 300.510221] ? __sanitizer_cov_trace_const_cmp1+0x1e/0x30
[ 300.510559] ? __kthread_parkme+0x1bc/0x260
[ 300.510831] ? __pfx_kjournald2+0x10/0x10
[ 300.511089] kthread+0x41a/0x570
[ 300.511303] ? calculate_sigpending+0x8d/0xb0
[ 300.511578] ? __pfx_kthread+0x10/0x10
[ 300.511821] ret_from_fork+0x781/0xbe0
[ 300.512065] ? __pfx_ret_from_fork+0x10/0x10
[ 300.512340] ? native_load_tls+0x16/0x50
[ 300.512598] ? __sanitizer_cov_trace_const_cmp8+0x1c/0x30
[ 300.512934] ? __switch_to+0x81e/0x1110
[ 300.513178] ? __pfx_kthread+0x10/0x10
[ 300.513418] ret_from_fork_asm+0x1a/0x30
[ 300.513690] </TASK>
"
Hope this cound be insightful to you.
Regards,
Yi Lai
---
If you don't need the following environment to reproduce the problem or if you
already have one reproduced environment, please ignore the following information.
How to reproduce:
git clone https://gitlab.com/xupengfe/repro_vm_env.git
cd repro_vm_env
tar -xvf repro_vm_env.tar.gz
cd repro_vm_env; ./start3.sh // it needs qemu-system-x86_64 and I used v7.1.0
// start3.sh will load bzImage_2241ab53cbb5cdb08a6b2d4688feb13971058f65 v6.2-rc5 kernel
// You could change the bzImage_xxx as you want
// Maybe you need to remove line "-drive if=pflash,format=raw,readonly=on,file=./OVMF_CODE.fd \" for different qemu version
You could use below command to log in, there is no password for root.
ssh -p 10023 root@localhost
After login vm(virtual machine) successfully, you could transfer reproduced
binary to the vm by below way, and reproduce the problem in vm:
gcc -pthread -o repro repro.c
scp -P 10023 repro root@localhost:/root/
Get the bzImage for target kernel:
Please use target kconfig and copy it to kernel_src/.config
make olddefconfig
make -jx bzImage //x should equal or less than cpu num your pc has
Fill the bzImage file into above start3.sh to load the target kernel in vm.
Tips:
If you already have qemu-system-x86_64, please ignore below info.
If you want to install qemu v7.1.0 version:
git clone https://github.com/qemu/qemu.git
cd qemu
git checkout -f v7.1.0
mkdir build
cd build
yum install -y ninja-build.x86_64
yum -y install libslirp-devel.x86_64
../configure --target-list=x86_64-softmmu --enable-kvm --enable-vnc --enable-gtk --enable-sdl --enable-usb-redir --enable-slirp
make
make install
next prev parent reply other threads:[~2026-03-13 1:59 UTC|newest]
Thread overview: 5+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-01-14 9:47 [PATCH v2] fs: make insert_inode_locked() wait for inode destruction Mateusz Guzik
2026-01-14 15:56 ` Christian Brauner
2026-01-14 17:53 ` Jan Kara
2026-03-13 1:59 ` Lai, Yi [this message]
2026-03-14 9:12 ` Mateusz Guzik
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=abNvb2PcrKj1FBeC@ly-workstation \
--to=yi1.lai@linux.intel.com \
--cc=brauner@kernel.org \
--cc=jack@suse.cz \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=mjguzik@gmail.com \
--cc=viro@zeniv.linux.org.uk \
--cc=yi1.lai@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox