Re: [PATCH v2] fs: make insert_inode_locked() wait for inode destruction

All of lore.kernel.org
 help / color / mirror / Atom feed

From: "Lai, Yi" <yi1.lai@linux.intel.com>
To: Mateusz Guzik <mjguzik@gmail.com>
Cc: brauner@kernel.org, viro@zeniv.linux.org.uk, jack@suse.cz,
	linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org,
	yi1.lai@intel.com
Subject: Re: [PATCH v2] fs: make insert_inode_locked() wait for inode destruction
Date: Fri, 13 Mar 2026 09:59:11 +0800	[thread overview]
Message-ID: <abNvb2PcrKj1FBeC@ly-workstation> (raw)
In-Reply-To: <20260114094717.236202-1-mjguzik@gmail.com>

On Wed, Jan 14, 2026 at 10:47:16AM +0100, Mateusz Guzik wrote:
> This is the only routine which instead skipped instead of waiting.
> 
> The current behavior is arguably a bug as it results in a corner case
> where the inode hash can have *two* matching inodes, one of which is on
> its way out.
> 
> Ironing out this difference is an incremental step towards sanitizing
> the API.
> 
> Signed-off-by: Mateusz Guzik <mjguzik@gmail.com>
> ---
> 
> v2:
> - add a way to avoid the rcu dance in __wait_on_freeing_inode
> 
> 
>  fs/inode.c | 41 ++++++++++++++++++++++++-----------------
>  1 file changed, 24 insertions(+), 17 deletions(-)
> 
> diff --git a/fs/inode.c b/fs/inode.c
> index 8a47c4da603f..a4cfe9182a7c 100644
> --- a/fs/inode.c
> +++ b/fs/inode.c
> @@ -1028,19 +1028,20 @@ long prune_icache_sb(struct super_block *sb, struct shrink_control *sc)
>  	return freed;
>  }
>  
> -static void __wait_on_freeing_inode(struct inode *inode, bool is_inode_hash_locked);
> +static void __wait_on_freeing_inode(struct inode *inode, bool hash_locked, bool rcu_locked);
> +
>  /*
>   * Called with the inode lock held.
>   */
>  static struct inode *find_inode(struct super_block *sb,
>  				struct hlist_head *head,
>  				int (*test)(struct inode *, void *),
> -				void *data, bool is_inode_hash_locked,
> +				void *data, bool hash_locked,
>  				bool *isnew)
>  {
>  	struct inode *inode = NULL;
>  
> -	if (is_inode_hash_locked)
> +	if (hash_locked)
>  		lockdep_assert_held(&inode_hash_lock);
>  	else
>  		lockdep_assert_not_held(&inode_hash_lock);
> @@ -1054,7 +1055,7 @@ static struct inode *find_inode(struct super_block *sb,
>  			continue;
>  		spin_lock(&inode->i_lock);
>  		if (inode_state_read(inode) & (I_FREEING | I_WILL_FREE)) {
> -			__wait_on_freeing_inode(inode, is_inode_hash_locked);
> +			__wait_on_freeing_inode(inode, hash_locked, true);
>  			goto repeat;
>  		}
>  		if (unlikely(inode_state_read(inode) & I_CREATING)) {
> @@ -1078,11 +1079,11 @@ static struct inode *find_inode(struct super_block *sb,
>   */
>  static struct inode *find_inode_fast(struct super_block *sb,
>  				struct hlist_head *head, unsigned long ino,
> -				bool is_inode_hash_locked, bool *isnew)
> +				bool hash_locked, bool *isnew)
>  {
>  	struct inode *inode = NULL;
>  
> -	if (is_inode_hash_locked)
> +	if (hash_locked)
>  		lockdep_assert_held(&inode_hash_lock);
>  	else
>  		lockdep_assert_not_held(&inode_hash_lock);
> @@ -1096,7 +1097,7 @@ static struct inode *find_inode_fast(struct super_block *sb,
>  			continue;
>  		spin_lock(&inode->i_lock);
>  		if (inode_state_read(inode) & (I_FREEING | I_WILL_FREE)) {
> -			__wait_on_freeing_inode(inode, is_inode_hash_locked);
> +			__wait_on_freeing_inode(inode, hash_locked, true);
>  			goto repeat;
>  		}
>  		if (unlikely(inode_state_read(inode) & I_CREATING)) {
> @@ -1832,16 +1833,13 @@ int insert_inode_locked(struct inode *inode)
>  	while (1) {
>  		struct inode *old = NULL;
>  		spin_lock(&inode_hash_lock);
> +repeat:
>  		hlist_for_each_entry(old, head, i_hash) {
>  			if (old->i_ino != ino)
>  				continue;
>  			if (old->i_sb != sb)
>  				continue;
>  			spin_lock(&old->i_lock);
> -			if (inode_state_read(old) & (I_FREEING | I_WILL_FREE)) {
> -				spin_unlock(&old->i_lock);
> -				continue;
> -			}
>  			break;
>  		}
>  		if (likely(!old)) {
> @@ -1852,6 +1850,11 @@ int insert_inode_locked(struct inode *inode)
>  			spin_unlock(&inode_hash_lock);
>  			return 0;
>  		}
> +		if (inode_state_read(old) & (I_FREEING | I_WILL_FREE)) {
> +			__wait_on_freeing_inode(old, true, false);
> +			old = NULL;
> +			goto repeat;
> +		}
>  		if (unlikely(inode_state_read(old) & I_CREATING)) {
>  			spin_unlock(&old->i_lock);
>  			spin_unlock(&inode_hash_lock);
> @@ -2522,16 +2525,18 @@ EXPORT_SYMBOL(inode_needs_sync);
>   * wake_up_bit(&inode->i_state, __I_NEW) after removing from the hash list
>   * will DTRT.
>   */
> -static void __wait_on_freeing_inode(struct inode *inode, bool is_inode_hash_locked)
> +static void __wait_on_freeing_inode(struct inode *inode, bool hash_locked, bool rcu_locked)
>  {
>  	struct wait_bit_queue_entry wqe;
>  	struct wait_queue_head *wq_head;
>  
> +	VFS_BUG_ON(!hash_locked && !rcu_locked);
> +
>  	/*
>  	 * Handle racing against evict(), see that routine for more details.
>  	 */
>  	if (unlikely(inode_unhashed(inode))) {
> -		WARN_ON(is_inode_hash_locked);
> +		WARN_ON(hash_locked);
>  		spin_unlock(&inode->i_lock);
>  		return;
>  	}
> @@ -2539,14 +2544,16 @@ static void __wait_on_freeing_inode(struct inode *inode, bool is_inode_hash_lock
>  	wq_head = inode_bit_waitqueue(&wqe, inode, __I_NEW);
>  	prepare_to_wait_event(wq_head, &wqe.wq_entry, TASK_UNINTERRUPTIBLE);
>  	spin_unlock(&inode->i_lock);
> -	rcu_read_unlock();
> -	if (is_inode_hash_locked)
> +	if (rcu_locked)
> +		rcu_read_unlock();
> +	if (hash_locked)
>  		spin_unlock(&inode_hash_lock);
>  	schedule();
>  	finish_wait(wq_head, &wqe.wq_entry);
> -	if (is_inode_hash_locked)
> +	if (hash_locked)
>  		spin_lock(&inode_hash_lock);
> -	rcu_read_lock();
> +	if (rcu_locked)
> +		rcu_read_lock();
>  }
>  
>  static __initdata unsigned long ihash_entries;
> -- 
> 2.48.1
>

Hi Mateusz Guzik,

Greetings!

I used Syzkaller and found that there is INFO: task hung in in add_transaction_credits and jbd2_journal_commit_transaction in linux-next next-20260311.

After bisection and two issues lead to the same first bad commit:
"
88ec797c4680 fs: make insert_inode_locked() wait for inode destruction
"

Detailed information for "there is INFO: task hung in in add_transaction_credits":
https://github.com/laifryiee/syzkaller_logs/tree/main/260312_161703_add_transaction_credits
Syzkaller repro code:
https://github.com/laifryiee/syzkaller_logs/tree/main/260312_161703_add_transaction_credits/repro.c
Syzkaller repro syscall steps:
https://github.com/laifryiee/syzkaller_logs/tree/main/260312_161703_add_transaction_credits/repro.prog
Syzkaller report:
https://github.com/laifryiee/syzkaller_logs/tree/main/260312_161703_add_transaction_credits/repro.report
Kconfig(make olddefconfig):
https://github.com/laifryiee/syzkaller_logs/tree/main/260312_161703_add_transaction_credits/kconfig_origin
Bisect info:
https://github.com/laifryiee/syzkaller_logs/tree/main/260312_161703_add_transaction_credits/bisect_info.log
bzImage:
https://github.com/laifryiee/syzkaller_logs/raw/refs/heads/main/260312_161703_add_transaction_credits/bzImage_next-20260311
Issue dmesg:
https://github.com/laifryiee/syzkaller_logs/blob/main/260312_161703_add_transaction_credits/next-20260311_dmesg.log

"
[  300.335888] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
[  300.336427] task:systemd         state:D stack:0     pid:744   tgid:1     ppid:0      task_flags:0x400040 flags:0x00
[  300.337200] Call Trace:
[  300.337398]  <TASK>
[  300.337563]  __schedule+0xf7f/0x4910
[  300.337849]  ? __pfx___schedule+0x10/0x10
[  300.338141]  ? lock_release+0x14f/0x2d0
[  300.338473]  ? wait_transaction_locked+0x196/0x240
[  300.338829]  schedule+0xf6/0x3d0
[  300.339063]  wait_transaction_locked+0x1bb/0x240
[  300.339382]  ? wait_transaction_locked+0x196/0x240
[  300.339717]  ? __pfx_wait_transaction_locked+0x10/0x10
[  300.340074]  ? __pfx_autoremove_wake_function+0x10/0x10
[  300.340437]  add_transaction_credits+0x121/0x1000
[  300.340771]  ? check_preemption_disabled+0x1/0x180
[  300.341133]  ? __pfx_add_transaction_credits+0x10/0x10
[  300.341519]  ? __this_cpu_preempt_check+0x21/0x30
[  300.341881]  ? lock_acquire+0x1c1/0x330
[  300.342181]  ? __kasan_check_write+0x18/0x20
[  300.342541]  start_this_handle+0x457/0x1550
[  300.342887]  ? __pfx_start_this_handle+0x10/0x10
[  300.343249]  ? kasan_save_track+0x18/0x40
[  300.343560]  ? jbd2__journal_start+0x198/0x6c0
[  300.343899]  ? debug_smp_processor_id+0x20/0x30
[  300.344242]  ? rcu_is_watching+0x19/0xc0
[  300.344540]  ? kmem_cache_alloc_noprof+0x4d4/0x6c0
[  300.344916]  jbd2__journal_start+0x397/0x6c0
[  300.345256]  __ext4_journal_start_sb+0x451/0x650
[  300.345620]  ? ext4_rmdir+0x637/0xf10
[  300.345917]  ext4_rmdir+0x637/0xf10
[  300.346196]  ? __pfx_ext4_rmdir+0x10/0x10
[  300.346893]  ? sysvec_call_function_single+0x9a/0x110
[  300.347299]  ? vfs_rmdir+0x1a3/0x880
[  300.347581]  vfs_rmdir+0x351/0x880
[  300.347857]  filename_rmdir+0x3e5/0x560
[  300.348138]  ? __virt_addr_valid+0x10a/0x5f0
[  300.348443]  ? __pfx_filename_rmdir+0x10/0x10
[  300.348761]  ? __sanitizer_cov_trace_const_cmp8+0x1c/0x30
[  300.349135]  ? strncpy_from_user+0x198/0x290
[  300.349443]  ? __sanitizer_cov_trace_const_cmp4+0x1a/0x20
[  300.349824]  ? do_getname+0x19e/0x3e0
[  300.350091]  __x64_sys_unlinkat+0x107/0x140
[  300.350390]  x64_sys_call+0x1b55/0x21c0
[  300.350690]  do_syscall_64+0xc1/0x1130
[  300.350964]  entry_SYSCALL_64_after_hwframe+0x76/0x7e
[  300.351316] RIP: 0033:0x7f69c443ebab
[  300.351570] RSP: 002b:00007f69c3b33838 EFLAGS: 00000246 ORIG_RAX: 0000000000000107
[  300.352091] RAX: ffffffffffffffda RBX: 0000000000000006 RCX: 00007f69c443ebab
[  300.352576] RDX: 0000000000000200 RSI: 00007f69bc008bf0 RDI: 000000000000000c
[  300.353064] RBP: 000000000000000c R08: 0000000000000003 R09: 0000000000000078
[  300.353547] R10: 000000000000009b R11: 0000000000000246 R12: 0000000000000000
[  300.354034] R13: 00007f69bc008bf0 R14: 0000000000000200 R15: 0000000000000006
[  300.354550]  </TASK>
"

Detailed information for "there is INFO: task hung in in jbd2_journal_commit_transaction":
https://github.com/laifryiee/syzkaller_logs/tree/main/260312_123000_jbd2_journal_commit_transaction
Syzkaller repro code:
https://github.com/laifryiee/syzkaller_logs/tree/main/260312_123000_jbd2_journal_commit_transaction/repro.c
Syzkaller repro syscall steps:
https://github.com/laifryiee/syzkaller_logs/tree/main/260312_123000_jbd2_journal_commit_transaction/repro.prog
Syzkaller report:
https://github.com/laifryiee/syzkaller_logs/tree/main/260312_123000_jbd2_journal_commit_transaction/repro.report
Kconfig(make olddefconfig):
https://github.com/laifryiee/syzkaller_logs/tree/main/260312_123000_jbd2_journal_commit_transaction/kconfig_origin
Bisect info:
https://github.com/laifryiee/syzkaller_logs/tree/main/260312_123000_jbd2_journal_commit_transaction/bisect_info.log
bzImage:
https://github.com/laifryiee/syzkaller_logs/raw/refs/heads/main/260312_123000_jbd2_journal_commit_transaction/bzImage_v7.0-rc3
Issue dmesg:
https://github.com/laifryiee/syzkaller_logs/blob/main/260312_123000_jbd2_journal_commit_transaction/v7.0-rc3_dmesg.log

"
[  300.500157] INFO: task jbd2/sda-8:100 blocked for more than 147 seconds.
[  300.500574]       Not tainted 7.0.0-rc3-v7.0-rc3 #1
[  300.500876] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
[  300.501335] task:jbd2/sda-8      state:D stack:0     pid:100   tgid:100   ppid:2      task_flags:0x240040 flags:0x00
[  300.501988] Call Trace:
[  300.502146]  <TASK>
[  300.502289]  __schedule+0xf9f/0x4200
[  300.502526]  ? __pfx___schedule+0x10/0x10
[  300.502787]  ? lock_release+0x14f/0x2d0
[  300.503036]  schedule+0xf6/0x3d0
[  300.503248]  jbd2_journal_wait_updates+0x146/0x270
[  300.503551]  ? __pfx_jbd2_journal_wait_updates+0x10/0x10
[  300.503892]  ? __pfx_autoremove_wake_function+0x10/0x10
[  300.504212]  ? lock_release+0x14f/0x2d0
[  300.504464]  jbd2_journal_commit_transaction+0x7aa/0x6350
[  300.504827]  ? lock_is_held_type+0xef/0x150
[  300.505097]  ? __lock_acquire+0x412/0x2210
[  300.505355]  ? __lock_acquire+0x412/0x2210
[  300.505616]  ? __pfx_jbd2_journal_commit_transaction+0x10/0x10
[  300.505980]  ? do_raw_spin_lock+0x140/0x280
[  300.506244]  ? __pfx_do_raw_spin_lock+0x10/0x10
[  300.506526]  ? lock_acquire+0x1c1/0x330
[  300.506782]  ? __this_cpu_preempt_check+0x21/0x30
[  300.507069]  ? _raw_spin_unlock_irqrestore+0x35/0x70
[  300.507376]  ? lockdep_hardirqs_on+0x85/0x110
[  300.507653]  ? _raw_spin_unlock_irqrestore+0x45/0x70
[  300.507956]  ? __sanitizer_cov_trace_const_cmp4+0x1a/0x20
[  300.508280]  ? __timer_delete_sync+0x21e/0x300
[  300.508578]  ? __this_cpu_preempt_check+0x21/0x30
[  300.508879]  ? lock_release+0x14f/0x2d0
[  300.509129]  kjournald2+0x203/0x790
[  300.509361]  ? __pfx_kjournald2+0x10/0x10
[  300.509617]  ? lockdep_hardirqs_on+0x85/0x110
[  300.509897]  ? __pfx_autoremove_wake_function+0x10/0x10
[  300.510221]  ? __sanitizer_cov_trace_const_cmp1+0x1e/0x30
[  300.510559]  ? __kthread_parkme+0x1bc/0x260
[  300.510831]  ? __pfx_kjournald2+0x10/0x10
[  300.511089]  kthread+0x41a/0x570
[  300.511303]  ? calculate_sigpending+0x8d/0xb0
[  300.511578]  ? __pfx_kthread+0x10/0x10
[  300.511821]  ret_from_fork+0x781/0xbe0
[  300.512065]  ? __pfx_ret_from_fork+0x10/0x10
[  300.512340]  ? native_load_tls+0x16/0x50
[  300.512598]  ? __sanitizer_cov_trace_const_cmp8+0x1c/0x30
[  300.512934]  ? __switch_to+0x81e/0x1110
[  300.513178]  ? __pfx_kthread+0x10/0x10
[  300.513418]  ret_from_fork_asm+0x1a/0x30
[  300.513690]  </TASK>
"

Hope this cound be insightful to you.

Regards,
Yi Lai

---

If you don't need the following environment to reproduce the problem or if you
already have one reproduced environment, please ignore the following information.

How to reproduce:
git clone https://gitlab.com/xupengfe/repro_vm_env.git
cd repro_vm_env
tar -xvf repro_vm_env.tar.gz
cd repro_vm_env; ./start3.sh  // it needs qemu-system-x86_64 and I used v7.1.0
  // start3.sh will load bzImage_2241ab53cbb5cdb08a6b2d4688feb13971058f65 v6.2-rc5 kernel
  // You could change the bzImage_xxx as you want
  // Maybe you need to remove line "-drive if=pflash,format=raw,readonly=on,file=./OVMF_CODE.fd \" for different qemu version
You could use below command to log in, there is no password for root.
ssh -p 10023 root@localhost

After login vm(virtual machine) successfully, you could transfer reproduced
binary to the vm by below way, and reproduce the problem in vm:
gcc -pthread -o repro repro.c
scp -P 10023 repro root@localhost:/root/

Get the bzImage for target kernel:
Please use target kconfig and copy it to kernel_src/.config
make olddefconfig
make -jx bzImage           //x should equal or less than cpu num your pc has

Fill the bzImage file into above start3.sh to load the target kernel in vm.


Tips:
If you already have qemu-system-x86_64, please ignore below info.
If you want to install qemu v7.1.0 version:
git clone https://github.com/qemu/qemu.git
cd qemu
git checkout -f v7.1.0
mkdir build
cd build
yum install -y ninja-build.x86_64
yum -y install libslirp-devel.x86_64
../configure --target-list=x86_64-softmmu --enable-kvm --enable-vnc --enable-gtk --enable-sdl --enable-usb-redir --enable-slirp
make
make install

next prev parent reply	other threads:[~2026-03-13  1:59 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-01-14  9:47 [PATCH v2] fs: make insert_inode_locked() wait for inode destruction Mateusz Guzik
2026-01-14 15:56 ` Christian Brauner
2026-01-14 17:53 ` Jan Kara
2026-03-13  1:59 ` Lai, Yi [this message]
2026-03-14  9:12   ` Mateusz Guzik

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=abNvb2PcrKj1FBeC@ly-workstation \
    --to=yi1.lai@linux.intel.com \
    --cc=brauner@kernel.org \
    --cc=jack@suse.cz \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mjguzik@gmail.com \
    --cc=viro@zeniv.linux.org.uk \
    --cc=yi1.lai@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.