Re: [PATCH v2] fs: make insert_inode_locked() wait for inode destruction

public inbox for linux-fsdevel@vger.kernel.org
 help / color / mirror / Atom feed

From: "Lai, Yi" <yi1.lai@linux.intel.com>
To: Mateusz Guzik <mjguzik@gmail.com>
Cc: brauner@kernel.org, viro@zeniv.linux.org.uk, jack@suse.cz,
	linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org,
	yi1.lai@intel.com
Subject: Re: [PATCH v2] fs: make insert_inode_locked() wait for inode destruction
Date: Fri, 13 Mar 2026 09:59:11 +0800	[thread overview]
Message-ID: <abNvb2PcrKj1FBeC@ly-workstation> (raw)
In-Reply-To: <20260114094717.236202-1-mjguzik@gmail.com>

On Wed, Jan 14, 2026 at 10:47:16AM +0100, Mateusz Guzik wrote:
> This is the only routine which instead skipped instead of waiting.
> 
> The current behavior is arguably a bug as it results in a corner case
> where the inode hash can have *two* matching inodes, one of which is on
> its way out.
> 
> Ironing out this difference is an incremental step towards sanitizing
> the API.
> 
> Signed-off-by: Mateusz Guzik <mjguzik@gmail.com>
> ---
> 
> v2:
> - add a way to avoid the rcu dance in __wait_on_freeing_inode
> 
> 
>  fs/inode.c | 41 ++++++++++++++++++++++++-----------------
>  1 file changed, 24 insertions(+), 17 deletions(-)
> 
> diff --git a/fs/inode.c b/fs/inode.c
> index 8a47c4da603f..a4cfe9182a7c 100644
> --- a/fs/inode.c
> +++ b/fs/inode.c
> @@ -1028,19 +1028,20 @@ long prune_icache_sb(struct super_block *sb, struct shrink_control *sc)
>  	return freed;
>  }
>  
> -static void __wait_on_freeing_inode(struct inode *inode, bool is_inode_hash_locked);
> +static void __wait_on_freeing_inode(struct inode *inode, bool hash_locked, bool rcu_locked);
> +
>  /*
>   * Called with the inode lock held.
>   */
>  static struct inode *find_inode(struct super_block *sb,
>  				struct hlist_head *head,
>  				int (*test)(struct inode *, void *),
> -				void *data, bool is_inode_hash_locked,
> +				void *data, bool hash_locked,
>  				bool *isnew)
>  {
>  	struct inode *inode = NULL;
>  
> -	if (is_inode_hash_locked)
> +	if (hash_locked)
>  		lockdep_assert_held(&inode_hash_lock);
>  	else
>  		lockdep_assert_not_held(&inode_hash_lock);
> @@ -1054,7 +1055,7 @@ static struct inode *find_inode(struct super_block *sb,
>  			continue;
>  		spin_lock(&inode->i_lock);
>  		if (inode_state_read(inode) & (I_FREEING | I_WILL_FREE)) {
> -			__wait_on_freeing_inode(inode, is_inode_hash_locked);
> +			__wait_on_freeing_inode(inode, hash_locked, true);
>  			goto repeat;
>  		}
>  		if (unlikely(inode_state_read(inode) & I_CREATING)) {
> @@ -1078,11 +1079,11 @@ static struct inode *find_inode(struct super_block *sb,
>   */
>  static struct inode *find_inode_fast(struct super_block *sb,
>  				struct hlist_head *head, unsigned long ino,
> -				bool is_inode_hash_locked, bool *isnew)
> +				bool hash_locked, bool *isnew)
>  {
>  	struct inode *inode = NULL;
>  
> -	if (is_inode_hash_locked)
> +	if (hash_locked)
>  		lockdep_assert_held(&inode_hash_lock);
>  	else
>  		lockdep_assert_not_held(&inode_hash_lock);
> @@ -1096,7 +1097,7 @@ static struct inode *find_inode_fast(struct super_block *sb,
>  			continue;
>  		spin_lock(&inode->i_lock);
>  		if (inode_state_read(inode) & (I_FREEING | I_WILL_FREE)) {
> -			__wait_on_freeing_inode(inode, is_inode_hash_locked);
> +			__wait_on_freeing_inode(inode, hash_locked, true);
>  			goto repeat;
>  		}
>  		if (unlikely(inode_state_read(inode) & I_CREATING)) {
> @@ -1832,16 +1833,13 @@ int insert_inode_locked(struct inode *inode)
>  	while (1) {
>  		struct inode *old = NULL;
>  		spin_lock(&inode_hash_lock);
> +repeat:
>  		hlist_for_each_entry(old, head, i_hash) {
>  			if (old->i_ino != ino)
>  				continue;
>  			if (old->i_sb != sb)
>  				continue;
>  			spin_lock(&old->i_lock);
> -			if (inode_state_read(old) & (I_FREEING | I_WILL_FREE)) {
> -				spin_unlock(&old->i_lock);
> -				continue;
> -			}
>  			break;
>  		}
>  		if (likely(!old)) {
> @@ -1852,6 +1850,11 @@ int insert_inode_locked(struct inode *inode)
>  			spin_unlock(&inode_hash_lock);
>  			return 0;
>  		}
> +		if (inode_state_read(old) & (I_FREEING | I_WILL_FREE)) {
> +			__wait_on_freeing_inode(old, true, false);
> +			old = NULL;
> +			goto repeat;
> +		}
>  		if (unlikely(inode_state_read(old) & I_CREATING)) {
>  			spin_unlock(&old->i_lock);
>  			spin_unlock(&inode_hash_lock);
> @@ -2522,16 +2525,18 @@ EXPORT_SYMBOL(inode_needs_sync);
>   * wake_up_bit(&inode->i_state, __I_NEW) after removing from the hash list
>   * will DTRT.
>   */
> -static void __wait_on_freeing_inode(struct inode *inode, bool is_inode_hash_locked)
> +static void __wait_on_freeing_inode(struct inode *inode, bool hash_locked, bool rcu_locked)
>  {
>  	struct wait_bit_queue_entry wqe;
>  	struct wait_queue_head *wq_head;
>  
> +	VFS_BUG_ON(!hash_locked && !rcu_locked);
> +
>  	/*
>  	 * Handle racing against evict(), see that routine for more details.
>  	 */
>  	if (unlikely(inode_unhashed(inode))) {
> -		WARN_ON(is_inode_hash_locked);
> +		WARN_ON(hash_locked);
>  		spin_unlock(&inode->i_lock);
>  		return;
>  	}
> @@ -2539,14 +2544,16 @@ static void __wait_on_freeing_inode(struct inode *inode, bool is_inode_hash_lock
>  	wq_head = inode_bit_waitqueue(&wqe, inode, __I_NEW);
>  	prepare_to_wait_event(wq_head, &wqe.wq_entry, TASK_UNINTERRUPTIBLE);
>  	spin_unlock(&inode->i_lock);
> -	rcu_read_unlock();
> -	if (is_inode_hash_locked)
> +	if (rcu_locked)
> +		rcu_read_unlock();
> +	if (hash_locked)
>  		spin_unlock(&inode_hash_lock);
>  	schedule();
>  	finish_wait(wq_head, &wqe.wq_entry);
> -	if (is_inode_hash_locked)
> +	if (hash_locked)
>  		spin_lock(&inode_hash_lock);
> -	rcu_read_lock();
> +	if (rcu_locked)
> +		rcu_read_lock();
>  }
>  
>  static __initdata unsigned long ihash_entries;
> -- 
> 2.48.1
>

Hi Mateusz Guzik,

Greetings!

I used Syzkaller and found that there is INFO: task hung in in add_transaction_credits and jbd2_journal_commit_transaction in linux-next next-20260311.

After bisection and two issues lead to the same first bad commit:
"
88ec797c4680 fs: make insert_inode_locked() wait for inode destruction
"

Detailed information for "there is INFO: task hung in in add_transaction_credits":
https://github.com/laifryiee/syzkaller_logs/tree/main/260312_161703_add_transaction_credits
Syzkaller repro code:
https://github.com/laifryiee/syzkaller_logs/tree/main/260312_161703_add_transaction_credits/repro.c
Syzkaller repro syscall steps:
https://github.com/laifryiee/syzkaller_logs/tree/main/260312_161703_add_transaction_credits/repro.prog
Syzkaller report:
https://github.com/laifryiee/syzkaller_logs/tree/main/260312_161703_add_transaction_credits/repro.report
Kconfig(make olddefconfig):
https://github.com/laifryiee/syzkaller_logs/tree/main/260312_161703_add_transaction_credits/kconfig_origin
Bisect info:
https://github.com/laifryiee/syzkaller_logs/tree/main/260312_161703_add_transaction_credits/bisect_info.log
bzImage:
https://github.com/laifryiee/syzkaller_logs/raw/refs/heads/main/260312_161703_add_transaction_credits/bzImage_next-20260311
Issue dmesg:
https://github.com/laifryiee/syzkaller_logs/blob/main/260312_161703_add_transaction_credits/next-20260311_dmesg.log

"
[  300.335888] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
[  300.336427] task:systemd         state:D stack:0     pid:744   tgid:1     ppid:0      task_flags:0x400040 flags:0x00
[  300.337200] Call Trace:
[  300.337398]  <TASK>
[  300.337563]  __schedule+0xf7f/0x4910
[  300.337849]  ? __pfx___schedule+0x10/0x10
[  300.338141]  ? lock_release+0x14f/0x2d0
[  300.338473]  ? wait_transaction_locked+0x196/0x240
[  300.338829]  schedule+0xf6/0x3d0
[  300.339063]  wait_transaction_locked+0x1bb/0x240
[  300.339382]  ? wait_transaction_locked+0x196/0x240
[  300.339717]  ? __pfx_wait_transaction_locked+0x10/0x10
[  300.340074]  ? __pfx_autoremove_wake_function+0x10/0x10
[  300.340437]  add_transaction_credits+0x121/0x1000
[  300.340771]  ? check_preemption_disabled+0x1/0x180
[  300.341133]  ? __pfx_add_transaction_credits+0x10/0x10
[  300.341519]  ? __this_cpu_preempt_check+0x21/0x30
[  300.341881]  ? lock_acquire+0x1c1/0x330
[  300.342181]  ? __kasan_check_write+0x18/0x20
[  300.342541]  start_this_handle+0x457/0x1550
[  300.342887]  ? __pfx_start_this_handle+0x10/0x10
[  300.343249]  ? kasan_save_track+0x18/0x40
[  300.343560]  ? jbd2__journal_start+0x198/0x6c0
[  300.343899]  ? debug_smp_processor_id+0x20/0x30
[  300.344242]  ? rcu_is_watching+0x19/0xc0
[  300.344540]  ? kmem_cache_alloc_noprof+0x4d4/0x6c0
[  300.344916]  jbd2__journal_start+0x397/0x6c0
[  300.345256]  __ext4_journal_start_sb+0x451/0x650
[  300.345620]  ? ext4_rmdir+0x637/0xf10
[  300.345917]  ext4_rmdir+0x637/0xf10
[  300.346196]  ? __pfx_ext4_rmdir+0x10/0x10
[  300.346893]  ? sysvec_call_function_single+0x9a/0x110
[  300.347299]  ? vfs_rmdir+0x1a3/0x880
[  300.347581]  vfs_rmdir+0x351/0x880
[  300.347857]  filename_rmdir+0x3e5/0x560
[  300.348138]  ? __virt_addr_valid+0x10a/0x5f0
[  300.348443]  ? __pfx_filename_rmdir+0x10/0x10
[  300.348761]  ? __sanitizer_cov_trace_const_cmp8+0x1c/0x30
[  300.349135]  ? strncpy_from_user+0x198/0x290
[  300.349443]  ? __sanitizer_cov_trace_const_cmp4+0x1a/0x20
[  300.349824]  ? do_getname+0x19e/0x3e0
[  300.350091]  __x64_sys_unlinkat+0x107/0x140
[  300.350390]  x64_sys_call+0x1b55/0x21c0
[  300.350690]  do_syscall_64+0xc1/0x1130
[  300.350964]  entry_SYSCALL_64_after_hwframe+0x76/0x7e
[  300.351316] RIP: 0033:0x7f69c443ebab
[  300.351570] RSP: 002b:00007f69c3b33838 EFLAGS: 00000246 ORIG_RAX: 0000000000000107
[  300.352091] RAX: ffffffffffffffda RBX: 0000000000000006 RCX: 00007f69c443ebab
[  300.352576] RDX: 0000000000000200 RSI: 00007f69bc008bf0 RDI: 000000000000000c
[  300.353064] RBP: 000000000000000c R08: 0000000000000003 R09: 0000000000000078
[  300.353547] R10: 000000000000009b R11: 0000000000000246 R12: 0000000000000000
[  300.354034] R13: 00007f69bc008bf0 R14: 0000000000000200 R15: 0000000000000006
[  300.354550]  </TASK>
"

Detailed information for "there is INFO: task hung in in jbd2_journal_commit_transaction":
https://github.com/laifryiee/syzkaller_logs/tree/main/260312_123000_jbd2_journal_commit_transaction
Syzkaller repro code:
https://github.com/laifryiee/syzkaller_logs/tree/main/260312_123000_jbd2_journal_commit_transaction/repro.c
Syzkaller repro syscall steps:
https://github.com/laifryiee/syzkaller_logs/tree/main/260312_123000_jbd2_journal_commit_transaction/repro.prog
Syzkaller report:
https://github.com/laifryiee/syzkaller_logs/tree/main/260312_123000_jbd2_journal_commit_transaction/repro.report
Kconfig(make olddefconfig):
https://github.com/laifryiee/syzkaller_logs/tree/main/260312_123000_jbd2_journal_commit_transaction/kconfig_origin
Bisect info:
https://github.com/laifryiee/syzkaller_logs/tree/main/260312_123000_jbd2_journal_commit_transaction/bisect_info.log
bzImage:
https://github.com/laifryiee/syzkaller_logs/raw/refs/heads/main/260312_123000_jbd2_journal_commit_transaction/bzImage_v7.0-rc3
Issue dmesg:
https://github.com/laifryiee/syzkaller_logs/blob/main/260312_123000_jbd2_journal_commit_transaction/v7.0-rc3_dmesg.log

"
[  300.500157] INFO: task jbd2/sda-8:100 blocked for more than 147 seconds.
[  300.500574]       Not tainted 7.0.0-rc3-v7.0-rc3 #1
[  300.500876] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
[  300.501335] task:jbd2/sda-8      state:D stack:0     pid:100   tgid:100   ppid:2      task_flags:0x240040 flags:0x00
[  300.501988] Call Trace:
[  300.502146]  <TASK>
[  300.502289]  __schedule+0xf9f/0x4200
[  300.502526]  ? __pfx___schedule+0x10/0x10
[  300.502787]  ? lock_release+0x14f/0x2d0
[  300.503036]  schedule+0xf6/0x3d0
[  300.503248]  jbd2_journal_wait_updates+0x146/0x270
[  300.503551]  ? __pfx_jbd2_journal_wait_updates+0x10/0x10
[  300.503892]  ? __pfx_autoremove_wake_function+0x10/0x10
[  300.504212]  ? lock_release+0x14f/0x2d0
[  300.504464]  jbd2_journal_commit_transaction+0x7aa/0x6350
[  300.504827]  ? lock_is_held_type+0xef/0x150
[  300.505097]  ? __lock_acquire+0x412/0x2210
[  300.505355]  ? __lock_acquire+0x412/0x2210
[  300.505616]  ? __pfx_jbd2_journal_commit_transaction+0x10/0x10
[  300.505980]  ? do_raw_spin_lock+0x140/0x280
[  300.506244]  ? __pfx_do_raw_spin_lock+0x10/0x10
[  300.506526]  ? lock_acquire+0x1c1/0x330
[  300.506782]  ? __this_cpu_preempt_check+0x21/0x30
[  300.507069]  ? _raw_spin_unlock_irqrestore+0x35/0x70
[  300.507376]  ? lockdep_hardirqs_on+0x85/0x110
[  300.507653]  ? _raw_spin_unlock_irqrestore+0x45/0x70
[  300.507956]  ? __sanitizer_cov_trace_const_cmp4+0x1a/0x20
[  300.508280]  ? __timer_delete_sync+0x21e/0x300
[  300.508578]  ? __this_cpu_preempt_check+0x21/0x30
[  300.508879]  ? lock_release+0x14f/0x2d0
[  300.509129]  kjournald2+0x203/0x790
[  300.509361]  ? __pfx_kjournald2+0x10/0x10
[  300.509617]  ? lockdep_hardirqs_on+0x85/0x110
[  300.509897]  ? __pfx_autoremove_wake_function+0x10/0x10
[  300.510221]  ? __sanitizer_cov_trace_const_cmp1+0x1e/0x30
[  300.510559]  ? __kthread_parkme+0x1bc/0x260
[  300.510831]  ? __pfx_kjournald2+0x10/0x10
[  300.511089]  kthread+0x41a/0x570
[  300.511303]  ? calculate_sigpending+0x8d/0xb0
[  300.511578]  ? __pfx_kthread+0x10/0x10
[  300.511821]  ret_from_fork+0x781/0xbe0
[  300.512065]  ? __pfx_ret_from_fork+0x10/0x10
[  300.512340]  ? native_load_tls+0x16/0x50
[  300.512598]  ? __sanitizer_cov_trace_const_cmp8+0x1c/0x30
[  300.512934]  ? __switch_to+0x81e/0x1110
[  300.513178]  ? __pfx_kthread+0x10/0x10
[  300.513418]  ret_from_fork_asm+0x1a/0x30
[  300.513690]  </TASK>
"

Hope this cound be insightful to you.

Regards,
Yi Lai

---

If you don't need the following environment to reproduce the problem or if you
already have one reproduced environment, please ignore the following information.

How to reproduce:
git clone https://gitlab.com/xupengfe/repro_vm_env.git
cd repro_vm_env
tar -xvf repro_vm_env.tar.gz
cd repro_vm_env; ./start3.sh  // it needs qemu-system-x86_64 and I used v7.1.0
  // start3.sh will load bzImage_2241ab53cbb5cdb08a6b2d4688feb13971058f65 v6.2-rc5 kernel
  // You could change the bzImage_xxx as you want
  // Maybe you need to remove line "-drive if=pflash,format=raw,readonly=on,file=./OVMF_CODE.fd \" for different qemu version
You could use below command to log in, there is no password for root.
ssh -p 10023 root@localhost

After login vm(virtual machine) successfully, you could transfer reproduced
binary to the vm by below way, and reproduce the problem in vm:
gcc -pthread -o repro repro.c
scp -P 10023 repro root@localhost:/root/

Get the bzImage for target kernel:
Please use target kconfig and copy it to kernel_src/.config
make olddefconfig
make -jx bzImage           //x should equal or less than cpu num your pc has

Fill the bzImage file into above start3.sh to load the target kernel in vm.


Tips:
If you already have qemu-system-x86_64, please ignore below info.
If you want to install qemu v7.1.0 version:
git clone https://github.com/qemu/qemu.git
cd qemu
git checkout -f v7.1.0
mkdir build
cd build
yum install -y ninja-build.x86_64
yum -y install libslirp-devel.x86_64
../configure --target-list=x86_64-softmmu --enable-kvm --enable-vnc --enable-gtk --enable-sdl --enable-usb-redir --enable-slirp
make
make install

next prev parent reply	other threads:[~2026-03-13  1:59 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-01-14  9:47 [PATCH v2] fs: make insert_inode_locked() wait for inode destruction Mateusz Guzik
2026-01-14 15:56 ` Christian Brauner
2026-01-14 17:53 ` Jan Kara
2026-03-13  1:59 ` Lai, Yi [this message]
2026-03-14  9:12   ` Mateusz Guzik

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=abNvb2PcrKj1FBeC@ly-workstation \
    --to=yi1.lai@linux.intel.com \
    --cc=brauner@kernel.org \
    --cc=jack@suse.cz \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mjguzik@gmail.com \
    --cc=viro@zeniv.linux.org.uk \
    --cc=yi1.lai@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox