The Linux Kernel Mailing List
 help / color / mirror / Atom feed
* [PATCH] ext4: validate readdir offset before accessing dirent
@ 2026-07-03  1:51 Yao Kai
  2026-07-03  4:31 ` Zhihao Cheng
                   ` (2 more replies)
  0 siblings, 3 replies; 4+ messages in thread
From: Yao Kai @ 2026-07-03  1:51 UTC (permalink / raw)
  To: linux-ext4, tytso
  Cc: adilger.kernel, libaokun, jack, ojaswin, ritesh.list,
	linux-kernel, yi.zhang, chengzhihao1, liuyongqiang13, Yao Kai,
	syzbot+5322c5c260eb44d209ed

A corrupted directory can trigger the following KASAN report when
ext4_readdir() resumes from an invalid position:

  BUG: KASAN: use-after-free in __ext4_check_dir_entry+0x5ef/0x820
  Read of size 2 at addr ffff88810a646000 by task repro_linear/509

  Call Trace:
   <TASK>
   dump_stack_lvl+0x53/0x70
   print_report+0xd0/0x630
   kasan_report+0xce/0x100
   __ext4_check_dir_entry+0x5ef/0x820
   ext4_readdir+0xcde/0x2b70
   iterate_dir+0x1a1/0x520
   __x64_sys_getdents64+0x12b/0x220
   do_syscall_64+0xf9/0x540
   entry_SYSCALL_64_after_hwframe+0x77/0x7f
   </TASK>

KASAN reports use-after-free because the out-of-bounds access lands in an
adjacent freed page. The directory buffer itself is still referenced.

ext4_dir_llseek() invalidates the directory cookie so that ext4_readdir()
rescans directory entries from the start of the block. The rescan checks
only the lower bound of rec_len before advancing. A corrupted rec_len can
therefore place the offset where the block has insufficient space for a
complete directory entry. The rescan itself may dereference that truncated
entry, or the main loop may pass it to __ext4_check_dir_entry(). The latter
reads de->rec_len before validating the range. For example:

  block offset  0                           4092  4096
                |---- de1.rec_len = 4092 -----|----|
                                               de2.inode
                                                    | de2.rec_len
                                                    ^ OOB, reported as UAF

de2 starts at offset 4092 in this 4 KiB block. Its four-byte inode fits in
the block, but its rec_len starts at offset 4096 and crosses the boundary.

The minimum safe length is inode-dependent. Encrypted and casefolded
directory entries need eight additional hash bytes, while a valid metadata
checksum tail is only 12 bytes.

Cache the metadata checksum feature state and derive the minimum directory
entry length from the on-disk format. Use it to bound both the rescan and
the offset passed to the main loop. Report an offset in a truncated block
tail and skip the remainder of the block, while continuing to accept an
offset exactly at the block boundary.

Reported-by: syzbot+5322c5c260eb44d209ed@syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=5322c5c260eb44d209ed
Fixes: ac27a0ec112a ("[PATCH] ext4: initial copy of files from ext3")
Signed-off-by: Yao Kai <yaokai34@huawei.com>
---
 fs/ext4/dir.c | 20 ++++++++++++++++++--
 1 file changed, 18 insertions(+), 2 deletions(-)

diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index 17edd678fa87..5c943d18882a 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -138,6 +138,7 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
 	struct buffer_head *bh = NULL;
 	struct fscrypt_str fstr = FSTR_INIT(NULL, 0);
 	struct dir_private_info *info = file->private_data;
+	bool has_csum = ext4_has_feature_metadata_csum(sb);
 
 	err = fscrypt_prepare_readdir(inode);
 	if (err)
@@ -149,7 +150,7 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
 			return err;
 
 		/* Can we just clear INDEX flag to ignore htree information? */
-		if (!ext4_has_feature_metadata_csum(sb)) {
+		if (!has_csum) {
 			/*
 			 * We don't set the inode dirty flag since it's not
 			 * critical that it gets flushed back to the disk.
@@ -235,7 +236,10 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
 		 * dirent right now.  Scan from the start of the block
 		 * to make sure. */
 		if (!inode_eq_iversion(inode, info->cookie)) {
-			for (i = 0; i < sb->s_blocksize && i < offset; ) {
+			for (i = 0;
+			     i <= sb->s_blocksize -
+				  ext4_dir_rec_len(1, has_csum ? NULL : inode) &&
+			     i < offset;) {
 				de = (struct ext4_dir_entry_2 *)
 					(bh->b_data + i);
 				/* It's too expensive to do a full
@@ -257,6 +261,17 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
 			info->cookie = inode_query_iversion(inode);
 		}
 
+		if (unlikely(offset < sb->s_blocksize &&
+			     offset > sb->s_blocksize -
+			     ext4_dir_rec_len(1, has_csum ? NULL : inode))) {
+			EXT4_ERROR_FILE(file, bh->b_blocknr,
+					"bad entry in directory: %s - offset=%u, size=%lu",
+					"directory entry too close to block end",
+					offset, sb->s_blocksize);
+			ctx->pos = (ctx->pos | (sb->s_blocksize - 1)) + 1;
+			goto next_block;
+		}
+
 		while (ctx->pos < inode->i_size
 		       && offset < sb->s_blocksize) {
 			de = (struct ext4_dir_entry_2 *) (bh->b_data + offset);
@@ -312,6 +327,7 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
 			ctx->pos += ext4_rec_len_from_disk(de->rec_len,
 						sb->s_blocksize);
 		}
+next_block:
 		if ((ctx->pos < inode->i_size) && !dir_relax_shared(inode))
 			goto done;
 		brelse(bh);
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH] ext4: validate readdir offset before accessing dirent
  2026-07-03  1:51 [PATCH] ext4: validate readdir offset before accessing dirent Yao Kai
@ 2026-07-03  4:31 ` Zhihao Cheng
  2026-07-03 15:43 ` Jan Kara
  2026-07-04  2:41 ` Zhang Yi
  2 siblings, 0 replies; 4+ messages in thread
From: Zhihao Cheng @ 2026-07-03  4:31 UTC (permalink / raw)
  To: Yao Kai, linux-ext4, tytso
  Cc: adilger.kernel, libaokun, jack, ojaswin, ritesh.list,
	linux-kernel, yi.zhang, liuyongqiang13,
	syzbot+5322c5c260eb44d209ed

在 2026/7/3 9:51, Yao Kai 写道:
> A corrupted directory can trigger the following KASAN report when
> ext4_readdir() resumes from an invalid position:
> 
>    BUG: KASAN: use-after-free in __ext4_check_dir_entry+0x5ef/0x820
>    Read of size 2 at addr ffff88810a646000 by task repro_linear/509
> 
>    Call Trace:
>     <TASK>
>     dump_stack_lvl+0x53/0x70
>     print_report+0xd0/0x630
>     kasan_report+0xce/0x100
>     __ext4_check_dir_entry+0x5ef/0x820
>     ext4_readdir+0xcde/0x2b70
>     iterate_dir+0x1a1/0x520
>     __x64_sys_getdents64+0x12b/0x220
>     do_syscall_64+0xf9/0x540
>     entry_SYSCALL_64_after_hwframe+0x77/0x7f
>     </TASK>
> 
> KASAN reports use-after-free because the out-of-bounds access lands in an
> adjacent freed page. The directory buffer itself is still referenced.
> 
> ext4_dir_llseek() invalidates the directory cookie so that ext4_readdir()
> rescans directory entries from the start of the block. The rescan checks
> only the lower bound of rec_len before advancing. A corrupted rec_len can
> therefore place the offset where the block has insufficient space for a
> complete directory entry. The rescan itself may dereference that truncated
> entry, or the main loop may pass it to __ext4_check_dir_entry(). The latter
> reads de->rec_len before validating the range. For example:
> 
>    block offset  0                           4092  4096
>                  |---- de1.rec_len = 4092 -----|----|
>                                                 de2.inode
>                                                      | de2.rec_len
>                                                      ^ OOB, reported as UAF
> 
> de2 starts at offset 4092 in this 4 KiB block. Its four-byte inode fits in
> the block, but its rec_len starts at offset 4096 and crosses the boundary.
> 
> The minimum safe length is inode-dependent. Encrypted and casefolded
> directory entries need eight additional hash bytes, while a valid metadata
> checksum tail is only 12 bytes.
> 
> Cache the metadata checksum feature state and derive the minimum directory
> entry length from the on-disk format. Use it to bound both the rescan and
> the offset passed to the main loop. Report an offset in a truncated block
> tail and skip the remainder of the block, while continuing to accept an
> offset exactly at the block boundary.
> 
> Reported-by: syzbot+5322c5c260eb44d209ed@syzkaller.appspotmail.com
> Closes: https://syzkaller.appspot.com/bug?extid=5322c5c260eb44d209ed
> Fixes: ac27a0ec112a ("[PATCH] ext4: initial copy of files from ext3")
> Signed-off-by: Yao Kai <yaokai34@huawei.com>
> ---
>   fs/ext4/dir.c | 20 ++++++++++++++++++--
>   1 file changed, 18 insertions(+), 2 deletions(-)
> 

Reviewed-by: Zhihao Cheng <chengzhihao1@huawei.com>
> diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
> index 17edd678fa87..5c943d18882a 100644
> --- a/fs/ext4/dir.c
> +++ b/fs/ext4/dir.c
> @@ -138,6 +138,7 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
>   	struct buffer_head *bh = NULL;
>   	struct fscrypt_str fstr = FSTR_INIT(NULL, 0);
>   	struct dir_private_info *info = file->private_data;
> +	bool has_csum = ext4_has_feature_metadata_csum(sb);
>   
>   	err = fscrypt_prepare_readdir(inode);
>   	if (err)
> @@ -149,7 +150,7 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
>   			return err;
>   
>   		/* Can we just clear INDEX flag to ignore htree information? */
> -		if (!ext4_has_feature_metadata_csum(sb)) {
> +		if (!has_csum) {
>   			/*
>   			 * We don't set the inode dirty flag since it's not
>   			 * critical that it gets flushed back to the disk.
> @@ -235,7 +236,10 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
>   		 * dirent right now.  Scan from the start of the block
>   		 * to make sure. */
>   		if (!inode_eq_iversion(inode, info->cookie)) {
> -			for (i = 0; i < sb->s_blocksize && i < offset; ) {
> +			for (i = 0;
> +			     i <= sb->s_blocksize -
> +				  ext4_dir_rec_len(1, has_csum ? NULL : inode) &&
> +			     i < offset;) {
>   				de = (struct ext4_dir_entry_2 *)
>   					(bh->b_data + i);
>   				/* It's too expensive to do a full
> @@ -257,6 +261,17 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
>   			info->cookie = inode_query_iversion(inode);
>   		}
>   
> +		if (unlikely(offset < sb->s_blocksize &&
> +			     offset > sb->s_blocksize -
> +			     ext4_dir_rec_len(1, has_csum ? NULL : inode))) {
> +			EXT4_ERROR_FILE(file, bh->b_blocknr,
> +					"bad entry in directory: %s - offset=%u, size=%lu",
> +					"directory entry too close to block end",
> +					offset, sb->s_blocksize);
> +			ctx->pos = (ctx->pos | (sb->s_blocksize - 1)) + 1;
> +			goto next_block;
> +		}
> +
>   		while (ctx->pos < inode->i_size
>   		       && offset < sb->s_blocksize) {
>   			de = (struct ext4_dir_entry_2 *) (bh->b_data + offset);
> @@ -312,6 +327,7 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
>   			ctx->pos += ext4_rec_len_from_disk(de->rec_len,
>   						sb->s_blocksize);
>   		}
> +next_block:
>   		if ((ctx->pos < inode->i_size) && !dir_relax_shared(inode))
>   			goto done;
>   		brelse(bh);
> 


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] ext4: validate readdir offset before accessing dirent
  2026-07-03  1:51 [PATCH] ext4: validate readdir offset before accessing dirent Yao Kai
  2026-07-03  4:31 ` Zhihao Cheng
@ 2026-07-03 15:43 ` Jan Kara
  2026-07-04  2:41 ` Zhang Yi
  2 siblings, 0 replies; 4+ messages in thread
From: Jan Kara @ 2026-07-03 15:43 UTC (permalink / raw)
  To: Yao Kai
  Cc: linux-ext4, tytso, adilger.kernel, libaokun, jack, ojaswin,
	ritesh.list, linux-kernel, yi.zhang, chengzhihao1, liuyongqiang13,
	syzbot+5322c5c260eb44d209ed

On Fri 03-07-26 09:51:06, Yao Kai wrote:
> A corrupted directory can trigger the following KASAN report when
> ext4_readdir() resumes from an invalid position:
> 
>   BUG: KASAN: use-after-free in __ext4_check_dir_entry+0x5ef/0x820
>   Read of size 2 at addr ffff88810a646000 by task repro_linear/509
> 
>   Call Trace:
>    <TASK>
>    dump_stack_lvl+0x53/0x70
>    print_report+0xd0/0x630
>    kasan_report+0xce/0x100
>    __ext4_check_dir_entry+0x5ef/0x820
>    ext4_readdir+0xcde/0x2b70
>    iterate_dir+0x1a1/0x520
>    __x64_sys_getdents64+0x12b/0x220
>    do_syscall_64+0xf9/0x540
>    entry_SYSCALL_64_after_hwframe+0x77/0x7f
>    </TASK>
> 
> KASAN reports use-after-free because the out-of-bounds access lands in an
> adjacent freed page. The directory buffer itself is still referenced.
> 
> ext4_dir_llseek() invalidates the directory cookie so that ext4_readdir()
> rescans directory entries from the start of the block. The rescan checks
> only the lower bound of rec_len before advancing. A corrupted rec_len can
> therefore place the offset where the block has insufficient space for a
> complete directory entry. The rescan itself may dereference that truncated
> entry, or the main loop may pass it to __ext4_check_dir_entry(). The latter
> reads de->rec_len before validating the range. For example:
> 
>   block offset  0                           4092  4096
>                 |---- de1.rec_len = 4092 -----|----|
>                                                de2.inode
>                                                     | de2.rec_len
>                                                     ^ OOB, reported as UAF
> 
> de2 starts at offset 4092 in this 4 KiB block. Its four-byte inode fits in
> the block, but its rec_len starts at offset 4096 and crosses the boundary.
> 
> The minimum safe length is inode-dependent. Encrypted and casefolded
> directory entries need eight additional hash bytes, while a valid metadata
> checksum tail is only 12 bytes.
> 
> Cache the metadata checksum feature state and derive the minimum directory
> entry length from the on-disk format. Use it to bound both the rescan and
> the offset passed to the main loop. Report an offset in a truncated block
> tail and skip the remainder of the block, while continuing to accept an
> offset exactly at the block boundary.
> 
> Reported-by: syzbot+5322c5c260eb44d209ed@syzkaller.appspotmail.com
> Closes: https://syzkaller.appspot.com/bug?extid=5322c5c260eb44d209ed
> Fixes: ac27a0ec112a ("[PATCH] ext4: initial copy of files from ext3")
> Signed-off-by: Yao Kai <yaokai34@huawei.com>

Just one nit below. Feel free to add:

Reviewed-by: Jan Kara <jack@suse.cz>

> @@ -257,6 +261,17 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
>  			info->cookie = inode_query_iversion(inode);
>  		}
>  
> +		if (unlikely(offset < sb->s_blocksize &&
> +			     offset > sb->s_blocksize -
> +			     ext4_dir_rec_len(1, has_csum ? NULL : inode))) {
> +			EXT4_ERROR_FILE(file, bh->b_blocknr,
> +					"bad entry in directory: %s - offset=%u, size=%lu",
> +					"directory entry too close to block end",
> +					offset, sb->s_blocksize);
> +			ctx->pos = (ctx->pos | (sb->s_blocksize - 1)) + 1;
				   ^^^ this is round_up(ctx->pos, sb->s_blocksize)


> +			goto next_block;
> +		}
> +
>  		while (ctx->pos < inode->i_size
>  		       && offset < sb->s_blocksize) {
>  			de = (struct ext4_dir_entry_2 *) (bh->b_data + offset);

								Honza
-- 
Jan Kara <jack@suse.com>
SUSE Labs, CR

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] ext4: validate readdir offset before accessing dirent
  2026-07-03  1:51 [PATCH] ext4: validate readdir offset before accessing dirent Yao Kai
  2026-07-03  4:31 ` Zhihao Cheng
  2026-07-03 15:43 ` Jan Kara
@ 2026-07-04  2:41 ` Zhang Yi
  2 siblings, 0 replies; 4+ messages in thread
From: Zhang Yi @ 2026-07-04  2:41 UTC (permalink / raw)
  To: Yao Kai, linux-ext4, tytso
  Cc: adilger.kernel, libaokun, jack, ojaswin, ritesh.list,
	linux-kernel, yi.zhang, chengzhihao1, liuyongqiang13,
	syzbot+5322c5c260eb44d209ed

On 7/3/2026 9:51 AM, Yao Kai wrote:
> A corrupted directory can trigger the following KASAN report when
> ext4_readdir() resumes from an invalid position:
> 
>    BUG: KASAN: use-after-free in __ext4_check_dir_entry+0x5ef/0x820
>    Read of size 2 at addr ffff88810a646000 by task repro_linear/509
> 
>    Call Trace:
>     <TASK>
>     dump_stack_lvl+0x53/0x70
>     print_report+0xd0/0x630
>     kasan_report+0xce/0x100
>     __ext4_check_dir_entry+0x5ef/0x820
>     ext4_readdir+0xcde/0x2b70
>     iterate_dir+0x1a1/0x520
>     __x64_sys_getdents64+0x12b/0x220
>     do_syscall_64+0xf9/0x540
>     entry_SYSCALL_64_after_hwframe+0x77/0x7f
>     </TASK>
> 
> KASAN reports use-after-free because the out-of-bounds access lands in an
> adjacent freed page. The directory buffer itself is still referenced.
> 
> ext4_dir_llseek() invalidates the directory cookie so that ext4_readdir()
> rescans directory entries from the start of the block. The rescan checks
> only the lower bound of rec_len before advancing. A corrupted rec_len can
> therefore place the offset where the block has insufficient space for a
> complete directory entry. The rescan itself may dereference that truncated
> entry, or the main loop may pass it to __ext4_check_dir_entry(). The latter
> reads de->rec_len before validating the range. For example:
> 
>    block offset  0                           4092  4096
>                  |---- de1.rec_len = 4092 -----|----|
>                                                 de2.inode
>                                                      | de2.rec_len
>                                                      ^ OOB, reported as UAF
> 
> de2 starts at offset 4092 in this 4 KiB block. Its four-byte inode fits in
> the block, but its rec_len starts at offset 4096 and crosses the boundary.
> 
> The minimum safe length is inode-dependent. Encrypted and casefolded
> directory entries need eight additional hash bytes, while a valid metadata
> checksum tail is only 12 bytes.
> 
> Cache the metadata checksum feature state and derive the minimum directory
> entry length from the on-disk format. Use it to bound both the rescan and
> the offset passed to the main loop. Report an offset in a truncated block
> tail and skip the remainder of the block, while continuing to accept an
> offset exactly at the block boundary.
> 
> Reported-by: syzbot+5322c5c260eb44d209ed@syzkaller.appspotmail.com
> Closes: https://syzkaller.appspot.com/bug?extid=5322c5c260eb44d209ed
> Fixes: ac27a0ec112a ("[PATCH] ext4: initial copy of files from ext3")
> Signed-off-by: Yao Kai <yaokai34@huawei.com>

Thanks for the fix patch. With Jan's suggestion incorporated, this looks
good to me.

Reviewed-by: Zhang Yi <yi.zhang@huawei.com>

> ---
>   fs/ext4/dir.c | 20 ++++++++++++++++++--
>   1 file changed, 18 insertions(+), 2 deletions(-)
> 
> diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
> index 17edd678fa87..5c943d18882a 100644
> --- a/fs/ext4/dir.c
> +++ b/fs/ext4/dir.c
> @@ -138,6 +138,7 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
>   	struct buffer_head *bh = NULL;
>   	struct fscrypt_str fstr = FSTR_INIT(NULL, 0);
>   	struct dir_private_info *info = file->private_data;
> +	bool has_csum = ext4_has_feature_metadata_csum(sb);
>   
>   	err = fscrypt_prepare_readdir(inode);
>   	if (err)
> @@ -149,7 +150,7 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
>   			return err;
>   
>   		/* Can we just clear INDEX flag to ignore htree information? */
> -		if (!ext4_has_feature_metadata_csum(sb)) {
> +		if (!has_csum) {
>   			/*
>   			 * We don't set the inode dirty flag since it's not
>   			 * critical that it gets flushed back to the disk.
> @@ -235,7 +236,10 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
>   		 * dirent right now.  Scan from the start of the block
>   		 * to make sure. */
>   		if (!inode_eq_iversion(inode, info->cookie)) {
> -			for (i = 0; i < sb->s_blocksize && i < offset; ) {
> +			for (i = 0;
> +			     i <= sb->s_blocksize -
> +				  ext4_dir_rec_len(1, has_csum ? NULL : inode) &&
> +			     i < offset;) {
>   				de = (struct ext4_dir_entry_2 *)
>   					(bh->b_data + i);
>   				/* It's too expensive to do a full
> @@ -257,6 +261,17 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
>   			info->cookie = inode_query_iversion(inode);
>   		}
>   
> +		if (unlikely(offset < sb->s_blocksize &&
> +			     offset > sb->s_blocksize -
> +			     ext4_dir_rec_len(1, has_csum ? NULL : inode))) {
> +			EXT4_ERROR_FILE(file, bh->b_blocknr,
> +					"bad entry in directory: %s - offset=%u, size=%lu",
> +					"directory entry too close to block end",
> +					offset, sb->s_blocksize);
> +			ctx->pos = (ctx->pos | (sb->s_blocksize - 1)) + 1;
> +			goto next_block;
> +		}
> +
>   		while (ctx->pos < inode->i_size
>   		       && offset < sb->s_blocksize) {
>   			de = (struct ext4_dir_entry_2 *) (bh->b_data + offset);
> @@ -312,6 +327,7 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
>   			ctx->pos += ext4_rec_len_from_disk(de->rec_len,
>   						sb->s_blocksize);
>   		}
> +next_block:
>   		if ((ctx->pos < inode->i_size) && !dir_relax_shared(inode))
>   			goto done;
>   		brelse(bh);


^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2026-07-04  2:41 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-07-03  1:51 [PATCH] ext4: validate readdir offset before accessing dirent Yao Kai
2026-07-03  4:31 ` Zhihao Cheng
2026-07-03 15:43 ` Jan Kara
2026-07-04  2:41 ` Zhang Yi

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox