All of lore.kernel.org
 help / color / mirror / Atom feed
From: Jaegeuk Kim via Linux-f2fs-devel <linux-f2fs-devel@lists.sourceforge.net>
To: Daniel Lee <chullee@google.com>
Cc: linux-kernel@vger.kernel.org, linux-f2fs-devel@lists.sourceforge.net
Subject: Re: [f2fs-dev] [PATCH v2] f2fs: Introduce linear search for dentries
Date: Fri, 20 Dec 2024 21:00:19 +0000	[thread overview]
Message-ID: <Z2Xa40v15NAewl7b@google.com> (raw)
In-Reply-To: <20241220172136.1028811-1-chullee@google.com>

On 12/20, Daniel Lee wrote:
> This patch addresses an issue where some files in case-insensitive
> directories become inaccessible due to changes in how the kernel function,
> utf8_casefold(), generates case-folded strings from the commit 5c26d2f1d3f5
> ("unicode: Don't special case ignorable code points").
> 
> F2FS uses these case-folded names to calculate hash values for locating
> dentries and stores them on disk. Since utf8_casefold() can produce
> different output across kernel versions, stored hash values and newly
> calculated hash values may differ. This results in affected files no
> longer being found via the hash-based lookup.
> 
> To resolve this, the patch introduces a linear search fallback.
> If the initial hash-based search fails, F2FS will sequentially scan the
> directory entries.
> 
> Fixes: 5c26d2f1d3f5 ("unicode: Don't special case ignorable code points")
> Link: https://bugzilla.kernel.org/show_bug.cgi?id=219586
> Signed-off-by: Daniel Lee <chullee@google.com>
> ---
> v2:
> - Only update chash if use_hash is true
>  fs/f2fs/dir.c    | 40 +++++++++++++++++++++++++++++-----------
>  fs/f2fs/f2fs.h   |  6 ++++--
>  fs/f2fs/inline.c |  5 +++--
>  3 files changed, 36 insertions(+), 15 deletions(-)
> 
> diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
> index 47a5c806cf16..3e8e5ddf9dbc 100644
> --- a/fs/f2fs/dir.c
> +++ b/fs/f2fs/dir.c
> @@ -175,7 +175,8 @@ static unsigned long dir_block_index(unsigned int level,
>  static struct f2fs_dir_entry *find_in_block(struct inode *dir,
>  				struct page *dentry_page,
>  				const struct f2fs_filename *fname,
> -				int *max_slots)
> +				int *max_slots,
> +				bool use_hash)
>  {
>  	struct f2fs_dentry_block *dentry_blk;
>  	struct f2fs_dentry_ptr d;
> @@ -183,7 +184,7 @@ static struct f2fs_dir_entry *find_in_block(struct inode *dir,
>  	dentry_blk = (struct f2fs_dentry_block *)page_address(dentry_page);
>  
>  	make_dentry_ptr_block(dir, &d, dentry_blk);
> -	return f2fs_find_target_dentry(&d, fname, max_slots);
> +	return f2fs_find_target_dentry(&d, fname, max_slots, use_hash);
>  }
>  
>  static inline int f2fs_match_name(const struct inode *dir,
> @@ -208,7 +209,8 @@ static inline int f2fs_match_name(const struct inode *dir,
>  }
>  
>  struct f2fs_dir_entry *f2fs_find_target_dentry(const struct f2fs_dentry_ptr *d,
> -			const struct f2fs_filename *fname, int *max_slots)
> +			const struct f2fs_filename *fname, int *max_slots,
> +			bool use_hash)
>  {
>  	struct f2fs_dir_entry *de;
>  	unsigned long bit_pos = 0;
> @@ -231,7 +233,7 @@ struct f2fs_dir_entry *f2fs_find_target_dentry(const struct f2fs_dentry_ptr *d,
>  			continue;
>  		}
>  
> -		if (de->hash_code == fname->hash) {
> +		if (!use_hash || de->hash_code == fname->hash) {
>  			res = f2fs_match_name(d->inode, fname,
>  					      d->filename[bit_pos],
>  					      le16_to_cpu(de->name_len));
> @@ -258,11 +260,12 @@ struct f2fs_dir_entry *f2fs_find_target_dentry(const struct f2fs_dentry_ptr *d,
>  static struct f2fs_dir_entry *find_in_level(struct inode *dir,
>  					unsigned int level,
>  					const struct f2fs_filename *fname,
> -					struct page **res_page)
> +					struct page **res_page,
> +					bool use_hash)
>  {
>  	int s = GET_DENTRY_SLOTS(fname->disk_name.len);
>  	unsigned int nbucket, nblock;
> -	unsigned int bidx, end_block;
> +	unsigned int bidx, end_block, bucket_no;
>  	struct page *dentry_page;
>  	struct f2fs_dir_entry *de = NULL;
>  	pgoff_t next_pgofs;
> @@ -272,8 +275,11 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
>  	nbucket = dir_buckets(level, F2FS_I(dir)->i_dir_level);
>  	nblock = bucket_blocks(level);
>  
> +	bucket_no = use_hash ? le32_to_cpu(fname->hash) % nbucket : 0;
> +
> +start_find_bucket:
>  	bidx = dir_block_index(level, F2FS_I(dir)->i_dir_level,
> -			       le32_to_cpu(fname->hash) % nbucket);
> +			       bucket_no);
>  	end_block = bidx + nblock;
>  
>  	while (bidx < end_block) {
> @@ -290,7 +296,7 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
>  			}
>  		}
>  
> -		de = find_in_block(dir, dentry_page, fname, &max_slots);
> +		de = find_in_block(dir, dentry_page, fname, &max_slots, use_hash);
>  		if (IS_ERR(de)) {
>  			*res_page = ERR_CAST(de);
>  			de = NULL;
> @@ -307,7 +313,10 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
>  		bidx++;
>  	}
>  
> -	if (!de && room && F2FS_I(dir)->chash != fname->hash) {
> +	if (!use_hash && !de && ++bucket_no < nbucket)
> +		goto start_find_bucket;
> +
> +	if (use_hash && !de && room && F2FS_I(dir)->chash != fname->hash) {
>  		F2FS_I(dir)->chash = fname->hash;
>  		F2FS_I(dir)->clevel = level;
>  	}
> @@ -323,11 +332,13 @@ struct f2fs_dir_entry *__f2fs_find_entry(struct inode *dir,
>  	struct f2fs_dir_entry *de = NULL;
>  	unsigned int max_depth;
>  	unsigned int level;
> +	bool use_hash = true;
>  
>  	*res_page = NULL;
>  
> +start_find_entry:

This label is only used by #if IS_ENABLED(CONFIG_UNICODE).

>  	if (f2fs_has_inline_dentry(dir)) {
> -		de = f2fs_find_in_inline_dir(dir, fname, res_page);
> +		de = f2fs_find_in_inline_dir(dir, fname, res_page, use_hash);
>  		goto out;
>  	}
>  
> @@ -343,11 +354,18 @@ struct f2fs_dir_entry *__f2fs_find_entry(struct inode *dir,
>  	}
>  
>  	for (level = 0; level < max_depth; level++) {
> -		de = find_in_level(dir, level, fname, res_page);
> +		de = find_in_level(dir, level, fname, res_page, use_hash);
>  		if (de || IS_ERR(*res_page))
>  			break;
>  	}
> +
>  out:
> +#if IS_ENABLED(CONFIG_UNICODE)
> +	if (IS_CASEFOLDED(dir) && !de && use_hash) {
> +		use_hash = false;
> +		goto start_find_entry;
> +	}
> +#endif
>  	/* This is to increase the speed of f2fs_create */
>  	if (!de)
>  		F2FS_I(dir)->task = current;
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index f523dd302bf6..1afebb9c4061 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -3588,7 +3588,8 @@ int f2fs_prepare_lookup(struct inode *dir, struct dentry *dentry,
>  			struct f2fs_filename *fname);
>  void f2fs_free_filename(struct f2fs_filename *fname);
>  struct f2fs_dir_entry *f2fs_find_target_dentry(const struct f2fs_dentry_ptr *d,
> -			const struct f2fs_filename *fname, int *max_slots);
> +			const struct f2fs_filename *fname, int *max_slots,
> +			bool use_hash);
>  int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d,
>  			unsigned int start_pos, struct fscrypt_str *fstr);
>  void f2fs_do_make_empty_dir(struct inode *inode, struct inode *parent,
> @@ -4224,7 +4225,8 @@ int f2fs_write_inline_data(struct inode *inode, struct folio *folio);
>  int f2fs_recover_inline_data(struct inode *inode, struct page *npage);
>  struct f2fs_dir_entry *f2fs_find_in_inline_dir(struct inode *dir,
>  					const struct f2fs_filename *fname,
> -					struct page **res_page);
> +					struct page **res_page,
> +					bool use_hash);
>  int f2fs_make_empty_inline_dir(struct inode *inode, struct inode *parent,
>  			struct page *ipage);
>  int f2fs_add_inline_entry(struct inode *dir, const struct f2fs_filename *fname,
> diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
> index cbd2a0d34804..3e3c35d4c98b 100644
> --- a/fs/f2fs/inline.c
> +++ b/fs/f2fs/inline.c
> @@ -352,7 +352,8 @@ int f2fs_recover_inline_data(struct inode *inode, struct page *npage)
>  
>  struct f2fs_dir_entry *f2fs_find_in_inline_dir(struct inode *dir,
>  					const struct f2fs_filename *fname,
> -					struct page **res_page)
> +					struct page **res_page,
> +					bool use_hash)
>  {
>  	struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
>  	struct f2fs_dir_entry *de;
> @@ -369,7 +370,7 @@ struct f2fs_dir_entry *f2fs_find_in_inline_dir(struct inode *dir,
>  	inline_dentry = inline_data_addr(dir, ipage);
>  
>  	make_dentry_ptr_inline(dir, &d, inline_dentry);
> -	de = f2fs_find_target_dentry(&d, fname, NULL);
> +	de = f2fs_find_target_dentry(&d, fname, NULL, use_hash);
>  	unlock_page(ipage);
>  	if (IS_ERR(de)) {
>  		*res_page = ERR_CAST(de);
> -- 
> 2.47.1.613.gc27f4b7a9f-goog


_______________________________________________
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

WARNING: multiple messages have this Message-ID (diff)
From: Jaegeuk Kim <jaegeuk@kernel.org>
To: Daniel Lee <chullee@google.com>
Cc: Chao Yu <chao@kernel.org>,
	linux-f2fs-devel@lists.sourceforge.net,
	linux-kernel@vger.kernel.org
Subject: Re: [PATCH v2] f2fs: Introduce linear search for dentries
Date: Fri, 20 Dec 2024 21:00:19 +0000	[thread overview]
Message-ID: <Z2Xa40v15NAewl7b@google.com> (raw)
In-Reply-To: <20241220172136.1028811-1-chullee@google.com>

On 12/20, Daniel Lee wrote:
> This patch addresses an issue where some files in case-insensitive
> directories become inaccessible due to changes in how the kernel function,
> utf8_casefold(), generates case-folded strings from the commit 5c26d2f1d3f5
> ("unicode: Don't special case ignorable code points").
> 
> F2FS uses these case-folded names to calculate hash values for locating
> dentries and stores them on disk. Since utf8_casefold() can produce
> different output across kernel versions, stored hash values and newly
> calculated hash values may differ. This results in affected files no
> longer being found via the hash-based lookup.
> 
> To resolve this, the patch introduces a linear search fallback.
> If the initial hash-based search fails, F2FS will sequentially scan the
> directory entries.
> 
> Fixes: 5c26d2f1d3f5 ("unicode: Don't special case ignorable code points")
> Link: https://bugzilla.kernel.org/show_bug.cgi?id=219586
> Signed-off-by: Daniel Lee <chullee@google.com>
> ---
> v2:
> - Only update chash if use_hash is true
>  fs/f2fs/dir.c    | 40 +++++++++++++++++++++++++++++-----------
>  fs/f2fs/f2fs.h   |  6 ++++--
>  fs/f2fs/inline.c |  5 +++--
>  3 files changed, 36 insertions(+), 15 deletions(-)
> 
> diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
> index 47a5c806cf16..3e8e5ddf9dbc 100644
> --- a/fs/f2fs/dir.c
> +++ b/fs/f2fs/dir.c
> @@ -175,7 +175,8 @@ static unsigned long dir_block_index(unsigned int level,
>  static struct f2fs_dir_entry *find_in_block(struct inode *dir,
>  				struct page *dentry_page,
>  				const struct f2fs_filename *fname,
> -				int *max_slots)
> +				int *max_slots,
> +				bool use_hash)
>  {
>  	struct f2fs_dentry_block *dentry_blk;
>  	struct f2fs_dentry_ptr d;
> @@ -183,7 +184,7 @@ static struct f2fs_dir_entry *find_in_block(struct inode *dir,
>  	dentry_blk = (struct f2fs_dentry_block *)page_address(dentry_page);
>  
>  	make_dentry_ptr_block(dir, &d, dentry_blk);
> -	return f2fs_find_target_dentry(&d, fname, max_slots);
> +	return f2fs_find_target_dentry(&d, fname, max_slots, use_hash);
>  }
>  
>  static inline int f2fs_match_name(const struct inode *dir,
> @@ -208,7 +209,8 @@ static inline int f2fs_match_name(const struct inode *dir,
>  }
>  
>  struct f2fs_dir_entry *f2fs_find_target_dentry(const struct f2fs_dentry_ptr *d,
> -			const struct f2fs_filename *fname, int *max_slots)
> +			const struct f2fs_filename *fname, int *max_slots,
> +			bool use_hash)
>  {
>  	struct f2fs_dir_entry *de;
>  	unsigned long bit_pos = 0;
> @@ -231,7 +233,7 @@ struct f2fs_dir_entry *f2fs_find_target_dentry(const struct f2fs_dentry_ptr *d,
>  			continue;
>  		}
>  
> -		if (de->hash_code == fname->hash) {
> +		if (!use_hash || de->hash_code == fname->hash) {
>  			res = f2fs_match_name(d->inode, fname,
>  					      d->filename[bit_pos],
>  					      le16_to_cpu(de->name_len));
> @@ -258,11 +260,12 @@ struct f2fs_dir_entry *f2fs_find_target_dentry(const struct f2fs_dentry_ptr *d,
>  static struct f2fs_dir_entry *find_in_level(struct inode *dir,
>  					unsigned int level,
>  					const struct f2fs_filename *fname,
> -					struct page **res_page)
> +					struct page **res_page,
> +					bool use_hash)
>  {
>  	int s = GET_DENTRY_SLOTS(fname->disk_name.len);
>  	unsigned int nbucket, nblock;
> -	unsigned int bidx, end_block;
> +	unsigned int bidx, end_block, bucket_no;
>  	struct page *dentry_page;
>  	struct f2fs_dir_entry *de = NULL;
>  	pgoff_t next_pgofs;
> @@ -272,8 +275,11 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
>  	nbucket = dir_buckets(level, F2FS_I(dir)->i_dir_level);
>  	nblock = bucket_blocks(level);
>  
> +	bucket_no = use_hash ? le32_to_cpu(fname->hash) % nbucket : 0;
> +
> +start_find_bucket:
>  	bidx = dir_block_index(level, F2FS_I(dir)->i_dir_level,
> -			       le32_to_cpu(fname->hash) % nbucket);
> +			       bucket_no);
>  	end_block = bidx + nblock;
>  
>  	while (bidx < end_block) {
> @@ -290,7 +296,7 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
>  			}
>  		}
>  
> -		de = find_in_block(dir, dentry_page, fname, &max_slots);
> +		de = find_in_block(dir, dentry_page, fname, &max_slots, use_hash);
>  		if (IS_ERR(de)) {
>  			*res_page = ERR_CAST(de);
>  			de = NULL;
> @@ -307,7 +313,10 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
>  		bidx++;
>  	}
>  
> -	if (!de && room && F2FS_I(dir)->chash != fname->hash) {
> +	if (!use_hash && !de && ++bucket_no < nbucket)
> +		goto start_find_bucket;
> +
> +	if (use_hash && !de && room && F2FS_I(dir)->chash != fname->hash) {
>  		F2FS_I(dir)->chash = fname->hash;
>  		F2FS_I(dir)->clevel = level;
>  	}
> @@ -323,11 +332,13 @@ struct f2fs_dir_entry *__f2fs_find_entry(struct inode *dir,
>  	struct f2fs_dir_entry *de = NULL;
>  	unsigned int max_depth;
>  	unsigned int level;
> +	bool use_hash = true;
>  
>  	*res_page = NULL;
>  
> +start_find_entry:

This label is only used by #if IS_ENABLED(CONFIG_UNICODE).

>  	if (f2fs_has_inline_dentry(dir)) {
> -		de = f2fs_find_in_inline_dir(dir, fname, res_page);
> +		de = f2fs_find_in_inline_dir(dir, fname, res_page, use_hash);
>  		goto out;
>  	}
>  
> @@ -343,11 +354,18 @@ struct f2fs_dir_entry *__f2fs_find_entry(struct inode *dir,
>  	}
>  
>  	for (level = 0; level < max_depth; level++) {
> -		de = find_in_level(dir, level, fname, res_page);
> +		de = find_in_level(dir, level, fname, res_page, use_hash);
>  		if (de || IS_ERR(*res_page))
>  			break;
>  	}
> +
>  out:
> +#if IS_ENABLED(CONFIG_UNICODE)
> +	if (IS_CASEFOLDED(dir) && !de && use_hash) {
> +		use_hash = false;
> +		goto start_find_entry;
> +	}
> +#endif
>  	/* This is to increase the speed of f2fs_create */
>  	if (!de)
>  		F2FS_I(dir)->task = current;
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index f523dd302bf6..1afebb9c4061 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -3588,7 +3588,8 @@ int f2fs_prepare_lookup(struct inode *dir, struct dentry *dentry,
>  			struct f2fs_filename *fname);
>  void f2fs_free_filename(struct f2fs_filename *fname);
>  struct f2fs_dir_entry *f2fs_find_target_dentry(const struct f2fs_dentry_ptr *d,
> -			const struct f2fs_filename *fname, int *max_slots);
> +			const struct f2fs_filename *fname, int *max_slots,
> +			bool use_hash);
>  int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d,
>  			unsigned int start_pos, struct fscrypt_str *fstr);
>  void f2fs_do_make_empty_dir(struct inode *inode, struct inode *parent,
> @@ -4224,7 +4225,8 @@ int f2fs_write_inline_data(struct inode *inode, struct folio *folio);
>  int f2fs_recover_inline_data(struct inode *inode, struct page *npage);
>  struct f2fs_dir_entry *f2fs_find_in_inline_dir(struct inode *dir,
>  					const struct f2fs_filename *fname,
> -					struct page **res_page);
> +					struct page **res_page,
> +					bool use_hash);
>  int f2fs_make_empty_inline_dir(struct inode *inode, struct inode *parent,
>  			struct page *ipage);
>  int f2fs_add_inline_entry(struct inode *dir, const struct f2fs_filename *fname,
> diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
> index cbd2a0d34804..3e3c35d4c98b 100644
> --- a/fs/f2fs/inline.c
> +++ b/fs/f2fs/inline.c
> @@ -352,7 +352,8 @@ int f2fs_recover_inline_data(struct inode *inode, struct page *npage)
>  
>  struct f2fs_dir_entry *f2fs_find_in_inline_dir(struct inode *dir,
>  					const struct f2fs_filename *fname,
> -					struct page **res_page)
> +					struct page **res_page,
> +					bool use_hash)
>  {
>  	struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
>  	struct f2fs_dir_entry *de;
> @@ -369,7 +370,7 @@ struct f2fs_dir_entry *f2fs_find_in_inline_dir(struct inode *dir,
>  	inline_dentry = inline_data_addr(dir, ipage);
>  
>  	make_dentry_ptr_inline(dir, &d, inline_dentry);
> -	de = f2fs_find_target_dentry(&d, fname, NULL);
> +	de = f2fs_find_target_dentry(&d, fname, NULL, use_hash);
>  	unlock_page(ipage);
>  	if (IS_ERR(de)) {
>  		*res_page = ERR_CAST(de);
> -- 
> 2.47.1.613.gc27f4b7a9f-goog

  reply	other threads:[~2024-12-20 21:01 UTC|newest]

Thread overview: 12+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-12-20 17:21 [f2fs-dev] [PATCH v2] f2fs: Introduce linear search for dentries Daniel Lee via Linux-f2fs-devel
2024-12-20 17:21 ` Daniel Lee
2024-12-20 21:00 ` Jaegeuk Kim via Linux-f2fs-devel [this message]
2024-12-20 21:00   ` Jaegeuk Kim
2024-12-23  8:26 ` [f2fs-dev] " Christoph Hellwig
2024-12-23  8:26   ` Christoph Hellwig
2024-12-23 16:57   ` [f2fs-dev] " Jaegeuk Kim via Linux-f2fs-devel
2024-12-23 16:57     ` Jaegeuk Kim
2025-01-03  9:24     ` [f2fs-dev] " Christoph Hellwig
2025-01-03  9:24       ` Christoph Hellwig
2025-01-06 21:18       ` [f2fs-dev] " Jaegeuk Kim via Linux-f2fs-devel
2025-01-06 21:18         ` Jaegeuk Kim

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=Z2Xa40v15NAewl7b@google.com \
    --to=linux-f2fs-devel@lists.sourceforge.net \
    --cc=chullee@google.com \
    --cc=jaegeuk@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.