From: Daniel Lee via Linux-f2fs-devel <linux-f2fs-devel@lists.sourceforge.net>
To: Jaegeuk Kim <jaegeuk@kernel.org>, Chao Yu <chao@kernel.org>
Cc: linux-kernel@vger.kernel.org, linux-f2fs-devel@lists.sourceforge.net
Subject: [f2fs-dev] [PATCH v2] f2fs: Introduce linear search for dentries
Date: Fri, 20 Dec 2024 09:21:36 -0800 [thread overview]
Message-ID: <20241220172136.1028811-1-chullee@google.com> (raw)
This patch addresses an issue where some files in case-insensitive
directories become inaccessible due to changes in how the kernel function,
utf8_casefold(), generates case-folded strings from the commit 5c26d2f1d3f5
("unicode: Don't special case ignorable code points").
F2FS uses these case-folded names to calculate hash values for locating
dentries and stores them on disk. Since utf8_casefold() can produce
different output across kernel versions, stored hash values and newly
calculated hash values may differ. This results in affected files no
longer being found via the hash-based lookup.
To resolve this, the patch introduces a linear search fallback.
If the initial hash-based search fails, F2FS will sequentially scan the
directory entries.
Fixes: 5c26d2f1d3f5 ("unicode: Don't special case ignorable code points")
Link: https://bugzilla.kernel.org/show_bug.cgi?id=219586
Signed-off-by: Daniel Lee <chullee@google.com>
---
v2:
- Only update chash if use_hash is true
fs/f2fs/dir.c | 40 +++++++++++++++++++++++++++++-----------
fs/f2fs/f2fs.h | 6 ++++--
fs/f2fs/inline.c | 5 +++--
3 files changed, 36 insertions(+), 15 deletions(-)
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index 47a5c806cf16..3e8e5ddf9dbc 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -175,7 +175,8 @@ static unsigned long dir_block_index(unsigned int level,
static struct f2fs_dir_entry *find_in_block(struct inode *dir,
struct page *dentry_page,
const struct f2fs_filename *fname,
- int *max_slots)
+ int *max_slots,
+ bool use_hash)
{
struct f2fs_dentry_block *dentry_blk;
struct f2fs_dentry_ptr d;
@@ -183,7 +184,7 @@ static struct f2fs_dir_entry *find_in_block(struct inode *dir,
dentry_blk = (struct f2fs_dentry_block *)page_address(dentry_page);
make_dentry_ptr_block(dir, &d, dentry_blk);
- return f2fs_find_target_dentry(&d, fname, max_slots);
+ return f2fs_find_target_dentry(&d, fname, max_slots, use_hash);
}
static inline int f2fs_match_name(const struct inode *dir,
@@ -208,7 +209,8 @@ static inline int f2fs_match_name(const struct inode *dir,
}
struct f2fs_dir_entry *f2fs_find_target_dentry(const struct f2fs_dentry_ptr *d,
- const struct f2fs_filename *fname, int *max_slots)
+ const struct f2fs_filename *fname, int *max_slots,
+ bool use_hash)
{
struct f2fs_dir_entry *de;
unsigned long bit_pos = 0;
@@ -231,7 +233,7 @@ struct f2fs_dir_entry *f2fs_find_target_dentry(const struct f2fs_dentry_ptr *d,
continue;
}
- if (de->hash_code == fname->hash) {
+ if (!use_hash || de->hash_code == fname->hash) {
res = f2fs_match_name(d->inode, fname,
d->filename[bit_pos],
le16_to_cpu(de->name_len));
@@ -258,11 +260,12 @@ struct f2fs_dir_entry *f2fs_find_target_dentry(const struct f2fs_dentry_ptr *d,
static struct f2fs_dir_entry *find_in_level(struct inode *dir,
unsigned int level,
const struct f2fs_filename *fname,
- struct page **res_page)
+ struct page **res_page,
+ bool use_hash)
{
int s = GET_DENTRY_SLOTS(fname->disk_name.len);
unsigned int nbucket, nblock;
- unsigned int bidx, end_block;
+ unsigned int bidx, end_block, bucket_no;
struct page *dentry_page;
struct f2fs_dir_entry *de = NULL;
pgoff_t next_pgofs;
@@ -272,8 +275,11 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
nbucket = dir_buckets(level, F2FS_I(dir)->i_dir_level);
nblock = bucket_blocks(level);
+ bucket_no = use_hash ? le32_to_cpu(fname->hash) % nbucket : 0;
+
+start_find_bucket:
bidx = dir_block_index(level, F2FS_I(dir)->i_dir_level,
- le32_to_cpu(fname->hash) % nbucket);
+ bucket_no);
end_block = bidx + nblock;
while (bidx < end_block) {
@@ -290,7 +296,7 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
}
}
- de = find_in_block(dir, dentry_page, fname, &max_slots);
+ de = find_in_block(dir, dentry_page, fname, &max_slots, use_hash);
if (IS_ERR(de)) {
*res_page = ERR_CAST(de);
de = NULL;
@@ -307,7 +313,10 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
bidx++;
}
- if (!de && room && F2FS_I(dir)->chash != fname->hash) {
+ if (!use_hash && !de && ++bucket_no < nbucket)
+ goto start_find_bucket;
+
+ if (use_hash && !de && room && F2FS_I(dir)->chash != fname->hash) {
F2FS_I(dir)->chash = fname->hash;
F2FS_I(dir)->clevel = level;
}
@@ -323,11 +332,13 @@ struct f2fs_dir_entry *__f2fs_find_entry(struct inode *dir,
struct f2fs_dir_entry *de = NULL;
unsigned int max_depth;
unsigned int level;
+ bool use_hash = true;
*res_page = NULL;
+start_find_entry:
if (f2fs_has_inline_dentry(dir)) {
- de = f2fs_find_in_inline_dir(dir, fname, res_page);
+ de = f2fs_find_in_inline_dir(dir, fname, res_page, use_hash);
goto out;
}
@@ -343,11 +354,18 @@ struct f2fs_dir_entry *__f2fs_find_entry(struct inode *dir,
}
for (level = 0; level < max_depth; level++) {
- de = find_in_level(dir, level, fname, res_page);
+ de = find_in_level(dir, level, fname, res_page, use_hash);
if (de || IS_ERR(*res_page))
break;
}
+
out:
+#if IS_ENABLED(CONFIG_UNICODE)
+ if (IS_CASEFOLDED(dir) && !de && use_hash) {
+ use_hash = false;
+ goto start_find_entry;
+ }
+#endif
/* This is to increase the speed of f2fs_create */
if (!de)
F2FS_I(dir)->task = current;
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index f523dd302bf6..1afebb9c4061 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -3588,7 +3588,8 @@ int f2fs_prepare_lookup(struct inode *dir, struct dentry *dentry,
struct f2fs_filename *fname);
void f2fs_free_filename(struct f2fs_filename *fname);
struct f2fs_dir_entry *f2fs_find_target_dentry(const struct f2fs_dentry_ptr *d,
- const struct f2fs_filename *fname, int *max_slots);
+ const struct f2fs_filename *fname, int *max_slots,
+ bool use_hash);
int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d,
unsigned int start_pos, struct fscrypt_str *fstr);
void f2fs_do_make_empty_dir(struct inode *inode, struct inode *parent,
@@ -4224,7 +4225,8 @@ int f2fs_write_inline_data(struct inode *inode, struct folio *folio);
int f2fs_recover_inline_data(struct inode *inode, struct page *npage);
struct f2fs_dir_entry *f2fs_find_in_inline_dir(struct inode *dir,
const struct f2fs_filename *fname,
- struct page **res_page);
+ struct page **res_page,
+ bool use_hash);
int f2fs_make_empty_inline_dir(struct inode *inode, struct inode *parent,
struct page *ipage);
int f2fs_add_inline_entry(struct inode *dir, const struct f2fs_filename *fname,
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
index cbd2a0d34804..3e3c35d4c98b 100644
--- a/fs/f2fs/inline.c
+++ b/fs/f2fs/inline.c
@@ -352,7 +352,8 @@ int f2fs_recover_inline_data(struct inode *inode, struct page *npage)
struct f2fs_dir_entry *f2fs_find_in_inline_dir(struct inode *dir,
const struct f2fs_filename *fname,
- struct page **res_page)
+ struct page **res_page,
+ bool use_hash)
{
struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
struct f2fs_dir_entry *de;
@@ -369,7 +370,7 @@ struct f2fs_dir_entry *f2fs_find_in_inline_dir(struct inode *dir,
inline_dentry = inline_data_addr(dir, ipage);
make_dentry_ptr_inline(dir, &d, inline_dentry);
- de = f2fs_find_target_dentry(&d, fname, NULL);
+ de = f2fs_find_target_dentry(&d, fname, NULL, use_hash);
unlock_page(ipage);
if (IS_ERR(de)) {
*res_page = ERR_CAST(de);
--
2.47.1.613.gc27f4b7a9f-goog
_______________________________________________
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
WARNING: multiple messages have this Message-ID (diff)
From: Daniel Lee <chullee@google.com>
To: Jaegeuk Kim <jaegeuk@kernel.org>, Chao Yu <chao@kernel.org>
Cc: linux-f2fs-devel@lists.sourceforge.net,
linux-kernel@vger.kernel.org, Daniel Lee <chullee@google.com>
Subject: [PATCH v2] f2fs: Introduce linear search for dentries
Date: Fri, 20 Dec 2024 09:21:36 -0800 [thread overview]
Message-ID: <20241220172136.1028811-1-chullee@google.com> (raw)
This patch addresses an issue where some files in case-insensitive
directories become inaccessible due to changes in how the kernel function,
utf8_casefold(), generates case-folded strings from the commit 5c26d2f1d3f5
("unicode: Don't special case ignorable code points").
F2FS uses these case-folded names to calculate hash values for locating
dentries and stores them on disk. Since utf8_casefold() can produce
different output across kernel versions, stored hash values and newly
calculated hash values may differ. This results in affected files no
longer being found via the hash-based lookup.
To resolve this, the patch introduces a linear search fallback.
If the initial hash-based search fails, F2FS will sequentially scan the
directory entries.
Fixes: 5c26d2f1d3f5 ("unicode: Don't special case ignorable code points")
Link: https://bugzilla.kernel.org/show_bug.cgi?id=219586
Signed-off-by: Daniel Lee <chullee@google.com>
---
v2:
- Only update chash if use_hash is true
fs/f2fs/dir.c | 40 +++++++++++++++++++++++++++++-----------
fs/f2fs/f2fs.h | 6 ++++--
fs/f2fs/inline.c | 5 +++--
3 files changed, 36 insertions(+), 15 deletions(-)
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index 47a5c806cf16..3e8e5ddf9dbc 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -175,7 +175,8 @@ static unsigned long dir_block_index(unsigned int level,
static struct f2fs_dir_entry *find_in_block(struct inode *dir,
struct page *dentry_page,
const struct f2fs_filename *fname,
- int *max_slots)
+ int *max_slots,
+ bool use_hash)
{
struct f2fs_dentry_block *dentry_blk;
struct f2fs_dentry_ptr d;
@@ -183,7 +184,7 @@ static struct f2fs_dir_entry *find_in_block(struct inode *dir,
dentry_blk = (struct f2fs_dentry_block *)page_address(dentry_page);
make_dentry_ptr_block(dir, &d, dentry_blk);
- return f2fs_find_target_dentry(&d, fname, max_slots);
+ return f2fs_find_target_dentry(&d, fname, max_slots, use_hash);
}
static inline int f2fs_match_name(const struct inode *dir,
@@ -208,7 +209,8 @@ static inline int f2fs_match_name(const struct inode *dir,
}
struct f2fs_dir_entry *f2fs_find_target_dentry(const struct f2fs_dentry_ptr *d,
- const struct f2fs_filename *fname, int *max_slots)
+ const struct f2fs_filename *fname, int *max_slots,
+ bool use_hash)
{
struct f2fs_dir_entry *de;
unsigned long bit_pos = 0;
@@ -231,7 +233,7 @@ struct f2fs_dir_entry *f2fs_find_target_dentry(const struct f2fs_dentry_ptr *d,
continue;
}
- if (de->hash_code == fname->hash) {
+ if (!use_hash || de->hash_code == fname->hash) {
res = f2fs_match_name(d->inode, fname,
d->filename[bit_pos],
le16_to_cpu(de->name_len));
@@ -258,11 +260,12 @@ struct f2fs_dir_entry *f2fs_find_target_dentry(const struct f2fs_dentry_ptr *d,
static struct f2fs_dir_entry *find_in_level(struct inode *dir,
unsigned int level,
const struct f2fs_filename *fname,
- struct page **res_page)
+ struct page **res_page,
+ bool use_hash)
{
int s = GET_DENTRY_SLOTS(fname->disk_name.len);
unsigned int nbucket, nblock;
- unsigned int bidx, end_block;
+ unsigned int bidx, end_block, bucket_no;
struct page *dentry_page;
struct f2fs_dir_entry *de = NULL;
pgoff_t next_pgofs;
@@ -272,8 +275,11 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
nbucket = dir_buckets(level, F2FS_I(dir)->i_dir_level);
nblock = bucket_blocks(level);
+ bucket_no = use_hash ? le32_to_cpu(fname->hash) % nbucket : 0;
+
+start_find_bucket:
bidx = dir_block_index(level, F2FS_I(dir)->i_dir_level,
- le32_to_cpu(fname->hash) % nbucket);
+ bucket_no);
end_block = bidx + nblock;
while (bidx < end_block) {
@@ -290,7 +296,7 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
}
}
- de = find_in_block(dir, dentry_page, fname, &max_slots);
+ de = find_in_block(dir, dentry_page, fname, &max_slots, use_hash);
if (IS_ERR(de)) {
*res_page = ERR_CAST(de);
de = NULL;
@@ -307,7 +313,10 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
bidx++;
}
- if (!de && room && F2FS_I(dir)->chash != fname->hash) {
+ if (!use_hash && !de && ++bucket_no < nbucket)
+ goto start_find_bucket;
+
+ if (use_hash && !de && room && F2FS_I(dir)->chash != fname->hash) {
F2FS_I(dir)->chash = fname->hash;
F2FS_I(dir)->clevel = level;
}
@@ -323,11 +332,13 @@ struct f2fs_dir_entry *__f2fs_find_entry(struct inode *dir,
struct f2fs_dir_entry *de = NULL;
unsigned int max_depth;
unsigned int level;
+ bool use_hash = true;
*res_page = NULL;
+start_find_entry:
if (f2fs_has_inline_dentry(dir)) {
- de = f2fs_find_in_inline_dir(dir, fname, res_page);
+ de = f2fs_find_in_inline_dir(dir, fname, res_page, use_hash);
goto out;
}
@@ -343,11 +354,18 @@ struct f2fs_dir_entry *__f2fs_find_entry(struct inode *dir,
}
for (level = 0; level < max_depth; level++) {
- de = find_in_level(dir, level, fname, res_page);
+ de = find_in_level(dir, level, fname, res_page, use_hash);
if (de || IS_ERR(*res_page))
break;
}
+
out:
+#if IS_ENABLED(CONFIG_UNICODE)
+ if (IS_CASEFOLDED(dir) && !de && use_hash) {
+ use_hash = false;
+ goto start_find_entry;
+ }
+#endif
/* This is to increase the speed of f2fs_create */
if (!de)
F2FS_I(dir)->task = current;
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index f523dd302bf6..1afebb9c4061 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -3588,7 +3588,8 @@ int f2fs_prepare_lookup(struct inode *dir, struct dentry *dentry,
struct f2fs_filename *fname);
void f2fs_free_filename(struct f2fs_filename *fname);
struct f2fs_dir_entry *f2fs_find_target_dentry(const struct f2fs_dentry_ptr *d,
- const struct f2fs_filename *fname, int *max_slots);
+ const struct f2fs_filename *fname, int *max_slots,
+ bool use_hash);
int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d,
unsigned int start_pos, struct fscrypt_str *fstr);
void f2fs_do_make_empty_dir(struct inode *inode, struct inode *parent,
@@ -4224,7 +4225,8 @@ int f2fs_write_inline_data(struct inode *inode, struct folio *folio);
int f2fs_recover_inline_data(struct inode *inode, struct page *npage);
struct f2fs_dir_entry *f2fs_find_in_inline_dir(struct inode *dir,
const struct f2fs_filename *fname,
- struct page **res_page);
+ struct page **res_page,
+ bool use_hash);
int f2fs_make_empty_inline_dir(struct inode *inode, struct inode *parent,
struct page *ipage);
int f2fs_add_inline_entry(struct inode *dir, const struct f2fs_filename *fname,
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
index cbd2a0d34804..3e3c35d4c98b 100644
--- a/fs/f2fs/inline.c
+++ b/fs/f2fs/inline.c
@@ -352,7 +352,8 @@ int f2fs_recover_inline_data(struct inode *inode, struct page *npage)
struct f2fs_dir_entry *f2fs_find_in_inline_dir(struct inode *dir,
const struct f2fs_filename *fname,
- struct page **res_page)
+ struct page **res_page,
+ bool use_hash)
{
struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
struct f2fs_dir_entry *de;
@@ -369,7 +370,7 @@ struct f2fs_dir_entry *f2fs_find_in_inline_dir(struct inode *dir,
inline_dentry = inline_data_addr(dir, ipage);
make_dentry_ptr_inline(dir, &d, inline_dentry);
- de = f2fs_find_target_dentry(&d, fname, NULL);
+ de = f2fs_find_target_dentry(&d, fname, NULL, use_hash);
unlock_page(ipage);
if (IS_ERR(de)) {
*res_page = ERR_CAST(de);
--
2.47.1.613.gc27f4b7a9f-goog
next reply other threads:[~2024-12-20 17:44 UTC|newest]
Thread overview: 12+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-12-20 17:21 Daniel Lee via Linux-f2fs-devel [this message]
2024-12-20 17:21 ` [PATCH v2] f2fs: Introduce linear search for dentries Daniel Lee
2024-12-20 21:00 ` [f2fs-dev] " Jaegeuk Kim via Linux-f2fs-devel
2024-12-20 21:00 ` Jaegeuk Kim
2024-12-23 8:26 ` [f2fs-dev] " Christoph Hellwig
2024-12-23 8:26 ` Christoph Hellwig
2024-12-23 16:57 ` [f2fs-dev] " Jaegeuk Kim via Linux-f2fs-devel
2024-12-23 16:57 ` Jaegeuk Kim
2025-01-03 9:24 ` [f2fs-dev] " Christoph Hellwig
2025-01-03 9:24 ` Christoph Hellwig
2025-01-06 21:18 ` [f2fs-dev] " Jaegeuk Kim via Linux-f2fs-devel
2025-01-06 21:18 ` Jaegeuk Kim
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20241220172136.1028811-1-chullee@google.com \
--to=linux-f2fs-devel@lists.sourceforge.net \
--cc=chao@kernel.org \
--cc=chullee@google.com \
--cc=jaegeuk@kernel.org \
--cc=linux-kernel@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.