Re: [PATCH] f2fs: Introduce linear search for dentries

public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed

From: Chao Yu <chao@kernel.org>
To: Daniel Lee <chullee@google.com>
Cc: Chao Yu <chao@kernel.org>, Jaegeuk Kim <jaegeuk@kernel.org>,
	linux-f2fs-devel@lists.sourceforge.net,
	linux-kernel@vger.kernel.org
Subject: Re: [PATCH] f2fs: Introduce linear search for dentries
Date: Fri, 20 Dec 2024 21:25:22 +0800	[thread overview]
Message-ID: <9799a430-c1d9-4f04-9a8c-ad88fade8351@kernel.org> (raw)
In-Reply-To: <CALBjLoBxtMQjKQXdzyt8x75--mT9nJZnoEpDArL56fg8JRPdTg@mail.gmail.com>

On 2024/12/20 12:59, Daniel Lee wrote:
> On Thu, Dec 19, 2024 at 5:29 AM Chao Yu <chao@kernel.org> wrote:
>>
>> Hi Daniel,
>>
>> On 2024/12/17 15:55, Daniel Lee wrote:
>>> This patch addresses an issue where some files in case-insensitive
>>> directories become inaccessible due to changes in how the kernel function,
>>> utf8_casefold(), generates case-folded strings from the commit 5c26d2f1d3f5
>>> ("unicode: Don't special case ignorable code points").
>>>
>>> F2FS uses these case-folded names to calculate hash values for locating
>>> dentries and stores them on disk. Since utf8_casefold() can produce
>>> different output across kernel versions, stored hash values and newly
>>> calculated hash values may differ. This results in affected files no
>>> longer being found via the hash-based lookup.
>>>
>>> To resolve this, the patch introduces a linear search fallback.
>>> If the initial hash-based search fails, F2FS will sequentially scan the
>>> directory entries.
>>>
>>
>> Need a fixes line?
> 
> Thanks. I will add the commit 5c26d2f1d3f5 to the Fixes:
> 
>>
>>> Link: https://bugzilla.kernel.org/show_bug.cgi?id=219586
>>> Signed-off-by: Daniel Lee <chullee@google.com>
>>> ---
>>>    fs/f2fs/dir.c    | 38 ++++++++++++++++++++++++++++----------
>>>    fs/f2fs/f2fs.h   |  6 ++++--
>>>    fs/f2fs/inline.c |  5 +++--
>>>    3 files changed, 35 insertions(+), 14 deletions(-)
>>>
>>> diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
>>> index 47a5c806cf16..a85d19b4e178 100644
>>> --- a/fs/f2fs/dir.c
>>> +++ b/fs/f2fs/dir.c
>>> @@ -175,7 +175,8 @@ static unsigned long dir_block_index(unsigned int level,
>>>    static struct f2fs_dir_entry *find_in_block(struct inode *dir,
>>>                                struct page *dentry_page,
>>>                                const struct f2fs_filename *fname,
>>> -                             int *max_slots)
>>> +                             int *max_slots,
>>> +                             bool use_hash)
>>>    {
>>>        struct f2fs_dentry_block *dentry_blk;
>>>        struct f2fs_dentry_ptr d;
>>> @@ -183,7 +184,7 @@ static struct f2fs_dir_entry *find_in_block(struct inode *dir,
>>>        dentry_blk = (struct f2fs_dentry_block *)page_address(dentry_page);
>>>
>>>        make_dentry_ptr_block(dir, &d, dentry_blk);
>>> -     return f2fs_find_target_dentry(&d, fname, max_slots);
>>> +     return f2fs_find_target_dentry(&d, fname, max_slots, use_hash);
>>>    }
>>>
>>>    static inline int f2fs_match_name(const struct inode *dir,
>>> @@ -208,7 +209,8 @@ static inline int f2fs_match_name(const struct inode *dir,
>>>    }
>>>
>>>    struct f2fs_dir_entry *f2fs_find_target_dentry(const struct f2fs_dentry_ptr *d,
>>> -                     const struct f2fs_filename *fname, int *max_slots)
>>> +                     const struct f2fs_filename *fname, int *max_slots,
>>> +                     bool use_hash)
>>>    {
>>>        struct f2fs_dir_entry *de;
>>>        unsigned long bit_pos = 0;
>>> @@ -231,7 +233,7 @@ struct f2fs_dir_entry *f2fs_find_target_dentry(const struct f2fs_dentry_ptr *d,
>>>                        continue;
>>>                }
>>>
>>> -             if (de->hash_code == fname->hash) {
>>> +             if (!use_hash || de->hash_code == fname->hash) {
>>>                        res = f2fs_match_name(d->inode, fname,
>>>                                              d->filename[bit_pos],
>>>                                              le16_to_cpu(de->name_len));
>>> @@ -258,11 +260,12 @@ struct f2fs_dir_entry *f2fs_find_target_dentry(const struct f2fs_dentry_ptr *d,
>>>    static struct f2fs_dir_entry *find_in_level(struct inode *dir,
>>>                                        unsigned int level,
>>>                                        const struct f2fs_filename *fname,
>>> -                                     struct page **res_page)
>>> +                                     struct page **res_page,
>>> +                                     bool use_hash)
>>>    {
>>>        int s = GET_DENTRY_SLOTS(fname->disk_name.len);
>>>        unsigned int nbucket, nblock;
>>> -     unsigned int bidx, end_block;
>>> +     unsigned int bidx, end_block, bucket_no;
>>>        struct page *dentry_page;
>>>        struct f2fs_dir_entry *de = NULL;
>>>        pgoff_t next_pgofs;
>>> @@ -272,8 +275,11 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
>>>        nbucket = dir_buckets(level, F2FS_I(dir)->i_dir_level);
>>>        nblock = bucket_blocks(level);
>>>
>>> +     bucket_no = use_hash ? le32_to_cpu(fname->hash) % nbucket : 0;
>>> +
>>> +start_find_bucket:
>>>        bidx = dir_block_index(level, F2FS_I(dir)->i_dir_level,
>>> -                            le32_to_cpu(fname->hash) % nbucket);
>>> +                            bucket_no);
>>>        end_block = bidx + nblock;
>>>
>>>        while (bidx < end_block) {
>>> @@ -290,7 +296,7 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
>>>                        }
>>>                }
>>>
>>> -             de = find_in_block(dir, dentry_page, fname, &max_slots);
>>> +             de = find_in_block(dir, dentry_page, fname, &max_slots, use_hash);
>>>                if (IS_ERR(de)) {
>>>                        *res_page = ERR_CAST(de);
>>>                        de = NULL;
>>> @@ -307,6 +313,9 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
>>>                bidx++;
>>>        }
>>>
>>> +     if (!use_hash && !de && ++bucket_no < nbucket)
>>> +             goto start_find_bucket;
>>> +
>>>        if (!de && room && F2FS_I(dir)->chash != fname->hash) {
>>
>> Do we need to avoid accessing or assigning hash if use_hash is false?
>>
> 
> Is it still worth keeping the hash for the creation if both hash-based
> and linear searches failed?

It needs to be kept for hash-based searching failure? since it was added to
enhance performance of file creation:
- open(..., O_CREAT)
  - f2fs_lookup
  : didn't find target dirent, then, record last chash & clevel
  - f2fs_create
  : create dirent in clevel bucket once chash matches

Thanks,

> 
>> Thanks,
>>
>>>                F2FS_I(dir)->chash = fname->hash;
>>>                F2FS_I(dir)->clevel = level;
>>> @@ -323,11 +332,13 @@ struct f2fs_dir_entry *__f2fs_find_entry(struct inode *dir,
>>>        struct f2fs_dir_entry *de = NULL;
>>>        unsigned int max_depth;
>>>        unsigned int level;
>>> +     bool use_hash = true;
>>>
>>>        *res_page = NULL;
>>>
>>> +start_find_entry:
>>>        if (f2fs_has_inline_dentry(dir)) {
>>> -             de = f2fs_find_in_inline_dir(dir, fname, res_page);
>>> +             de = f2fs_find_in_inline_dir(dir, fname, res_page, use_hash);
>>>                goto out;
>>>        }
>>>
>>> @@ -343,11 +354,18 @@ struct f2fs_dir_entry *__f2fs_find_entry(struct inode *dir,
>>>        }
>>>
>>>        for (level = 0; level < max_depth; level++) {
>>> -             de = find_in_level(dir, level, fname, res_page);
>>> +             de = find_in_level(dir, level, fname, res_page, use_hash);
>>>                if (de || IS_ERR(*res_page))
>>>                        break;
>>>        }
>>> +
>>>    out:
>>> +#if IS_ENABLED(CONFIG_UNICODE)
>>> +     if (IS_CASEFOLDED(dir) && !de && use_hash) {
>>> +             use_hash = false;
>>> +             goto start_find_entry;
>>> +     }
>>> +#endif
>>>        /* This is to increase the speed of f2fs_create */
>>>        if (!de)
>>>                F2FS_I(dir)->task = current;
>>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
>>> index f523dd302bf6..1afebb9c4061 100644
>>> --- a/fs/f2fs/f2fs.h
>>> +++ b/fs/f2fs/f2fs.h
>>> @@ -3588,7 +3588,8 @@ int f2fs_prepare_lookup(struct inode *dir, struct dentry *dentry,
>>>                        struct f2fs_filename *fname);
>>>    void f2fs_free_filename(struct f2fs_filename *fname);
>>>    struct f2fs_dir_entry *f2fs_find_target_dentry(const struct f2fs_dentry_ptr *d,
>>> -                     const struct f2fs_filename *fname, int *max_slots);
>>> +                     const struct f2fs_filename *fname, int *max_slots,
>>> +                     bool use_hash);
>>>    int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d,
>>>                        unsigned int start_pos, struct fscrypt_str *fstr);
>>>    void f2fs_do_make_empty_dir(struct inode *inode, struct inode *parent,
>>> @@ -4224,7 +4225,8 @@ int f2fs_write_inline_data(struct inode *inode, struct folio *folio);
>>>    int f2fs_recover_inline_data(struct inode *inode, struct page *npage);
>>>    struct f2fs_dir_entry *f2fs_find_in_inline_dir(struct inode *dir,
>>>                                        const struct f2fs_filename *fname,
>>> -                                     struct page **res_page);
>>> +                                     struct page **res_page,
>>> +                                     bool use_hash);
>>>    int f2fs_make_empty_inline_dir(struct inode *inode, struct inode *parent,
>>>                        struct page *ipage);
>>>    int f2fs_add_inline_entry(struct inode *dir, const struct f2fs_filename *fname,
>>> diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
>>> index cbd2a0d34804..3e3c35d4c98b 100644
>>> --- a/fs/f2fs/inline.c
>>> +++ b/fs/f2fs/inline.c
>>> @@ -352,7 +352,8 @@ int f2fs_recover_inline_data(struct inode *inode, struct page *npage)
>>>
>>>    struct f2fs_dir_entry *f2fs_find_in_inline_dir(struct inode *dir,
>>>                                        const struct f2fs_filename *fname,
>>> -                                     struct page **res_page)
>>> +                                     struct page **res_page,
>>> +                                     bool use_hash)
>>>    {
>>>        struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
>>>        struct f2fs_dir_entry *de;
>>> @@ -369,7 +370,7 @@ struct f2fs_dir_entry *f2fs_find_in_inline_dir(struct inode *dir,
>>>        inline_dentry = inline_data_addr(dir, ipage);
>>>
>>>        make_dentry_ptr_inline(dir, &d, inline_dentry);
>>> -     de = f2fs_find_target_dentry(&d, fname, NULL);
>>> +     de = f2fs_find_target_dentry(&d, fname, NULL, use_hash);
>>>        unlock_page(ipage);
>>>        if (IS_ERR(de)) {
>>>                *res_page = ERR_CAST(de);
>>

next prev parent reply	other threads:[~2024-12-20 13:25 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-12-17  7:55 [PATCH] f2fs: Introduce linear search for dentries Daniel Lee
2024-12-19 13:29 ` Chao Yu
2024-12-20  4:59   ` Daniel Lee
2024-12-20 13:25     ` Chao Yu [this message]
2024-12-20 17:04       ` Daniel Lee
  -- strict thread matches above, loose matches on Subject: below --
2024-12-17  7:47 Daniel Lee

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=9799a430-c1d9-4f04-9a8c-ad88fade8351@kernel.org \
    --to=chao@kernel.org \
    --cc=chullee@google.com \
    --cc=jaegeuk@kernel.org \
    --cc=linux-f2fs-devel@lists.sourceforge.net \
    --cc=linux-kernel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox