linux-ext4.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] f2fs: improve the performance of f2fs_lookup
@ 2025-07-03  8:21 Yuwen Chen
  2025-07-03  8:56 ` Christoph Hellwig
                   ` (3 more replies)
  0 siblings, 4 replies; 12+ messages in thread
From: Yuwen Chen @ 2025-07-03  8:21 UTC (permalink / raw)
  To: tytso, adilger.kernel, jaegeuk, chao, viro, brauner
  Cc: linux-ext4, linux-kernel, linux-f2fs-devel, linux-fsdevel,
	Yuwen Chen

On the Android system, the file creation operation will call
the f2fs_lookup function. When there are too many files in a
directory, the generic_ci_match operation will be called
repeatedly in large quantities. In extreme cases, the file
creation speed will drop to three times per second.

Signed-off-by: Yuwen Chen <ywen.chen@foxmail.com>
---
 fs/ext4/namei.c    |  2 +-
 fs/f2fs/dir.c      | 24 +++++++++++++++++-------
 fs/f2fs/f2fs.h     |  3 ++-
 fs/f2fs/inline.c   |  3 ++-
 fs/libfs.c         | 32 +++++++++++++++++++++++++++++---
 include/linux/fs.h |  8 +++++++-
 6 files changed, 58 insertions(+), 14 deletions(-)

diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index a178ac2294895..f235693bd71aa 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1443,7 +1443,7 @@ static bool ext4_match(struct inode *parent,
 
 		return generic_ci_match(parent, fname->usr_fname,
 					&fname->cf_name, de->name,
-					de->name_len) > 0;
+					de->name_len, NULL) > 0;
 	}
 #endif
 
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index c36b3b22bfffd..ee0cbeb80debd 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -176,6 +176,7 @@ static struct f2fs_dir_entry *find_in_block(struct inode *dir,
 				struct folio *dentry_folio,
 				const struct f2fs_filename *fname,
 				int *max_slots,
+				struct decrypted_name_prealloc *prealloc,
 				bool use_hash)
 {
 	struct f2fs_dentry_block *dentry_blk;
@@ -184,12 +185,13 @@ static struct f2fs_dir_entry *find_in_block(struct inode *dir,
 	dentry_blk = folio_address(dentry_folio);
 
 	make_dentry_ptr_block(dir, &d, dentry_blk);
-	return f2fs_find_target_dentry(&d, fname, max_slots, use_hash);
+	return f2fs_find_target_dentry(&d, fname, max_slots, prealloc, use_hash);
 }
 
 static inline int f2fs_match_name(const struct inode *dir,
 				   const struct f2fs_filename *fname,
-				   const u8 *de_name, u32 de_name_len)
+				   const u8 *de_name, u32 de_name_len,
+				   struct decrypted_name_prealloc *prealloc)
 {
 	struct fscrypt_name f;
 
@@ -197,7 +199,7 @@ static inline int f2fs_match_name(const struct inode *dir,
 	if (fname->cf_name.name)
 		return generic_ci_match(dir, fname->usr_fname,
 					&fname->cf_name,
-					de_name, de_name_len);
+					de_name, de_name_len, prealloc);
 
 #endif
 	f.usr_fname = fname->usr_fname;
@@ -210,6 +212,7 @@ static inline int f2fs_match_name(const struct inode *dir,
 
 struct f2fs_dir_entry *f2fs_find_target_dentry(const struct f2fs_dentry_ptr *d,
 			const struct f2fs_filename *fname, int *max_slots,
+			struct decrypted_name_prealloc *prealloc,
 			bool use_hash)
 {
 	struct f2fs_dir_entry *de;
@@ -236,7 +239,8 @@ struct f2fs_dir_entry *f2fs_find_target_dentry(const struct f2fs_dentry_ptr *d,
 		if (!use_hash || de->hash_code == fname->hash) {
 			res = f2fs_match_name(d->inode, fname,
 					      d->filename[bit_pos],
-					      le16_to_cpu(de->name_len));
+					      le16_to_cpu(de->name_len),
+					      prealloc);
 			if (res < 0)
 				return ERR_PTR(res);
 			if (res)
@@ -261,6 +265,7 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
 					unsigned int level,
 					const struct f2fs_filename *fname,
 					struct folio **res_folio,
+					struct decrypted_name_prealloc *prealloc,
 					bool use_hash)
 {
 	int s = GET_DENTRY_SLOTS(fname->disk_name.len);
@@ -296,7 +301,8 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
 			}
 		}
 
-		de = find_in_block(dir, dentry_folio, fname, &max_slots, use_hash);
+		de = find_in_block(dir, dentry_folio, fname, &max_slots, prealloc,
+				   use_hash);
 		if (IS_ERR(de)) {
 			*res_folio = ERR_CAST(de);
 			de = NULL;
@@ -336,6 +342,7 @@ struct f2fs_dir_entry *__f2fs_find_entry(struct inode *dir,
 	unsigned int max_depth;
 	unsigned int level;
 	bool use_hash = true;
+	struct decrypted_name_prealloc prealloc = {0};
 
 	*res_folio = NULL;
 
@@ -343,7 +350,8 @@ struct f2fs_dir_entry *__f2fs_find_entry(struct inode *dir,
 start_find_entry:
 #endif
 	if (f2fs_has_inline_dentry(dir)) {
-		de = f2fs_find_in_inline_dir(dir, fname, res_folio, use_hash);
+		de = f2fs_find_in_inline_dir(dir, fname, res_folio, &prealloc,
+					     use_hash);
 		goto out;
 	}
 
@@ -359,7 +367,8 @@ struct f2fs_dir_entry *__f2fs_find_entry(struct inode *dir,
 	}
 
 	for (level = 0; level < max_depth; level++) {
-		de = find_in_level(dir, level, fname, res_folio, use_hash);
+		de = find_in_level(dir, level, fname, res_folio, &prealloc,
+				   use_hash);
 		if (de || IS_ERR(*res_folio))
 			break;
 	}
@@ -372,6 +381,7 @@ struct f2fs_dir_entry *__f2fs_find_entry(struct inode *dir,
 		goto start_find_entry;
 	}
 #endif
+	kfree(prealloc.name);
 	/* This is to increase the speed of f2fs_create */
 	if (!de)
 		F2FS_I(dir)->task = current;
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 9333a22b9a01e..dfbd2215310fb 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -3673,6 +3673,7 @@ int f2fs_prepare_lookup(struct inode *dir, struct dentry *dentry,
 void f2fs_free_filename(struct f2fs_filename *fname);
 struct f2fs_dir_entry *f2fs_find_target_dentry(const struct f2fs_dentry_ptr *d,
 			const struct f2fs_filename *fname, int *max_slots,
+			struct decrypted_name_prealloc *prealloc,
 			bool use_hash);
 int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d,
 			unsigned int start_pos, struct fscrypt_str *fstr);
@@ -4316,7 +4317,7 @@ int f2fs_write_inline_data(struct inode *inode, struct folio *folio);
 int f2fs_recover_inline_data(struct inode *inode, struct folio *nfolio);
 struct f2fs_dir_entry *f2fs_find_in_inline_dir(struct inode *dir,
 		const struct f2fs_filename *fname, struct folio **res_folio,
-		bool use_hash);
+		struct decrypted_name_prealloc *prealloc, bool use_hash);
 int f2fs_make_empty_inline_dir(struct inode *inode, struct inode *parent,
 			struct folio *ifolio);
 int f2fs_add_inline_entry(struct inode *dir, const struct f2fs_filename *fname,
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
index 901c630685ced..d02ff6c26d70a 100644
--- a/fs/f2fs/inline.c
+++ b/fs/f2fs/inline.c
@@ -353,6 +353,7 @@ int f2fs_recover_inline_data(struct inode *inode, struct folio *nfolio)
 struct f2fs_dir_entry *f2fs_find_in_inline_dir(struct inode *dir,
 					const struct f2fs_filename *fname,
 					struct folio **res_folio,
+					struct decrypted_name_prealloc *prealloc,
 					bool use_hash)
 {
 	struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
@@ -370,7 +371,7 @@ struct f2fs_dir_entry *f2fs_find_in_inline_dir(struct inode *dir,
 	inline_dentry = inline_data_addr(dir, ifolio);
 
 	make_dentry_ptr_inline(dir, &d, inline_dentry);
-	de = f2fs_find_target_dentry(&d, fname, NULL, use_hash);
+	de = f2fs_find_target_dentry(&d, fname, NULL, prealloc, use_hash);
 	folio_unlock(ifolio);
 	if (IS_ERR(de)) {
 		*res_folio = ERR_CAST(de);
diff --git a/fs/libfs.c b/fs/libfs.c
index 9ea0ecc325a81..cab3d86483835 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -1863,6 +1863,26 @@ static const struct dentry_operations generic_ci_dentry_ops = {
 #endif
 };
 
+#define DECRYPTED_NAME_PREALLOC_MIN_LEN 64
+static inline char *decrypted_name_prealloc_resize(
+		struct decrypted_name_prealloc *prealloc,
+		size_t wantlen)
+{
+	char *retbuf = NULL;
+
+	if (prealloc->name && wantlen >= prealloc->namelen)
+		return prealloc->name;
+
+	retbuf = kmalloc(wantlen + DECRYPTED_NAME_PREALLOC_MIN_LEN, GFP_KERNEL);
+	if (!retbuf)
+		return NULL;
+
+	kfree(prealloc->name);
+	prealloc->name = retbuf;
+	prealloc->namelen = wantlen + DECRYPTED_NAME_PREALLOC_MIN_LEN;
+	return retbuf;
+}
+
 /**
  * generic_ci_match() - Match a name (case-insensitively) with a dirent.
  * This is a filesystem helper for comparison with directory entries.
@@ -1884,7 +1904,8 @@ static const struct dentry_operations generic_ci_dentry_ops = {
 int generic_ci_match(const struct inode *parent,
 		     const struct qstr *name,
 		     const struct qstr *folded_name,
-		     const u8 *de_name, u32 de_name_len)
+		     const u8 *de_name, u32 de_name_len,
+		     struct decrypted_name_prealloc *prealloc)
 {
 	const struct super_block *sb = parent->i_sb;
 	const struct unicode_map *um = sb->s_encoding;
@@ -1899,7 +1920,11 @@ int generic_ci_match(const struct inode *parent,
 		if (WARN_ON_ONCE(!fscrypt_has_encryption_key(parent)))
 			return -EINVAL;
 
-		decrypted_name.name = kmalloc(de_name_len, GFP_KERNEL);
+		if (!prealloc)
+			decrypted_name.name = kmalloc(de_name_len, GFP_KERNEL);
+		else
+			decrypted_name.name = decrypted_name_prealloc_resize(
+					prealloc, de_name_len);
 		if (!decrypted_name.name)
 			return -ENOMEM;
 		res = fscrypt_fname_disk_to_usr(parent, 0, 0, &encrypted_name,
@@ -1928,7 +1953,8 @@ int generic_ci_match(const struct inode *parent,
 		res = utf8_strncasecmp(um, name, &dirent);
 
 out:
-	kfree(decrypted_name.name);
+	if (!prealloc)
+		kfree(decrypted_name.name);
 	if (res < 0 && sb_has_strict_encoding(sb)) {
 		pr_err_ratelimited("Directory contains filename that is invalid UTF-8");
 		return 0;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 4ec77da65f144..65307c8c11485 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -3651,10 +3651,16 @@ extern int generic_file_fsync(struct file *, loff_t, loff_t, int);
 extern int generic_check_addressable(unsigned, u64);
 
 extern void generic_set_sb_d_ops(struct super_block *sb);
+
+struct decrypted_name_prealloc {
+	char *name;
+	size_t namelen;
+};
 extern int generic_ci_match(const struct inode *parent,
 			    const struct qstr *name,
 			    const struct qstr *folded_name,
-			    const u8 *de_name, u32 de_name_len);
+			    const u8 *de_name, u32 de_name_len,
+			    struct decrypted_name_prealloc *prealloc);
 
 #if IS_ENABLED(CONFIG_UNICODE)
 int generic_ci_d_hash(const struct dentry *dentry, struct qstr *str);
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH] f2fs: improve the performance of f2fs_lookup
  2025-07-03  8:21 [PATCH] f2fs: improve the performance of f2fs_lookup Yuwen Chen
@ 2025-07-03  8:56 ` Christoph Hellwig
  2025-07-04  2:43   ` [PATCH v3 1/2] libfs: reduce the number of memory allocations in generic_ci_match Yuwen Chen
  2025-07-04  2:44   ` [PATCH v3 2/2] f2fs: improve the performance of f2fs_lookup Yuwen Chen
  2025-07-03  9:54 ` [PATCH v2] " Yuwen Chen
                   ` (2 subsequent siblings)
  3 siblings, 2 replies; 12+ messages in thread
From: Christoph Hellwig @ 2025-07-03  8:56 UTC (permalink / raw)
  To: Yuwen Chen
  Cc: tytso, adilger.kernel, jaegeuk, chao, viro, brauner, linux-ext4,
	linux-kernel, linux-f2fs-devel, linux-fsdevel

On Thu, Jul 03, 2025 at 04:21:30PM +0800, Yuwen Chen wrote:
> On the Android system, the file creation operation will call
> the f2fs_lookup function. When there are too many files in a
> directory, the generic_ci_match operation will be called
> repeatedly in large quantities. In extreme cases, the file
> creation speed will drop to three times per second.

This files to explain what you are changing in detail, and why
(except for the very highlevel problem statement here).

> 
> Signed-off-by: Yuwen Chen <ywen.chen@foxmail.com>
> ---
>  fs/ext4/namei.c    |  2 +-
>  fs/f2fs/dir.c      | 24 +++++++++++++++++-------
>  fs/f2fs/f2fs.h     |  3 ++-
>  fs/f2fs/inline.c   |  3 ++-
>  fs/libfs.c         | 32 +++++++++++++++++++++++++++++---
>  include/linux/fs.h |  8 +++++++-

Also please split generic infrastructure changes from f2fs ones.


^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH v2] f2fs: improve the performance of f2fs_lookup
  2025-07-03  8:21 [PATCH] f2fs: improve the performance of f2fs_lookup Yuwen Chen
  2025-07-03  8:56 ` Christoph Hellwig
@ 2025-07-03  9:54 ` Yuwen Chen
  2025-07-08  6:41 ` [PATCH v4 1/2] libfs: reduce the number of memory allocations in generic_ci_match Yuwen Chen
  2025-07-09  2:42 ` [PATCH] f2fs: improve the performance of f2fs_lookup kernel test robot
  3 siblings, 0 replies; 12+ messages in thread
From: Yuwen Chen @ 2025-07-03  9:54 UTC (permalink / raw)
  To: ywen.chen
  Cc: adilger.kernel, brauner, chao, jaegeuk, linux-ext4,
	linux-f2fs-devel, linux-fsdevel, linux-kernel, tytso, viro

On the Android system, the file creation operation will call
the f2fs_lookup function. When there are too many files in a
directory, the generic_ci_match operation will be called
repeatedly in large quantities. In extreme cases, the file
creation speed will drop to three times per second.

Use the following program to conduct a file-creation test in
the private program directory(/data/media/0/Android/data/*)
of Android.

int main(int argc, char **argv)
{
    size_t fcnt = 0;
    char path[PATH_MAX];
    char buf[4096] = {0};
    int i, fd;

    if (argc < 2)
        return - EINVAL;

    fcnt = atoi(argv[1]);
    for (i = 0; i < fcnt; i++)
    {
        snprintf(path, sizeof(path), "./%d", i);
        fd = open(path, O_RDWR | O_CREAT, 0600);
        if (fd < 0)
            return - 1;
        write(fd, buf, sizeof(buf));
        close(fd);
    }
    return 0;
}

The test platform is Snapdragon 8s Gen4, with a kernel version
of v6.16 and a userdebug version.

Before this submission was merged, when creating 2000 files,
the performance test results are as follows:
$ time /data/file_creater 2000
0m14.83s real     0m00.00s user     0m14.30s system
0m15.61s real     0m00.00s user     0m15.04s system
0m14.72s real     0m00.01s user     0m14.18s system

After this submission was merged, the performance is as follows:
$ time /data/file_creater 2000
0m08.17s real     0m00.00s user     0m07.86s system
0m08.16s real     0m00.01s user     0m07.86s system
0m08.15s real     0m00.00s user     0m07.86s system

It was observed through perf that the generic_ci_match function
was called a large number of times, which led to most of the
time being spent on memory allocation and release. Due to a
flush_dcache operation in the implementation of cts_cbc_decrypt,
this memory cannot be allocated on the stack.

Signed-off-by: Yuwen Chen <ywen.chen@foxmail.com>
---
 fs/ext4/namei.c    |  2 +-
 fs/f2fs/dir.c      | 24 +++++++++++++++++-------
 fs/f2fs/f2fs.h     |  3 ++-
 fs/f2fs/inline.c   |  3 ++-
 fs/libfs.c         | 32 +++++++++++++++++++++++++++++---
 include/linux/fs.h |  8 +++++++-
 6 files changed, 58 insertions(+), 14 deletions(-)

diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index a178ac2294895..f235693bd71aa 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1443,7 +1443,7 @@ static bool ext4_match(struct inode *parent,
 
 		return generic_ci_match(parent, fname->usr_fname,
 					&fname->cf_name, de->name,
-					de->name_len) > 0;
+					de->name_len, NULL) > 0;
 	}
 #endif
 
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index c36b3b22bfffd..ee0cbeb80debd 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -176,6 +176,7 @@ static struct f2fs_dir_entry *find_in_block(struct inode *dir,
 				struct folio *dentry_folio,
 				const struct f2fs_filename *fname,
 				int *max_slots,
+				struct decrypted_name_prealloc *prealloc,
 				bool use_hash)
 {
 	struct f2fs_dentry_block *dentry_blk;
@@ -184,12 +185,13 @@ static struct f2fs_dir_entry *find_in_block(struct inode *dir,
 	dentry_blk = folio_address(dentry_folio);
 
 	make_dentry_ptr_block(dir, &d, dentry_blk);
-	return f2fs_find_target_dentry(&d, fname, max_slots, use_hash);
+	return f2fs_find_target_dentry(&d, fname, max_slots, prealloc, use_hash);
 }
 
 static inline int f2fs_match_name(const struct inode *dir,
 				   const struct f2fs_filename *fname,
-				   const u8 *de_name, u32 de_name_len)
+				   const u8 *de_name, u32 de_name_len,
+				   struct decrypted_name_prealloc *prealloc)
 {
 	struct fscrypt_name f;
 
@@ -197,7 +199,7 @@ static inline int f2fs_match_name(const struct inode *dir,
 	if (fname->cf_name.name)
 		return generic_ci_match(dir, fname->usr_fname,
 					&fname->cf_name,
-					de_name, de_name_len);
+					de_name, de_name_len, prealloc);
 
 #endif
 	f.usr_fname = fname->usr_fname;
@@ -210,6 +212,7 @@ static inline int f2fs_match_name(const struct inode *dir,
 
 struct f2fs_dir_entry *f2fs_find_target_dentry(const struct f2fs_dentry_ptr *d,
 			const struct f2fs_filename *fname, int *max_slots,
+			struct decrypted_name_prealloc *prealloc,
 			bool use_hash)
 {
 	struct f2fs_dir_entry *de;
@@ -236,7 +239,8 @@ struct f2fs_dir_entry *f2fs_find_target_dentry(const struct f2fs_dentry_ptr *d,
 		if (!use_hash || de->hash_code == fname->hash) {
 			res = f2fs_match_name(d->inode, fname,
 					      d->filename[bit_pos],
-					      le16_to_cpu(de->name_len));
+					      le16_to_cpu(de->name_len),
+					      prealloc);
 			if (res < 0)
 				return ERR_PTR(res);
 			if (res)
@@ -261,6 +265,7 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
 					unsigned int level,
 					const struct f2fs_filename *fname,
 					struct folio **res_folio,
+					struct decrypted_name_prealloc *prealloc,
 					bool use_hash)
 {
 	int s = GET_DENTRY_SLOTS(fname->disk_name.len);
@@ -296,7 +301,8 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
 			}
 		}
 
-		de = find_in_block(dir, dentry_folio, fname, &max_slots, use_hash);
+		de = find_in_block(dir, dentry_folio, fname, &max_slots, prealloc,
+				   use_hash);
 		if (IS_ERR(de)) {
 			*res_folio = ERR_CAST(de);
 			de = NULL;
@@ -336,6 +342,7 @@ struct f2fs_dir_entry *__f2fs_find_entry(struct inode *dir,
 	unsigned int max_depth;
 	unsigned int level;
 	bool use_hash = true;
+	struct decrypted_name_prealloc prealloc = {0};
 
 	*res_folio = NULL;
 
@@ -343,7 +350,8 @@ struct f2fs_dir_entry *__f2fs_find_entry(struct inode *dir,
 start_find_entry:
 #endif
 	if (f2fs_has_inline_dentry(dir)) {
-		de = f2fs_find_in_inline_dir(dir, fname, res_folio, use_hash);
+		de = f2fs_find_in_inline_dir(dir, fname, res_folio, &prealloc,
+					     use_hash);
 		goto out;
 	}
 
@@ -359,7 +367,8 @@ struct f2fs_dir_entry *__f2fs_find_entry(struct inode *dir,
 	}
 
 	for (level = 0; level < max_depth; level++) {
-		de = find_in_level(dir, level, fname, res_folio, use_hash);
+		de = find_in_level(dir, level, fname, res_folio, &prealloc,
+				   use_hash);
 		if (de || IS_ERR(*res_folio))
 			break;
 	}
@@ -372,6 +381,7 @@ struct f2fs_dir_entry *__f2fs_find_entry(struct inode *dir,
 		goto start_find_entry;
 	}
 #endif
+	kfree(prealloc.name);
 	/* This is to increase the speed of f2fs_create */
 	if (!de)
 		F2FS_I(dir)->task = current;
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 9333a22b9a01e..dfbd2215310fb 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -3673,6 +3673,7 @@ int f2fs_prepare_lookup(struct inode *dir, struct dentry *dentry,
 void f2fs_free_filename(struct f2fs_filename *fname);
 struct f2fs_dir_entry *f2fs_find_target_dentry(const struct f2fs_dentry_ptr *d,
 			const struct f2fs_filename *fname, int *max_slots,
+			struct decrypted_name_prealloc *prealloc,
 			bool use_hash);
 int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d,
 			unsigned int start_pos, struct fscrypt_str *fstr);
@@ -4316,7 +4317,7 @@ int f2fs_write_inline_data(struct inode *inode, struct folio *folio);
 int f2fs_recover_inline_data(struct inode *inode, struct folio *nfolio);
 struct f2fs_dir_entry *f2fs_find_in_inline_dir(struct inode *dir,
 		const struct f2fs_filename *fname, struct folio **res_folio,
-		bool use_hash);
+		struct decrypted_name_prealloc *prealloc, bool use_hash);
 int f2fs_make_empty_inline_dir(struct inode *inode, struct inode *parent,
 			struct folio *ifolio);
 int f2fs_add_inline_entry(struct inode *dir, const struct f2fs_filename *fname,
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
index 901c630685ced..d02ff6c26d70a 100644
--- a/fs/f2fs/inline.c
+++ b/fs/f2fs/inline.c
@@ -353,6 +353,7 @@ int f2fs_recover_inline_data(struct inode *inode, struct folio *nfolio)
 struct f2fs_dir_entry *f2fs_find_in_inline_dir(struct inode *dir,
 					const struct f2fs_filename *fname,
 					struct folio **res_folio,
+					struct decrypted_name_prealloc *prealloc,
 					bool use_hash)
 {
 	struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
@@ -370,7 +371,7 @@ struct f2fs_dir_entry *f2fs_find_in_inline_dir(struct inode *dir,
 	inline_dentry = inline_data_addr(dir, ifolio);
 
 	make_dentry_ptr_inline(dir, &d, inline_dentry);
-	de = f2fs_find_target_dentry(&d, fname, NULL, use_hash);
+	de = f2fs_find_target_dentry(&d, fname, NULL, prealloc, use_hash);
 	folio_unlock(ifolio);
 	if (IS_ERR(de)) {
 		*res_folio = ERR_CAST(de);
diff --git a/fs/libfs.c b/fs/libfs.c
index 9ea0ecc325a81..cab3d86483835 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -1863,6 +1863,26 @@ static const struct dentry_operations generic_ci_dentry_ops = {
 #endif
 };
 
+#define DECRYPTED_NAME_PREALLOC_MIN_LEN 64
+static inline char *decrypted_name_prealloc_resize(
+		struct decrypted_name_prealloc *prealloc,
+		size_t wantlen)
+{
+	char *retbuf = NULL;
+
+	if (prealloc->name && wantlen >= prealloc->namelen)
+		return prealloc->name;
+
+	retbuf = kmalloc(wantlen + DECRYPTED_NAME_PREALLOC_MIN_LEN, GFP_KERNEL);
+	if (!retbuf)
+		return NULL;
+
+	kfree(prealloc->name);
+	prealloc->name = retbuf;
+	prealloc->namelen = wantlen + DECRYPTED_NAME_PREALLOC_MIN_LEN;
+	return retbuf;
+}
+
 /**
  * generic_ci_match() - Match a name (case-insensitively) with a dirent.
  * This is a filesystem helper for comparison with directory entries.
@@ -1884,7 +1904,8 @@ static const struct dentry_operations generic_ci_dentry_ops = {
 int generic_ci_match(const struct inode *parent,
 		     const struct qstr *name,
 		     const struct qstr *folded_name,
-		     const u8 *de_name, u32 de_name_len)
+		     const u8 *de_name, u32 de_name_len,
+		     struct decrypted_name_prealloc *prealloc)
 {
 	const struct super_block *sb = parent->i_sb;
 	const struct unicode_map *um = sb->s_encoding;
@@ -1899,7 +1920,11 @@ int generic_ci_match(const struct inode *parent,
 		if (WARN_ON_ONCE(!fscrypt_has_encryption_key(parent)))
 			return -EINVAL;
 
-		decrypted_name.name = kmalloc(de_name_len, GFP_KERNEL);
+		if (!prealloc)
+			decrypted_name.name = kmalloc(de_name_len, GFP_KERNEL);
+		else
+			decrypted_name.name = decrypted_name_prealloc_resize(
+					prealloc, de_name_len);
 		if (!decrypted_name.name)
 			return -ENOMEM;
 		res = fscrypt_fname_disk_to_usr(parent, 0, 0, &encrypted_name,
@@ -1928,7 +1953,8 @@ int generic_ci_match(const struct inode *parent,
 		res = utf8_strncasecmp(um, name, &dirent);
 
 out:
-	kfree(decrypted_name.name);
+	if (!prealloc)
+		kfree(decrypted_name.name);
 	if (res < 0 && sb_has_strict_encoding(sb)) {
 		pr_err_ratelimited("Directory contains filename that is invalid UTF-8");
 		return 0;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 4ec77da65f144..65307c8c11485 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -3651,10 +3651,16 @@ extern int generic_file_fsync(struct file *, loff_t, loff_t, int);
 extern int generic_check_addressable(unsigned, u64);
 
 extern void generic_set_sb_d_ops(struct super_block *sb);
+
+struct decrypted_name_prealloc {
+	char *name;
+	size_t namelen;
+};
 extern int generic_ci_match(const struct inode *parent,
 			    const struct qstr *name,
 			    const struct qstr *folded_name,
-			    const u8 *de_name, u32 de_name_len);
+			    const u8 *de_name, u32 de_name_len,
+			    struct decrypted_name_prealloc *prealloc);
 
 #if IS_ENABLED(CONFIG_UNICODE)
 int generic_ci_d_hash(const struct dentry *dentry, struct qstr *str);
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH v3 1/2] libfs: reduce the number of memory allocations in generic_ci_match
  2025-07-03  8:56 ` Christoph Hellwig
@ 2025-07-04  2:43   ` Yuwen Chen
  2025-07-04  6:02     ` Eric Biggers
  2025-07-04  2:44   ` [PATCH v3 2/2] f2fs: improve the performance of f2fs_lookup Yuwen Chen
  1 sibling, 1 reply; 12+ messages in thread
From: Yuwen Chen @ 2025-07-04  2:43 UTC (permalink / raw)
  To: hch
  Cc: adilger.kernel, brauner, chao, jaegeuk, linux-ext4,
	linux-f2fs-devel, linux-fsdevel, linux-kernel, tytso, viro,
	ywen.chen

During path traversal, the generic_ci_match function may be called
multiple times. The number of memory allocations and releases
in it accounts for a relatively high proportion in the flamegraph.
This patch significantly reduces the number of memory allocations
in generic_ci_match through pre - allocation.

Signed-off-by: Yuwen Chen <ywen.chen@foxmail.com>
---
 fs/ext4/namei.c    |  2 +-
 fs/f2fs/dir.c      |  2 +-
 fs/libfs.c         | 33 ++++++++++++++++++++++++++++++---
 include/linux/fs.h |  8 +++++++-
 4 files changed, 39 insertions(+), 6 deletions(-)

diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index a178ac2294895..f235693bd71aa 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1443,7 +1443,7 @@ static bool ext4_match(struct inode *parent,
 
 		return generic_ci_match(parent, fname->usr_fname,
 					&fname->cf_name, de->name,
-					de->name_len) > 0;
+					de->name_len, NULL) > 0;
 	}
 #endif
 
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index c36b3b22bfffd..4c6611fbd9574 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -197,7 +197,7 @@ static inline int f2fs_match_name(const struct inode *dir,
 	if (fname->cf_name.name)
 		return generic_ci_match(dir, fname->usr_fname,
 					&fname->cf_name,
-					de_name, de_name_len);
+					de_name, de_name_len, NULL);
 
 #endif
 	f.usr_fname = fname->usr_fname;
diff --git a/fs/libfs.c b/fs/libfs.c
index 9ea0ecc325a81..d2a6b2a4fe11c 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -1863,6 +1863,26 @@ static const struct dentry_operations generic_ci_dentry_ops = {
 #endif
 };
 
+#define DECRYPTED_NAME_PREALLOC_MIN_LEN 64
+static inline char *decrypted_name_prealloc_resize(
+		struct decrypted_name_prealloc *prealloc,
+		size_t wantlen)
+{
+	char *retbuf = NULL;
+
+	if (prealloc->name && wantlen >= prealloc->namelen)
+		return prealloc->name;
+
+	retbuf = kmalloc(wantlen + DECRYPTED_NAME_PREALLOC_MIN_LEN, GFP_KERNEL);
+	if (!retbuf)
+		return NULL;
+
+	kfree(prealloc->name);
+	prealloc->name = retbuf;
+	prealloc->namelen = wantlen + DECRYPTED_NAME_PREALLOC_MIN_LEN;
+	return retbuf;
+}
+
 /**
  * generic_ci_match() - Match a name (case-insensitively) with a dirent.
  * This is a filesystem helper for comparison with directory entries.
@@ -1873,6 +1893,7 @@ static const struct dentry_operations generic_ci_dentry_ops = {
  * @folded_name: Optional pre-folded name under lookup
  * @de_name: Dirent name.
  * @de_name_len: dirent name length.
+ * @prealloc: decrypted name memory buffer
  *
  * Test whether a case-insensitive directory entry matches the filename
  * being searched.  If @folded_name is provided, it is used instead of
@@ -1884,7 +1905,8 @@ static const struct dentry_operations generic_ci_dentry_ops = {
 int generic_ci_match(const struct inode *parent,
 		     const struct qstr *name,
 		     const struct qstr *folded_name,
-		     const u8 *de_name, u32 de_name_len)
+		     const u8 *de_name, u32 de_name_len,
+		     struct decrypted_name_prealloc *prealloc)
 {
 	const struct super_block *sb = parent->i_sb;
 	const struct unicode_map *um = sb->s_encoding;
@@ -1899,7 +1921,11 @@ int generic_ci_match(const struct inode *parent,
 		if (WARN_ON_ONCE(!fscrypt_has_encryption_key(parent)))
 			return -EINVAL;
 
-		decrypted_name.name = kmalloc(de_name_len, GFP_KERNEL);
+		if (!prealloc)
+			decrypted_name.name = kmalloc(de_name_len, GFP_KERNEL);
+		else
+			decrypted_name.name = decrypted_name_prealloc_resize(
+					prealloc, de_name_len);
 		if (!decrypted_name.name)
 			return -ENOMEM;
 		res = fscrypt_fname_disk_to_usr(parent, 0, 0, &encrypted_name,
@@ -1928,7 +1954,8 @@ int generic_ci_match(const struct inode *parent,
 		res = utf8_strncasecmp(um, name, &dirent);
 
 out:
-	kfree(decrypted_name.name);
+	if (!prealloc)
+		kfree(decrypted_name.name);
 	if (res < 0 && sb_has_strict_encoding(sb)) {
 		pr_err_ratelimited("Directory contains filename that is invalid UTF-8");
 		return 0;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 4ec77da65f144..65307c8c11485 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -3651,10 +3651,16 @@ extern int generic_file_fsync(struct file *, loff_t, loff_t, int);
 extern int generic_check_addressable(unsigned, u64);
 
 extern void generic_set_sb_d_ops(struct super_block *sb);
+
+struct decrypted_name_prealloc {
+	char *name;
+	size_t namelen;
+};
 extern int generic_ci_match(const struct inode *parent,
 			    const struct qstr *name,
 			    const struct qstr *folded_name,
-			    const u8 *de_name, u32 de_name_len);
+			    const u8 *de_name, u32 de_name_len,
+			    struct decrypted_name_prealloc *prealloc);
 
 #if IS_ENABLED(CONFIG_UNICODE)
 int generic_ci_d_hash(const struct dentry *dentry, struct qstr *str);
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH v3 2/2] f2fs: improve the performance of f2fs_lookup
  2025-07-03  8:56 ` Christoph Hellwig
  2025-07-04  2:43   ` [PATCH v3 1/2] libfs: reduce the number of memory allocations in generic_ci_match Yuwen Chen
@ 2025-07-04  2:44   ` Yuwen Chen
  1 sibling, 0 replies; 12+ messages in thread
From: Yuwen Chen @ 2025-07-04  2:44 UTC (permalink / raw)
  To: hch
  Cc: adilger.kernel, brauner, chao, jaegeuk, linux-ext4,
	linux-f2fs-devel, linux-fsdevel, linux-kernel, tytso, viro,
	ywen.chen

On the Android system, the file creation operation will call
the f2fs_lookup function. When there are too many files in a
directory, the generic_ci_match operation will be called
repeatedly in large quantities. In extreme cases, the file
creation speed will drop to three times per second.

Use the following program to conduct a file-creation test in
the private program directory(/data/media/0/Android/data/*)
of Android.

int main(int argc, char **argv)
{
    size_t fcnt = 0;
    char path[PATH_MAX];
    char buf[4096] = {0};
    int i, fd;

    if (argc < 2)
        return - EINVAL;

    fcnt = atoi(argv[1]);
    for (i = 0; i < fcnt; i++)
    {
        snprintf(path, sizeof(path), "./%d", i);
        fd = open(path, O_RDWR | O_CREAT, 0600);
        if (fd < 0)
            return - 1;
        write(fd, buf, sizeof(buf));
        close(fd);
    }
    return 0;
}

The test platform is Snapdragon 8s Gen4, with a kernel version
of v6.6 and a userdebug version.

Before this submission was merged, when creating 2000 files,
the performance test results are as follows:
$ time /data/file_creater 2000
0m14.83s real     0m00.00s user     0m14.30s system
0m15.61s real     0m00.00s user     0m15.04s system
0m14.72s real     0m00.01s user     0m14.18s system

After this submission was merged, the performance is as follows:
$ time /data/file_creater 2000
0m08.17s real     0m00.00s user     0m07.86s system
0m08.16s real     0m00.01s user     0m07.86s system
0m08.15s real     0m00.00s user     0m07.86s system

It was observed through perf that the generic_ci_match function
was called a large number of times, which led to most of the
time being spent on memory allocation and release. Due to a
flush_dcache operation in the implementation of cts_cbc_decrypt,
this memory cannot be allocated on the stack.

Signed-off-by: Yuwen Chen <ywen.chen@foxmail.com>
---
 fs/f2fs/dir.c    | 24 +++++++++++++++++-------
 fs/f2fs/f2fs.h   |  3 ++-
 fs/f2fs/inline.c |  3 ++-
 3 files changed, 21 insertions(+), 9 deletions(-)

diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index 4c6611fbd9574..ee0cbeb80debd 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -176,6 +176,7 @@ static struct f2fs_dir_entry *find_in_block(struct inode *dir,
 				struct folio *dentry_folio,
 				const struct f2fs_filename *fname,
 				int *max_slots,
+				struct decrypted_name_prealloc *prealloc,
 				bool use_hash)
 {
 	struct f2fs_dentry_block *dentry_blk;
@@ -184,12 +185,13 @@ static struct f2fs_dir_entry *find_in_block(struct inode *dir,
 	dentry_blk = folio_address(dentry_folio);
 
 	make_dentry_ptr_block(dir, &d, dentry_blk);
-	return f2fs_find_target_dentry(&d, fname, max_slots, use_hash);
+	return f2fs_find_target_dentry(&d, fname, max_slots, prealloc, use_hash);
 }
 
 static inline int f2fs_match_name(const struct inode *dir,
 				   const struct f2fs_filename *fname,
-				   const u8 *de_name, u32 de_name_len)
+				   const u8 *de_name, u32 de_name_len,
+				   struct decrypted_name_prealloc *prealloc)
 {
 	struct fscrypt_name f;
 
@@ -197,7 +199,7 @@ static inline int f2fs_match_name(const struct inode *dir,
 	if (fname->cf_name.name)
 		return generic_ci_match(dir, fname->usr_fname,
 					&fname->cf_name,
-					de_name, de_name_len, NULL);
+					de_name, de_name_len, prealloc);
 
 #endif
 	f.usr_fname = fname->usr_fname;
@@ -210,6 +212,7 @@ static inline int f2fs_match_name(const struct inode *dir,
 
 struct f2fs_dir_entry *f2fs_find_target_dentry(const struct f2fs_dentry_ptr *d,
 			const struct f2fs_filename *fname, int *max_slots,
+			struct decrypted_name_prealloc *prealloc,
 			bool use_hash)
 {
 	struct f2fs_dir_entry *de;
@@ -236,7 +239,8 @@ struct f2fs_dir_entry *f2fs_find_target_dentry(const struct f2fs_dentry_ptr *d,
 		if (!use_hash || de->hash_code == fname->hash) {
 			res = f2fs_match_name(d->inode, fname,
 					      d->filename[bit_pos],
-					      le16_to_cpu(de->name_len));
+					      le16_to_cpu(de->name_len),
+					      prealloc);
 			if (res < 0)
 				return ERR_PTR(res);
 			if (res)
@@ -261,6 +265,7 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
 					unsigned int level,
 					const struct f2fs_filename *fname,
 					struct folio **res_folio,
+					struct decrypted_name_prealloc *prealloc,
 					bool use_hash)
 {
 	int s = GET_DENTRY_SLOTS(fname->disk_name.len);
@@ -296,7 +301,8 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
 			}
 		}
 
-		de = find_in_block(dir, dentry_folio, fname, &max_slots, use_hash);
+		de = find_in_block(dir, dentry_folio, fname, &max_slots, prealloc,
+				   use_hash);
 		if (IS_ERR(de)) {
 			*res_folio = ERR_CAST(de);
 			de = NULL;
@@ -336,6 +342,7 @@ struct f2fs_dir_entry *__f2fs_find_entry(struct inode *dir,
 	unsigned int max_depth;
 	unsigned int level;
 	bool use_hash = true;
+	struct decrypted_name_prealloc prealloc = {0};
 
 	*res_folio = NULL;
 
@@ -343,7 +350,8 @@ struct f2fs_dir_entry *__f2fs_find_entry(struct inode *dir,
 start_find_entry:
 #endif
 	if (f2fs_has_inline_dentry(dir)) {
-		de = f2fs_find_in_inline_dir(dir, fname, res_folio, use_hash);
+		de = f2fs_find_in_inline_dir(dir, fname, res_folio, &prealloc,
+					     use_hash);
 		goto out;
 	}
 
@@ -359,7 +367,8 @@ struct f2fs_dir_entry *__f2fs_find_entry(struct inode *dir,
 	}
 
 	for (level = 0; level < max_depth; level++) {
-		de = find_in_level(dir, level, fname, res_folio, use_hash);
+		de = find_in_level(dir, level, fname, res_folio, &prealloc,
+				   use_hash);
 		if (de || IS_ERR(*res_folio))
 			break;
 	}
@@ -372,6 +381,7 @@ struct f2fs_dir_entry *__f2fs_find_entry(struct inode *dir,
 		goto start_find_entry;
 	}
 #endif
+	kfree(prealloc.name);
 	/* This is to increase the speed of f2fs_create */
 	if (!de)
 		F2FS_I(dir)->task = current;
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 9333a22b9a01e..dfbd2215310fb 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -3673,6 +3673,7 @@ int f2fs_prepare_lookup(struct inode *dir, struct dentry *dentry,
 void f2fs_free_filename(struct f2fs_filename *fname);
 struct f2fs_dir_entry *f2fs_find_target_dentry(const struct f2fs_dentry_ptr *d,
 			const struct f2fs_filename *fname, int *max_slots,
+			struct decrypted_name_prealloc *prealloc,
 			bool use_hash);
 int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d,
 			unsigned int start_pos, struct fscrypt_str *fstr);
@@ -4316,7 +4317,7 @@ int f2fs_write_inline_data(struct inode *inode, struct folio *folio);
 int f2fs_recover_inline_data(struct inode *inode, struct folio *nfolio);
 struct f2fs_dir_entry *f2fs_find_in_inline_dir(struct inode *dir,
 		const struct f2fs_filename *fname, struct folio **res_folio,
-		bool use_hash);
+		struct decrypted_name_prealloc *prealloc, bool use_hash);
 int f2fs_make_empty_inline_dir(struct inode *inode, struct inode *parent,
 			struct folio *ifolio);
 int f2fs_add_inline_entry(struct inode *dir, const struct f2fs_filename *fname,
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
index 901c630685ced..d02ff6c26d70a 100644
--- a/fs/f2fs/inline.c
+++ b/fs/f2fs/inline.c
@@ -353,6 +353,7 @@ int f2fs_recover_inline_data(struct inode *inode, struct folio *nfolio)
 struct f2fs_dir_entry *f2fs_find_in_inline_dir(struct inode *dir,
 					const struct f2fs_filename *fname,
 					struct folio **res_folio,
+					struct decrypted_name_prealloc *prealloc,
 					bool use_hash)
 {
 	struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
@@ -370,7 +371,7 @@ struct f2fs_dir_entry *f2fs_find_in_inline_dir(struct inode *dir,
 	inline_dentry = inline_data_addr(dir, ifolio);
 
 	make_dentry_ptr_inline(dir, &d, inline_dentry);
-	de = f2fs_find_target_dentry(&d, fname, NULL, use_hash);
+	de = f2fs_find_target_dentry(&d, fname, NULL, prealloc, use_hash);
 	folio_unlock(ifolio);
 	if (IS_ERR(de)) {
 		*res_folio = ERR_CAST(de);
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH v3 1/2] libfs: reduce the number of memory allocations in generic_ci_match
  2025-07-04  2:43   ` [PATCH v3 1/2] libfs: reduce the number of memory allocations in generic_ci_match Yuwen Chen
@ 2025-07-04  6:02     ` Eric Biggers
  2025-07-07  5:27       ` Christoph Hellwig
  0 siblings, 1 reply; 12+ messages in thread
From: Eric Biggers @ 2025-07-04  6:02 UTC (permalink / raw)
  To: Yuwen Chen
  Cc: hch, brauner, tytso, linux-kernel, linux-f2fs-devel,
	adilger.kernel, viro, linux-fsdevel, jaegeuk, linux-ext4

On Fri, Jul 04, 2025 at 10:43:57AM +0800, Yuwen Chen wrote:
> During path traversal, the generic_ci_match function may be called
> multiple times. The number of memory allocations and releases
> in it accounts for a relatively high proportion in the flamegraph.
> This patch significantly reduces the number of memory allocations
> in generic_ci_match through pre - allocation.
> 
> Signed-off-by: Yuwen Chen <ywen.chen@foxmail.com>
> ---
>  fs/ext4/namei.c    |  2 +-
>  fs/f2fs/dir.c      |  2 +-
>  fs/libfs.c         | 33 ++++++++++++++++++++++++++++++---
>  include/linux/fs.h |  8 +++++++-
>  4 files changed, 39 insertions(+), 6 deletions(-)
> 

The reason the allocation is needed at all is because generic_ci_match() has to
decrypt the encrypted on-disk filename from the dentry that it's matching
against.  It can't decrypt in-place, since the source buffer is in the pagecache
which must not be modified.  Hence, a separate destination buffer is needed.

Filenames have a maximum length of NAME_MAX, i.e. 255, bytes.

It would be *much* simpler to just allocate that on the stack.

And we almost can.  255 bytes is on the high end of what can be acceptable to
allocate on the stack in the kernel.  However, here it would give a lot of
benefit and would always occur close to the leaves in the call graph.  So the
size is not a barrier here, IMO.

The real problem is, once again, the legacy crypto_skcipher API, which requires
that the source/destination buffers be provided as scatterlists.  In Linux, the
kernel stack can be in the vmalloc area.  Thus, the buffers passed to
crypto_skcipher cannot be stack buffers unless the caller actually is aware of
how to turn a vmalloc'ed buffer into a scatterlist, which is hard to do.  (See
verity_ahash_update() in drivers/md/dm-verity-target.c for an example.)

Fortunately, I'm currently in the process of introducing library APIs that will
supersede these legacy crypto APIs.  They'll be simpler and faster and won't
have these silly limitations like not working on virtual addresses...  I plan to
make fscrypt use the library APIs instead of the legacy crypto API.

It will take some time to land everything, though.  We can consider this
patchset as a workaround in the mean time.  But it's sad to see the legacy
crypto API continue to cause problems and more time be wasted on these problems.

I do wonder if the "turn a vmalloc'ed buffer into a scatterlist" trick that some
code in the kernel uses is something that would be worth adopting for now in
fname_decrypt().  As I mentioned above, it's hard to do (you have to go page by
page), but it's possible.  That would allow immediately moving
generic_ci_match() to use a stack allocation, which would avoid adding all the
complexity of the preallocation that you have in this patchset.

- Eric

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v3 1/2] libfs: reduce the number of memory allocations in generic_ci_match
  2025-07-04  6:02     ` Eric Biggers
@ 2025-07-07  5:27       ` Christoph Hellwig
  2025-07-08  7:11         ` Yuwen Chen
       [not found]         ` <tencent_3B7E5FEB5DB445A5DC50E3F3AE4DE72A7908@qq.com>
  0 siblings, 2 replies; 12+ messages in thread
From: Christoph Hellwig @ 2025-07-07  5:27 UTC (permalink / raw)
  To: Eric Biggers
  Cc: Yuwen Chen, hch, brauner, tytso, linux-kernel, linux-f2fs-devel,
	adilger.kernel, viro, linux-fsdevel, jaegeuk, linux-ext4

On Thu, Jul 03, 2025 at 11:02:59PM -0700, Eric Biggers wrote:

[Can you trim your replies to the usual 73 characters?  The long lines
make them quite hard to read without first reflowing them]

> The real problem is, once again, the legacy crypto_skcipher API, which requires
> that the source/destination buffers be provided as scatterlists.  In Linux, the
> kernel stack can be in the vmalloc area.  Thus, the buffers passed to
> crypto_skcipher cannot be stack buffers unless the caller actually is aware of
> how to turn a vmalloc'ed buffer into a scatterlist, which is hard to do.  (See
> verity_ahash_update() in drivers/md/dm-verity-target.c for an example.)

I don't think setting up a scatterlist for vmalloc data is hard.  But it is
extra boilerplate code that is rather annoying and adds overhead.

> code in the kernel uses is something that would be worth adopting for
> now in fname_decrypt().  As I mentioned above, it's hard to do (you
> have to go page by page), but it's possible.  That would allow
> immediately moving generic_ci_match() to use a stack allocation, which
> would avoid adding all the complexity of the preallocation that you
> have in this patchset.

I suspect that all the overhead required for that get close to that of a
memory allocation.

But I wonder why generic_ci_match is even called that often.  Both ext4
and f2fs support hashed lookups, so you should usually only see it called
for the main match, plus the occasional hash false positive, which should
be rate if the hash works.

Yuwen, are you using f2fs in the mode where it does a linear scan on a
hash lookup miss?  That was added as a workaround for the utf8 code point
changes, but is a completely broken idea the defeats hashed lookups and
IIRC only was default for a very short time.

Note that even with this fixed, using an on-stack allocation would be
nice eventually when moving the crypto library API, as it would still
avoid the allocation entirely.  But caching shouldn't be worth it if the
number of generic_ci_match per lookup is just slightly above 1.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH v4 1/2] libfs: reduce the number of memory allocations in generic_ci_match
  2025-07-03  8:21 [PATCH] f2fs: improve the performance of f2fs_lookup Yuwen Chen
  2025-07-03  8:56 ` Christoph Hellwig
  2025-07-03  9:54 ` [PATCH v2] " Yuwen Chen
@ 2025-07-08  6:41 ` Yuwen Chen
  2025-07-09  2:42 ` [PATCH] f2fs: improve the performance of f2fs_lookup kernel test robot
  3 siblings, 0 replies; 12+ messages in thread
From: Yuwen Chen @ 2025-07-08  6:41 UTC (permalink / raw)
  To: ywen.chen
  Cc: adilger.kernel, brauner, chao, jaegeuk, linux-ext4,
	linux-f2fs-devel, linux-fsdevel, linux-kernel, tytso, viro

During path traversal, the generic_ci_match function may be called
multiple times. The number of memory allocations and releases
in it accounts for a relatively high proportion in the flamegraph.
This patch significantly reduces the number of memory allocations
in generic_ci_match through pre - allocation.

Signed-off-by: Yuwen Chen <ywen.chen@foxmail.com>
---
 fs/ext4/namei.c    |  2 +-
 fs/f2fs/dir.c      |  2 +-
 fs/libfs.c         | 33 ++++++++++++++++++++++++++++++---
 include/linux/fs.h |  8 +++++++-
 4 files changed, 39 insertions(+), 6 deletions(-)

diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index a178ac2294895..f235693bd71aa 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1443,7 +1443,7 @@ static bool ext4_match(struct inode *parent,
 
 		return generic_ci_match(parent, fname->usr_fname,
 					&fname->cf_name, de->name,
-					de->name_len) > 0;
+					de->name_len, NULL) > 0;
 	}
 #endif
 
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index c36b3b22bfffd..4c6611fbd9574 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -197,7 +197,7 @@ static inline int f2fs_match_name(const struct inode *dir,
 	if (fname->cf_name.name)
 		return generic_ci_match(dir, fname->usr_fname,
 					&fname->cf_name,
-					de_name, de_name_len);
+					de_name, de_name_len, NULL);
 
 #endif
 	f.usr_fname = fname->usr_fname;
diff --git a/fs/libfs.c b/fs/libfs.c
index 9ea0ecc325a81..293b605971bbf 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -1863,6 +1863,26 @@ static const struct dentry_operations generic_ci_dentry_ops = {
 #endif
 };
 
+#define DECRYPTED_NAME_PREALLOC_MIN_LEN 64
+static inline char *decrypted_name_prealloc_resize(
+		struct decrypted_name_prealloc *prealloc,
+		size_t wantlen)
+{
+	char *retbuf = NULL;
+
+	if (prealloc->name && wantlen <= prealloc->namelen)
+		return prealloc->name;
+
+	retbuf = kmalloc(wantlen + DECRYPTED_NAME_PREALLOC_MIN_LEN, GFP_KERNEL);
+	if (!retbuf)
+		return NULL;
+
+	kfree(prealloc->name);
+	prealloc->name = retbuf;
+	prealloc->namelen = wantlen + DECRYPTED_NAME_PREALLOC_MIN_LEN;
+	return retbuf;
+}
+
 /**
  * generic_ci_match() - Match a name (case-insensitively) with a dirent.
  * This is a filesystem helper for comparison with directory entries.
@@ -1873,6 +1893,7 @@ static const struct dentry_operations generic_ci_dentry_ops = {
  * @folded_name: Optional pre-folded name under lookup
  * @de_name: Dirent name.
  * @de_name_len: dirent name length.
+ * @prealloc: decrypted name memory buffer
  *
  * Test whether a case-insensitive directory entry matches the filename
  * being searched.  If @folded_name is provided, it is used instead of
@@ -1884,7 +1905,8 @@ static const struct dentry_operations generic_ci_dentry_ops = {
 int generic_ci_match(const struct inode *parent,
 		     const struct qstr *name,
 		     const struct qstr *folded_name,
-		     const u8 *de_name, u32 de_name_len)
+		     const u8 *de_name, u32 de_name_len,
+		     struct decrypted_name_prealloc *prealloc)
 {
 	const struct super_block *sb = parent->i_sb;
 	const struct unicode_map *um = sb->s_encoding;
@@ -1899,7 +1921,11 @@ int generic_ci_match(const struct inode *parent,
 		if (WARN_ON_ONCE(!fscrypt_has_encryption_key(parent)))
 			return -EINVAL;
 
-		decrypted_name.name = kmalloc(de_name_len, GFP_KERNEL);
+		if (!prealloc)
+			decrypted_name.name = kmalloc(de_name_len, GFP_KERNEL);
+		else
+			decrypted_name.name = decrypted_name_prealloc_resize(
+					prealloc, de_name_len);
 		if (!decrypted_name.name)
 			return -ENOMEM;
 		res = fscrypt_fname_disk_to_usr(parent, 0, 0, &encrypted_name,
@@ -1928,7 +1954,8 @@ int generic_ci_match(const struct inode *parent,
 		res = utf8_strncasecmp(um, name, &dirent);
 
 out:
-	kfree(decrypted_name.name);
+	if (!prealloc)
+		kfree(decrypted_name.name);
 	if (res < 0 && sb_has_strict_encoding(sb)) {
 		pr_err_ratelimited("Directory contains filename that is invalid UTF-8");
 		return 0;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 4ec77da65f144..65307c8c11485 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -3651,10 +3651,16 @@ extern int generic_file_fsync(struct file *, loff_t, loff_t, int);
 extern int generic_check_addressable(unsigned, u64);
 
 extern void generic_set_sb_d_ops(struct super_block *sb);
+
+struct decrypted_name_prealloc {
+	char *name;
+	size_t namelen;
+};
 extern int generic_ci_match(const struct inode *parent,
 			    const struct qstr *name,
 			    const struct qstr *folded_name,
-			    const u8 *de_name, u32 de_name_len);
+			    const u8 *de_name, u32 de_name_len,
+			    struct decrypted_name_prealloc *prealloc);
 
 #if IS_ENABLED(CONFIG_UNICODE)
 int generic_ci_d_hash(const struct dentry *dentry, struct qstr *str);
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH v3 1/2] libfs: reduce the number of memory allocations in generic_ci_match
@ 2025-07-08  6:52 ywen.chen
  0 siblings, 0 replies; 12+ messages in thread
From: ywen.chen @ 2025-07-08  6:52 UTC (permalink / raw)
  To: Christoph Hellwig, Eric Biggers
  Cc: Christoph Hellwig, brauner, tytso, linux-kernel, linux-f2fs-devel,
	adilger.kernel, viro, linux-fsdevel, jaegeuk, linux-ext4

&gt; But I wonder why generic_ci_match is even called that often.  Both ext4
&gt; and f2fs support hashed lookups, so you should usually only see it called
&gt; for the main match, plus the occasional hash false positive, which should
&gt; be rate if the hash works.


At present, in the latest version of Linux, in some scenarios,
f2fs still uses linear search.


The logic of linear search was introduced by Commit 91b587ba79e1
(f2fs: Introduce linear search for dentries). Commit 91b587ba79e1
was designed to solve the problem of inconsistent hashes before
and after the rollback of Commit 5c26d2f1d3f5
("unicode: Don't special case ignorable code points"),
which led to files being inaccessible.


In order to reduce the impact of linear search, in relatively new
versions, the logic of turning off linear search has also been
introduced. However, the triggering conditions for this
turn - off logic on f2fs are rather strict:


1. Use the latest version of the fsck.f2fs tool to correct
the file system.
2. Use a relatively new version of the kernel. (For example,
linear search cannot be turned off in v6.6)


The performance gain of this commit is very obvious in scenarios
where linear search is not turned off. In scenarios where linear
search is turned off, no performance problems will be introduced
either.<br>

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v3 1/2] libfs: reduce the number of memory allocations in generic_ci_match
  2025-07-07  5:27       ` Christoph Hellwig
@ 2025-07-08  7:11         ` Yuwen Chen
       [not found]         ` <tencent_3B7E5FEB5DB445A5DC50E3F3AE4DE72A7908@qq.com>
  1 sibling, 0 replies; 12+ messages in thread
From: Yuwen Chen @ 2025-07-08  7:11 UTC (permalink / raw)
  To: hch
  Cc: adilger.kernel, brauner, ebiggers, jaegeuk, linux-ext4,
	linux-f2fs-devel, linux-fsdevel, linux-kernel, tytso, viro,
	ywen.chen

On Sun, 6 Jul 2025 22:27:17 -0700 Christoph Hellwig wrote:
> But I wonder why generic_ci_match is even called that often.  Both ext4
> and f2fs support hashed lookups, so you should usually only see it called
> for the main match, plus the occasional hash false positive, which should
> be rate if the hash works.

At present, in the latest version of Linux, in some scenarios,
f2fs still uses linear search.

The logic of linear search was introduced by Commit 91b587ba79e1
(f2fs: Introduce linear search for dentries). Commit 91b587ba79e1
was designed to solve the problem of inconsistent hashes before
and after the rollback of Commit 5c26d2f1d3f5
("unicode: Don't special case ignorable code points"),
which led to files being inaccessible.

In order to reduce the impact of linear search, in relatively new
versions, the logic of turning off linear search has also been
introduced. However, the triggering conditions for this
turn - off logic on f2fs are rather strict:

1. Use the latest version of the fsck.f2fs tool to correct
the file system.
2. Use a relatively new version of the kernel. (For example,
linear search cannot be turned off in v6.6)

The performance gain of this commit is very obvious in scenarios
where linear search is not turned off. In scenarios where linear
search is turned off, no performance problems will be introduced
either.


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: 回复: [PATCH v3 1/2] libfs: reduce the number of memory allocations in generic_ci_match
       [not found]         ` <tencent_3B7E5FEB5DB445A5DC50E3F3AE4DE72A7908@qq.com>
@ 2025-07-08  9:40           ` Christoph Hellwig
  0 siblings, 0 replies; 12+ messages in thread
From: Christoph Hellwig @ 2025-07-08  9:40 UTC (permalink / raw)
  To: ywen.chen
  Cc: Christoph Hellwig, Eric Biggers, brauner, tytso, linux-kernel,
	linux-f2fs-devel, adilger.kernel, viro, linux-fsdevel, jaegeuk,
	linux-ext4

On Tue, Jul 08, 2025 at 10:36:07AM +0800, ywen.chen wrote:
> 1. Use the latest version of the fsck.f2fs tool to correct
> the file system.
> 2. Use a relatively new version of the kernel. (For example,
> linear search cannot be turned off in v6.6)
> 
> 
> The performance gain of this commit is very obvious in scenarios
> where linear search is not turned off. In scenarios where linear
> search is turned off, no performance problems will be introduced
> either.

Turning off hashed lookups was a stupid idea and should not last.  Hashed
directory lookups are the one file system improvement that significantly
improved lookup performance.  So don't work around it, because otherwise
you will be creating more optimizations to work around the lack of
hashed lookups.


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] f2fs: improve the performance of f2fs_lookup
  2025-07-03  8:21 [PATCH] f2fs: improve the performance of f2fs_lookup Yuwen Chen
                   ` (2 preceding siblings ...)
  2025-07-08  6:41 ` [PATCH v4 1/2] libfs: reduce the number of memory allocations in generic_ci_match Yuwen Chen
@ 2025-07-09  2:42 ` kernel test robot
  3 siblings, 0 replies; 12+ messages in thread
From: kernel test robot @ 2025-07-09  2:42 UTC (permalink / raw)
  To: Yuwen Chen, tytso, adilger.kernel, jaegeuk, chao, viro, brauner
  Cc: oe-kbuild-all, linux-ext4, linux-kernel, linux-f2fs-devel,
	linux-fsdevel, Yuwen Chen

Hi Yuwen,

kernel test robot noticed the following build warnings:

[auto build test WARNING on jaegeuk-f2fs/dev-test]
[also build test WARNING on jaegeuk-f2fs/dev brauner-vfs/vfs.all linus/master v6.16-rc5 next-20250708]
[cannot apply to tytso-ext4/dev]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Yuwen-Chen/f2fs-improve-the-performance-of-f2fs_lookup/20250708-184528
base:   https://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs.git dev-test
patch link:    https://lore.kernel.org/r/tencent_0D8BB6ABAB0880DB7BFCCE35EDBC3DCFF505%40qq.com
patch subject: [PATCH] f2fs: improve the performance of f2fs_lookup
config: arc-randconfig-001-20250709 (https://download.01.org/0day-ci/archive/20250709/202507091026.yb48YXt5-lkp@intel.com/config)
compiler: arc-linux-gcc (GCC) 8.5.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20250709/202507091026.yb48YXt5-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202507091026.yb48YXt5-lkp@intel.com/

All warnings (new ones prefixed by >>):

>> Warning: fs/libfs.c:1908 function parameter 'prealloc' not described in 'generic_ci_match'

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2025-07-09  2:42 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-07-03  8:21 [PATCH] f2fs: improve the performance of f2fs_lookup Yuwen Chen
2025-07-03  8:56 ` Christoph Hellwig
2025-07-04  2:43   ` [PATCH v3 1/2] libfs: reduce the number of memory allocations in generic_ci_match Yuwen Chen
2025-07-04  6:02     ` Eric Biggers
2025-07-07  5:27       ` Christoph Hellwig
2025-07-08  7:11         ` Yuwen Chen
     [not found]         ` <tencent_3B7E5FEB5DB445A5DC50E3F3AE4DE72A7908@qq.com>
2025-07-08  9:40           ` 回复: " Christoph Hellwig
2025-07-04  2:44   ` [PATCH v3 2/2] f2fs: improve the performance of f2fs_lookup Yuwen Chen
2025-07-03  9:54 ` [PATCH v2] " Yuwen Chen
2025-07-08  6:41 ` [PATCH v4 1/2] libfs: reduce the number of memory allocations in generic_ci_match Yuwen Chen
2025-07-09  2:42 ` [PATCH] f2fs: improve the performance of f2fs_lookup kernel test robot
  -- strict thread matches above, loose matches on Subject: below --
2025-07-08  6:52 [PATCH v3 1/2] libfs: reduce the number of memory allocations in generic_ci_match ywen.chen

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).