linux-f2fs-devel.lists.sourceforge.net archive mirror
 help / color / mirror / Atom feed
* [f2fs-dev] [PATCH v2 1/2] f2fs: fix to shrink read extent node in batches
@ 2024-11-22  6:50 Chao Yu via Linux-f2fs-devel
  2024-11-22  6:50 ` [f2fs-dev] [PATCH v2 2/2] f2fs: add a sysfs node to limit max read extent count per-inode Chao Yu via Linux-f2fs-devel
                   ` (2 more replies)
  0 siblings, 3 replies; 5+ messages in thread
From: Chao Yu via Linux-f2fs-devel @ 2024-11-22  6:50 UTC (permalink / raw)
  To: jaegeuk; +Cc: Xiuhong Wang, Zhiguo Niu, linux-kernel, linux-f2fs-devel

We use rwlock to protect core structure data of extent tree during
its shrink, however, if there is a huge number of extent nodes in
extent tree, during shrink of extent tree, it may hold rwlock for
a very long time, which may trigger kernel hang issue.

This patch fixes to shrink read extent node in batches, so that,
critical region of the rwlock can be shrunk to avoid its extreme
long time hold.

Reported-by: Xiuhong Wang <xiuhong.wang@unisoc.com>
Closes: https://lore.kernel.org/linux-f2fs-devel/20241112110627.1314632-1-xiuhong.wang@unisoc.com/
Signed-off-by: Xiuhong Wang <xiuhong.wang@unisoc.com>
Signed-off-by: Zhiguo Niu <zhiguo.niu@unisoc.com>
Signed-off-by: Chao Yu <chao@kernel.org>
---
v2:
- no updates.
 fs/f2fs/extent_cache.c | 69 +++++++++++++++++++++++++-----------------
 1 file changed, 41 insertions(+), 28 deletions(-)

diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c
index 019c1f7b7fa5..b7a6817b44b0 100644
--- a/fs/f2fs/extent_cache.c
+++ b/fs/f2fs/extent_cache.c
@@ -379,21 +379,22 @@ static struct extent_tree *__grab_extent_tree(struct inode *inode,
 }
 
 static unsigned int __free_extent_tree(struct f2fs_sb_info *sbi,
-					struct extent_tree *et)
+				struct extent_tree *et, unsigned int nr_shrink)
 {
 	struct rb_node *node, *next;
 	struct extent_node *en;
-	unsigned int count = atomic_read(&et->node_cnt);
+	unsigned int count;
 
 	node = rb_first_cached(&et->root);
-	while (node) {
+
+	for (count = 0; node && count < nr_shrink; count++) {
 		next = rb_next(node);
 		en = rb_entry(node, struct extent_node, rb_node);
 		__release_extent_node(sbi, et, en);
 		node = next;
 	}
 
-	return count - atomic_read(&et->node_cnt);
+	return count;
 }
 
 static void __drop_largest_extent(struct extent_tree *et,
@@ -622,6 +623,30 @@ static struct extent_node *__insert_extent_tree(struct f2fs_sb_info *sbi,
 	return en;
 }
 
+static unsigned int __destroy_extent_node(struct inode *inode,
+					enum extent_type type)
+{
+	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+	struct extent_tree *et = F2FS_I(inode)->extent_tree[type];
+	unsigned int nr_shrink = type == EX_READ ?
+				READ_EXTENT_CACHE_SHRINK_NUMBER :
+				AGE_EXTENT_CACHE_SHRINK_NUMBER;
+	unsigned int node_cnt = 0;
+
+	if (!et || !atomic_read(&et->node_cnt))
+		return 0;
+
+	while (atomic_read(&et->node_cnt)) {
+		write_lock(&et->lock);
+		node_cnt += __free_extent_tree(sbi, et, nr_shrink);
+		write_unlock(&et->lock);
+	}
+
+	f2fs_bug_on(sbi, atomic_read(&et->node_cnt));
+
+	return node_cnt;
+}
+
 static void __update_extent_tree_range(struct inode *inode,
 			struct extent_info *tei, enum extent_type type)
 {
@@ -760,9 +785,6 @@ static void __update_extent_tree_range(struct inode *inode,
 		}
 	}
 
-	if (is_inode_flag_set(inode, FI_NO_EXTENT))
-		__free_extent_tree(sbi, et);
-
 	if (et->largest_updated) {
 		et->largest_updated = false;
 		updated = true;
@@ -780,6 +802,9 @@ static void __update_extent_tree_range(struct inode *inode,
 out_read_extent_cache:
 	write_unlock(&et->lock);
 
+	if (is_inode_flag_set(inode, FI_NO_EXTENT))
+		__destroy_extent_node(inode, EX_READ);
+
 	if (updated)
 		f2fs_mark_inode_dirty_sync(inode, true);
 }
@@ -942,10 +967,14 @@ static unsigned int __shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink
 	list_for_each_entry_safe(et, next, &eti->zombie_list, list) {
 		if (atomic_read(&et->node_cnt)) {
 			write_lock(&et->lock);
-			node_cnt += __free_extent_tree(sbi, et);
+			node_cnt += __free_extent_tree(sbi, et,
+					nr_shrink - node_cnt - tree_cnt);
 			write_unlock(&et->lock);
 		}
-		f2fs_bug_on(sbi, atomic_read(&et->node_cnt));
+
+		if (atomic_read(&et->node_cnt))
+			goto unlock_out;
+
 		list_del_init(&et->list);
 		radix_tree_delete(&eti->extent_tree_root, et->ino);
 		kmem_cache_free(extent_tree_slab, et);
@@ -1084,23 +1113,6 @@ unsigned int f2fs_shrink_age_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink
 	return __shrink_extent_tree(sbi, nr_shrink, EX_BLOCK_AGE);
 }
 
-static unsigned int __destroy_extent_node(struct inode *inode,
-					enum extent_type type)
-{
-	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
-	struct extent_tree *et = F2FS_I(inode)->extent_tree[type];
-	unsigned int node_cnt = 0;
-
-	if (!et || !atomic_read(&et->node_cnt))
-		return 0;
-
-	write_lock(&et->lock);
-	node_cnt = __free_extent_tree(sbi, et);
-	write_unlock(&et->lock);
-
-	return node_cnt;
-}
-
 void f2fs_destroy_extent_node(struct inode *inode)
 {
 	__destroy_extent_node(inode, EX_READ);
@@ -1109,7 +1121,6 @@ void f2fs_destroy_extent_node(struct inode *inode)
 
 static void __drop_extent_tree(struct inode *inode, enum extent_type type)
 {
-	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
 	struct extent_tree *et = F2FS_I(inode)->extent_tree[type];
 	bool updated = false;
 
@@ -1117,7 +1128,6 @@ static void __drop_extent_tree(struct inode *inode, enum extent_type type)
 		return;
 
 	write_lock(&et->lock);
-	__free_extent_tree(sbi, et);
 	if (type == EX_READ) {
 		set_inode_flag(inode, FI_NO_EXTENT);
 		if (et->largest.len) {
@@ -1126,6 +1136,9 @@ static void __drop_extent_tree(struct inode *inode, enum extent_type type)
 		}
 	}
 	write_unlock(&et->lock);
+
+	__destroy_extent_node(inode, type);
+
 	if (updated)
 		f2fs_mark_inode_dirty_sync(inode, true);
 }
-- 
2.40.1



_______________________________________________
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [f2fs-dev] [PATCH v2 2/2] f2fs: add a sysfs node to limit max read extent count per-inode
  2024-11-22  6:50 [f2fs-dev] [PATCH v2 1/2] f2fs: fix to shrink read extent node in batches Chao Yu via Linux-f2fs-devel
@ 2024-11-22  6:50 ` Chao Yu via Linux-f2fs-devel
  2024-11-23 15:50 ` [f2fs-dev] [PATCH v2 1/2] f2fs: fix to shrink read extent node in batches patchwork-bot+f2fs--- via Linux-f2fs-devel
  2024-11-25  3:11 ` [f2fs-dev] 答复: " 王秀红 (Xiuhong Wang)
  2 siblings, 0 replies; 5+ messages in thread
From: Chao Yu via Linux-f2fs-devel @ 2024-11-22  6:50 UTC (permalink / raw)
  To: jaegeuk; +Cc: Xiuhong Wang, Zhiguo Niu, linux-kernel, linux-f2fs-devel

Quoted:
"at this time, there are still 1086911 extent nodes in this zombie
extent tree that need to be cleaned up.

crash_arm64_sprd_v8.0.3++> extent_tree.node_cnt ffffff80896cc500
  node_cnt = {
    counter = 1086911
  },
"

As reported by Xiuhong, there will be a huge number of extent nodes
in extent tree, it may potentially cause:
- slab memory fragments
- extreme long time shrink on extent tree
- low mapping efficiency

Let's add a sysfs node to limit max read extent count for each inode,
by default, value of this threshold is 10240, it can be updated
according to user's requirement.

Reported-by: Xiuhong Wang <xiuhong.wang@unisoc.com>
Closes: https://lore.kernel.org/linux-f2fs-devel/20241112110627.1314632-1-xiuhong.wang@unisoc.com/
Signed-off-by: Xiuhong Wang <xiuhong.wang@unisoc.com>
Signed-off-by: Zhiguo Niu <zhiguo.niu@unisoc.com>
Signed-off-by: Chao Yu <chao@kernel.org>
---
v2:
- fix to add missing max_read_extent_count sysfs entry declaration
 Documentation/ABI/testing/sysfs-fs-f2fs |  6 ++++++
 fs/f2fs/extent_cache.c                  |  5 ++++-
 fs/f2fs/f2fs.h                          |  4 ++++
 fs/f2fs/sysfs.c                         | 10 ++++++++++
 4 files changed, 24 insertions(+), 1 deletion(-)

diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs
index 513296bb6f29..3e1630c70d8a 100644
--- a/Documentation/ABI/testing/sysfs-fs-f2fs
+++ b/Documentation/ABI/testing/sysfs-fs-f2fs
@@ -822,3 +822,9 @@ Description:	It controls the valid block ratio threshold not to trigger excessiv
 		for zoned deivces. The initial value of it is 95(%). F2FS will stop the
 		background GC thread from intiating GC for sections having valid blocks
 		exceeding the ratio.
+
+What:		/sys/fs/f2fs/<disk>/max_read_extent_count
+Date:		November 2024
+Contact:	"Chao Yu" <chao@kernel.org>
+Description:	It controls max read extent count for per-inode, the value of threshold
+		is 10240 by default.
diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c
index b7a6817b44b0..347b3b647834 100644
--- a/fs/f2fs/extent_cache.c
+++ b/fs/f2fs/extent_cache.c
@@ -717,7 +717,9 @@ static void __update_extent_tree_range(struct inode *inode,
 		}
 
 		if (end < org_end && (type != EX_READ ||
-				org_end - end >= F2FS_MIN_EXTENT_LEN)) {
+			(org_end - end >= F2FS_MIN_EXTENT_LEN &&
+			atomic_read(&et->node_cnt) <
+					sbi->max_read_extent_count))) {
 			if (parts) {
 				__set_extent_info(&ei,
 					end, org_end - end,
@@ -1212,6 +1214,7 @@ void f2fs_init_extent_cache_info(struct f2fs_sb_info *sbi)
 	sbi->hot_data_age_threshold = DEF_HOT_DATA_AGE_THRESHOLD;
 	sbi->warm_data_age_threshold = DEF_WARM_DATA_AGE_THRESHOLD;
 	sbi->last_age_weight = LAST_AGE_WEIGHT;
+	sbi->max_read_extent_count = DEF_MAX_READ_EXTENT_COUNT;
 }
 
 int __init f2fs_create_extent_cache(void)
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index b65b023a588a..6f2cbf4c5740 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -635,6 +635,9 @@ enum {
 #define DEF_HOT_DATA_AGE_THRESHOLD	262144
 #define DEF_WARM_DATA_AGE_THRESHOLD	2621440
 
+/* default max read extent count per inode */
+#define DEF_MAX_READ_EXTENT_COUNT	10240
+
 /* extent cache type */
 enum extent_type {
 	EX_READ,
@@ -1619,6 +1622,7 @@ struct f2fs_sb_info {
 	/* for extent tree cache */
 	struct extent_tree_info extent_tree[NR_EXTENT_CACHES];
 	atomic64_t allocated_data_blocks;	/* for block age extent_cache */
+	unsigned int max_read_extent_count;	/* max read extent count per inode */
 
 	/* The threshold used for hot and warm data seperation*/
 	unsigned int hot_data_age_threshold;
diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
index bdbf24db667b..6b99dc49f776 100644
--- a/fs/f2fs/sysfs.c
+++ b/fs/f2fs/sysfs.c
@@ -787,6 +787,13 @@ static ssize_t __sbi_store(struct f2fs_attr *a,
 		return count;
 	}
 
+	if (!strcmp(a->attr.name, "max_read_extent_count")) {
+		if (t > UINT_MAX)
+			return -EINVAL;
+		*ui = (unsigned int)t;
+		return count;
+	}
+
 	if (!strcmp(a->attr.name, "ipu_policy")) {
 		if (t >= BIT(F2FS_IPU_MAX))
 			return -EINVAL;
@@ -1052,6 +1059,8 @@ F2FS_SBI_GENERAL_RW_ATTR(revoked_atomic_block);
 F2FS_SBI_GENERAL_RW_ATTR(hot_data_age_threshold);
 F2FS_SBI_GENERAL_RW_ATTR(warm_data_age_threshold);
 F2FS_SBI_GENERAL_RW_ATTR(last_age_weight);
+/* read extent cache */
+F2FS_SBI_GENERAL_RW_ATTR(max_read_extent_count);
 #ifdef CONFIG_BLK_DEV_ZONED
 F2FS_SBI_GENERAL_RO_ATTR(unusable_blocks_per_sec);
 F2FS_SBI_GENERAL_RW_ATTR(blkzone_alloc_policy);
@@ -1242,6 +1251,7 @@ static struct attribute *f2fs_attrs[] = {
 	ATTR_LIST(hot_data_age_threshold),
 	ATTR_LIST(warm_data_age_threshold),
 	ATTR_LIST(last_age_weight),
+	ATTR_LIST(max_read_extent_count),
 	NULL,
 };
 ATTRIBUTE_GROUPS(f2fs);
-- 
2.40.1



_______________________________________________
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [f2fs-dev] [PATCH v2 1/2] f2fs: fix to shrink read extent node in batches
  2024-11-22  6:50 [f2fs-dev] [PATCH v2 1/2] f2fs: fix to shrink read extent node in batches Chao Yu via Linux-f2fs-devel
  2024-11-22  6:50 ` [f2fs-dev] [PATCH v2 2/2] f2fs: add a sysfs node to limit max read extent count per-inode Chao Yu via Linux-f2fs-devel
@ 2024-11-23 15:50 ` patchwork-bot+f2fs--- via Linux-f2fs-devel
  2024-11-25  3:11 ` [f2fs-dev] 答复: " 王秀红 (Xiuhong Wang)
  2 siblings, 0 replies; 5+ messages in thread
From: patchwork-bot+f2fs--- via Linux-f2fs-devel @ 2024-11-23 15:50 UTC (permalink / raw)
  To: Chao Yu; +Cc: xiuhong.wang, jaegeuk, zhiguo.niu, linux-kernel, linux-f2fs-devel

Hello:

This series was applied to jaegeuk/f2fs.git (dev)
by Jaegeuk Kim <jaegeuk@kernel.org>:

On Fri, 22 Nov 2024 14:50:04 +0800 you wrote:
> We use rwlock to protect core structure data of extent tree during
> its shrink, however, if there is a huge number of extent nodes in
> extent tree, during shrink of extent tree, it may hold rwlock for
> a very long time, which may trigger kernel hang issue.
> 
> This patch fixes to shrink read extent node in batches, so that,
> critical region of the rwlock can be shrunk to avoid its extreme
> long time hold.
> 
> [...]

Here is the summary with links:
  - [f2fs-dev,v2,1/2] f2fs: fix to shrink read extent node in batches
    https://git.kernel.org/jaegeuk/f2fs/c/3fc5d5a182f6
  - [f2fs-dev,v2,2/2] f2fs: add a sysfs node to limit max read extent count per-inode
    https://git.kernel.org/jaegeuk/f2fs/c/009a8241a8e5

You are awesome, thank you!
-- 
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/patchwork/pwbot.html




_______________________________________________
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [f2fs-dev] 答复: [PATCH v2 1/2] f2fs: fix to shrink read extent node in batches
  2024-11-22  6:50 [f2fs-dev] [PATCH v2 1/2] f2fs: fix to shrink read extent node in batches Chao Yu via Linux-f2fs-devel
  2024-11-22  6:50 ` [f2fs-dev] [PATCH v2 2/2] f2fs: add a sysfs node to limit max read extent count per-inode Chao Yu via Linux-f2fs-devel
  2024-11-23 15:50 ` [f2fs-dev] [PATCH v2 1/2] f2fs: fix to shrink read extent node in batches patchwork-bot+f2fs--- via Linux-f2fs-devel
@ 2024-11-25  3:11 ` 王秀红 (Xiuhong Wang)
  2024-11-25 11:13   ` Chao Yu via Linux-f2fs-devel
  2 siblings, 1 reply; 5+ messages in thread
From: 王秀红 (Xiuhong Wang) @ 2024-11-25  3:11 UTC (permalink / raw)
  To: Chao Yu, jaegeuk@kernel.org
  Cc: 牛志国 (Zhiguo Niu),
	linux-kernel@vger.kernel.org,
	linux-f2fs-devel@lists.sourceforge.net

Hi Chao,

after tested in this weekend with these patch base on the orginal case, no issue reproduced, so
Tested-by: Xiuhong Wang <xiuhong.wang@unisoc.com>

thanks!

-----邮件原件-----
发件人: Chao Yu <chao@kernel.org> 
发送时间: 2024年11月22日 14:50
收件人: jaegeuk@kernel.org
抄送: linux-f2fs-devel@lists.sourceforge.net; linux-kernel@vger.kernel.org; Chao Yu <chao@kernel.org>; 王秀红 (Xiuhong Wang) <Xiuhong.Wang@unisoc.com>; 牛志国 (Zhiguo Niu) <Zhiguo.Niu@unisoc.com>
主题: [PATCH v2 1/2] f2fs: fix to shrink read extent node in batches


注意: 这封邮件来自于外部。除非你确定邮件内容安全,否则不要点击任何链接和附件。
CAUTION: This email originated from outside of the organization. Do not click links or open attachments unless you recognize the sender and know the content is safe.



We use rwlock to protect core structure data of extent tree during its shrink, however, if there is a huge number of extent nodes in extent tree, during shrink of extent tree, it may hold rwlock for a very long time, which may trigger kernel hang issue.

This patch fixes to shrink read extent node in batches, so that, critical region of the rwlock can be shrunk to avoid its extreme long time hold.

Reported-by: Xiuhong Wang <xiuhong.wang@unisoc.com>
Closes: https://lore.kernel.org/linux-f2fs-devel/20241112110627.1314632-1-xiuhong.wang@unisoc.com/
Signed-off-by: Xiuhong Wang <xiuhong.wang@unisoc.com>
Signed-off-by: Zhiguo Niu <zhiguo.niu@unisoc.com>
Signed-off-by: Chao Yu <chao@kernel.org>
---
v2:
- no updates.
 fs/f2fs/extent_cache.c | 69 +++++++++++++++++++++++++-----------------
 1 file changed, 41 insertions(+), 28 deletions(-)

diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index 019c1f7b7fa5..b7a6817b44b0 100644
--- a/fs/f2fs/extent_cache.c
+++ b/fs/f2fs/extent_cache.c
@@ -379,21 +379,22 @@ static struct extent_tree *__grab_extent_tree(struct inode *inode,  }

 static unsigned int __free_extent_tree(struct f2fs_sb_info *sbi,
-                                       struct extent_tree *et)
+                               struct extent_tree *et, unsigned int 
+ nr_shrink)
 {
        struct rb_node *node, *next;
        struct extent_node *en;
-       unsigned int count = atomic_read(&et->node_cnt);
+       unsigned int count;

        node = rb_first_cached(&et->root);
-       while (node) {
+
+       for (count = 0; node && count < nr_shrink; count++) {
                next = rb_next(node);
                en = rb_entry(node, struct extent_node, rb_node);
                __release_extent_node(sbi, et, en);
                node = next;
        }

-       return count - atomic_read(&et->node_cnt);
+       return count;
 }

 static void __drop_largest_extent(struct extent_tree *et, @@ -622,6 +623,30 @@ static struct extent_node *__insert_extent_tree(struct f2fs_sb_info *sbi,
        return en;
 }

+static unsigned int __destroy_extent_node(struct inode *inode,
+                                       enum extent_type type) {
+       struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+       struct extent_tree *et = F2FS_I(inode)->extent_tree[type];
+       unsigned int nr_shrink = type == EX_READ ?
+                               READ_EXTENT_CACHE_SHRINK_NUMBER :
+                               AGE_EXTENT_CACHE_SHRINK_NUMBER;
+       unsigned int node_cnt = 0;
+
+       if (!et || !atomic_read(&et->node_cnt))
+               return 0;
+
+       while (atomic_read(&et->node_cnt)) {
+               write_lock(&et->lock);
+               node_cnt += __free_extent_tree(sbi, et, nr_shrink);
+               write_unlock(&et->lock);
+       }
+
+       f2fs_bug_on(sbi, atomic_read(&et->node_cnt));
+
+       return node_cnt;
+}
+
 static void __update_extent_tree_range(struct inode *inode,
                        struct extent_info *tei, enum extent_type type)  { @@ -760,9 +785,6 @@ static void __update_extent_tree_range(struct inode *inode,
                }
        }

-       if (is_inode_flag_set(inode, FI_NO_EXTENT))
-               __free_extent_tree(sbi, et);
-
        if (et->largest_updated) {
                et->largest_updated = false;
                updated = true;
@@ -780,6 +802,9 @@ static void __update_extent_tree_range(struct inode *inode,
 out_read_extent_cache:
        write_unlock(&et->lock);

+       if (is_inode_flag_set(inode, FI_NO_EXTENT))
+               __destroy_extent_node(inode, EX_READ);
+
        if (updated)
                f2fs_mark_inode_dirty_sync(inode, true);  } @@ -942,10 +967,14 @@ static unsigned int __shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink
        list_for_each_entry_safe(et, next, &eti->zombie_list, list) {
                if (atomic_read(&et->node_cnt)) {
                        write_lock(&et->lock);
-                       node_cnt += __free_extent_tree(sbi, et);
+                       node_cnt += __free_extent_tree(sbi, et,
+                                       nr_shrink - node_cnt - 
+ tree_cnt);
                        write_unlock(&et->lock);
                }
-               f2fs_bug_on(sbi, atomic_read(&et->node_cnt));
+
+               if (atomic_read(&et->node_cnt))
+                       goto unlock_out;
+
                list_del_init(&et->list);
                radix_tree_delete(&eti->extent_tree_root, et->ino);
                kmem_cache_free(extent_tree_slab, et); @@ -1084,23 +1113,6 @@ unsigned int f2fs_shrink_age_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink
        return __shrink_extent_tree(sbi, nr_shrink, EX_BLOCK_AGE);  }

-static unsigned int __destroy_extent_node(struct inode *inode,
-                                       enum extent_type type)
-{
-       struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
-       struct extent_tree *et = F2FS_I(inode)->extent_tree[type];
-       unsigned int node_cnt = 0;
-
-       if (!et || !atomic_read(&et->node_cnt))
-               return 0;
-
-       write_lock(&et->lock);
-       node_cnt = __free_extent_tree(sbi, et);
-       write_unlock(&et->lock);
-
-       return node_cnt;
-}
-
 void f2fs_destroy_extent_node(struct inode *inode)  {
        __destroy_extent_node(inode, EX_READ); @@ -1109,7 +1121,6 @@ void f2fs_destroy_extent_node(struct inode *inode)

 static void __drop_extent_tree(struct inode *inode, enum extent_type type)  {
-       struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
        struct extent_tree *et = F2FS_I(inode)->extent_tree[type];
        bool updated = false;

@@ -1117,7 +1128,6 @@ static void __drop_extent_tree(struct inode *inode, enum extent_type type)
                return;

        write_lock(&et->lock);
-       __free_extent_tree(sbi, et);
        if (type == EX_READ) {
                set_inode_flag(inode, FI_NO_EXTENT);
                if (et->largest.len) {
@@ -1126,6 +1136,9 @@ static void __drop_extent_tree(struct inode *inode, enum extent_type type)
                }
        }
        write_unlock(&et->lock);
+
+       __destroy_extent_node(inode, type);
+
        if (updated)
                f2fs_mark_inode_dirty_sync(inode, true);  }
--
2.40.1


_______________________________________________
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [f2fs-dev] 答复: [PATCH v2 1/2] f2fs: fix to shrink read extent node in batches
  2024-11-25  3:11 ` [f2fs-dev] 答复: " 王秀红 (Xiuhong Wang)
@ 2024-11-25 11:13   ` Chao Yu via Linux-f2fs-devel
  0 siblings, 0 replies; 5+ messages in thread
From: Chao Yu via Linux-f2fs-devel @ 2024-11-25 11:13 UTC (permalink / raw)
  To: 王秀红 (Xiuhong Wang), jaegeuk@kernel.org
  Cc: 牛志国 (Zhiguo Niu),
	linux-kernel@vger.kernel.org,
	linux-f2fs-devel@lists.sourceforge.net

On 2024/11/25 11:11, 王秀红 (Xiuhong Wang) wrote:
> Hi Chao,
> 
> after tested in this weekend with these patch base on the orginal case, no issue reproduced, so
> Tested-by: Xiuhong Wang <xiuhong.wang@unisoc.com>

Hi Xiuhong,

Thanks for helping to test.

Thanks,

> 
> thanks!
> 
> -----邮件原件-----
> 发件人: Chao Yu <chao@kernel.org>
> 发送时间: 2024年11月22日 14:50
> 收件人: jaegeuk@kernel.org
> 抄送: linux-f2fs-devel@lists.sourceforge.net; linux-kernel@vger.kernel.org; Chao Yu <chao@kernel.org>; 王秀红 (Xiuhong Wang) <Xiuhong.Wang@unisoc.com>; 牛志国 (Zhiguo Niu) <Zhiguo.Niu@unisoc.com>
> 主题: [PATCH v2 1/2] f2fs: fix to shrink read extent node in batches
> 
> 
> 注意: 这封邮件来自于外部。除非你确定邮件内容安全,否则不要点击任何链接和附件。
> CAUTION: This email originated from outside of the organization. Do not click links or open attachments unless you recognize the sender and know the content is safe.
> 
> 
> 
> We use rwlock to protect core structure data of extent tree during its shrink, however, if there is a huge number of extent nodes in extent tree, during shrink of extent tree, it may hold rwlock for a very long time, which may trigger kernel hang issue.
> 
> This patch fixes to shrink read extent node in batches, so that, critical region of the rwlock can be shrunk to avoid its extreme long time hold.
> 
> Reported-by: Xiuhong Wang <xiuhong.wang@unisoc.com>
> Closes: https://lore.kernel.org/linux-f2fs-devel/20241112110627.1314632-1-xiuhong.wang@unisoc.com/
> Signed-off-by: Xiuhong Wang <xiuhong.wang@unisoc.com>
> Signed-off-by: Zhiguo Niu <zhiguo.niu@unisoc.com>
> Signed-off-by: Chao Yu <chao@kernel.org>
> ---
> v2:
> - no updates.
>   fs/f2fs/extent_cache.c | 69 +++++++++++++++++++++++++-----------------
>   1 file changed, 41 insertions(+), 28 deletions(-)
> 
> diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index 019c1f7b7fa5..b7a6817b44b0 100644
> --- a/fs/f2fs/extent_cache.c
> +++ b/fs/f2fs/extent_cache.c
> @@ -379,21 +379,22 @@ static struct extent_tree *__grab_extent_tree(struct inode *inode,  }
> 
>   static unsigned int __free_extent_tree(struct f2fs_sb_info *sbi,
> -                                       struct extent_tree *et)
> +                               struct extent_tree *et, unsigned int
> + nr_shrink)
>   {
>          struct rb_node *node, *next;
>          struct extent_node *en;
> -       unsigned int count = atomic_read(&et->node_cnt);
> +       unsigned int count;
> 
>          node = rb_first_cached(&et->root);
> -       while (node) {
> +
> +       for (count = 0; node && count < nr_shrink; count++) {
>                  next = rb_next(node);
>                  en = rb_entry(node, struct extent_node, rb_node);
>                  __release_extent_node(sbi, et, en);
>                  node = next;
>          }
> 
> -       return count - atomic_read(&et->node_cnt);
> +       return count;
>   }
> 
>   static void __drop_largest_extent(struct extent_tree *et, @@ -622,6 +623,30 @@ static struct extent_node *__insert_extent_tree(struct f2fs_sb_info *sbi,
>          return en;
>   }
> 
> +static unsigned int __destroy_extent_node(struct inode *inode,
> +                                       enum extent_type type) {
> +       struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
> +       struct extent_tree *et = F2FS_I(inode)->extent_tree[type];
> +       unsigned int nr_shrink = type == EX_READ ?
> +                               READ_EXTENT_CACHE_SHRINK_NUMBER :
> +                               AGE_EXTENT_CACHE_SHRINK_NUMBER;
> +       unsigned int node_cnt = 0;
> +
> +       if (!et || !atomic_read(&et->node_cnt))
> +               return 0;
> +
> +       while (atomic_read(&et->node_cnt)) {
> +               write_lock(&et->lock);
> +               node_cnt += __free_extent_tree(sbi, et, nr_shrink);
> +               write_unlock(&et->lock);
> +       }
> +
> +       f2fs_bug_on(sbi, atomic_read(&et->node_cnt));
> +
> +       return node_cnt;
> +}
> +
>   static void __update_extent_tree_range(struct inode *inode,
>                          struct extent_info *tei, enum extent_type type)  { @@ -760,9 +785,6 @@ static void __update_extent_tree_range(struct inode *inode,
>                  }
>          }
> 
> -       if (is_inode_flag_set(inode, FI_NO_EXTENT))
> -               __free_extent_tree(sbi, et);
> -
>          if (et->largest_updated) {
>                  et->largest_updated = false;
>                  updated = true;
> @@ -780,6 +802,9 @@ static void __update_extent_tree_range(struct inode *inode,
>   out_read_extent_cache:
>          write_unlock(&et->lock);
> 
> +       if (is_inode_flag_set(inode, FI_NO_EXTENT))
> +               __destroy_extent_node(inode, EX_READ);
> +
>          if (updated)
>                  f2fs_mark_inode_dirty_sync(inode, true);  } @@ -942,10 +967,14 @@ static unsigned int __shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink
>          list_for_each_entry_safe(et, next, &eti->zombie_list, list) {
>                  if (atomic_read(&et->node_cnt)) {
>                          write_lock(&et->lock);
> -                       node_cnt += __free_extent_tree(sbi, et);
> +                       node_cnt += __free_extent_tree(sbi, et,
> +                                       nr_shrink - node_cnt -
> + tree_cnt);
>                          write_unlock(&et->lock);
>                  }
> -               f2fs_bug_on(sbi, atomic_read(&et->node_cnt));
> +
> +               if (atomic_read(&et->node_cnt))
> +                       goto unlock_out;
> +
>                  list_del_init(&et->list);
>                  radix_tree_delete(&eti->extent_tree_root, et->ino);
>                  kmem_cache_free(extent_tree_slab, et); @@ -1084,23 +1113,6 @@ unsigned int f2fs_shrink_age_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink
>          return __shrink_extent_tree(sbi, nr_shrink, EX_BLOCK_AGE);  }
> 
> -static unsigned int __destroy_extent_node(struct inode *inode,
> -                                       enum extent_type type)
> -{
> -       struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
> -       struct extent_tree *et = F2FS_I(inode)->extent_tree[type];
> -       unsigned int node_cnt = 0;
> -
> -       if (!et || !atomic_read(&et->node_cnt))
> -               return 0;
> -
> -       write_lock(&et->lock);
> -       node_cnt = __free_extent_tree(sbi, et);
> -       write_unlock(&et->lock);
> -
> -       return node_cnt;
> -}
> -
>   void f2fs_destroy_extent_node(struct inode *inode)  {
>          __destroy_extent_node(inode, EX_READ); @@ -1109,7 +1121,6 @@ void f2fs_destroy_extent_node(struct inode *inode)
> 
>   static void __drop_extent_tree(struct inode *inode, enum extent_type type)  {
> -       struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
>          struct extent_tree *et = F2FS_I(inode)->extent_tree[type];
>          bool updated = false;
> 
> @@ -1117,7 +1128,6 @@ static void __drop_extent_tree(struct inode *inode, enum extent_type type)
>                  return;
> 
>          write_lock(&et->lock);
> -       __free_extent_tree(sbi, et);
>          if (type == EX_READ) {
>                  set_inode_flag(inode, FI_NO_EXTENT);
>                  if (et->largest.len) {
> @@ -1126,6 +1136,9 @@ static void __drop_extent_tree(struct inode *inode, enum extent_type type)
>                  }
>          }
>          write_unlock(&et->lock);
> +
> +       __destroy_extent_node(inode, type);
> +
>          if (updated)
>                  f2fs_mark_inode_dirty_sync(inode, true);  }
> --
> 2.40.1
> 



_______________________________________________
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2024-11-25 11:14 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-11-22  6:50 [f2fs-dev] [PATCH v2 1/2] f2fs: fix to shrink read extent node in batches Chao Yu via Linux-f2fs-devel
2024-11-22  6:50 ` [f2fs-dev] [PATCH v2 2/2] f2fs: add a sysfs node to limit max read extent count per-inode Chao Yu via Linux-f2fs-devel
2024-11-23 15:50 ` [f2fs-dev] [PATCH v2 1/2] f2fs: fix to shrink read extent node in batches patchwork-bot+f2fs--- via Linux-f2fs-devel
2024-11-25  3:11 ` [f2fs-dev] 答复: " 王秀红 (Xiuhong Wang)
2024-11-25 11:13   ` Chao Yu via Linux-f2fs-devel

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).