public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH v1] ext4: add mb_stats_clear for mballoc statistics
@ 2026-04-14 10:02 Baolin Liu
  2026-04-14 10:07 ` liubaolin
                   ` (4 more replies)
  0 siblings, 5 replies; 9+ messages in thread
From: Baolin Liu @ 2026-04-14 10:02 UTC (permalink / raw)
  To: tytso, adilger.kernel
  Cc: liubaolin12138, linux-ext4, linux-kernel, wangguanyu, Baolin Liu

From: Baolin Liu <liubaolin@kylinos.cn>

Add a write-only mb_stats_clear sysfs knob to reset ext4 mballoc
runtime statistics.This makes it easier to inspect allocator
activity for a specific workload instead of using counters
accumulated since mount.

Signed-off-by: Baolin Liu <liubaolin@kylinos.cn>
---
 fs/ext4/ext4.h    |  1 +
 fs/ext4/mballoc.c | 31 +++++++++++++++++++++++++++++++
 fs/ext4/sysfs.c   | 24 ++++++++++++++++++++++++
 3 files changed, 56 insertions(+)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 7617e2d454ea..3a32e1a515dd 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2995,6 +2995,7 @@ int ext4_fc_record_regions(struct super_block *sb, int ino,
 extern const struct seq_operations ext4_mb_seq_groups_ops;
 extern const struct seq_operations ext4_mb_seq_structs_summary_ops;
 extern int ext4_seq_mb_stats_show(struct seq_file *seq, void *offset);
+extern void ext4_mb_stats_clear(struct ext4_sb_info *sbi);
 extern int ext4_mb_init(struct super_block *);
 extern void ext4_mb_release(struct super_block *);
 extern ext4_fsblk_t ext4_mb_new_blocks(handle_t *,
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index bb58eafb87bc..382c91586b26 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -3219,6 +3219,8 @@ int ext4_seq_mb_stats_show(struct seq_file *seq, void *offset)
 	}
 	seq_printf(seq, "\treqs: %u\n", atomic_read(&sbi->s_bal_reqs));
 	seq_printf(seq, "\tsuccess: %u\n", atomic_read(&sbi->s_bal_success));
+	seq_printf(seq, "\tblocks_allocated: %u\n",
+		   atomic_read(&sbi->s_bal_allocated));
 
 	seq_printf(seq, "\tgroups_scanned: %u\n",
 		   atomic_read(&sbi->s_bal_groups_scanned));
@@ -4721,6 +4723,35 @@ static void ext4_mb_collect_stats(struct ext4_allocation_context *ac)
 		trace_ext4_mballoc_prealloc(ac);
 }
 
+void ext4_mb_stats_clear(struct ext4_sb_info *sbi)
+{
+	int i;
+
+	atomic_set(&sbi->s_bal_reqs, 0);
+	atomic_set(&sbi->s_bal_success, 0);
+	atomic_set(&sbi->s_bal_allocated, 0);
+	atomic_set(&sbi->s_bal_groups_scanned, 0);
+
+	for (i = 0; i < EXT4_MB_NUM_CRS; i++) {
+		atomic64_set(&sbi->s_bal_cX_hits[i], 0);
+		atomic64_set(&sbi->s_bal_cX_groups_considered[i], 0);
+		atomic_set(&sbi->s_bal_cX_ex_scanned[i], 0);
+		atomic64_set(&sbi->s_bal_cX_failed[i], 0);
+	}
+
+	atomic_set(&sbi->s_bal_ex_scanned, 0);
+	atomic_set(&sbi->s_bal_goals, 0);
+	atomic_set(&sbi->s_bal_stream_goals, 0);
+	atomic_set(&sbi->s_bal_len_goals, 0);
+	atomic_set(&sbi->s_bal_2orders, 0);
+	atomic_set(&sbi->s_bal_breaks, 0);
+	atomic_set(&sbi->s_mb_lost_chunks, 0);
+	atomic_set(&sbi->s_mb_buddies_generated, 0);
+	atomic64_set(&sbi->s_mb_generation_time, 0);
+	atomic_set(&sbi->s_mb_preallocated, 0);
+	atomic_set(&sbi->s_mb_discarded, 0);
+}
+
 /*
  * Called on failure; free up any blocks from the inode PA for this
  * context.  We don't need this for MB_GROUP_PA because we only change
diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c
index 923b375e017f..a5bd88a99f22 100644
--- a/fs/ext4/sysfs.c
+++ b/fs/ext4/sysfs.c
@@ -41,6 +41,7 @@ typedef enum {
 	attr_pointer_atomic,
 	attr_journal_task,
 	attr_err_report_sec,
+	attr_mb_stats_clear,
 } attr_id_t;
 
 typedef enum {
@@ -161,6 +162,25 @@ static ssize_t err_report_sec_store(struct ext4_sb_info *sbi,
 	return count;
 }
 
+static ssize_t mb_stats_clear_store(struct ext4_sb_info *sbi,
+				    const char *buf, size_t count)
+{
+	int val;
+	int ret;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	ret = kstrtoint(skip_spaces(buf), 0, &val);
+	if (ret)
+		return ret;
+	if (val != 1)
+		return -EINVAL;
+
+	ext4_mb_stats_clear(sbi);
+	return count;
+}
+
 static ssize_t journal_task_show(struct ext4_sb_info *sbi, char *buf)
 {
 	if (!sbi->s_journal)
@@ -251,6 +271,7 @@ EXT4_ATTR_OFFSET(mb_best_avail_max_trim_order, 0644, mb_order,
 EXT4_ATTR_OFFSET(err_report_sec, 0644, err_report_sec, ext4_sb_info, s_err_report_sec);
 EXT4_RW_ATTR_SBI_UI(inode_goal, s_inode_goal);
 EXT4_RW_ATTR_SBI_UI(mb_stats, s_mb_stats);
+EXT4_ATTR(mb_stats_clear, 0200, mb_stats_clear);
 EXT4_RW_ATTR_SBI_UI(mb_max_to_scan, s_mb_max_to_scan);
 EXT4_RW_ATTR_SBI_UI(mb_min_to_scan, s_mb_min_to_scan);
 EXT4_RW_ATTR_SBI_UI(mb_order2_req, s_mb_order2_reqs);
@@ -301,6 +322,7 @@ static struct attribute *ext4_attrs[] = {
 	ATTR_LIST(inode_readahead_blks),
 	ATTR_LIST(inode_goal),
 	ATTR_LIST(mb_stats),
+	ATTR_LIST(mb_stats_clear),
 	ATTR_LIST(mb_max_to_scan),
 	ATTR_LIST(mb_min_to_scan),
 	ATTR_LIST(mb_order2_req),
@@ -561,6 +583,8 @@ static ssize_t ext4_attr_store(struct kobject *kobj,
 		return trigger_test_error(sbi, buf, len);
 	case attr_err_report_sec:
 		return err_report_sec_store(sbi, buf, len);
+	case attr_mb_stats_clear:
+		return mb_stats_clear_store(sbi, buf, len);
 	default:
 		return ext4_generic_attr_store(a, sbi, buf, len);
 	}
-- 
2.51.0


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH v1] ext4: add mb_stats_clear for mballoc statistics
  2026-04-14 10:02 [PATCH v1] ext4: add mb_stats_clear for mballoc statistics Baolin Liu
@ 2026-04-14 10:07 ` liubaolin
  2026-04-15 19:26 ` Ojaswin Mujoo
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 9+ messages in thread
From: liubaolin @ 2026-04-14 10:07 UTC (permalink / raw)
  To: tytso, adilger.kernel; +Cc: linux-ext4, linux-kernel, wangguanyu, Baolin Liu

> Dear all,                                                                                                                                                                                                                           
>   I have sent a small ext4 patch to add a manual reset capability for the mballoc statistics, and I would like to add some background on the motivation.
> 
>   The idea came mainly from XFS stats_clear.
>   ext4 already exports mballoc runtime statistics through /proc/fs/ext4/<dev>/mb_stats, 
>   but these counters keep accumulating from mount time, which makes it inconvenient when trying to observe allocator behavior for a single test run.
>                                                                                    
>   This patch adds a write-only sysfs node, /sys/fs/ext4/<dev>/mb_stats_clear, so that writing 1 to it resets the ext4 mballoc runtime statistics.
>   It also adds sbi->s_bal_allocated to /proc/fs/ext4/<dev>/mb_stats, 
>   so that the proc output matches the mballoc summary printed at unmount time and the set of counters covered by mb_stats_clear is more complete. 
>   
>   The main goal is to make it easier to observe allocator activity for a specific test run instead of relying on counters accumulated since mount. 
>   With this in place, the counters can be cleared before starting a test, and the resulting mb_stats output reflects only the activity generated by that test.
>   
>   The counters being cleared are runtime mballoc statistics used for /proc/fs/ext4/<dev>/mb_stats reporting and for the mballoc summary printed at unmount time. 
>   I did not find any cases where these fields are read back to drive ext4 behavior, so the reset only affects statistics reporting.
> 
>   For validation, /sys/fs/ext4/<dev>/mb_stats can be enabled first, 
>   then a file operation test can be run so that the relevant values in /proc/fs/ext4/<dev>/mb_stats become non-zero. 
>   After writing 1 to /sys/fs/ext4/<dev>/mb_stats_clear, those values should return to 0. 
>   Running another file operation test afterward should make those values increase again.
> 
>   Best regards,
>   Baolin Liu





在 2026/4/14 18:02, Baolin Liu 写道:
> From: Baolin Liu <liubaolin@kylinos.cn>
> 
> Add a write-only mb_stats_clear sysfs knob to reset ext4 mballoc
> runtime statistics.This makes it easier to inspect allocator
> activity for a specific workload instead of using counters
> accumulated since mount.
> 
> Signed-off-by: Baolin Liu <liubaolin@kylinos.cn>
> ---
>   fs/ext4/ext4.h    |  1 +
>   fs/ext4/mballoc.c | 31 +++++++++++++++++++++++++++++++
>   fs/ext4/sysfs.c   | 24 ++++++++++++++++++++++++
>   3 files changed, 56 insertions(+)
> 
> diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
> index 7617e2d454ea..3a32e1a515dd 100644
> --- a/fs/ext4/ext4.h
> +++ b/fs/ext4/ext4.h
> @@ -2995,6 +2995,7 @@ int ext4_fc_record_regions(struct super_block *sb, int ino,
>   extern const struct seq_operations ext4_mb_seq_groups_ops;
>   extern const struct seq_operations ext4_mb_seq_structs_summary_ops;
>   extern int ext4_seq_mb_stats_show(struct seq_file *seq, void *offset);
> +extern void ext4_mb_stats_clear(struct ext4_sb_info *sbi);
>   extern int ext4_mb_init(struct super_block *);
>   extern void ext4_mb_release(struct super_block *);
>   extern ext4_fsblk_t ext4_mb_new_blocks(handle_t *,
> diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
> index bb58eafb87bc..382c91586b26 100644
> --- a/fs/ext4/mballoc.c
> +++ b/fs/ext4/mballoc.c
> @@ -3219,6 +3219,8 @@ int ext4_seq_mb_stats_show(struct seq_file *seq, void *offset)
>   	}
>   	seq_printf(seq, "\treqs: %u\n", atomic_read(&sbi->s_bal_reqs));
>   	seq_printf(seq, "\tsuccess: %u\n", atomic_read(&sbi->s_bal_success));
> +	seq_printf(seq, "\tblocks_allocated: %u\n",
> +		   atomic_read(&sbi->s_bal_allocated));
>   
>   	seq_printf(seq, "\tgroups_scanned: %u\n",
>   		   atomic_read(&sbi->s_bal_groups_scanned));
> @@ -4721,6 +4723,35 @@ static void ext4_mb_collect_stats(struct ext4_allocation_context *ac)
>   		trace_ext4_mballoc_prealloc(ac);
>   }
>   
> +void ext4_mb_stats_clear(struct ext4_sb_info *sbi)
> +{
> +	int i;
> +
> +	atomic_set(&sbi->s_bal_reqs, 0);
> +	atomic_set(&sbi->s_bal_success, 0);
> +	atomic_set(&sbi->s_bal_allocated, 0);
> +	atomic_set(&sbi->s_bal_groups_scanned, 0);
> +
> +	for (i = 0; i < EXT4_MB_NUM_CRS; i++) {
> +		atomic64_set(&sbi->s_bal_cX_hits[i], 0);
> +		atomic64_set(&sbi->s_bal_cX_groups_considered[i], 0);
> +		atomic_set(&sbi->s_bal_cX_ex_scanned[i], 0);
> +		atomic64_set(&sbi->s_bal_cX_failed[i], 0);
> +	}
> +
> +	atomic_set(&sbi->s_bal_ex_scanned, 0);
> +	atomic_set(&sbi->s_bal_goals, 0);
> +	atomic_set(&sbi->s_bal_stream_goals, 0);
> +	atomic_set(&sbi->s_bal_len_goals, 0);
> +	atomic_set(&sbi->s_bal_2orders, 0);
> +	atomic_set(&sbi->s_bal_breaks, 0);
> +	atomic_set(&sbi->s_mb_lost_chunks, 0);
> +	atomic_set(&sbi->s_mb_buddies_generated, 0);
> +	atomic64_set(&sbi->s_mb_generation_time, 0);
> +	atomic_set(&sbi->s_mb_preallocated, 0);
> +	atomic_set(&sbi->s_mb_discarded, 0);
> +}
> +
>   /*
>    * Called on failure; free up any blocks from the inode PA for this
>    * context.  We don't need this for MB_GROUP_PA because we only change
> diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c
> index 923b375e017f..a5bd88a99f22 100644
> --- a/fs/ext4/sysfs.c
> +++ b/fs/ext4/sysfs.c
> @@ -41,6 +41,7 @@ typedef enum {
>   	attr_pointer_atomic,
>   	attr_journal_task,
>   	attr_err_report_sec,
> +	attr_mb_stats_clear,
>   } attr_id_t;
>   
>   typedef enum {
> @@ -161,6 +162,25 @@ static ssize_t err_report_sec_store(struct ext4_sb_info *sbi,
>   	return count;
>   }
>   
> +static ssize_t mb_stats_clear_store(struct ext4_sb_info *sbi,
> +				    const char *buf, size_t count)
> +{
> +	int val;
> +	int ret;
> +
> +	if (!capable(CAP_SYS_ADMIN))
> +		return -EPERM;
> +
> +	ret = kstrtoint(skip_spaces(buf), 0, &val);
> +	if (ret)
> +		return ret;
> +	if (val != 1)
> +		return -EINVAL;
> +
> +	ext4_mb_stats_clear(sbi);
> +	return count;
> +}
> +
>   static ssize_t journal_task_show(struct ext4_sb_info *sbi, char *buf)
>   {
>   	if (!sbi->s_journal)
> @@ -251,6 +271,7 @@ EXT4_ATTR_OFFSET(mb_best_avail_max_trim_order, 0644, mb_order,
>   EXT4_ATTR_OFFSET(err_report_sec, 0644, err_report_sec, ext4_sb_info, s_err_report_sec);
>   EXT4_RW_ATTR_SBI_UI(inode_goal, s_inode_goal);
>   EXT4_RW_ATTR_SBI_UI(mb_stats, s_mb_stats);
> +EXT4_ATTR(mb_stats_clear, 0200, mb_stats_clear);
>   EXT4_RW_ATTR_SBI_UI(mb_max_to_scan, s_mb_max_to_scan);
>   EXT4_RW_ATTR_SBI_UI(mb_min_to_scan, s_mb_min_to_scan);
>   EXT4_RW_ATTR_SBI_UI(mb_order2_req, s_mb_order2_reqs);
> @@ -301,6 +322,7 @@ static struct attribute *ext4_attrs[] = {
>   	ATTR_LIST(inode_readahead_blks),
>   	ATTR_LIST(inode_goal),
>   	ATTR_LIST(mb_stats),
> +	ATTR_LIST(mb_stats_clear),
>   	ATTR_LIST(mb_max_to_scan),
>   	ATTR_LIST(mb_min_to_scan),
>   	ATTR_LIST(mb_order2_req),
> @@ -561,6 +583,8 @@ static ssize_t ext4_attr_store(struct kobject *kobj,
>   		return trigger_test_error(sbi, buf, len);
>   	case attr_err_report_sec:
>   		return err_report_sec_store(sbi, buf, len);
> +	case attr_mb_stats_clear:
> +		return mb_stats_clear_store(sbi, buf, len);
>   	default:
>   		return ext4_generic_attr_store(a, sbi, buf, len);
>   	}


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v1] ext4: add mb_stats_clear for mballoc statistics
  2026-04-14 10:02 [PATCH v1] ext4: add mb_stats_clear for mballoc statistics Baolin Liu
  2026-04-14 10:07 ` liubaolin
@ 2026-04-15 19:26 ` Ojaswin Mujoo
  2026-04-16  7:07   ` liubaolin
  2026-04-16  1:14 ` Andreas Dilger
                   ` (2 subsequent siblings)
  4 siblings, 1 reply; 9+ messages in thread
From: Ojaswin Mujoo @ 2026-04-15 19:26 UTC (permalink / raw)
  To: Baolin Liu
  Cc: tytso, adilger.kernel, linux-ext4, linux-kernel, wangguanyu,
	Baolin Liu

On Tue, Apr 14, 2026 at 06:02:11PM +0800, Baolin Liu wrote:
> From: Baolin Liu <liubaolin@kylinos.cn>
> 
> Add a write-only mb_stats_clear sysfs knob to reset ext4 mballoc
> runtime statistics.This makes it easier to inspect allocator
> activity for a specific workload instead of using counters
> accumulated since mount.
> 
> Signed-off-by: Baolin Liu <liubaolin@kylinos.cn>

The patch looks good to me Baolin. We just need to add documentation of
this to the Documentation/ABI/testing/sysfs-fs-ext4 file so that the
users know what it is and the fact that the only value we allow to write
is 1. 

Regards,
ojaswin


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v1] ext4: add mb_stats_clear for mballoc statistics
  2026-04-14 10:02 [PATCH v1] ext4: add mb_stats_clear for mballoc statistics Baolin Liu
  2026-04-14 10:07 ` liubaolin
  2026-04-15 19:26 ` Ojaswin Mujoo
@ 2026-04-16  1:14 ` Andreas Dilger
  2026-04-16  7:11   ` liubaolin
  2026-04-16  1:44 ` Ritesh Harjani
  2026-04-16  8:53 ` Zhang Yi
  4 siblings, 1 reply; 9+ messages in thread
From: Andreas Dilger @ 2026-04-16  1:14 UTC (permalink / raw)
  To: Baolin Liu; +Cc: tytso, linux-ext4, linux-kernel, wangguanyu, Baolin Liu

On Apr 14, 2026, at 04:02, Baolin Liu <liubaolin12138@163.com> wrote:
> 
> From: Baolin Liu <liubaolin@kylinos.cn>
> 
> Add a write-only mb_stats_clear sysfs knob to reset ext4 mballoc
> runtime statistics. This makes it easier to inspect allocator
> activity for a specific workload instead of using counters
> accumulated since mount.

Rather than having a read-only "mb_stats" procfs file and a separate
write-only "mb_stats_clear" sysfs file to clear "mb_stats", IMHO it
would be more obvious to write directly to "/proc/fs/ext4/DEV/mb_stats"
file to clear it.  Writing "0" would be logical to zero out the stats.

Cheers, Andreas

> 
> Signed-off-by: Baolin Liu <liubaolin@kylinos.cn>
> ---
> fs/ext4/ext4.h    |  1 +
> fs/ext4/mballoc.c | 31 +++++++++++++++++++++++++++++++
> fs/ext4/sysfs.c   | 24 ++++++++++++++++++++++++
> 3 files changed, 56 insertions(+)
> 
> diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
> index 7617e2d454ea..3a32e1a515dd 100644
> --- a/fs/ext4/ext4.h
> +++ b/fs/ext4/ext4.h
> @@ -2995,6 +2995,7 @@ int ext4_fc_record_regions(struct super_block *sb, int ino,
> extern const struct seq_operations ext4_mb_seq_groups_ops;
> extern const struct seq_operations ext4_mb_seq_structs_summary_ops;
> extern int ext4_seq_mb_stats_show(struct seq_file *seq, void *offset);
> +extern void ext4_mb_stats_clear(struct ext4_sb_info *sbi);
> extern int ext4_mb_init(struct super_block *);
> extern void ext4_mb_release(struct super_block *);
> extern ext4_fsblk_t ext4_mb_new_blocks(handle_t *,
> diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
> index bb58eafb87bc..382c91586b26 100644
> --- a/fs/ext4/mballoc.c
> +++ b/fs/ext4/mballoc.c
> @@ -3219,6 +3219,8 @@ int ext4_seq_mb_stats_show(struct seq_file *seq, void *offset)
> }
> seq_printf(seq, "\treqs: %u\n", atomic_read(&sbi->s_bal_reqs));
> seq_printf(seq, "\tsuccess: %u\n", atomic_read(&sbi->s_bal_success));
> + seq_printf(seq, "\tblocks_allocated: %u\n",
> +   atomic_read(&sbi->s_bal_allocated));
> 
> seq_printf(seq, "\tgroups_scanned: %u\n",
>   atomic_read(&sbi->s_bal_groups_scanned));
> @@ -4721,6 +4723,35 @@ static void ext4_mb_collect_stats(struct ext4_allocation_context *ac)
> trace_ext4_mballoc_prealloc(ac);
> }
> 
> +void ext4_mb_stats_clear(struct ext4_sb_info *sbi)
> +{
> + int i;
> +
> + atomic_set(&sbi->s_bal_reqs, 0);
> + atomic_set(&sbi->s_bal_success, 0);
> + atomic_set(&sbi->s_bal_allocated, 0);
> + atomic_set(&sbi->s_bal_groups_scanned, 0);
> +
> + for (i = 0; i < EXT4_MB_NUM_CRS; i++) {
> + atomic64_set(&sbi->s_bal_cX_hits[i], 0);
> + atomic64_set(&sbi->s_bal_cX_groups_considered[i], 0);
> + atomic_set(&sbi->s_bal_cX_ex_scanned[i], 0);
> + atomic64_set(&sbi->s_bal_cX_failed[i], 0);
> + }
> +
> + atomic_set(&sbi->s_bal_ex_scanned, 0);
> + atomic_set(&sbi->s_bal_goals, 0);
> + atomic_set(&sbi->s_bal_stream_goals, 0);
> + atomic_set(&sbi->s_bal_len_goals, 0);
> + atomic_set(&sbi->s_bal_2orders, 0);
> + atomic_set(&sbi->s_bal_breaks, 0);
> + atomic_set(&sbi->s_mb_lost_chunks, 0);
> + atomic_set(&sbi->s_mb_buddies_generated, 0);
> + atomic64_set(&sbi->s_mb_generation_time, 0);
> + atomic_set(&sbi->s_mb_preallocated, 0);
> + atomic_set(&sbi->s_mb_discarded, 0);
> +}
> +
> /*
>  * Called on failure; free up any blocks from the inode PA for this
>  * context.  We don't need this for MB_GROUP_PA because we only change
> diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c
> index 923b375e017f..a5bd88a99f22 100644
> --- a/fs/ext4/sysfs.c
> +++ b/fs/ext4/sysfs.c
> @@ -41,6 +41,7 @@ typedef enum {
> attr_pointer_atomic,
> attr_journal_task,
> attr_err_report_sec,
> + attr_mb_stats_clear,
> } attr_id_t;
> 
> typedef enum {
> @@ -161,6 +162,25 @@ static ssize_t err_report_sec_store(struct ext4_sb_info *sbi,
> return count;
> }
> 
> +static ssize_t mb_stats_clear_store(struct ext4_sb_info *sbi,
> +    const char *buf, size_t count)
> +{
> + int val;
> + int ret;
> +
> + if (!capable(CAP_SYS_ADMIN))
> + return -EPERM;
> +
> + ret = kstrtoint(skip_spaces(buf), 0, &val);
> + if (ret)
> + return ret;
> + if (val != 1)
> + return -EINVAL;
> +
> + ext4_mb_stats_clear(sbi);
> + return count;
> +}
> +
> static ssize_t journal_task_show(struct ext4_sb_info *sbi, char *buf)
> {
> if (!sbi->s_journal)
> @@ -251,6 +271,7 @@ EXT4_ATTR_OFFSET(mb_best_avail_max_trim_order, 0644, mb_order,
> EXT4_ATTR_OFFSET(err_report_sec, 0644, err_report_sec, ext4_sb_info, s_err_report_sec);
> EXT4_RW_ATTR_SBI_UI(inode_goal, s_inode_goal);
> EXT4_RW_ATTR_SBI_UI(mb_stats, s_mb_stats);
> +EXT4_ATTR(mb_stats_clear, 0200, mb_stats_clear);
> EXT4_RW_ATTR_SBI_UI(mb_max_to_scan, s_mb_max_to_scan);
> EXT4_RW_ATTR_SBI_UI(mb_min_to_scan, s_mb_min_to_scan);
> EXT4_RW_ATTR_SBI_UI(mb_order2_req, s_mb_order2_reqs);
> @@ -301,6 +322,7 @@ static struct attribute *ext4_attrs[] = {
> ATTR_LIST(inode_readahead_blks),
> ATTR_LIST(inode_goal),
> ATTR_LIST(mb_stats),
> + ATTR_LIST(mb_stats_clear),
> ATTR_LIST(mb_max_to_scan),
> ATTR_LIST(mb_min_to_scan),
> ATTR_LIST(mb_order2_req),
> @@ -561,6 +583,8 @@ static ssize_t ext4_attr_store(struct kobject *kobj,
> return trigger_test_error(sbi, buf, len);
> case attr_err_report_sec:
> return err_report_sec_store(sbi, buf, len);
> + case attr_mb_stats_clear:
> + return mb_stats_clear_store(sbi, buf, len);
> default:
> return ext4_generic_attr_store(a, sbi, buf, len);
> }
> -- 
> 2.51.0
> 


Cheers, Andreas






^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v1] ext4: add mb_stats_clear for mballoc statistics
  2026-04-14 10:02 [PATCH v1] ext4: add mb_stats_clear for mballoc statistics Baolin Liu
                   ` (2 preceding siblings ...)
  2026-04-16  1:14 ` Andreas Dilger
@ 2026-04-16  1:44 ` Ritesh Harjani
  2026-04-16  7:16   ` liubaolin
  2026-04-16  8:53 ` Zhang Yi
  4 siblings, 1 reply; 9+ messages in thread
From: Ritesh Harjani @ 2026-04-16  1:44 UTC (permalink / raw)
  To: Baolin Liu, tytso, adilger.kernel
  Cc: liubaolin12138, linux-ext4, linux-kernel, wangguanyu, Baolin Liu

Baolin Liu <liubaolin12138@163.com> writes:

> From: Baolin Liu <liubaolin@kylinos.cn>
>
> Add a write-only mb_stats_clear sysfs knob to reset ext4 mballoc
> runtime statistics.This makes it easier to inspect allocator
> activity for a specific workload instead of using counters
> accumulated since mount.
>
> Signed-off-by: Baolin Liu <liubaolin@kylinos.cn>

Make sense to me. The changes looks good and works fine at my end.
So please feel free to add:

Reviewed-by: Ritesh Harjani (IBM) <ritesh.list@gmail.com>

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v1] ext4: add mb_stats_clear for mballoc statistics
  2026-04-15 19:26 ` Ojaswin Mujoo
@ 2026-04-16  7:07   ` liubaolin
  0 siblings, 0 replies; 9+ messages in thread
From: liubaolin @ 2026-04-16  7:07 UTC (permalink / raw)
  To: Ojaswin Mujoo
  Cc: tytso, adilger.kernel, linux-ext4, linux-kernel, wangguanyu,
	Baolin Liu

> Dear ojaswin,
>    Alright, thank you for your review. I will revise the patch according to Andreas's comments and submit a second version. 
>    When submitting the second version, I will also update the relevant files under the Documentation directory according to your suggestions to add explanations.
> 
> Regards,
> Baolin



在 2026/4/16 3:26, Ojaswin Mujoo 写道:
> On Tue, Apr 14, 2026 at 06:02:11PM +0800, Baolin Liu wrote:
>> From: Baolin Liu <liubaolin@kylinos.cn>
>>
>> Add a write-only mb_stats_clear sysfs knob to reset ext4 mballoc
>> runtime statistics.This makes it easier to inspect allocator
>> activity for a specific workload instead of using counters
>> accumulated since mount.
>>
>> Signed-off-by: Baolin Liu <liubaolin@kylinos.cn>
> 
> The patch looks good to me Baolin. We just need to add documentation of
> this to the Documentation/ABI/testing/sysfs-fs-ext4 file so that the
> users know what it is and the fact that the only value we allow to write
> is 1.
> 
> Regards,
> ojaswin


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v1] ext4: add mb_stats_clear for mballoc statistics
  2026-04-16  1:14 ` Andreas Dilger
@ 2026-04-16  7:11   ` liubaolin
  0 siblings, 0 replies; 9+ messages in thread
From: liubaolin @ 2026-04-16  7:11 UTC (permalink / raw)
  To: Andreas Dilger; +Cc: tytso, linux-ext4, linux-kernel, wangguanyu, Baolin Liu

> Dear Andreas,
>    Alright, thank you for your review. 
>    I will revise the patch according to your suggestions and submit a second version as soon as possible.
> 
> Regards,
> Baolin



在 2026/4/16 9:14, Andreas Dilger 写道:
> On Apr 14, 2026, at 04:02, Baolin Liu <liubaolin12138@163.com> wrote:
>>
>> From: Baolin Liu <liubaolin@kylinos.cn>
>>
>> Add a write-only mb_stats_clear sysfs knob to reset ext4 mballoc
>> runtime statistics. This makes it easier to inspect allocator
>> activity for a specific workload instead of using counters
>> accumulated since mount.
> 
> Rather than having a read-only "mb_stats" procfs file and a separate
> write-only "mb_stats_clear" sysfs file to clear "mb_stats", IMHO it
> would be more obvious to write directly to "/proc/fs/ext4/DEV/mb_stats"
> file to clear it.  Writing "0" would be logical to zero out the stats.
> 
> Cheers, Andreas
> 
>>
>> Signed-off-by: Baolin Liu <liubaolin@kylinos.cn>
>> ---
>> fs/ext4/ext4.h    |  1 +
>> fs/ext4/mballoc.c | 31 +++++++++++++++++++++++++++++++
>> fs/ext4/sysfs.c   | 24 ++++++++++++++++++++++++
>> 3 files changed, 56 insertions(+)
>>
>> diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
>> index 7617e2d454ea..3a32e1a515dd 100644
>> --- a/fs/ext4/ext4.h
>> +++ b/fs/ext4/ext4.h
>> @@ -2995,6 +2995,7 @@ int ext4_fc_record_regions(struct super_block *sb, int ino,
>> extern const struct seq_operations ext4_mb_seq_groups_ops;
>> extern const struct seq_operations ext4_mb_seq_structs_summary_ops;
>> extern int ext4_seq_mb_stats_show(struct seq_file *seq, void *offset);
>> +extern void ext4_mb_stats_clear(struct ext4_sb_info *sbi);
>> extern int ext4_mb_init(struct super_block *);
>> extern void ext4_mb_release(struct super_block *);
>> extern ext4_fsblk_t ext4_mb_new_blocks(handle_t *,
>> diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
>> index bb58eafb87bc..382c91586b26 100644
>> --- a/fs/ext4/mballoc.c
>> +++ b/fs/ext4/mballoc.c
>> @@ -3219,6 +3219,8 @@ int ext4_seq_mb_stats_show(struct seq_file *seq, void *offset)
>> }
>> seq_printf(seq, "\treqs: %u\n", atomic_read(&sbi->s_bal_reqs));
>> seq_printf(seq, "\tsuccess: %u\n", atomic_read(&sbi->s_bal_success));
>> + seq_printf(seq, "\tblocks_allocated: %u\n",
>> +   atomic_read(&sbi->s_bal_allocated));
>>
>> seq_printf(seq, "\tgroups_scanned: %u\n",
>>    atomic_read(&sbi->s_bal_groups_scanned));
>> @@ -4721,6 +4723,35 @@ static void ext4_mb_collect_stats(struct ext4_allocation_context *ac)
>> trace_ext4_mballoc_prealloc(ac);
>> }
>>
>> +void ext4_mb_stats_clear(struct ext4_sb_info *sbi)
>> +{
>> + int i;
>> +
>> + atomic_set(&sbi->s_bal_reqs, 0);
>> + atomic_set(&sbi->s_bal_success, 0);
>> + atomic_set(&sbi->s_bal_allocated, 0);
>> + atomic_set(&sbi->s_bal_groups_scanned, 0);
>> +
>> + for (i = 0; i < EXT4_MB_NUM_CRS; i++) {
>> + atomic64_set(&sbi->s_bal_cX_hits[i], 0);
>> + atomic64_set(&sbi->s_bal_cX_groups_considered[i], 0);
>> + atomic_set(&sbi->s_bal_cX_ex_scanned[i], 0);
>> + atomic64_set(&sbi->s_bal_cX_failed[i], 0);
>> + }
>> +
>> + atomic_set(&sbi->s_bal_ex_scanned, 0);
>> + atomic_set(&sbi->s_bal_goals, 0);
>> + atomic_set(&sbi->s_bal_stream_goals, 0);
>> + atomic_set(&sbi->s_bal_len_goals, 0);
>> + atomic_set(&sbi->s_bal_2orders, 0);
>> + atomic_set(&sbi->s_bal_breaks, 0);
>> + atomic_set(&sbi->s_mb_lost_chunks, 0);
>> + atomic_set(&sbi->s_mb_buddies_generated, 0);
>> + atomic64_set(&sbi->s_mb_generation_time, 0);
>> + atomic_set(&sbi->s_mb_preallocated, 0);
>> + atomic_set(&sbi->s_mb_discarded, 0);
>> +}
>> +
>> /*
>>   * Called on failure; free up any blocks from the inode PA for this
>>   * context.  We don't need this for MB_GROUP_PA because we only change
>> diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c
>> index 923b375e017f..a5bd88a99f22 100644
>> --- a/fs/ext4/sysfs.c
>> +++ b/fs/ext4/sysfs.c
>> @@ -41,6 +41,7 @@ typedef enum {
>> attr_pointer_atomic,
>> attr_journal_task,
>> attr_err_report_sec,
>> + attr_mb_stats_clear,
>> } attr_id_t;
>>
>> typedef enum {
>> @@ -161,6 +162,25 @@ static ssize_t err_report_sec_store(struct ext4_sb_info *sbi,
>> return count;
>> }
>>
>> +static ssize_t mb_stats_clear_store(struct ext4_sb_info *sbi,
>> +    const char *buf, size_t count)
>> +{
>> + int val;
>> + int ret;
>> +
>> + if (!capable(CAP_SYS_ADMIN))
>> + return -EPERM;
>> +
>> + ret = kstrtoint(skip_spaces(buf), 0, &val);
>> + if (ret)
>> + return ret;
>> + if (val != 1)
>> + return -EINVAL;
>> +
>> + ext4_mb_stats_clear(sbi);
>> + return count;
>> +}
>> +
>> static ssize_t journal_task_show(struct ext4_sb_info *sbi, char *buf)
>> {
>> if (!sbi->s_journal)
>> @@ -251,6 +271,7 @@ EXT4_ATTR_OFFSET(mb_best_avail_max_trim_order, 0644, mb_order,
>> EXT4_ATTR_OFFSET(err_report_sec, 0644, err_report_sec, ext4_sb_info, s_err_report_sec);
>> EXT4_RW_ATTR_SBI_UI(inode_goal, s_inode_goal);
>> EXT4_RW_ATTR_SBI_UI(mb_stats, s_mb_stats);
>> +EXT4_ATTR(mb_stats_clear, 0200, mb_stats_clear);
>> EXT4_RW_ATTR_SBI_UI(mb_max_to_scan, s_mb_max_to_scan);
>> EXT4_RW_ATTR_SBI_UI(mb_min_to_scan, s_mb_min_to_scan);
>> EXT4_RW_ATTR_SBI_UI(mb_order2_req, s_mb_order2_reqs);
>> @@ -301,6 +322,7 @@ static struct attribute *ext4_attrs[] = {
>> ATTR_LIST(inode_readahead_blks),
>> ATTR_LIST(inode_goal),
>> ATTR_LIST(mb_stats),
>> + ATTR_LIST(mb_stats_clear),
>> ATTR_LIST(mb_max_to_scan),
>> ATTR_LIST(mb_min_to_scan),
>> ATTR_LIST(mb_order2_req),
>> @@ -561,6 +583,8 @@ static ssize_t ext4_attr_store(struct kobject *kobj,
>> return trigger_test_error(sbi, buf, len);
>> case attr_err_report_sec:
>> return err_report_sec_store(sbi, buf, len);
>> + case attr_mb_stats_clear:
>> + return mb_stats_clear_store(sbi, buf, len);
>> default:
>> return ext4_generic_attr_store(a, sbi, buf, len);
>> }
>> -- 
>> 2.51.0
>>
> 
> 
> Cheers, Andreas
> 
> 
> 
> 


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v1] ext4: add mb_stats_clear for mballoc statistics
  2026-04-16  1:44 ` Ritesh Harjani
@ 2026-04-16  7:16   ` liubaolin
  0 siblings, 0 replies; 9+ messages in thread
From: liubaolin @ 2026-04-16  7:16 UTC (permalink / raw)
  To: Ritesh Harjani (IBM), tytso, adilger.kernel
  Cc: linux-ext4, linux-kernel, wangguanyu, Baolin Liu

> Dear Ritesh,
> Thank you for your review. I will revise the patch according to Andreas's comments and submit a second version. 
> When the second version is submitted, you are welcome to review it. 
> Once the patch is finalized, the maintainer will add the Reviewed-by tag.
> 
> Regards,
> Baolin



在 2026/4/16 9:44, Ritesh Harjani (IBM) 写道:
> Baolin Liu <liubaolin12138@163.com> writes:
> 
>> From: Baolin Liu <liubaolin@kylinos.cn>
>>
>> Add a write-only mb_stats_clear sysfs knob to reset ext4 mballoc
>> runtime statistics.This makes it easier to inspect allocator
>> activity for a specific workload instead of using counters
>> accumulated since mount.
>>
>> Signed-off-by: Baolin Liu <liubaolin@kylinos.cn>
> 
> Make sense to me. The changes looks good and works fine at my end.
> So please feel free to add:
> 
> Reviewed-by: Ritesh Harjani (IBM) <ritesh.list@gmail.com>


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v1] ext4: add mb_stats_clear for mballoc statistics
  2026-04-14 10:02 [PATCH v1] ext4: add mb_stats_clear for mballoc statistics Baolin Liu
                   ` (3 preceding siblings ...)
  2026-04-16  1:44 ` Ritesh Harjani
@ 2026-04-16  8:53 ` Zhang Yi
  4 siblings, 0 replies; 9+ messages in thread
From: Zhang Yi @ 2026-04-16  8:53 UTC (permalink / raw)
  To: Baolin Liu, tytso, adilger.kernel
  Cc: linux-ext4, linux-kernel, wangguanyu, Baolin Liu

On 4/14/2026 6:02 PM, Baolin Liu wrote:
> From: Baolin Liu <liubaolin@kylinos.cn>
> 
> Add a write-only mb_stats_clear sysfs knob to reset ext4 mballoc
> runtime statistics.This makes it easier to inspect allocator
> activity for a specific workload instead of using counters
> accumulated since mount.
> 
> Signed-off-by: Baolin Liu <liubaolin@kylinos.cn>

Looks good to me!

Reviewed-by: Zhang Yi <yi.zhang@huawei.com>

> ---
>  fs/ext4/ext4.h    |  1 +
>  fs/ext4/mballoc.c | 31 +++++++++++++++++++++++++++++++
>  fs/ext4/sysfs.c   | 24 ++++++++++++++++++++++++
>  3 files changed, 56 insertions(+)
> 
> diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
> index 7617e2d454ea..3a32e1a515dd 100644
> --- a/fs/ext4/ext4.h
> +++ b/fs/ext4/ext4.h
> @@ -2995,6 +2995,7 @@ int ext4_fc_record_regions(struct super_block *sb, int ino,
>  extern const struct seq_operations ext4_mb_seq_groups_ops;
>  extern const struct seq_operations ext4_mb_seq_structs_summary_ops;
>  extern int ext4_seq_mb_stats_show(struct seq_file *seq, void *offset);
> +extern void ext4_mb_stats_clear(struct ext4_sb_info *sbi);
>  extern int ext4_mb_init(struct super_block *);
>  extern void ext4_mb_release(struct super_block *);
>  extern ext4_fsblk_t ext4_mb_new_blocks(handle_t *,
> diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
> index bb58eafb87bc..382c91586b26 100644
> --- a/fs/ext4/mballoc.c
> +++ b/fs/ext4/mballoc.c
> @@ -3219,6 +3219,8 @@ int ext4_seq_mb_stats_show(struct seq_file *seq, void *offset)
>  	}
>  	seq_printf(seq, "\treqs: %u\n", atomic_read(&sbi->s_bal_reqs));
>  	seq_printf(seq, "\tsuccess: %u\n", atomic_read(&sbi->s_bal_success));
> +	seq_printf(seq, "\tblocks_allocated: %u\n",
> +		   atomic_read(&sbi->s_bal_allocated));
>  
>  	seq_printf(seq, "\tgroups_scanned: %u\n",
>  		   atomic_read(&sbi->s_bal_groups_scanned));
> @@ -4721,6 +4723,35 @@ static void ext4_mb_collect_stats(struct ext4_allocation_context *ac)
>  		trace_ext4_mballoc_prealloc(ac);
>  }
>  
> +void ext4_mb_stats_clear(struct ext4_sb_info *sbi)
> +{
> +	int i;
> +
> +	atomic_set(&sbi->s_bal_reqs, 0);
> +	atomic_set(&sbi->s_bal_success, 0);
> +	atomic_set(&sbi->s_bal_allocated, 0);
> +	atomic_set(&sbi->s_bal_groups_scanned, 0);
> +
> +	for (i = 0; i < EXT4_MB_NUM_CRS; i++) {
> +		atomic64_set(&sbi->s_bal_cX_hits[i], 0);
> +		atomic64_set(&sbi->s_bal_cX_groups_considered[i], 0);
> +		atomic_set(&sbi->s_bal_cX_ex_scanned[i], 0);
> +		atomic64_set(&sbi->s_bal_cX_failed[i], 0);
> +	}
> +
> +	atomic_set(&sbi->s_bal_ex_scanned, 0);
> +	atomic_set(&sbi->s_bal_goals, 0);
> +	atomic_set(&sbi->s_bal_stream_goals, 0);
> +	atomic_set(&sbi->s_bal_len_goals, 0);
> +	atomic_set(&sbi->s_bal_2orders, 0);
> +	atomic_set(&sbi->s_bal_breaks, 0);
> +	atomic_set(&sbi->s_mb_lost_chunks, 0);
> +	atomic_set(&sbi->s_mb_buddies_generated, 0);
> +	atomic64_set(&sbi->s_mb_generation_time, 0);
> +	atomic_set(&sbi->s_mb_preallocated, 0);
> +	atomic_set(&sbi->s_mb_discarded, 0);
> +}
> +
>  /*
>   * Called on failure; free up any blocks from the inode PA for this
>   * context.  We don't need this for MB_GROUP_PA because we only change
> diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c
> index 923b375e017f..a5bd88a99f22 100644
> --- a/fs/ext4/sysfs.c
> +++ b/fs/ext4/sysfs.c
> @@ -41,6 +41,7 @@ typedef enum {
>  	attr_pointer_atomic,
>  	attr_journal_task,
>  	attr_err_report_sec,
> +	attr_mb_stats_clear,
>  } attr_id_t;
>  
>  typedef enum {
> @@ -161,6 +162,25 @@ static ssize_t err_report_sec_store(struct ext4_sb_info *sbi,
>  	return count;
>  }
>  
> +static ssize_t mb_stats_clear_store(struct ext4_sb_info *sbi,
> +				    const char *buf, size_t count)
> +{
> +	int val;
> +	int ret;
> +
> +	if (!capable(CAP_SYS_ADMIN))
> +		return -EPERM;
> +
> +	ret = kstrtoint(skip_spaces(buf), 0, &val);
> +	if (ret)
> +		return ret;
> +	if (val != 1)
> +		return -EINVAL;
> +
> +	ext4_mb_stats_clear(sbi);
> +	return count;
> +}
> +
>  static ssize_t journal_task_show(struct ext4_sb_info *sbi, char *buf)
>  {
>  	if (!sbi->s_journal)
> @@ -251,6 +271,7 @@ EXT4_ATTR_OFFSET(mb_best_avail_max_trim_order, 0644, mb_order,
>  EXT4_ATTR_OFFSET(err_report_sec, 0644, err_report_sec, ext4_sb_info, s_err_report_sec);
>  EXT4_RW_ATTR_SBI_UI(inode_goal, s_inode_goal);
>  EXT4_RW_ATTR_SBI_UI(mb_stats, s_mb_stats);
> +EXT4_ATTR(mb_stats_clear, 0200, mb_stats_clear);
>  EXT4_RW_ATTR_SBI_UI(mb_max_to_scan, s_mb_max_to_scan);
>  EXT4_RW_ATTR_SBI_UI(mb_min_to_scan, s_mb_min_to_scan);
>  EXT4_RW_ATTR_SBI_UI(mb_order2_req, s_mb_order2_reqs);
> @@ -301,6 +322,7 @@ static struct attribute *ext4_attrs[] = {
>  	ATTR_LIST(inode_readahead_blks),
>  	ATTR_LIST(inode_goal),
>  	ATTR_LIST(mb_stats),
> +	ATTR_LIST(mb_stats_clear),
>  	ATTR_LIST(mb_max_to_scan),
>  	ATTR_LIST(mb_min_to_scan),
>  	ATTR_LIST(mb_order2_req),
> @@ -561,6 +583,8 @@ static ssize_t ext4_attr_store(struct kobject *kobj,
>  		return trigger_test_error(sbi, buf, len);
>  	case attr_err_report_sec:
>  		return err_report_sec_store(sbi, buf, len);
> +	case attr_mb_stats_clear:
> +		return mb_stats_clear_store(sbi, buf, len);
>  	default:
>  		return ext4_generic_attr_store(a, sbi, buf, len);
>  	}


^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2026-04-16  8:53 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-04-14 10:02 [PATCH v1] ext4: add mb_stats_clear for mballoc statistics Baolin Liu
2026-04-14 10:07 ` liubaolin
2026-04-15 19:26 ` Ojaswin Mujoo
2026-04-16  7:07   ` liubaolin
2026-04-16  1:14 ` Andreas Dilger
2026-04-16  7:11   ` liubaolin
2026-04-16  1:44 ` Ritesh Harjani
2026-04-16  7:16   ` liubaolin
2026-04-16  8:53 ` Zhang Yi

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox